blob: efd5cd6cfd801538532bf384cc8f441f0a530052 [file] [log] [blame]
H. Peter Anvin9e6747c2009-06-28 17:13:04 -07001/* ----------------------------------------------------------------------- *
2 *
H. Peter Anvin53f15592016-03-01 22:43:51 -08003 * Copyright 1996-2016 The NASM Authors - All Rights Reserved
H. Peter Anvin9e6747c2009-06-28 17:13:04 -07004 * See the file AUTHORS included with the NASM distribution for
5 * the specific copyright holders.
H. Peter Anvin8cad14b2008-06-01 17:23:51 -07006 *
H. Peter Anvin9e6747c2009-06-28 17:13:04 -07007 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following
9 * conditions are met:
10 *
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following
15 * disclaimer in the documentation and/or other materials provided
16 * with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
19 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
20 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
21 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
23 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
29 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
30 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 *
32 * ----------------------------------------------------------------------- */
33
34/*
35 * quote.c
H. Peter Anvin8cad14b2008-06-01 17:23:51 -070036 */
37
38#include "compiler.h"
39
H. Peter Anvin8cad14b2008-06-01 17:23:51 -070040
41#include "nasmlib.h"
42#include "quote.h"
43
H. Peter Anvin (Intel)41e96822019-04-25 18:00:32 -070044/*
45 * Create a NASM quoted string in newly allocated memory. Update the
46 * *lenp parameter with the output length (sans final NUL).
47 */
48
49char *nasm_quote(const char *str, size_t *lenp)
H. Peter Anvin8cad14b2008-06-01 17:23:51 -070050{
H. Peter Anvinb1577012016-05-10 02:54:15 -070051 const char *p, *ep;
52 char c, c1, *q, *nstr;
H. Peter Anvin4d5029d2009-07-15 16:22:47 -040053 unsigned char uc;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -070054 bool sq_ok, dq_ok;
55 size_t qlen;
H. Peter Anvin (Intel)41e96822019-04-25 18:00:32 -070056 size_t len = *lenp;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -070057
58 sq_ok = dq_ok = true;
59 ep = str+len;
60 qlen = 0; /* Length if we need `...` quotes */
61 for (p = str; p < ep; p++) {
62 c = *p;
63 switch (c) {
64 case '\'':
65 sq_ok = false;
66 qlen++;
67 break;
68 case '\"':
69 dq_ok = false;
70 qlen++;
71 break;
72 case '`':
73 case '\\':
74 qlen += 2;
75 break;
76 default:
77 if (c < ' ' || c > '~') {
78 sq_ok = dq_ok = false;
79 switch (c) {
80 case '\a':
81 case '\b':
82 case '\t':
83 case '\n':
84 case '\v':
85 case '\f':
86 case '\r':
87 case 27:
88 qlen += 2;
89 break;
90 default:
91 c1 = (p+1 < ep) ? p[1] : 0;
H. Peter Anvinb93c1882009-07-14 14:48:26 -040092 if (c1 >= '0' && c1 <= '7')
H. Peter Anvin4d5029d2009-07-15 16:22:47 -040093 uc = 0377; /* Must use the full form */
H. Peter Anvin8cad14b2008-06-01 17:23:51 -070094 else
H. Peter Anvin4d5029d2009-07-15 16:22:47 -040095 uc = c;
96 if (uc > 077)
H. Peter Anvinb93c1882009-07-14 14:48:26 -040097 qlen++;
H. Peter Anvin4d5029d2009-07-15 16:22:47 -040098 if (uc > 07)
H. Peter Anvinb93c1882009-07-14 14:48:26 -040099 qlen++;
100 qlen += 2;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700101 break;
102 }
103 } else {
104 qlen++;
105 }
106 break;
107 }
108 }
109
110 if (sq_ok || dq_ok) {
111 /* Use '...' or "..." */
112 nstr = nasm_malloc(len+3);
113 nstr[0] = nstr[len+1] = sq_ok ? '\'' : '\"';
H. Peter Anvin (Intel)41e96822019-04-25 18:00:32 -0700114 q = &nstr[len+2];
Cyrill Gorcunova26efa42010-09-07 09:52:47 +0400115 if (len > 0)
116 memcpy(nstr+1, str, len);
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700117 } else {
118 /* Need to use `...` quoted syntax */
119 nstr = nasm_malloc(qlen+3);
120 q = nstr;
121 *q++ = '`';
122 for (p = str; p < ep; p++) {
123 c = *p;
124 switch (c) {
125 case '`':
126 case '\\':
127 *q++ = '\\';
128 *q++ = c;
129 break;
H. Peter Anvin2dff9542008-06-02 10:38:54 -0700130 case 7:
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700131 *q++ = '\\';
132 *q++ = 'a';
133 break;
H. Peter Anvin2dff9542008-06-02 10:38:54 -0700134 case 8:
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700135 *q++ = '\\';
136 *q++ = 'b';
137 break;
H. Peter Anvin2dff9542008-06-02 10:38:54 -0700138 case 9:
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700139 *q++ = '\\';
140 *q++ = 't';
141 break;
H. Peter Anvin2dff9542008-06-02 10:38:54 -0700142 case 10:
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700143 *q++ = '\\';
144 *q++ = 'n';
145 break;
H. Peter Anvin2dff9542008-06-02 10:38:54 -0700146 case 11:
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700147 *q++ = '\\';
148 *q++ = 'v';
149 break;
H. Peter Anvin2dff9542008-06-02 10:38:54 -0700150 case 12:
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700151 *q++ = '\\';
152 *q++ = 'f';
153 break;
H. Peter Anvin2dff9542008-06-02 10:38:54 -0700154 case 13:
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700155 *q++ = '\\';
156 *q++ = 'r';
157 break;
158 case 27:
159 *q++ = '\\';
160 *q++ = 'e';
161 break;
162 default:
163 if (c < ' ' || c > '~') {
164 c1 = (p+1 < ep) ? p[1] : 0;
165 if (c1 >= '0' && c1 <= '7')
H. Peter Anvin4d5029d2009-07-15 16:22:47 -0400166 uc = 0377; /* Must use the full form */
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700167 else
H. Peter Anvin4d5029d2009-07-15 16:22:47 -0400168 uc = c;
H. Peter Anvinb93c1882009-07-14 14:48:26 -0400169 *q++ = '\\';
H. Peter Anvin4d5029d2009-07-15 16:22:47 -0400170 if (uc > 077)
171 *q++ = ((unsigned char)c >> 6) + '0';
172 if (uc > 07)
173 *q++ = (((unsigned char)c >> 3) & 7) + '0';
174 *q++ = ((unsigned char)c & 7) + '0';
H. Peter Anvinb93c1882009-07-14 14:48:26 -0400175 break;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700176 } else {
177 *q++ = c;
178 }
179 break;
180 }
181 }
182 *q++ = '`';
H. Peter Anvin (Intel)41e96822019-04-25 18:00:32 -0700183 nasm_assert((size_t)(q-nstr) == qlen+2);
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700184 }
H. Peter Anvin (Intel)41e96822019-04-25 18:00:32 -0700185 *q = '\0';
186 *lenp = q - nstr;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700187 return nstr;
188}
189
190static char *emit_utf8(char *q, int32_t v)
191{
192 if (v < 0) {
193 /* Impossible - do nothing */
194 } else if (v <= 0x7f) {
195 *q++ = v;
196 } else if (v <= 0x000007ff) {
197 *q++ = 0xc0 | (v >> 6);
198 *q++ = 0x80 | (v & 63);
199 } else if (v <= 0x0000ffff) {
200 *q++ = 0xe0 | (v >> 12);
201 *q++ = 0x80 | ((v >> 6) & 63);
202 *q++ = 0x80 | (v & 63);
203 } else if (v <= 0x001fffff) {
204 *q++ = 0xf0 | (v >> 18);
205 *q++ = 0x80 | ((v >> 12) & 63);
206 *q++ = 0x80 | ((v >> 6) & 63);
207 *q++ = 0x80 | (v & 63);
208 } else if (v <= 0x03ffffff) {
209 *q++ = 0xf8 | (v >> 24);
210 *q++ = 0x80 | ((v >> 18) & 63);
211 *q++ = 0x80 | ((v >> 12) & 63);
212 *q++ = 0x80 | ((v >> 6) & 63);
213 *q++ = 0x80 | (v & 63);
214 } else {
215 *q++ = 0xfc | (v >> 30);
216 *q++ = 0x80 | ((v >> 24) & 63);
217 *q++ = 0x80 | ((v >> 18) & 63);
218 *q++ = 0x80 | ((v >> 12) & 63);
219 *q++ = 0x80 | ((v >> 6) & 63);
220 *q++ = 0x80 | (v & 63);
221 }
222 return q;
223}
224
225/*
H. Peter Anvin (Intel)41e96822019-04-25 18:00:32 -0700226 * Same as nasm_quote, but take the length of a C string;
227 * the lenp argument is optional.
H. Peter Anvin8b262472019-02-26 14:00:54 -0800228 */
H. Peter Anvin (Intel)41e96822019-04-25 18:00:32 -0700229char *nasm_quote_cstr(const char *str, size_t *lenp)
H. Peter Anvin8b262472019-02-26 14:00:54 -0800230{
H. Peter Anvin (Intel)41e96822019-04-25 18:00:32 -0700231 size_t len = strlen(str);
232 char *qstr = nasm_quote(str, &len);
233 if (lenp)
234 *lenp = len;
235 return qstr;
H. Peter Anvin8b262472019-02-26 14:00:54 -0800236}
237
238/*
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700239 * Do an *in-place* dequoting of the specified string, returning the
240 * resulting length (which may be containing embedded nulls.)
241 *
242 * In-place replacement is possible since the unquoted length is always
243 * shorter than or equal to the quoted length.
H. Peter Anvin88c9e1f2008-06-04 11:26:59 -0700244 *
245 * *ep points to the final quote, or to the null if improperly quoted.
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700246 */
H. Peter Anvin88c9e1f2008-06-04 11:26:59 -0700247size_t nasm_unquote(char *str, char **ep)
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700248{
H. Peter Anvin88c9e1f2008-06-04 11:26:59 -0700249 char bq;
250 char *p, *q;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700251 char *escp = NULL;
252 char c;
253 enum unq_state {
254 st_start,
255 st_backslash,
256 st_hex,
257 st_oct,
H. Peter Anvin53f15592016-03-01 22:43:51 -0800258 st_ucs
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700259 } state;
260 int ndig = 0;
261 int32_t nval = 0;
262
H. Peter Anvin88c9e1f2008-06-04 11:26:59 -0700263 p = q = str;
264
265 bq = *p++;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700266 if (!bq)
267 return 0;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700268
H. Peter Anvin88c9e1f2008-06-04 11:26:59 -0700269 switch (bq) {
270 case '\'':
271 case '\"':
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700272 /* '...' or "..." string */
H. Peter Anvin88c9e1f2008-06-04 11:26:59 -0700273 while ((c = *p) && c != bq) {
274 p++;
275 *q++ = c;
276 }
277 *q = '\0';
278 break;
279
280 case '`':
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700281 /* `...` string */
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700282 state = st_start;
283
H. Peter Anvin88c9e1f2008-06-04 11:26:59 -0700284 while ((c = *p)) {
285 p++;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700286 switch (state) {
287 case st_start:
H. Peter Anvin88c9e1f2008-06-04 11:26:59 -0700288 switch (c) {
289 case '\\':
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700290 state = st_backslash;
H. Peter Anvin88c9e1f2008-06-04 11:26:59 -0700291 break;
292 case '`':
293 p--;
294 goto out;
295 default:
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700296 *q++ = c;
H. Peter Anvin88c9e1f2008-06-04 11:26:59 -0700297 break;
298 }
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700299 break;
300
301 case st_backslash:
302 state = st_start;
H. Peter Anvinfbdd36c2008-06-02 13:59:09 -0700303 escp = p; /* Beginning of argument sequence */
H. Peter Anvine46fec62008-06-02 10:02:36 -0700304 nval = 0;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700305 switch (c) {
306 case 'a':
307 *q++ = 7;
308 break;
309 case 'b':
310 *q++ = 8;
311 break;
312 case 'e':
313 *q++ = 27;
314 break;
315 case 'f':
316 *q++ = 12;
317 break;
318 case 'n':
319 *q++ = 10;
320 break;
321 case 'r':
322 *q++ = 13;
323 break;
324 case 't':
325 *q++ = 9;
326 break;
327 case 'u':
328 state = st_ucs;
329 ndig = 4;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700330 break;
331 case 'U':
332 state = st_ucs;
333 ndig = 8;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700334 break;
335 case 'v':
336 *q++ = 11;
H. Peter Anvine46fec62008-06-02 10:02:36 -0700337 break;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700338 case 'x':
339 case 'X':
340 state = st_hex;
H. Peter Anvine46fec62008-06-02 10:02:36 -0700341 ndig = 2;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700342 break;
343 case '0':
344 case '1':
345 case '2':
346 case '3':
347 case '4':
348 case '5':
349 case '6':
350 case '7':
351 state = st_oct;
H. Peter Anvine46fec62008-06-02 10:02:36 -0700352 ndig = 2; /* Up to two more digits */
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700353 nval = c - '0';
354 break;
355 default:
356 *q++ = c;
357 break;
358 }
359 break;
360
361 case st_oct:
362 if (c >= '0' && c <= '7') {
363 nval = (nval << 3) + (c - '0');
H. Peter Anvine46fec62008-06-02 10:02:36 -0700364 if (!--ndig) {
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700365 *q++ = nval;
366 state = st_start;
367 }
368 } else {
369 p--; /* Process this character again */
370 *q++ = nval;
371 state = st_start;
372 }
373 break;
374
375 case st_hex:
376 if ((c >= '0' && c <= '9') ||
377 (c >= 'A' && c <= 'F') ||
378 (c >= 'a' && c <= 'f')) {
379 nval = (nval << 4) + numvalue(c);
H. Peter Anvin312445a2008-06-14 21:09:39 -0700380 if (!--ndig) {
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700381 *q++ = nval;
382 state = st_start;
383 }
384 } else {
385 p--; /* Process this character again */
H. Peter Anvinfbdd36c2008-06-02 13:59:09 -0700386 *q++ = (p > escp) ? nval : escp[-1];
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700387 state = st_start;
388 }
389 break;
390
391 case st_ucs:
392 if ((c >= '0' && c <= '9') ||
393 (c >= 'A' && c <= 'F') ||
394 (c >= 'a' && c <= 'f')) {
395 nval = (nval << 4) + numvalue(c);
396 if (!--ndig) {
397 q = emit_utf8(q, nval);
398 state = st_start;
399 }
400 } else {
401 p--; /* Process this character again */
H. Peter Anvinfbdd36c2008-06-02 13:59:09 -0700402 if (p > escp)
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700403 q = emit_utf8(q, nval);
404 else
H. Peter Anvinfbdd36c2008-06-02 13:59:09 -0700405 *q++ = escp[-1];
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700406 state = st_start;
407 }
408 break;
409 }
410 }
H. Peter Anvin6ecc1592008-06-01 21:34:49 -0700411 switch (state) {
412 case st_start:
413 case st_backslash:
414 break;
415 case st_oct:
416 *q++ = nval;
417 break;
418 case st_hex:
H. Peter Anvinfbdd36c2008-06-02 13:59:09 -0700419 *q++ = (p > escp) ? nval : escp[-1];
H. Peter Anvin6ecc1592008-06-01 21:34:49 -0700420 break;
421 case st_ucs:
H. Peter Anvinfbdd36c2008-06-02 13:59:09 -0700422 if (p > escp)
H. Peter Anvin6ecc1592008-06-01 21:34:49 -0700423 q = emit_utf8(q, nval);
424 else
H. Peter Anvinfbdd36c2008-06-02 13:59:09 -0700425 *q++ = escp[-1];
H. Peter Anvin6ecc1592008-06-01 21:34:49 -0700426 break;
427 }
H. Peter Anvin88c9e1f2008-06-04 11:26:59 -0700428 out:
429 break;
430
431 default:
432 /* Not a quoted string, just return the input... */
433 p = q = strchr(str, '\0');
434 break;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700435 }
436
H. Peter Anvin88c9e1f2008-06-04 11:26:59 -0700437 if (ep)
438 *ep = p;
439 return q-str;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700440}
441
442/*
443 * Find the end of a quoted string; returns the pointer to the terminating
444 * character (either the ending quote or the null character, if unterminated.)
445 */
446char *nasm_skip_string(char *str)
447{
448 char bq;
449 char *p;
450 char c;
451 enum unq_state {
452 st_start,
H. Peter Anvin53f15592016-03-01 22:43:51 -0800453 st_backslash
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700454 } state;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700455
456 bq = str[0];
457 if (bq == '\'' || bq == '\"') {
458 /* '...' or "..." string */
459 for (p = str+1; *p && *p != bq; p++)
460 ;
461 return p;
462 } else if (bq == '`') {
463 /* `...` string */
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700464 state = st_start;
Cyrill Gorcunova45febd2014-11-23 18:26:54 +0300465 p = str+1;
466 if (!*p)
467 return p;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700468
469 while ((c = *p++)) {
470 switch (state) {
471 case st_start:
472 switch (c) {
473 case '\\':
474 state = st_backslash;
475 break;
476 case '`':
477 return p-1; /* Found the end */
478 default:
479 break;
480 }
481 break;
482
483 case st_backslash:
H. Peter Anvin1df123b2008-06-02 09:57:46 -0700484 /*
485 * Note: for the purpose of finding the end of the string,
486 * all successor states to st_backslash are functionally
487 * equivalent to st_start, since either a backslash or
488 * a backquote will force a return to the st_start state.
489 */
490 state = st_start;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700491 break;
492 }
493 }
Cyrill Gorcunov7cc90ba2014-12-20 15:07:39 +0300494 return p-1; /* Unterminated string... */
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700495 } else {
496 return str; /* Not a string... */
497 }
498}