blob: 962ae9fdc2a39e0c05fdb7b283b0e5f0ab79f821 [file] [log] [blame]
H. Peter Anvin9e6747c2009-06-28 17:13:04 -07001/* ----------------------------------------------------------------------- *
2 *
H. Peter Anvin53f15592016-03-01 22:43:51 -08003 * Copyright 1996-2016 The NASM Authors - All Rights Reserved
H. Peter Anvin9e6747c2009-06-28 17:13:04 -07004 * See the file AUTHORS included with the NASM distribution for
5 * the specific copyright holders.
H. Peter Anvin8cad14b2008-06-01 17:23:51 -07006 *
H. Peter Anvin9e6747c2009-06-28 17:13:04 -07007 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following
9 * conditions are met:
10 *
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following
15 * disclaimer in the documentation and/or other materials provided
16 * with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
19 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
20 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
21 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
23 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
29 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
30 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 *
32 * ----------------------------------------------------------------------- */
33
34/*
35 * quote.c
H. Peter Anvin8cad14b2008-06-01 17:23:51 -070036 */
37
38#include "compiler.h"
39
H. Peter Anvin8cad14b2008-06-01 17:23:51 -070040
41#include "nasmlib.h"
42#include "quote.h"
43
H. Peter Anvinb1577012016-05-10 02:54:15 -070044char *nasm_quote(const char *str, size_t len)
H. Peter Anvin8cad14b2008-06-01 17:23:51 -070045{
H. Peter Anvinb1577012016-05-10 02:54:15 -070046 const char *p, *ep;
47 char c, c1, *q, *nstr;
H. Peter Anvin4d5029d2009-07-15 16:22:47 -040048 unsigned char uc;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -070049 bool sq_ok, dq_ok;
50 size_t qlen;
51
52 sq_ok = dq_ok = true;
53 ep = str+len;
54 qlen = 0; /* Length if we need `...` quotes */
55 for (p = str; p < ep; p++) {
56 c = *p;
57 switch (c) {
58 case '\'':
59 sq_ok = false;
60 qlen++;
61 break;
62 case '\"':
63 dq_ok = false;
64 qlen++;
65 break;
66 case '`':
67 case '\\':
68 qlen += 2;
69 break;
70 default:
71 if (c < ' ' || c > '~') {
72 sq_ok = dq_ok = false;
73 switch (c) {
74 case '\a':
75 case '\b':
76 case '\t':
77 case '\n':
78 case '\v':
79 case '\f':
80 case '\r':
81 case 27:
82 qlen += 2;
83 break;
84 default:
85 c1 = (p+1 < ep) ? p[1] : 0;
H. Peter Anvinb93c1882009-07-14 14:48:26 -040086 if (c1 >= '0' && c1 <= '7')
H. Peter Anvin4d5029d2009-07-15 16:22:47 -040087 uc = 0377; /* Must use the full form */
H. Peter Anvin8cad14b2008-06-01 17:23:51 -070088 else
H. Peter Anvin4d5029d2009-07-15 16:22:47 -040089 uc = c;
90 if (uc > 077)
H. Peter Anvinb93c1882009-07-14 14:48:26 -040091 qlen++;
H. Peter Anvin4d5029d2009-07-15 16:22:47 -040092 if (uc > 07)
H. Peter Anvinb93c1882009-07-14 14:48:26 -040093 qlen++;
94 qlen += 2;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -070095 break;
96 }
97 } else {
98 qlen++;
99 }
100 break;
101 }
102 }
103
104 if (sq_ok || dq_ok) {
105 /* Use '...' or "..." */
106 nstr = nasm_malloc(len+3);
107 nstr[0] = nstr[len+1] = sq_ok ? '\'' : '\"';
108 nstr[len+2] = '\0';
Cyrill Gorcunova26efa42010-09-07 09:52:47 +0400109 if (len > 0)
110 memcpy(nstr+1, str, len);
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700111 } else {
112 /* Need to use `...` quoted syntax */
113 nstr = nasm_malloc(qlen+3);
114 q = nstr;
115 *q++ = '`';
116 for (p = str; p < ep; p++) {
117 c = *p;
118 switch (c) {
119 case '`':
120 case '\\':
121 *q++ = '\\';
122 *q++ = c;
123 break;
H. Peter Anvin2dff9542008-06-02 10:38:54 -0700124 case 7:
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700125 *q++ = '\\';
126 *q++ = 'a';
127 break;
H. Peter Anvin2dff9542008-06-02 10:38:54 -0700128 case 8:
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700129 *q++ = '\\';
130 *q++ = 'b';
131 break;
H. Peter Anvin2dff9542008-06-02 10:38:54 -0700132 case 9:
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700133 *q++ = '\\';
134 *q++ = 't';
135 break;
H. Peter Anvin2dff9542008-06-02 10:38:54 -0700136 case 10:
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700137 *q++ = '\\';
138 *q++ = 'n';
139 break;
H. Peter Anvin2dff9542008-06-02 10:38:54 -0700140 case 11:
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700141 *q++ = '\\';
142 *q++ = 'v';
143 break;
H. Peter Anvin2dff9542008-06-02 10:38:54 -0700144 case 12:
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700145 *q++ = '\\';
146 *q++ = 'f';
147 break;
H. Peter Anvin2dff9542008-06-02 10:38:54 -0700148 case 13:
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700149 *q++ = '\\';
150 *q++ = 'r';
151 break;
152 case 27:
153 *q++ = '\\';
154 *q++ = 'e';
155 break;
156 default:
157 if (c < ' ' || c > '~') {
158 c1 = (p+1 < ep) ? p[1] : 0;
159 if (c1 >= '0' && c1 <= '7')
H. Peter Anvin4d5029d2009-07-15 16:22:47 -0400160 uc = 0377; /* Must use the full form */
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700161 else
H. Peter Anvin4d5029d2009-07-15 16:22:47 -0400162 uc = c;
H. Peter Anvinb93c1882009-07-14 14:48:26 -0400163 *q++ = '\\';
H. Peter Anvin4d5029d2009-07-15 16:22:47 -0400164 if (uc > 077)
165 *q++ = ((unsigned char)c >> 6) + '0';
166 if (uc > 07)
167 *q++ = (((unsigned char)c >> 3) & 7) + '0';
168 *q++ = ((unsigned char)c & 7) + '0';
H. Peter Anvinb93c1882009-07-14 14:48:26 -0400169 break;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700170 } else {
171 *q++ = c;
172 }
173 break;
174 }
175 }
176 *q++ = '`';
177 *q++ = '\0';
H. Peter Anvin252c2db2009-07-15 16:28:43 -0400178 nasm_assert((size_t)(q-nstr) == qlen+3);
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700179 }
180 return nstr;
181}
182
183static char *emit_utf8(char *q, int32_t v)
184{
185 if (v < 0) {
186 /* Impossible - do nothing */
187 } else if (v <= 0x7f) {
188 *q++ = v;
189 } else if (v <= 0x000007ff) {
190 *q++ = 0xc0 | (v >> 6);
191 *q++ = 0x80 | (v & 63);
192 } else if (v <= 0x0000ffff) {
193 *q++ = 0xe0 | (v >> 12);
194 *q++ = 0x80 | ((v >> 6) & 63);
195 *q++ = 0x80 | (v & 63);
196 } else if (v <= 0x001fffff) {
197 *q++ = 0xf0 | (v >> 18);
198 *q++ = 0x80 | ((v >> 12) & 63);
199 *q++ = 0x80 | ((v >> 6) & 63);
200 *q++ = 0x80 | (v & 63);
201 } else if (v <= 0x03ffffff) {
202 *q++ = 0xf8 | (v >> 24);
203 *q++ = 0x80 | ((v >> 18) & 63);
204 *q++ = 0x80 | ((v >> 12) & 63);
205 *q++ = 0x80 | ((v >> 6) & 63);
206 *q++ = 0x80 | (v & 63);
207 } else {
208 *q++ = 0xfc | (v >> 30);
209 *q++ = 0x80 | ((v >> 24) & 63);
210 *q++ = 0x80 | ((v >> 18) & 63);
211 *q++ = 0x80 | ((v >> 12) & 63);
212 *q++ = 0x80 | ((v >> 6) & 63);
213 *q++ = 0x80 | (v & 63);
214 }
215 return q;
216}
217
218/*
H. Peter Anvin8b262472019-02-26 14:00:54 -0800219 * Quote a C string
220 */
221char *nasm_quote_cstr(const char *str)
222{
223 return nasm_quote(str, strlen(str));
224}
225
226/*
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700227 * Do an *in-place* dequoting of the specified string, returning the
228 * resulting length (which may be containing embedded nulls.)
229 *
230 * In-place replacement is possible since the unquoted length is always
231 * shorter than or equal to the quoted length.
H. Peter Anvin88c9e1f2008-06-04 11:26:59 -0700232 *
233 * *ep points to the final quote, or to the null if improperly quoted.
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700234 */
H. Peter Anvin88c9e1f2008-06-04 11:26:59 -0700235size_t nasm_unquote(char *str, char **ep)
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700236{
H. Peter Anvin88c9e1f2008-06-04 11:26:59 -0700237 char bq;
238 char *p, *q;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700239 char *escp = NULL;
240 char c;
241 enum unq_state {
242 st_start,
243 st_backslash,
244 st_hex,
245 st_oct,
H. Peter Anvin53f15592016-03-01 22:43:51 -0800246 st_ucs
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700247 } state;
248 int ndig = 0;
249 int32_t nval = 0;
250
H. Peter Anvin88c9e1f2008-06-04 11:26:59 -0700251 p = q = str;
252
253 bq = *p++;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700254 if (!bq)
255 return 0;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700256
H. Peter Anvin88c9e1f2008-06-04 11:26:59 -0700257 switch (bq) {
258 case '\'':
259 case '\"':
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700260 /* '...' or "..." string */
H. Peter Anvin88c9e1f2008-06-04 11:26:59 -0700261 while ((c = *p) && c != bq) {
262 p++;
263 *q++ = c;
264 }
265 *q = '\0';
266 break;
267
268 case '`':
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700269 /* `...` string */
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700270 state = st_start;
271
H. Peter Anvin88c9e1f2008-06-04 11:26:59 -0700272 while ((c = *p)) {
273 p++;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700274 switch (state) {
275 case st_start:
H. Peter Anvin88c9e1f2008-06-04 11:26:59 -0700276 switch (c) {
277 case '\\':
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700278 state = st_backslash;
H. Peter Anvin88c9e1f2008-06-04 11:26:59 -0700279 break;
280 case '`':
281 p--;
282 goto out;
283 default:
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700284 *q++ = c;
H. Peter Anvin88c9e1f2008-06-04 11:26:59 -0700285 break;
286 }
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700287 break;
288
289 case st_backslash:
290 state = st_start;
H. Peter Anvinfbdd36c2008-06-02 13:59:09 -0700291 escp = p; /* Beginning of argument sequence */
H. Peter Anvine46fec62008-06-02 10:02:36 -0700292 nval = 0;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700293 switch (c) {
294 case 'a':
295 *q++ = 7;
296 break;
297 case 'b':
298 *q++ = 8;
299 break;
300 case 'e':
301 *q++ = 27;
302 break;
303 case 'f':
304 *q++ = 12;
305 break;
306 case 'n':
307 *q++ = 10;
308 break;
309 case 'r':
310 *q++ = 13;
311 break;
312 case 't':
313 *q++ = 9;
314 break;
315 case 'u':
316 state = st_ucs;
317 ndig = 4;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700318 break;
319 case 'U':
320 state = st_ucs;
321 ndig = 8;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700322 break;
323 case 'v':
324 *q++ = 11;
H. Peter Anvine46fec62008-06-02 10:02:36 -0700325 break;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700326 case 'x':
327 case 'X':
328 state = st_hex;
H. Peter Anvine46fec62008-06-02 10:02:36 -0700329 ndig = 2;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700330 break;
331 case '0':
332 case '1':
333 case '2':
334 case '3':
335 case '4':
336 case '5':
337 case '6':
338 case '7':
339 state = st_oct;
H. Peter Anvine46fec62008-06-02 10:02:36 -0700340 ndig = 2; /* Up to two more digits */
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700341 nval = c - '0';
342 break;
343 default:
344 *q++ = c;
345 break;
346 }
347 break;
348
349 case st_oct:
350 if (c >= '0' && c <= '7') {
351 nval = (nval << 3) + (c - '0');
H. Peter Anvine46fec62008-06-02 10:02:36 -0700352 if (!--ndig) {
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700353 *q++ = nval;
354 state = st_start;
355 }
356 } else {
357 p--; /* Process this character again */
358 *q++ = nval;
359 state = st_start;
360 }
361 break;
362
363 case st_hex:
364 if ((c >= '0' && c <= '9') ||
365 (c >= 'A' && c <= 'F') ||
366 (c >= 'a' && c <= 'f')) {
367 nval = (nval << 4) + numvalue(c);
H. Peter Anvin312445a2008-06-14 21:09:39 -0700368 if (!--ndig) {
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700369 *q++ = nval;
370 state = st_start;
371 }
372 } else {
373 p--; /* Process this character again */
H. Peter Anvinfbdd36c2008-06-02 13:59:09 -0700374 *q++ = (p > escp) ? nval : escp[-1];
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700375 state = st_start;
376 }
377 break;
378
379 case st_ucs:
380 if ((c >= '0' && c <= '9') ||
381 (c >= 'A' && c <= 'F') ||
382 (c >= 'a' && c <= 'f')) {
383 nval = (nval << 4) + numvalue(c);
384 if (!--ndig) {
385 q = emit_utf8(q, nval);
386 state = st_start;
387 }
388 } else {
389 p--; /* Process this character again */
H. Peter Anvinfbdd36c2008-06-02 13:59:09 -0700390 if (p > escp)
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700391 q = emit_utf8(q, nval);
392 else
H. Peter Anvinfbdd36c2008-06-02 13:59:09 -0700393 *q++ = escp[-1];
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700394 state = st_start;
395 }
396 break;
397 }
398 }
H. Peter Anvin6ecc1592008-06-01 21:34:49 -0700399 switch (state) {
400 case st_start:
401 case st_backslash:
402 break;
403 case st_oct:
404 *q++ = nval;
405 break;
406 case st_hex:
H. Peter Anvinfbdd36c2008-06-02 13:59:09 -0700407 *q++ = (p > escp) ? nval : escp[-1];
H. Peter Anvin6ecc1592008-06-01 21:34:49 -0700408 break;
409 case st_ucs:
H. Peter Anvinfbdd36c2008-06-02 13:59:09 -0700410 if (p > escp)
H. Peter Anvin6ecc1592008-06-01 21:34:49 -0700411 q = emit_utf8(q, nval);
412 else
H. Peter Anvinfbdd36c2008-06-02 13:59:09 -0700413 *q++ = escp[-1];
H. Peter Anvin6ecc1592008-06-01 21:34:49 -0700414 break;
415 }
H. Peter Anvin88c9e1f2008-06-04 11:26:59 -0700416 out:
417 break;
418
419 default:
420 /* Not a quoted string, just return the input... */
421 p = q = strchr(str, '\0');
422 break;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700423 }
424
H. Peter Anvin88c9e1f2008-06-04 11:26:59 -0700425 if (ep)
426 *ep = p;
427 return q-str;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700428}
429
430/*
431 * Find the end of a quoted string; returns the pointer to the terminating
432 * character (either the ending quote or the null character, if unterminated.)
433 */
434char *nasm_skip_string(char *str)
435{
436 char bq;
437 char *p;
438 char c;
439 enum unq_state {
440 st_start,
H. Peter Anvin53f15592016-03-01 22:43:51 -0800441 st_backslash
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700442 } state;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700443
444 bq = str[0];
445 if (bq == '\'' || bq == '\"') {
446 /* '...' or "..." string */
447 for (p = str+1; *p && *p != bq; p++)
448 ;
449 return p;
450 } else if (bq == '`') {
451 /* `...` string */
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700452 state = st_start;
Cyrill Gorcunova45febd2014-11-23 18:26:54 +0300453 p = str+1;
454 if (!*p)
455 return p;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700456
457 while ((c = *p++)) {
458 switch (state) {
459 case st_start:
460 switch (c) {
461 case '\\':
462 state = st_backslash;
463 break;
464 case '`':
465 return p-1; /* Found the end */
466 default:
467 break;
468 }
469 break;
470
471 case st_backslash:
H. Peter Anvin1df123b2008-06-02 09:57:46 -0700472 /*
473 * Note: for the purpose of finding the end of the string,
474 * all successor states to st_backslash are functionally
475 * equivalent to st_start, since either a backslash or
476 * a backquote will force a return to the st_start state.
477 */
478 state = st_start;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700479 break;
480 }
481 }
Cyrill Gorcunov7cc90ba2014-12-20 15:07:39 +0300482 return p-1; /* Unterminated string... */
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700483 } else {
484 return str; /* Not a string... */
485 }
486}