blob: 4cf4f25644dffbe0147f59d1adf955a79c324e58 [file] [log] [blame]
H. Peter Anvin9e6747c2009-06-28 17:13:04 -07001/* ----------------------------------------------------------------------- *
2 *
3 * Copyright 1996-2009 The NASM Authors - All Rights Reserved
4 * See the file AUTHORS included with the NASM distribution for
5 * the specific copyright holders.
H. Peter Anvin8cad14b2008-06-01 17:23:51 -07006 *
H. Peter Anvin9e6747c2009-06-28 17:13:04 -07007 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following
9 * conditions are met:
10 *
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following
15 * disclaimer in the documentation and/or other materials provided
16 * with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
19 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
20 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
21 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
23 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
29 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
30 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 *
32 * ----------------------------------------------------------------------- */
33
34/*
35 * quote.c
H. Peter Anvin8cad14b2008-06-01 17:23:51 -070036 */
37
38#include "compiler.h"
39
40#include <assert.h>
41#include <stdlib.h>
42
43#include "nasmlib.h"
44#include "quote.h"
45
46#define numvalue(c) ((c)>='a' ? (c)-'a'+10 : (c)>='A' ? (c)-'A'+10 : (c)-'0')
47
48char *nasm_quote(char *str, size_t len)
49{
50 char c, c1, *p, *q, *nstr, *ep;
H. Peter Anvin4d5029d2009-07-15 16:22:47 -040051 unsigned char uc;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -070052 bool sq_ok, dq_ok;
53 size_t qlen;
54
55 sq_ok = dq_ok = true;
56 ep = str+len;
57 qlen = 0; /* Length if we need `...` quotes */
58 for (p = str; p < ep; p++) {
59 c = *p;
60 switch (c) {
61 case '\'':
62 sq_ok = false;
63 qlen++;
64 break;
65 case '\"':
66 dq_ok = false;
67 qlen++;
68 break;
69 case '`':
70 case '\\':
71 qlen += 2;
72 break;
73 default:
74 if (c < ' ' || c > '~') {
75 sq_ok = dq_ok = false;
76 switch (c) {
77 case '\a':
78 case '\b':
79 case '\t':
80 case '\n':
81 case '\v':
82 case '\f':
83 case '\r':
84 case 27:
85 qlen += 2;
86 break;
87 default:
88 c1 = (p+1 < ep) ? p[1] : 0;
H. Peter Anvinb93c1882009-07-14 14:48:26 -040089 if (c1 >= '0' && c1 <= '7')
H. Peter Anvin4d5029d2009-07-15 16:22:47 -040090 uc = 0377; /* Must use the full form */
H. Peter Anvin8cad14b2008-06-01 17:23:51 -070091 else
H. Peter Anvin4d5029d2009-07-15 16:22:47 -040092 uc = c;
93 if (uc > 077)
H. Peter Anvinb93c1882009-07-14 14:48:26 -040094 qlen++;
H. Peter Anvin4d5029d2009-07-15 16:22:47 -040095 if (uc > 07)
H. Peter Anvinb93c1882009-07-14 14:48:26 -040096 qlen++;
97 qlen += 2;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -070098 break;
99 }
100 } else {
101 qlen++;
102 }
103 break;
104 }
105 }
106
107 if (sq_ok || dq_ok) {
108 /* Use '...' or "..." */
109 nstr = nasm_malloc(len+3);
110 nstr[0] = nstr[len+1] = sq_ok ? '\'' : '\"';
111 nstr[len+2] = '\0';
112 memcpy(nstr+1, str, len);
113 } else {
114 /* Need to use `...` quoted syntax */
115 nstr = nasm_malloc(qlen+3);
116 q = nstr;
117 *q++ = '`';
118 for (p = str; p < ep; p++) {
119 c = *p;
120 switch (c) {
121 case '`':
122 case '\\':
123 *q++ = '\\';
124 *q++ = c;
125 break;
H. Peter Anvin2dff9542008-06-02 10:38:54 -0700126 case 7:
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700127 *q++ = '\\';
128 *q++ = 'a';
129 break;
H. Peter Anvin2dff9542008-06-02 10:38:54 -0700130 case 8:
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700131 *q++ = '\\';
132 *q++ = 'b';
133 break;
H. Peter Anvin2dff9542008-06-02 10:38:54 -0700134 case 9:
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700135 *q++ = '\\';
136 *q++ = 't';
137 break;
H. Peter Anvin2dff9542008-06-02 10:38:54 -0700138 case 10:
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700139 *q++ = '\\';
140 *q++ = 'n';
141 break;
H. Peter Anvin2dff9542008-06-02 10:38:54 -0700142 case 11:
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700143 *q++ = '\\';
144 *q++ = 'v';
145 break;
H. Peter Anvin2dff9542008-06-02 10:38:54 -0700146 case 12:
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700147 *q++ = '\\';
148 *q++ = 'f';
149 break;
H. Peter Anvin2dff9542008-06-02 10:38:54 -0700150 case 13:
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700151 *q++ = '\\';
152 *q++ = 'r';
153 break;
154 case 27:
155 *q++ = '\\';
156 *q++ = 'e';
157 break;
158 default:
159 if (c < ' ' || c > '~') {
160 c1 = (p+1 < ep) ? p[1] : 0;
161 if (c1 >= '0' && c1 <= '7')
H. Peter Anvin4d5029d2009-07-15 16:22:47 -0400162 uc = 0377; /* Must use the full form */
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700163 else
H. Peter Anvin4d5029d2009-07-15 16:22:47 -0400164 uc = c;
H. Peter Anvinb93c1882009-07-14 14:48:26 -0400165 *q++ = '\\';
H. Peter Anvin4d5029d2009-07-15 16:22:47 -0400166 if (uc > 077)
167 *q++ = ((unsigned char)c >> 6) + '0';
168 if (uc > 07)
169 *q++ = (((unsigned char)c >> 3) & 7) + '0';
170 *q++ = ((unsigned char)c & 7) + '0';
H. Peter Anvinb93c1882009-07-14 14:48:26 -0400171 break;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700172 } else {
173 *q++ = c;
174 }
175 break;
176 }
177 }
178 *q++ = '`';
179 *q++ = '\0';
180 assert((size_t)(q-nstr) == qlen+3);
181 }
182 return nstr;
183}
184
185static char *emit_utf8(char *q, int32_t v)
186{
187 if (v < 0) {
188 /* Impossible - do nothing */
189 } else if (v <= 0x7f) {
190 *q++ = v;
191 } else if (v <= 0x000007ff) {
192 *q++ = 0xc0 | (v >> 6);
193 *q++ = 0x80 | (v & 63);
194 } else if (v <= 0x0000ffff) {
195 *q++ = 0xe0 | (v >> 12);
196 *q++ = 0x80 | ((v >> 6) & 63);
197 *q++ = 0x80 | (v & 63);
198 } else if (v <= 0x001fffff) {
199 *q++ = 0xf0 | (v >> 18);
200 *q++ = 0x80 | ((v >> 12) & 63);
201 *q++ = 0x80 | ((v >> 6) & 63);
202 *q++ = 0x80 | (v & 63);
203 } else if (v <= 0x03ffffff) {
204 *q++ = 0xf8 | (v >> 24);
205 *q++ = 0x80 | ((v >> 18) & 63);
206 *q++ = 0x80 | ((v >> 12) & 63);
207 *q++ = 0x80 | ((v >> 6) & 63);
208 *q++ = 0x80 | (v & 63);
209 } else {
210 *q++ = 0xfc | (v >> 30);
211 *q++ = 0x80 | ((v >> 24) & 63);
212 *q++ = 0x80 | ((v >> 18) & 63);
213 *q++ = 0x80 | ((v >> 12) & 63);
214 *q++ = 0x80 | ((v >> 6) & 63);
215 *q++ = 0x80 | (v & 63);
216 }
217 return q;
218}
219
220/*
221 * Do an *in-place* dequoting of the specified string, returning the
222 * resulting length (which may be containing embedded nulls.)
223 *
224 * In-place replacement is possible since the unquoted length is always
225 * shorter than or equal to the quoted length.
H. Peter Anvin88c9e1f2008-06-04 11:26:59 -0700226 *
227 * *ep points to the final quote, or to the null if improperly quoted.
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700228 */
H. Peter Anvin88c9e1f2008-06-04 11:26:59 -0700229size_t nasm_unquote(char *str, char **ep)
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700230{
H. Peter Anvin88c9e1f2008-06-04 11:26:59 -0700231 char bq;
232 char *p, *q;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700233 char *escp = NULL;
234 char c;
235 enum unq_state {
236 st_start,
237 st_backslash,
238 st_hex,
239 st_oct,
240 st_ucs,
241 } state;
242 int ndig = 0;
243 int32_t nval = 0;
244
H. Peter Anvin88c9e1f2008-06-04 11:26:59 -0700245 p = q = str;
246
247 bq = *p++;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700248 if (!bq)
249 return 0;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700250
H. Peter Anvin88c9e1f2008-06-04 11:26:59 -0700251 switch (bq) {
252 case '\'':
253 case '\"':
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700254 /* '...' or "..." string */
H. Peter Anvin88c9e1f2008-06-04 11:26:59 -0700255 while ((c = *p) && c != bq) {
256 p++;
257 *q++ = c;
258 }
259 *q = '\0';
260 break;
261
262 case '`':
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700263 /* `...` string */
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700264 state = st_start;
265
H. Peter Anvin88c9e1f2008-06-04 11:26:59 -0700266 while ((c = *p)) {
267 p++;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700268 switch (state) {
269 case st_start:
H. Peter Anvin88c9e1f2008-06-04 11:26:59 -0700270 switch (c) {
271 case '\\':
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700272 state = st_backslash;
H. Peter Anvin88c9e1f2008-06-04 11:26:59 -0700273 break;
274 case '`':
275 p--;
276 goto out;
277 default:
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700278 *q++ = c;
H. Peter Anvin88c9e1f2008-06-04 11:26:59 -0700279 break;
280 }
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700281 break;
282
283 case st_backslash:
284 state = st_start;
H. Peter Anvinfbdd36c2008-06-02 13:59:09 -0700285 escp = p; /* Beginning of argument sequence */
H. Peter Anvine46fec62008-06-02 10:02:36 -0700286 nval = 0;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700287 switch (c) {
288 case 'a':
289 *q++ = 7;
290 break;
291 case 'b':
292 *q++ = 8;
293 break;
294 case 'e':
295 *q++ = 27;
296 break;
297 case 'f':
298 *q++ = 12;
299 break;
300 case 'n':
301 *q++ = 10;
302 break;
303 case 'r':
304 *q++ = 13;
305 break;
306 case 't':
307 *q++ = 9;
308 break;
309 case 'u':
310 state = st_ucs;
311 ndig = 4;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700312 break;
313 case 'U':
314 state = st_ucs;
315 ndig = 8;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700316 break;
317 case 'v':
318 *q++ = 11;
H. Peter Anvine46fec62008-06-02 10:02:36 -0700319 break;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700320 case 'x':
321 case 'X':
322 state = st_hex;
H. Peter Anvine46fec62008-06-02 10:02:36 -0700323 ndig = 2;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700324 break;
325 case '0':
326 case '1':
327 case '2':
328 case '3':
329 case '4':
330 case '5':
331 case '6':
332 case '7':
333 state = st_oct;
H. Peter Anvine46fec62008-06-02 10:02:36 -0700334 ndig = 2; /* Up to two more digits */
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700335 nval = c - '0';
336 break;
337 default:
338 *q++ = c;
339 break;
340 }
341 break;
342
343 case st_oct:
344 if (c >= '0' && c <= '7') {
345 nval = (nval << 3) + (c - '0');
H. Peter Anvine46fec62008-06-02 10:02:36 -0700346 if (!--ndig) {
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700347 *q++ = nval;
348 state = st_start;
349 }
350 } else {
351 p--; /* Process this character again */
352 *q++ = nval;
353 state = st_start;
354 }
355 break;
356
357 case st_hex:
358 if ((c >= '0' && c <= '9') ||
359 (c >= 'A' && c <= 'F') ||
360 (c >= 'a' && c <= 'f')) {
361 nval = (nval << 4) + numvalue(c);
H. Peter Anvin312445a2008-06-14 21:09:39 -0700362 if (!--ndig) {
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700363 *q++ = nval;
364 state = st_start;
365 }
366 } else {
367 p--; /* Process this character again */
H. Peter Anvinfbdd36c2008-06-02 13:59:09 -0700368 *q++ = (p > escp) ? nval : escp[-1];
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700369 state = st_start;
370 }
371 break;
372
373 case st_ucs:
374 if ((c >= '0' && c <= '9') ||
375 (c >= 'A' && c <= 'F') ||
376 (c >= 'a' && c <= 'f')) {
377 nval = (nval << 4) + numvalue(c);
378 if (!--ndig) {
379 q = emit_utf8(q, nval);
380 state = st_start;
381 }
382 } else {
383 p--; /* Process this character again */
H. Peter Anvinfbdd36c2008-06-02 13:59:09 -0700384 if (p > escp)
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700385 q = emit_utf8(q, nval);
386 else
H. Peter Anvinfbdd36c2008-06-02 13:59:09 -0700387 *q++ = escp[-1];
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700388 state = st_start;
389 }
390 break;
391 }
392 }
H. Peter Anvin6ecc1592008-06-01 21:34:49 -0700393 switch (state) {
394 case st_start:
395 case st_backslash:
396 break;
397 case st_oct:
398 *q++ = nval;
399 break;
400 case st_hex:
H. Peter Anvinfbdd36c2008-06-02 13:59:09 -0700401 *q++ = (p > escp) ? nval : escp[-1];
H. Peter Anvin6ecc1592008-06-01 21:34:49 -0700402 break;
403 case st_ucs:
H. Peter Anvinfbdd36c2008-06-02 13:59:09 -0700404 if (p > escp)
H. Peter Anvin6ecc1592008-06-01 21:34:49 -0700405 q = emit_utf8(q, nval);
406 else
H. Peter Anvinfbdd36c2008-06-02 13:59:09 -0700407 *q++ = escp[-1];
H. Peter Anvin6ecc1592008-06-01 21:34:49 -0700408 break;
409 }
H. Peter Anvin88c9e1f2008-06-04 11:26:59 -0700410 out:
411 break;
412
413 default:
414 /* Not a quoted string, just return the input... */
415 p = q = strchr(str, '\0');
416 break;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700417 }
418
H. Peter Anvin88c9e1f2008-06-04 11:26:59 -0700419 if (ep)
420 *ep = p;
421 return q-str;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700422}
423
424/*
425 * Find the end of a quoted string; returns the pointer to the terminating
426 * character (either the ending quote or the null character, if unterminated.)
427 */
428char *nasm_skip_string(char *str)
429{
430 char bq;
431 char *p;
432 char c;
433 enum unq_state {
434 st_start,
435 st_backslash,
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700436 } state;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700437
438 bq = str[0];
439 if (bq == '\'' || bq == '\"') {
440 /* '...' or "..." string */
441 for (p = str+1; *p && *p != bq; p++)
442 ;
443 return p;
444 } else if (bq == '`') {
445 /* `...` string */
446 p = str+1;
447 state = st_start;
448
449 while ((c = *p++)) {
450 switch (state) {
451 case st_start:
452 switch (c) {
453 case '\\':
454 state = st_backslash;
455 break;
456 case '`':
457 return p-1; /* Found the end */
458 default:
459 break;
460 }
461 break;
462
463 case st_backslash:
H. Peter Anvin1df123b2008-06-02 09:57:46 -0700464 /*
465 * Note: for the purpose of finding the end of the string,
466 * all successor states to st_backslash are functionally
467 * equivalent to st_start, since either a backslash or
468 * a backquote will force a return to the st_start state.
469 */
470 state = st_start;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700471 break;
472 }
473 }
474 return p; /* Unterminated string... */
475 } else {
476 return str; /* Not a string... */
477 }
478}