blob: 2cb55b140c438c94563311572776faecd84a679c [file] [log] [blame]
H. Peter Anvin9e6747c2009-06-28 17:13:04 -07001/* ----------------------------------------------------------------------- *
2 *
3 * Copyright 1996-2009 The NASM Authors - All Rights Reserved
4 * See the file AUTHORS included with the NASM distribution for
5 * the specific copyright holders.
H. Peter Anvin8cad14b2008-06-01 17:23:51 -07006 *
H. Peter Anvin9e6747c2009-06-28 17:13:04 -07007 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following
9 * conditions are met:
10 *
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following
15 * disclaimer in the documentation and/or other materials provided
16 * with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
19 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
20 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
21 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
23 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
29 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
30 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 *
32 * ----------------------------------------------------------------------- */
33
34/*
35 * quote.c
H. Peter Anvin8cad14b2008-06-01 17:23:51 -070036 */
37
38#include "compiler.h"
39
H. Peter Anvin8cad14b2008-06-01 17:23:51 -070040#include <stdlib.h>
41
42#include "nasmlib.h"
43#include "quote.h"
44
45#define numvalue(c) ((c)>='a' ? (c)-'a'+10 : (c)>='A' ? (c)-'A'+10 : (c)-'0')
46
47char *nasm_quote(char *str, size_t len)
48{
49 char c, c1, *p, *q, *nstr, *ep;
H. Peter Anvin4d5029d2009-07-15 16:22:47 -040050 unsigned char uc;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -070051 bool sq_ok, dq_ok;
52 size_t qlen;
53
54 sq_ok = dq_ok = true;
55 ep = str+len;
56 qlen = 0; /* Length if we need `...` quotes */
57 for (p = str; p < ep; p++) {
58 c = *p;
59 switch (c) {
60 case '\'':
61 sq_ok = false;
62 qlen++;
63 break;
64 case '\"':
65 dq_ok = false;
66 qlen++;
67 break;
68 case '`':
69 case '\\':
70 qlen += 2;
71 break;
72 default:
73 if (c < ' ' || c > '~') {
74 sq_ok = dq_ok = false;
75 switch (c) {
76 case '\a':
77 case '\b':
78 case '\t':
79 case '\n':
80 case '\v':
81 case '\f':
82 case '\r':
83 case 27:
84 qlen += 2;
85 break;
86 default:
87 c1 = (p+1 < ep) ? p[1] : 0;
H. Peter Anvinb93c1882009-07-14 14:48:26 -040088 if (c1 >= '0' && c1 <= '7')
H. Peter Anvin4d5029d2009-07-15 16:22:47 -040089 uc = 0377; /* Must use the full form */
H. Peter Anvin8cad14b2008-06-01 17:23:51 -070090 else
H. Peter Anvin4d5029d2009-07-15 16:22:47 -040091 uc = c;
92 if (uc > 077)
H. Peter Anvinb93c1882009-07-14 14:48:26 -040093 qlen++;
H. Peter Anvin4d5029d2009-07-15 16:22:47 -040094 if (uc > 07)
H. Peter Anvinb93c1882009-07-14 14:48:26 -040095 qlen++;
96 qlen += 2;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -070097 break;
98 }
99 } else {
100 qlen++;
101 }
102 break;
103 }
104 }
105
106 if (sq_ok || dq_ok) {
107 /* Use '...' or "..." */
108 nstr = nasm_malloc(len+3);
109 nstr[0] = nstr[len+1] = sq_ok ? '\'' : '\"';
110 nstr[len+2] = '\0';
111 memcpy(nstr+1, str, len);
112 } else {
113 /* Need to use `...` quoted syntax */
114 nstr = nasm_malloc(qlen+3);
115 q = nstr;
116 *q++ = '`';
117 for (p = str; p < ep; p++) {
118 c = *p;
119 switch (c) {
120 case '`':
121 case '\\':
122 *q++ = '\\';
123 *q++ = c;
124 break;
H. Peter Anvin2dff9542008-06-02 10:38:54 -0700125 case 7:
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700126 *q++ = '\\';
127 *q++ = 'a';
128 break;
H. Peter Anvin2dff9542008-06-02 10:38:54 -0700129 case 8:
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700130 *q++ = '\\';
131 *q++ = 'b';
132 break;
H. Peter Anvin2dff9542008-06-02 10:38:54 -0700133 case 9:
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700134 *q++ = '\\';
135 *q++ = 't';
136 break;
H. Peter Anvin2dff9542008-06-02 10:38:54 -0700137 case 10:
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700138 *q++ = '\\';
139 *q++ = 'n';
140 break;
H. Peter Anvin2dff9542008-06-02 10:38:54 -0700141 case 11:
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700142 *q++ = '\\';
143 *q++ = 'v';
144 break;
H. Peter Anvin2dff9542008-06-02 10:38:54 -0700145 case 12:
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700146 *q++ = '\\';
147 *q++ = 'f';
148 break;
H. Peter Anvin2dff9542008-06-02 10:38:54 -0700149 case 13:
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700150 *q++ = '\\';
151 *q++ = 'r';
152 break;
153 case 27:
154 *q++ = '\\';
155 *q++ = 'e';
156 break;
157 default:
158 if (c < ' ' || c > '~') {
159 c1 = (p+1 < ep) ? p[1] : 0;
160 if (c1 >= '0' && c1 <= '7')
H. Peter Anvin4d5029d2009-07-15 16:22:47 -0400161 uc = 0377; /* Must use the full form */
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700162 else
H. Peter Anvin4d5029d2009-07-15 16:22:47 -0400163 uc = c;
H. Peter Anvinb93c1882009-07-14 14:48:26 -0400164 *q++ = '\\';
H. Peter Anvin4d5029d2009-07-15 16:22:47 -0400165 if (uc > 077)
166 *q++ = ((unsigned char)c >> 6) + '0';
167 if (uc > 07)
168 *q++ = (((unsigned char)c >> 3) & 7) + '0';
169 *q++ = ((unsigned char)c & 7) + '0';
H. Peter Anvinb93c1882009-07-14 14:48:26 -0400170 break;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700171 } else {
172 *q++ = c;
173 }
174 break;
175 }
176 }
177 *q++ = '`';
178 *q++ = '\0';
H. Peter Anvin252c2db2009-07-15 16:28:43 -0400179 nasm_assert((size_t)(q-nstr) == qlen+3);
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700180 }
181 return nstr;
182}
183
184static char *emit_utf8(char *q, int32_t v)
185{
186 if (v < 0) {
187 /* Impossible - do nothing */
188 } else if (v <= 0x7f) {
189 *q++ = v;
190 } else if (v <= 0x000007ff) {
191 *q++ = 0xc0 | (v >> 6);
192 *q++ = 0x80 | (v & 63);
193 } else if (v <= 0x0000ffff) {
194 *q++ = 0xe0 | (v >> 12);
195 *q++ = 0x80 | ((v >> 6) & 63);
196 *q++ = 0x80 | (v & 63);
197 } else if (v <= 0x001fffff) {
198 *q++ = 0xf0 | (v >> 18);
199 *q++ = 0x80 | ((v >> 12) & 63);
200 *q++ = 0x80 | ((v >> 6) & 63);
201 *q++ = 0x80 | (v & 63);
202 } else if (v <= 0x03ffffff) {
203 *q++ = 0xf8 | (v >> 24);
204 *q++ = 0x80 | ((v >> 18) & 63);
205 *q++ = 0x80 | ((v >> 12) & 63);
206 *q++ = 0x80 | ((v >> 6) & 63);
207 *q++ = 0x80 | (v & 63);
208 } else {
209 *q++ = 0xfc | (v >> 30);
210 *q++ = 0x80 | ((v >> 24) & 63);
211 *q++ = 0x80 | ((v >> 18) & 63);
212 *q++ = 0x80 | ((v >> 12) & 63);
213 *q++ = 0x80 | ((v >> 6) & 63);
214 *q++ = 0x80 | (v & 63);
215 }
216 return q;
217}
218
219/*
220 * Do an *in-place* dequoting of the specified string, returning the
221 * resulting length (which may be containing embedded nulls.)
222 *
223 * In-place replacement is possible since the unquoted length is always
224 * shorter than or equal to the quoted length.
H. Peter Anvin88c9e1f2008-06-04 11:26:59 -0700225 *
226 * *ep points to the final quote, or to the null if improperly quoted.
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700227 */
H. Peter Anvin88c9e1f2008-06-04 11:26:59 -0700228size_t nasm_unquote(char *str, char **ep)
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700229{
H. Peter Anvin88c9e1f2008-06-04 11:26:59 -0700230 char bq;
231 char *p, *q;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700232 char *escp = NULL;
233 char c;
234 enum unq_state {
235 st_start,
236 st_backslash,
237 st_hex,
238 st_oct,
239 st_ucs,
240 } state;
241 int ndig = 0;
242 int32_t nval = 0;
243
H. Peter Anvin88c9e1f2008-06-04 11:26:59 -0700244 p = q = str;
245
246 bq = *p++;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700247 if (!bq)
248 return 0;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700249
H. Peter Anvin88c9e1f2008-06-04 11:26:59 -0700250 switch (bq) {
251 case '\'':
252 case '\"':
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700253 /* '...' or "..." string */
H. Peter Anvin88c9e1f2008-06-04 11:26:59 -0700254 while ((c = *p) && c != bq) {
255 p++;
256 *q++ = c;
257 }
258 *q = '\0';
259 break;
260
261 case '`':
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700262 /* `...` string */
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700263 state = st_start;
264
H. Peter Anvin88c9e1f2008-06-04 11:26:59 -0700265 while ((c = *p)) {
266 p++;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700267 switch (state) {
268 case st_start:
H. Peter Anvin88c9e1f2008-06-04 11:26:59 -0700269 switch (c) {
270 case '\\':
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700271 state = st_backslash;
H. Peter Anvin88c9e1f2008-06-04 11:26:59 -0700272 break;
273 case '`':
274 p--;
275 goto out;
276 default:
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700277 *q++ = c;
H. Peter Anvin88c9e1f2008-06-04 11:26:59 -0700278 break;
279 }
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700280 break;
281
282 case st_backslash:
283 state = st_start;
H. Peter Anvinfbdd36c2008-06-02 13:59:09 -0700284 escp = p; /* Beginning of argument sequence */
H. Peter Anvine46fec62008-06-02 10:02:36 -0700285 nval = 0;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700286 switch (c) {
287 case 'a':
288 *q++ = 7;
289 break;
290 case 'b':
291 *q++ = 8;
292 break;
293 case 'e':
294 *q++ = 27;
295 break;
296 case 'f':
297 *q++ = 12;
298 break;
299 case 'n':
300 *q++ = 10;
301 break;
302 case 'r':
303 *q++ = 13;
304 break;
305 case 't':
306 *q++ = 9;
307 break;
308 case 'u':
309 state = st_ucs;
310 ndig = 4;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700311 break;
312 case 'U':
313 state = st_ucs;
314 ndig = 8;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700315 break;
316 case 'v':
317 *q++ = 11;
H. Peter Anvine46fec62008-06-02 10:02:36 -0700318 break;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700319 case 'x':
320 case 'X':
321 state = st_hex;
H. Peter Anvine46fec62008-06-02 10:02:36 -0700322 ndig = 2;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700323 break;
324 case '0':
325 case '1':
326 case '2':
327 case '3':
328 case '4':
329 case '5':
330 case '6':
331 case '7':
332 state = st_oct;
H. Peter Anvine46fec62008-06-02 10:02:36 -0700333 ndig = 2; /* Up to two more digits */
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700334 nval = c - '0';
335 break;
336 default:
337 *q++ = c;
338 break;
339 }
340 break;
341
342 case st_oct:
343 if (c >= '0' && c <= '7') {
344 nval = (nval << 3) + (c - '0');
H. Peter Anvine46fec62008-06-02 10:02:36 -0700345 if (!--ndig) {
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700346 *q++ = nval;
347 state = st_start;
348 }
349 } else {
350 p--; /* Process this character again */
351 *q++ = nval;
352 state = st_start;
353 }
354 break;
355
356 case st_hex:
357 if ((c >= '0' && c <= '9') ||
358 (c >= 'A' && c <= 'F') ||
359 (c >= 'a' && c <= 'f')) {
360 nval = (nval << 4) + numvalue(c);
H. Peter Anvin312445a2008-06-14 21:09:39 -0700361 if (!--ndig) {
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700362 *q++ = nval;
363 state = st_start;
364 }
365 } else {
366 p--; /* Process this character again */
H. Peter Anvinfbdd36c2008-06-02 13:59:09 -0700367 *q++ = (p > escp) ? nval : escp[-1];
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700368 state = st_start;
369 }
370 break;
371
372 case st_ucs:
373 if ((c >= '0' && c <= '9') ||
374 (c >= 'A' && c <= 'F') ||
375 (c >= 'a' && c <= 'f')) {
376 nval = (nval << 4) + numvalue(c);
377 if (!--ndig) {
378 q = emit_utf8(q, nval);
379 state = st_start;
380 }
381 } else {
382 p--; /* Process this character again */
H. Peter Anvinfbdd36c2008-06-02 13:59:09 -0700383 if (p > escp)
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700384 q = emit_utf8(q, nval);
385 else
H. Peter Anvinfbdd36c2008-06-02 13:59:09 -0700386 *q++ = escp[-1];
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700387 state = st_start;
388 }
389 break;
390 }
391 }
H. Peter Anvin6ecc1592008-06-01 21:34:49 -0700392 switch (state) {
393 case st_start:
394 case st_backslash:
395 break;
396 case st_oct:
397 *q++ = nval;
398 break;
399 case st_hex:
H. Peter Anvinfbdd36c2008-06-02 13:59:09 -0700400 *q++ = (p > escp) ? nval : escp[-1];
H. Peter Anvin6ecc1592008-06-01 21:34:49 -0700401 break;
402 case st_ucs:
H. Peter Anvinfbdd36c2008-06-02 13:59:09 -0700403 if (p > escp)
H. Peter Anvin6ecc1592008-06-01 21:34:49 -0700404 q = emit_utf8(q, nval);
405 else
H. Peter Anvinfbdd36c2008-06-02 13:59:09 -0700406 *q++ = escp[-1];
H. Peter Anvin6ecc1592008-06-01 21:34:49 -0700407 break;
408 }
H. Peter Anvin88c9e1f2008-06-04 11:26:59 -0700409 out:
410 break;
411
412 default:
413 /* Not a quoted string, just return the input... */
414 p = q = strchr(str, '\0');
415 break;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700416 }
417
H. Peter Anvin88c9e1f2008-06-04 11:26:59 -0700418 if (ep)
419 *ep = p;
420 return q-str;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700421}
422
423/*
424 * Find the end of a quoted string; returns the pointer to the terminating
425 * character (either the ending quote or the null character, if unterminated.)
426 */
427char *nasm_skip_string(char *str)
428{
429 char bq;
430 char *p;
431 char c;
432 enum unq_state {
433 st_start,
434 st_backslash,
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700435 } state;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700436
437 bq = str[0];
438 if (bq == '\'' || bq == '\"') {
439 /* '...' or "..." string */
440 for (p = str+1; *p && *p != bq; p++)
441 ;
442 return p;
443 } else if (bq == '`') {
444 /* `...` string */
445 p = str+1;
446 state = st_start;
447
448 while ((c = *p++)) {
449 switch (state) {
450 case st_start:
451 switch (c) {
452 case '\\':
453 state = st_backslash;
454 break;
455 case '`':
456 return p-1; /* Found the end */
457 default:
458 break;
459 }
460 break;
461
462 case st_backslash:
H. Peter Anvin1df123b2008-06-02 09:57:46 -0700463 /*
464 * Note: for the purpose of finding the end of the string,
465 * all successor states to st_backslash are functionally
466 * equivalent to st_start, since either a backslash or
467 * a backquote will force a return to the st_start state.
468 */
469 state = st_start;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700470 break;
471 }
472 }
473 return p; /* Unterminated string... */
474 } else {
475 return str; /* Not a string... */
476 }
477}