blob: 58bb5a108e79d0fc1e9e504e9d1457b097659e7f [file] [log] [blame]
H. Peter Anvin9e6747c2009-06-28 17:13:04 -07001/* ----------------------------------------------------------------------- *
H. Peter Anvinbb42d302019-04-22 14:29:29 -07002 *
H. Peter Anvin249c2172019-06-06 16:21:01 -07003 * Copyright 1996-2019 The NASM Authors - All Rights Reserved
H. Peter Anvin9e6747c2009-06-28 17:13:04 -07004 * See the file AUTHORS included with the NASM distribution for
5 * the specific copyright holders.
H. Peter Anvin8cad14b2008-06-01 17:23:51 -07006 *
H. Peter Anvin9e6747c2009-06-28 17:13:04 -07007 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following
9 * conditions are met:
10 *
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following
15 * disclaimer in the documentation and/or other materials provided
16 * with the distribution.
H. Peter Anvinbb42d302019-04-22 14:29:29 -070017 *
H. Peter Anvin9e6747c2009-06-28 17:13:04 -070018 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
19 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
20 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
21 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
23 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
29 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
30 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 *
32 * ----------------------------------------------------------------------- */
33
34/*
35 * quote.c
H. Peter Anvin8cad14b2008-06-01 17:23:51 -070036 */
37
38#include "compiler.h"
H. Peter Anvin8cad14b2008-06-01 17:23:51 -070039#include "nasmlib.h"
40#include "quote.h"
H. Peter Anvinbb42d302019-04-22 14:29:29 -070041#include "nctype.h"
42#include "error.h"
H. Peter Anvin8cad14b2008-06-01 17:23:51 -070043
H. Peter Anvin (Intel)41e96822019-04-25 18:00:32 -070044/*
45 * Create a NASM quoted string in newly allocated memory. Update the
46 * *lenp parameter with the output length (sans final NUL).
47 */
48
49char *nasm_quote(const char *str, size_t *lenp)
H. Peter Anvin8cad14b2008-06-01 17:23:51 -070050{
H. Peter Anvinb1577012016-05-10 02:54:15 -070051 const char *p, *ep;
52 char c, c1, *q, *nstr;
H. Peter Anvin4d5029d2009-07-15 16:22:47 -040053 unsigned char uc;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -070054 bool sq_ok, dq_ok;
55 size_t qlen;
H. Peter Anvin (Intel)41e96822019-04-25 18:00:32 -070056 size_t len = *lenp;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -070057
58 sq_ok = dq_ok = true;
59 ep = str+len;
60 qlen = 0; /* Length if we need `...` quotes */
61 for (p = str; p < ep; p++) {
62 c = *p;
63 switch (c) {
64 case '\'':
65 sq_ok = false;
66 qlen++;
67 break;
68 case '\"':
69 dq_ok = false;
70 qlen++;
71 break;
72 case '`':
73 case '\\':
74 qlen += 2;
75 break;
76 default:
77 if (c < ' ' || c > '~') {
78 sq_ok = dq_ok = false;
79 switch (c) {
80 case '\a':
81 case '\b':
82 case '\t':
83 case '\n':
84 case '\v':
85 case '\f':
86 case '\r':
87 case 27:
88 qlen += 2;
89 break;
90 default:
91 c1 = (p+1 < ep) ? p[1] : 0;
H. Peter Anvinb93c1882009-07-14 14:48:26 -040092 if (c1 >= '0' && c1 <= '7')
H. Peter Anvin4d5029d2009-07-15 16:22:47 -040093 uc = 0377; /* Must use the full form */
H. Peter Anvin8cad14b2008-06-01 17:23:51 -070094 else
H. Peter Anvin4d5029d2009-07-15 16:22:47 -040095 uc = c;
96 if (uc > 077)
H. Peter Anvinb93c1882009-07-14 14:48:26 -040097 qlen++;
H. Peter Anvin4d5029d2009-07-15 16:22:47 -040098 if (uc > 07)
H. Peter Anvinb93c1882009-07-14 14:48:26 -040099 qlen++;
100 qlen += 2;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700101 break;
102 }
103 } else {
104 qlen++;
105 }
106 break;
107 }
108 }
109
110 if (sq_ok || dq_ok) {
111 /* Use '...' or "..." */
112 nstr = nasm_malloc(len+3);
113 nstr[0] = nstr[len+1] = sq_ok ? '\'' : '\"';
H. Peter Anvin (Intel)41e96822019-04-25 18:00:32 -0700114 q = &nstr[len+2];
Cyrill Gorcunova26efa42010-09-07 09:52:47 +0400115 if (len > 0)
116 memcpy(nstr+1, str, len);
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700117 } else {
118 /* Need to use `...` quoted syntax */
119 nstr = nasm_malloc(qlen+3);
120 q = nstr;
121 *q++ = '`';
122 for (p = str; p < ep; p++) {
123 c = *p;
124 switch (c) {
125 case '`':
126 case '\\':
127 *q++ = '\\';
128 *q++ = c;
129 break;
H. Peter Anvin2dff9542008-06-02 10:38:54 -0700130 case 7:
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700131 *q++ = '\\';
132 *q++ = 'a';
133 break;
H. Peter Anvin2dff9542008-06-02 10:38:54 -0700134 case 8:
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700135 *q++ = '\\';
136 *q++ = 'b';
137 break;
H. Peter Anvin2dff9542008-06-02 10:38:54 -0700138 case 9:
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700139 *q++ = '\\';
140 *q++ = 't';
141 break;
H. Peter Anvin2dff9542008-06-02 10:38:54 -0700142 case 10:
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700143 *q++ = '\\';
144 *q++ = 'n';
145 break;
H. Peter Anvin2dff9542008-06-02 10:38:54 -0700146 case 11:
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700147 *q++ = '\\';
148 *q++ = 'v';
149 break;
H. Peter Anvin2dff9542008-06-02 10:38:54 -0700150 case 12:
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700151 *q++ = '\\';
152 *q++ = 'f';
153 break;
H. Peter Anvin2dff9542008-06-02 10:38:54 -0700154 case 13:
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700155 *q++ = '\\';
156 *q++ = 'r';
157 break;
158 case 27:
159 *q++ = '\\';
160 *q++ = 'e';
161 break;
162 default:
163 if (c < ' ' || c > '~') {
164 c1 = (p+1 < ep) ? p[1] : 0;
165 if (c1 >= '0' && c1 <= '7')
H. Peter Anvin4d5029d2009-07-15 16:22:47 -0400166 uc = 0377; /* Must use the full form */
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700167 else
H. Peter Anvin4d5029d2009-07-15 16:22:47 -0400168 uc = c;
H. Peter Anvinb93c1882009-07-14 14:48:26 -0400169 *q++ = '\\';
H. Peter Anvin4d5029d2009-07-15 16:22:47 -0400170 if (uc > 077)
171 *q++ = ((unsigned char)c >> 6) + '0';
172 if (uc > 07)
173 *q++ = (((unsigned char)c >> 3) & 7) + '0';
174 *q++ = ((unsigned char)c & 7) + '0';
H. Peter Anvinb93c1882009-07-14 14:48:26 -0400175 break;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700176 } else {
177 *q++ = c;
178 }
179 break;
180 }
181 }
182 *q++ = '`';
H. Peter Anvin (Intel)41e96822019-04-25 18:00:32 -0700183 nasm_assert((size_t)(q-nstr) == qlen+2);
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700184 }
H. Peter Anvin (Intel)41e96822019-04-25 18:00:32 -0700185 *q = '\0';
186 *lenp = q - nstr;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700187 return nstr;
188}
189
H. Peter Anvinbb42d302019-04-22 14:29:29 -0700190static unsigned char *emit_utf8(unsigned char *q, uint32_t v)
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700191{
H. Peter Anvinbb42d302019-04-22 14:29:29 -0700192 uint32_t vb1, vb2, vb3, vb4, vb5;
193
194 if (v <= 0x7f) {
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700195 *q++ = v;
H. Peter Anvinbb42d302019-04-22 14:29:29 -0700196 goto out0;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700197 }
H. Peter Anvinbb42d302019-04-22 14:29:29 -0700198
199 vb1 = v >> 6;
H. Peter Anvind4b20352019-06-06 17:15:16 -0700200 if (vb1 <= 0x1f) {
H. Peter Anvinbb42d302019-04-22 14:29:29 -0700201 *q++ = 0xc0 + vb1;
202 goto out1;
203 }
204
205 vb2 = vb1 >> 6;
H. Peter Anvind4b20352019-06-06 17:15:16 -0700206 if (vb2 <= 0x0f) {
H. Peter Anvinbb42d302019-04-22 14:29:29 -0700207 *q++ = 0xe0 + vb2;
208 goto out2;
209 }
210
211 vb3 = vb2 >> 6;
H. Peter Anvind4b20352019-06-06 17:15:16 -0700212 if (vb3 <= 0x07) {
H. Peter Anvinbb42d302019-04-22 14:29:29 -0700213 *q++ = 0xf0 + vb3;
214 goto out3;
215 }
216
217 vb4 = vb3 >> 6;
H. Peter Anvind4b20352019-06-06 17:15:16 -0700218 if (vb4 <= 0x03) {
H. Peter Anvinbb42d302019-04-22 14:29:29 -0700219 *q++ = 0xf8 + vb4;
220 goto out4;
221 }
222
H. Peter Anvin10d95892019-06-06 17:26:28 -0700223 /*
224 * Note: this is invalid even for "classic" (pre-UTF16) 31-bit
225 * UTF-8 if the value is >= 0x8000000. This at least tries to do
226 * something vaguely sensible with it. Caveat programmer.
227 * The __utf*__ string transform functions do reject these
228 * as invalid input.
H. Peter Anvindb6960c2019-06-06 17:32:44 -0700229 *
230 * vb5 cannot be more than 3, as a 32-bit value has been shifted
231 * right by 5*6 = 30 bits already.
H. Peter Anvin10d95892019-06-06 17:26:28 -0700232 */
H. Peter Anvinbb42d302019-04-22 14:29:29 -0700233 vb5 = vb4 >> 6;
H. Peter Anvin10d95892019-06-06 17:26:28 -0700234 *q++ = 0xfc + vb5;
235 goto out5;
H. Peter Anvinbb42d302019-04-22 14:29:29 -0700236
237 /* Emit extension bytes as appropriate */
238out5: *q++ = 0x80 + (vb4 & 63);
239out4: *q++ = 0x80 + (vb3 & 63);
240out3: *q++ = 0x80 + (vb2 & 63);
241out2: *q++ = 0x80 + (vb1 & 63);
242out1: *q++ = 0x80 + (v & 63);
243out0: return q;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700244}
245
H. Peter Anvin249c2172019-06-06 16:21:01 -0700246static inline uint32_t ctlbit(uint32_t v)
247{
248 return unlikely(v < 32) ? UINT32_C(1) << v : 0;
249}
250
251#define CTL_ERR(c) \
252 (badctl & (ctlmask |= ctlbit(c)))
253
254#define EMIT_UTF8(c) \
255 do { \
256 uint32_t ec = (c); \
257 if (!CTL_ERR(ec)) \
258 q = emit_utf8(q, ec); \
259 } while (0)
260
261#define EMIT(c) \
262 do { \
263 unsigned char ec = (c); \
264 if (!CTL_ERR(ec)) \
265 *q++ = ec; \
266 } while (0)
267
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700268/*
H. Peter Anvin (Intel)41e96822019-04-25 18:00:32 -0700269 * Same as nasm_quote, but take the length of a C string;
270 * the lenp argument is optional.
H. Peter Anvin8b262472019-02-26 14:00:54 -0800271 */
H. Peter Anvin (Intel)41e96822019-04-25 18:00:32 -0700272char *nasm_quote_cstr(const char *str, size_t *lenp)
H. Peter Anvin8b262472019-02-26 14:00:54 -0800273{
H. Peter Anvin (Intel)41e96822019-04-25 18:00:32 -0700274 size_t len = strlen(str);
275 char *qstr = nasm_quote(str, &len);
276 if (lenp)
277 *lenp = len;
278 return qstr;
H. Peter Anvin8b262472019-02-26 14:00:54 -0800279}
280
281/*
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700282 * Do an *in-place* dequoting of the specified string, returning the
283 * resulting length (which may be containing embedded nulls.)
284 *
285 * In-place replacement is possible since the unquoted length is always
286 * shorter than or equal to the quoted length.
H. Peter Anvin88c9e1f2008-06-04 11:26:59 -0700287 *
288 * *ep points to the final quote, or to the null if improperly quoted.
H. Peter Anvinbb42d302019-04-22 14:29:29 -0700289 *
H. Peter Anvin249c2172019-06-06 16:21:01 -0700290 * Issue an error if the string contains control characters
291 * corresponding to bits set in badctl; in that case, the output
292 * string, but not *ep, is truncated before the first invalid
293 * character.
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700294 */
H. Peter Anvinbb42d302019-04-22 14:29:29 -0700295
296static size_t nasm_unquote_common(char *str, char **ep,
H. Peter Anvin249c2172019-06-06 16:21:01 -0700297 const uint32_t badctl)
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700298{
H. Peter Anvin249c2172019-06-06 16:21:01 -0700299 unsigned char bq;
300 const unsigned char *p;
301 const unsigned char *escp = NULL;
302 unsigned char *q;
H. Peter Anvinbb42d302019-04-22 14:29:29 -0700303 unsigned char c;
H. Peter Anvin249c2172019-06-06 16:21:01 -0700304 uint32_t ctlmask = 0; /* Mask of control characters seen */
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700305 enum unq_state {
306 st_start,
307 st_backslash,
308 st_hex,
309 st_oct,
H. Peter Anvin249c2172019-06-06 16:21:01 -0700310 st_ucs,
311 st_done
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700312 } state;
313 int ndig = 0;
H. Peter Anvinbb42d302019-04-22 14:29:29 -0700314 uint32_t nval = 0;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700315
H. Peter Anvinbb42d302019-04-22 14:29:29 -0700316 p = q = (unsigned char *)str;
317
H. Peter Anvin88c9e1f2008-06-04 11:26:59 -0700318 bq = *p++;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700319 if (!bq)
320 return 0;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700321
H. Peter Anvin88c9e1f2008-06-04 11:26:59 -0700322 switch (bq) {
323 case '\'':
324 case '\"':
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700325 /* '...' or "..." string */
H. Peter Anvin249c2172019-06-06 16:21:01 -0700326 while ((c = *p++) && (c != bq))
H. Peter Anvinbb42d302019-04-22 14:29:29 -0700327 EMIT(c);
H. Peter Anvin88c9e1f2008-06-04 11:26:59 -0700328 break;
329
330 case '`':
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700331 /* `...` string */
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700332 state = st_start;
333
H. Peter Anvin249c2172019-06-06 16:21:01 -0700334 while (state != st_done) {
335 c = *p++;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700336 switch (state) {
337 case st_start:
H. Peter Anvin88c9e1f2008-06-04 11:26:59 -0700338 switch (c) {
339 case '\\':
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700340 state = st_backslash;
H. Peter Anvin88c9e1f2008-06-04 11:26:59 -0700341 break;
342 case '`':
H. Peter Anvin249c2172019-06-06 16:21:01 -0700343 case '\0':
344 state = st_done;
345 break;
H. Peter Anvin88c9e1f2008-06-04 11:26:59 -0700346 default:
H. Peter Anvinbb42d302019-04-22 14:29:29 -0700347 EMIT(c);
H. Peter Anvin88c9e1f2008-06-04 11:26:59 -0700348 break;
349 }
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700350 break;
351
352 case st_backslash:
353 state = st_start;
H. Peter Anvinfbdd36c2008-06-02 13:59:09 -0700354 escp = p; /* Beginning of argument sequence */
H. Peter Anvine46fec62008-06-02 10:02:36 -0700355 nval = 0;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700356 switch (c) {
357 case 'a':
H. Peter Anvinbb42d302019-04-22 14:29:29 -0700358 nval = 7;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700359 break;
360 case 'b':
H. Peter Anvinbb42d302019-04-22 14:29:29 -0700361 nval = 8;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700362 break;
363 case 'e':
H. Peter Anvinbb42d302019-04-22 14:29:29 -0700364 nval = 27;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700365 break;
366 case 'f':
H. Peter Anvinbb42d302019-04-22 14:29:29 -0700367 nval = 12;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700368 break;
369 case 'n':
H. Peter Anvinbb42d302019-04-22 14:29:29 -0700370 nval = 10;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700371 break;
372 case 'r':
H. Peter Anvinbb42d302019-04-22 14:29:29 -0700373 nval = 13;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700374 break;
375 case 't':
H. Peter Anvinbb42d302019-04-22 14:29:29 -0700376 nval = 9;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700377 break;
378 case 'u':
379 state = st_ucs;
380 ndig = 4;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700381 break;
382 case 'U':
383 state = st_ucs;
384 ndig = 8;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700385 break;
386 case 'v':
H. Peter Anvinbb42d302019-04-22 14:29:29 -0700387 nval = 11;
H. Peter Anvine46fec62008-06-02 10:02:36 -0700388 break;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700389 case 'x':
390 case 'X':
391 state = st_hex;
H. Peter Anvine46fec62008-06-02 10:02:36 -0700392 ndig = 2;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700393 break;
394 case '0':
395 case '1':
396 case '2':
397 case '3':
398 case '4':
399 case '5':
400 case '6':
401 case '7':
402 state = st_oct;
H. Peter Anvine46fec62008-06-02 10:02:36 -0700403 ndig = 2; /* Up to two more digits */
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700404 nval = c - '0';
405 break;
H. Peter Anvin249c2172019-06-06 16:21:01 -0700406 case '\0':
407 nval = '\\';
408 p--; /* Reprocess; terminates string */
409 break;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700410 default:
H. Peter Anvinbb42d302019-04-22 14:29:29 -0700411 nval = c;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700412 break;
413 }
H. Peter Anvinbb42d302019-04-22 14:29:29 -0700414 if (state == st_start)
415 EMIT(nval);
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700416 break;
417
418 case st_oct:
419 if (c >= '0' && c <= '7') {
420 nval = (nval << 3) + (c - '0');
H. Peter Anvin249c2172019-06-06 16:21:01 -0700421 if (--ndig)
422 break; /* Might have more digits */
423 } else {
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700424 p--; /* Process this character again */
H. Peter Anvin249c2172019-06-06 16:21:01 -0700425 }
426 EMIT(nval);
427 state = st_start;
428 break;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700429
430 case st_hex:
H. Peter Anvin249c2172019-06-06 16:21:01 -0700431 case st_ucs:
H. Peter Anvinbb42d302019-04-22 14:29:29 -0700432 if (nasm_isxdigit(c)) {
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700433 nval = (nval << 4) + numvalue(c);
H. Peter Anvin249c2172019-06-06 16:21:01 -0700434 if (--ndig)
435 break; /* Might have more digits */
436 } else {
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700437 p--; /* Process this character again */
H. Peter Anvin249c2172019-06-06 16:21:01 -0700438 }
439
440 if (unlikely(p <= escp))
441 EMIT(escp[-1]);
442 else if (state == st_ucs)
443 EMIT_UTF8(nval);
444 else
445 EMIT(nval);
446
447 state = st_start;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700448 break;
449
H. Peter Anvin249c2172019-06-06 16:21:01 -0700450 default:
451 panic();
H. Peter Anvinbb42d302019-04-22 14:29:29 -0700452 }
H. Peter Anvin249c2172019-06-06 16:21:01 -0700453 }
454 break;
H. Peter Anvin88c9e1f2008-06-04 11:26:59 -0700455
456 default:
457 /* Not a quoted string, just return the input... */
H. Peter Anvin249c2172019-06-06 16:21:01 -0700458 while ((c = *p++))
H. Peter Anvinbb42d302019-04-22 14:29:29 -0700459 EMIT(c);
H. Peter Anvin88c9e1f2008-06-04 11:26:59 -0700460 break;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700461 }
462
H. Peter Anvin249c2172019-06-06 16:21:01 -0700463 /* Zero-terminate the output */
H. Peter Anvinbb42d302019-04-22 14:29:29 -0700464 *q = '\0';
465
H. Peter Anvin249c2172019-06-06 16:21:01 -0700466 if (ctlmask & badctl)
H. Peter Anvinbb42d302019-04-22 14:29:29 -0700467 nasm_nonfatal("control character in string not allowed here");
468
H. Peter Anvin88c9e1f2008-06-04 11:26:59 -0700469 if (ep)
H. Peter Anvin249c2172019-06-06 16:21:01 -0700470 *ep = (char *)p - 1;
H. Peter Anvinbb42d302019-04-22 14:29:29 -0700471 return (char *)q - str;
472}
473#undef EMIT
474
475size_t nasm_unquote(char *str, char **ep)
476{
477 return nasm_unquote_common(str, ep, 0);
478}
479size_t nasm_unquote_cstr(char *str, char **ep)
480{
H. Peter Anvin249c2172019-06-06 16:21:01 -0700481 /*
482 * These are the only control characters permitted: BEL BS TAB ESC
483 */
484 const uint32_t okctl = (1 << '\a') | (1 << '\b') | (1 << '\t') | (1 << 27);
485
486 return nasm_unquote_common(str, ep, ~okctl);
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700487}
488
489/*
490 * Find the end of a quoted string; returns the pointer to the terminating
491 * character (either the ending quote or the null character, if unterminated.)
H. Peter Anvin4d7bf792019-06-06 16:29:52 -0700492 * If the input is not a quoted string, return NULL.
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700493 */
H. Peter Anvin249c2172019-06-06 16:21:01 -0700494char *nasm_skip_string(const char *str)
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700495{
496 char bq;
H. Peter Anvin249c2172019-06-06 16:21:01 -0700497 const char *p;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700498 char c;
499 enum unq_state {
500 st_start,
H. Peter Anvin249c2172019-06-06 16:21:01 -0700501 st_backslash,
502 st_done
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700503 } state;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700504
505 bq = str[0];
H. Peter Anvin249c2172019-06-06 16:21:01 -0700506 p = str+1;
H. Peter Anvin4d7bf792019-06-06 16:29:52 -0700507 switch (bq) {
508 case '\'':
509 case '\"':
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700510 /* '...' or "..." string */
H. Peter Anvin249c2172019-06-06 16:21:01 -0700511 while ((c = *p++) && (c != bq))
512 ;
H. Peter Anvin4d7bf792019-06-06 16:29:52 -0700513 break;
514
515 case '`':
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700516 /* `...` string */
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700517 state = st_start;
H. Peter Anvin249c2172019-06-06 16:21:01 -0700518 while (state != st_done) {
519 c = *p++;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700520 switch (state) {
521 case st_start:
522 switch (c) {
523 case '\\':
524 state = st_backslash;
525 break;
526 case '`':
H. Peter Anvin249c2172019-06-06 16:21:01 -0700527 case '\0':
528 state = st_done;
529 break;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700530 default:
531 break;
532 }
533 break;
534
535 case st_backslash:
H. Peter Anvin1df123b2008-06-02 09:57:46 -0700536 /*
537 * Note: for the purpose of finding the end of the string,
538 * all successor states to st_backslash are functionally
539 * equivalent to st_start, since either a backslash or
540 * a backquote will force a return to the st_start state.
541 */
H. Peter Anvin249c2172019-06-06 16:21:01 -0700542 state = c ? st_start : st_done;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700543 break;
H. Peter Anvin249c2172019-06-06 16:21:01 -0700544
545 default:
546 panic();
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700547 }
548 }
H. Peter Anvin4d7bf792019-06-06 16:29:52 -0700549 break;
550
551 default:
552 /* Not a string at all... */
553 return NULL;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700554 }
H. Peter Anvin249c2172019-06-06 16:21:01 -0700555 return (char *)p - 1;
H. Peter Anvin8cad14b2008-06-01 17:23:51 -0700556}