blob: f031ec0e6a8268329cac67eadb9815654e3f72a6 [file] [log] [blame]
H. Peter Anvinea6e34d2002-04-30 20:51:32 +00001/* parser.c source line parser for the Netwide Assembler
2 *
3 * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
4 * Julian Hall. All rights reserved. The software is
5 * redistributable under the licence given in the file "Licence"
6 * distributed in the NASM archive.
7 *
8 * initial version 27/iii/95 by Simon Tatham
9 */
10
11#include <stdio.h>
12#include <stdlib.h>
13#include <stddef.h>
14#include <string.h>
15#include <ctype.h>
16
17#include "nasm.h"
18#include "nasmlib.h"
19#include "parser.h"
20#include "float.h"
21
22#include "names.c"
23
H. Peter Anvinea6e34d2002-04-30 20:51:32 +000024static long reg_flags[] = { /* sizes and special flags */
25 0, REG8, REG_AL, REG_AX, REG8, REG8, REG16, REG16, REG8, REG_CL,
26 REG_CREG, REG_CREG, REG_CREG, REG_CR4, REG_CS, REG_CX, REG8,
27 REG16, REG8, REG_DREG, REG_DREG, REG_DREG, REG_DREG, REG_DREG,
28 REG_DREG, REG_DESS, REG_DX, REG_EAX, REG32, REG32, REG_ECX,
29 REG32, REG32, REG_DESS, REG32, REG32, REG_FSGS, REG_FSGS,
30 MMXREG, MMXREG, MMXREG, MMXREG, MMXREG, MMXREG, MMXREG, MMXREG,
31 REG16, REG16, REG_DESS, FPU0, FPUREG, FPUREG, FPUREG, FPUREG,
32 FPUREG, FPUREG, FPUREG, REG_TREG, REG_TREG, REG_TREG, REG_TREG,
33 REG_TREG
34};
35
36enum { /* special tokens */
37 S_BYTE, S_DWORD, S_FAR, S_LONG, S_NEAR, S_QWORD, S_SHORT, S_TO,
38 S_TWORD, S_WORD
39};
40
41static char *special_names[] = { /* and the actual text */
42 "byte", "dword", "far", "long", "near", "qword", "short", "to",
43 "tword", "word"
44};
45
46static char *prefix_names[] = {
47 "a16", "a32", "lock", "o16", "o32", "rep", "repe", "repne",
48 "repnz", "repz", "times"
49};
50
51/*
52 * Evaluator datatype. Expressions, within the evaluator, are
53 * stored as an array of these beasts, terminated by a record with
54 * type==0. Mostly, it's a vector type: each type denotes some kind
55 * of a component, and the value denotes the multiple of that
56 * component present in the expression. The exception is the WRT
57 * type, whose `value' field denotes the segment to which the
58 * expression is relative. These segments will be segment-base
59 * types, i.e. either odd segment values or SEG_ABS types. So it is
60 * still valid to assume that anything with a `value' field of zero
61 * is insignificant.
62 */
63typedef struct {
64 long type; /* a register, or EXPR_xxx */
65 long value; /* must be >= 32 bits */
66} expr;
67
68static void eval_reset(void);
69static expr *evaluate(int);
70
71/*
72 * ASSUMPTION MADE HERE. The number of distinct register names
73 * (i.e. possible "type" fields for an expr structure) does not
74 * exceed 126.
75 */
76#define EXPR_SIMPLE 126
77#define EXPR_WRT 127
78#define EXPR_SEGBASE 128
79
80static int is_reloc(expr *);
81static int is_simple(expr *);
82static int is_really_simple (expr *);
83static long reloc_value(expr *);
84static long reloc_seg(expr *);
85static long reloc_wrt(expr *);
86
87enum { /* token types, other than chars */
88 TOKEN_ID = 256, TOKEN_NUM, TOKEN_REG, TOKEN_INSN, TOKEN_ERRNUM,
89 TOKEN_HERE, TOKEN_BASE, TOKEN_SPECIAL, TOKEN_PREFIX, TOKEN_SHL,
90 TOKEN_SHR, TOKEN_SDIV, TOKEN_SMOD, TOKEN_SEG, TOKEN_WRT,
91 TOKEN_FLOAT
92};
93
94struct tokenval {
95 long t_integer, t_inttwo;
96 char *t_charptr;
97};
98
99static char tempstorage[1024], *q;
100static int bsi (char *string, char **array, int size);/* binary search */
101
102static int nexttoken (void);
103static int is_comma_next (void);
104
105static char *bufptr;
106static int i;
107static struct tokenval tokval;
108static lfunc labelfunc;
109static efunc error;
110static char *label;
111static struct ofmt *outfmt;
112
113static long seg, ofs;
114
H. Peter Anvinea838272002-04-30 20:51:53 +0000115static int forward;
116
H. Peter Anvinea6e34d2002-04-30 20:51:32 +0000117insn *parse_line (long segment, long offset, lfunc lookup_label, int pass,
118 char *buffer, insn *result, struct ofmt *output,
119 efunc errfunc) {
120 int operand;
121 int critical;
122
H. Peter Anvinea838272002-04-30 20:51:53 +0000123 forward = result->forw_ref = FALSE;
H. Peter Anvinea6e34d2002-04-30 20:51:32 +0000124 q = tempstorage;
125 bufptr = buffer;
126 labelfunc = lookup_label;
127 outfmt = output;
128 error = errfunc;
129 seg = segment;
130 ofs = offset;
131 label = "";
132
133 i = nexttoken();
134
135 result->eops = NULL; /* must do this, whatever happens */
136
137 if (i==0) { /* blank line - ignore */
138 result->label = NULL; /* so, no label on it */
139 result->opcode = -1; /* and no instruction either */
140 return result;
141 }
142 if (i != TOKEN_ID && i != TOKEN_INSN && i != TOKEN_PREFIX &&
143 (i!=TOKEN_REG || (REG_SREG & ~reg_flags[tokval.t_integer]))) {
144 error (ERR_NONFATAL, "label or instruction expected"
145 " at start of line");
146 result->label = NULL;
147 result->opcode = -1;
148 return result;
149 }
150
151 if (i == TOKEN_ID) { /* there's a label here */
152 label = result->label = tokval.t_charptr;
153 i = nexttoken();
154 if (i == ':') { /* skip over the optional colon */
155 i = nexttoken();
H. Peter Anvin6768eb72002-04-30 20:52:26 +0000156 } else if (i == 0 && pass == 1) {
157 error (ERR_WARNING|ERR_WARN_OL,
158 "label alone on a line without a colon might be in error");
H. Peter Anvinea6e34d2002-04-30 20:51:32 +0000159 }
160 } else /* no label; so, moving swiftly on */
161 result->label = NULL;
162
163 if (i==0) {
164 result->opcode = -1; /* this line contains just a label */
165 return result;
166 }
167
168 result->nprefix = 0;
H. Peter Anvind7ed89e2002-04-30 20:52:08 +0000169 result->times = 1L;
H. Peter Anvinea6e34d2002-04-30 20:51:32 +0000170
171 while (i == TOKEN_PREFIX ||
172 (i==TOKEN_REG && !(REG_SREG & ~reg_flags[tokval.t_integer]))) {
173 /*
174 * Handle special case: the TIMES prefix.
175 */
176 if (i == TOKEN_PREFIX && tokval.t_integer == P_TIMES) {
177 expr *value;
178
179 i = nexttoken();
180 eval_reset();
181 value = evaluate (pass);
182 if (!value) { /* but, error in evaluator */
183 result->opcode = -1; /* unrecoverable parse error: */
184 return result; /* ignore this instruction */
185 }
186 if (!is_simple (value)) {
187 error (ERR_NONFATAL,
188 "non-constant argument supplied to TIMES");
H. Peter Anvind7ed89e2002-04-30 20:52:08 +0000189 result->times = 1L;
190 } else {
H. Peter Anvinea6e34d2002-04-30 20:51:32 +0000191 result->times = value->value;
H. Peter Anvind7ed89e2002-04-30 20:52:08 +0000192 if (value->value < 0)
H. Peter Anvin6768eb72002-04-30 20:52:26 +0000193 error(ERR_NONFATAL, "TIMES value %d is negative",
H. Peter Anvind7ed89e2002-04-30 20:52:08 +0000194 value->value);
195 }
H. Peter Anvinea6e34d2002-04-30 20:51:32 +0000196 } else {
197 if (result->nprefix == MAXPREFIX)
198 error (ERR_NONFATAL,
199 "instruction has more than %d prefixes", MAXPREFIX);
200 else
201 result->prefixes[result->nprefix++] = tokval.t_integer;
202 i = nexttoken();
203 }
204 }
205
206 if (i != TOKEN_INSN) {
207 error (ERR_NONFATAL, "parser: instruction expected");
208 result->opcode = -1;
209 return result;
210 }
211
212 result->opcode = tokval.t_integer;
213 result->condition = tokval.t_inttwo;
214
215 /*
216 * RESB, RESW and RESD cannot be satisfied with incorrectly
217 * evaluated operands, since the correct values _must_ be known
218 * on the first pass. Hence, even in pass one, we set the
219 * `critical' flag on calling evaluate(), so that it will bomb
220 * out on undefined symbols. Nasty, but there's nothing we can
221 * do about it.
222 *
223 * For the moment, EQU has the same difficulty, so we'll
224 * include that.
225 */
226 if (result->opcode == I_RESB ||
227 result->opcode == I_RESW ||
228 result->opcode == I_RESD ||
229 result->opcode == I_RESQ ||
230 result->opcode == I_REST ||
231 result->opcode == I_EQU)
232 critical = pass;
233 else
234 critical = (pass==2 ? 2 : 0);
235
236 if (result->opcode == I_DB ||
237 result->opcode == I_DW ||
238 result->opcode == I_DD ||
239 result->opcode == I_DQ ||
H. Peter Anvind7ed89e2002-04-30 20:52:08 +0000240 result->opcode == I_DT ||
241 result->opcode == I_INCBIN) {
H. Peter Anvinea6e34d2002-04-30 20:51:32 +0000242 extop *eop, **tail = &result->eops;
243 int oper_num = 0;
244
245 /*
246 * Begin to read the DB/DW/DD/DQ/DT operands.
247 */
248 while (1) {
249 i = nexttoken();
250 if (i == 0)
251 break;
252 eop = *tail = nasm_malloc(sizeof(extop));
253 tail = &eop->next;
254 eop->next = NULL;
255 eop->type = EOT_NOTHING;
256 oper_num++;
257
258 if (i == TOKEN_NUM && tokval.t_charptr && is_comma_next()) {
259 eop->type = EOT_DB_STRING;
260 eop->stringval = tokval.t_charptr;
261 eop->stringlen = tokval.t_inttwo;
262 i = nexttoken(); /* eat the comma */
263 continue;
264 }
265
266 if (i == TOKEN_FLOAT || i == '-') {
267 long sign = +1L;
268
269 if (i == '-') {
270 char *save = bufptr;
271 i = nexttoken();
272 sign = -1L;
273 if (i != TOKEN_FLOAT) {
274 bufptr = save;
275 i = '-';
276 }
277 }
278
279 if (i == TOKEN_FLOAT) {
280 eop->type = EOT_DB_STRING;
281 eop->stringval = q;
282 if (result->opcode == I_DD)
283 eop->stringlen = 4;
284 else if (result->opcode == I_DQ)
285 eop->stringlen = 8;
286 else if (result->opcode == I_DT)
287 eop->stringlen = 10;
288 else {
289 error(ERR_NONFATAL, "floating-point constant"
290 " encountered in `D%c' instruction",
291 result->opcode == I_DW ? 'W' : 'B');
292 eop->type = EOT_NOTHING;
293 }
294 q += eop->stringlen;
295 if (!float_const (tokval.t_charptr, sign,
296 (unsigned char *)eop->stringval,
297 eop->stringlen, error))
298 eop->type = EOT_NOTHING;
299 i = nexttoken(); /* eat the comma */
300 continue;
301 }
302 }
303
304 /* anything else */ {
305 expr *value;
306 eval_reset();
307 value = evaluate (critical);
308 if (!value) { /* but, error in evaluator */
309 result->opcode = -1;/* unrecoverable parse error: */
310 return result; /* ignore this instruction */
311 }
312 if (is_reloc(value)) {
313 eop->type = EOT_DB_NUMBER;
314 eop->offset = reloc_value(value);
315 eop->segment = reloc_seg(value);
316 eop->wrt = reloc_wrt(value);
317 } else {
318 error (ERR_NONFATAL,
319 "`%s' operand %d: expression is not simple"
320 " or relocatable",
321 insn_names[result->opcode], oper_num);
322 }
323 }
H. Peter Anvin6768eb72002-04-30 20:52:26 +0000324
325 /*
326 * We're about to call nexttoken(), which will eat the
327 * comma that we're currently sitting on between
328 * arguments. However, we'd better check first that it
329 * _is_ a comma.
330 */
331 if (i == 0) /* also could be EOL */
332 break;
333 if (i != ',') {
334 error (ERR_NONFATAL, "comma expected after `%s' operand %d",
335 insn_names[result->opcode], oper_num);
336 result->opcode = -1;/* unrecoverable parse error: */
337 return result; /* ignore this instruction */
338 }
H. Peter Anvinea6e34d2002-04-30 20:51:32 +0000339 }
H. Peter Anvind7ed89e2002-04-30 20:52:08 +0000340
341 if (result->opcode == I_INCBIN) {
342 /*
343 * Correct syntax for INCBIN is that there should be
344 * one string operand, followed by one or two numeric
345 * operands.
346 */
347 if (!result->eops || result->eops->type != EOT_DB_STRING)
348 error (ERR_NONFATAL, "`incbin' expects a file name");
349 else if (result->eops->next &&
350 result->eops->next->type != EOT_DB_NUMBER)
351 error (ERR_NONFATAL, "`incbin': second parameter is",
352 " non-numeric");
353 else if (result->eops->next && result->eops->next->next &&
354 result->eops->next->next->type != EOT_DB_NUMBER)
355 error (ERR_NONFATAL, "`incbin': third parameter is",
356 " non-numeric");
357 else if (result->eops->next && result->eops->next->next &&
358 result->eops->next->next->next)
359 error (ERR_NONFATAL, "`incbin': more than three parameters");
360 else
361 return result;
362 /*
363 * If we reach here, one of the above errors happened.
364 * Throw the instruction away.
365 */
366 result->opcode = -1;
367 return result;
368 }
369
H. Peter Anvinea6e34d2002-04-30 20:51:32 +0000370 return result;
371 }
372
373 /* right. Now we begin to parse the operands. There may be up to three
374 * of these, separated by commas, and terminated by a zero token. */
375
376 for (operand = 0; operand < 3; operand++) {
377 expr *seg, *value; /* used most of the time */
378 int mref; /* is this going to be a memory ref? */
H. Peter Anvin6768eb72002-04-30 20:52:26 +0000379 int bracket; /* is it a [] mref, or a & mref? */
H. Peter Anvinea6e34d2002-04-30 20:51:32 +0000380
381 result->oprs[operand].addr_size = 0;/* have to zero this whatever */
382 i = nexttoken();
383 if (i == 0) break; /* end of operands: get out of here */
384 result->oprs[operand].type = 0; /* so far, no override */
385 while (i == TOKEN_SPECIAL) {/* size specifiers */
386 switch ((int)tokval.t_integer) {
387 case S_BYTE:
388 result->oprs[operand].type |= BITS8;
389 break;
390 case S_WORD:
391 result->oprs[operand].type |= BITS16;
392 break;
393 case S_DWORD:
394 case S_LONG:
395 result->oprs[operand].type |= BITS32;
396 break;
397 case S_QWORD:
398 result->oprs[operand].type |= BITS64;
399 break;
400 case S_TWORD:
401 result->oprs[operand].type |= BITS80;
402 break;
403 case S_TO:
404 result->oprs[operand].type |= TO;
405 break;
406 case S_FAR:
407 result->oprs[operand].type |= FAR;
408 break;
409 case S_NEAR:
410 result->oprs[operand].type |= NEAR;
411 break;
412 case S_SHORT:
413 result->oprs[operand].type |= SHORT;
414 break;
415 }
416 i = nexttoken();
417 }
418
H. Peter Anvin6768eb72002-04-30 20:52:26 +0000419 if (i == '[' || i == '&') { /* memory reference */
H. Peter Anvinea6e34d2002-04-30 20:51:32 +0000420 mref = TRUE;
H. Peter Anvin6768eb72002-04-30 20:52:26 +0000421 bracket = (i == '[');
422 i = nexttoken();
H. Peter Anvinea6e34d2002-04-30 20:51:32 +0000423 if (i == TOKEN_SPECIAL) { /* check for address size override */
424 switch ((int)tokval.t_integer) {
425 case S_WORD:
426 result->oprs[operand].addr_size = 16;
427 break;
428 case S_DWORD:
429 case S_LONG:
430 result->oprs[operand].addr_size = 32;
431 break;
432 default:
433 error (ERR_NONFATAL, "invalid size specification in"
434 " effective address");
435 }
436 i = nexttoken();
437 }
H. Peter Anvin6768eb72002-04-30 20:52:26 +0000438 } else { /* immediate operand, or register */
H. Peter Anvinea6e34d2002-04-30 20:51:32 +0000439 mref = FALSE;
H. Peter Anvin6768eb72002-04-30 20:52:26 +0000440 bracket = FALSE; /* placate optimisers */
441 }
H. Peter Anvinea6e34d2002-04-30 20:51:32 +0000442
443 eval_reset();
444
445 value = evaluate (critical);
H. Peter Anvinea838272002-04-30 20:51:53 +0000446 if (forward)
447 result->forw_ref = TRUE;
H. Peter Anvinea6e34d2002-04-30 20:51:32 +0000448 if (!value) { /* error in evaluator */
449 result->opcode = -1; /* unrecoverable parse error: */
450 return result; /* ignore this instruction */
451 }
452 if (i == ':' && mref) { /* it was seg:offset */
453 seg = value; /* so shift this into the segment */
454 i = nexttoken(); /* then skip the colon */
455 if (i == TOKEN_SPECIAL) { /* another check for size override */
456 switch ((int)tokval.t_integer) {
457 case S_WORD:
458 result->oprs[operand].addr_size = 16;
459 break;
460 case S_DWORD:
461 case S_LONG:
462 result->oprs[operand].addr_size = 32;
463 break;
464 default:
465 error (ERR_NONFATAL, "invalid size specification in"
466 " effective address");
467 }
468 i = nexttoken();
469 }
470 value = evaluate (critical);
H. Peter Anvinea838272002-04-30 20:51:53 +0000471 if (forward)
472 result->forw_ref = TRUE;
H. Peter Anvinea6e34d2002-04-30 20:51:32 +0000473 /* and get the offset */
474 if (!value) { /* but, error in evaluator */
475 result->opcode = -1; /* unrecoverable parse error: */
476 return result; /* ignore this instruction */
477 }
478 } else seg = NULL;
H. Peter Anvin6768eb72002-04-30 20:52:26 +0000479 if (mref && bracket) { /* find ] at the end */
H. Peter Anvinea6e34d2002-04-30 20:51:32 +0000480 if (i != ']') {
481 error (ERR_NONFATAL, "parser: expecting ]");
482 do { /* error recovery again */
483 i = nexttoken();
484 } while (i != 0 && i != ',');
485 } else /* we got the required ] */
486 i = nexttoken();
487 } else { /* immediate operand */
488 if (i != 0 && i != ',' && i != ':') {
489 error (ERR_NONFATAL, "comma or end of line expected");
490 do { /* error recovery */
491 i = nexttoken();
492 } while (i != 0 && i != ',');
493 } else if (i == ':') {
494 result->oprs[operand].type |= COLON;
495 }
496 }
497
498 /* now convert the exprs returned from evaluate() into operand
499 * descriptions... */
500
501 if (mref) { /* it's a memory reference */
502 expr *e = value;
503 int b, i, s; /* basereg, indexreg, scale */
504 long o; /* offset */
505
506 if (seg) { /* segment override */
507 if (seg[1].type!=0 || seg->value!=1 ||
508 REG_SREG & ~reg_flags[seg->type])
509 error (ERR_NONFATAL, "invalid segment override");
510 else if (result->nprefix == MAXPREFIX)
511 error (ERR_NONFATAL,
512 "instruction has more than %d prefixes",
513 MAXPREFIX);
514 else
515 result->prefixes[result->nprefix++] = seg->type;
516 }
517
518 b = i = -1, o = s = 0;
519
520 if (e->type < EXPR_SIMPLE) { /* this bit's a register */
521 if (e->value == 1) /* in fact it can be basereg */
522 b = e->type;
523 else /* no, it has to be indexreg */
524 i = e->type, s = e->value;
525 e++;
526 }
527 if (e->type && e->type < EXPR_SIMPLE) {/* it's a second register */
528 if (e->value != 1) { /* it has to be indexreg */
529 if (i != -1) { /* but it can't be */
530 error(ERR_NONFATAL, "invalid effective address");
531 result->opcode = -1;
532 return result;
533 } else
534 i = e->type, s = e->value;
535 } else { /* it can be basereg */
536 if (b != -1) /* or can it? */
537 i = e->type, s = 1;
538 else
539 b = e->type;
540 }
541 e++;
542 }
543 if (e->type != 0) { /* is there an offset? */
544 if (e->type < EXPR_SIMPLE) {/* in fact, is there an error? */
545 error (ERR_NONFATAL, "invalid effective address");
546 result->opcode = -1;
547 return result;
548 } else {
549 if (e->type == EXPR_SIMPLE) {
550 o = e->value;
551 e++;
552 }
553 if (e->type == EXPR_WRT) {
554 result->oprs[operand].wrt = e->value;
555 e++;
556 } else
557 result->oprs[operand].wrt = NO_SEG;
H. Peter Anvinea838272002-04-30 20:51:53 +0000558 /*
559 * Look for a segment base type.
560 */
561 if (e->type && e->type < EXPR_SEGBASE) {
562 error (ERR_NONFATAL, "invalid effective address");
563 result->opcode = -1;
564 return result;
565 }
566 while (e->type && e->value == 0)
567 e++;
568 if (e->type && e->value != 1) {
569 error (ERR_NONFATAL, "invalid effective address");
570 result->opcode = -1;
571 return result;
572 }
573 if (e->type) {
574 result->oprs[operand].segment = e->type-EXPR_SEGBASE;
H. Peter Anvinea6e34d2002-04-30 20:51:32 +0000575 e++;
576 } else
577 result->oprs[operand].segment = NO_SEG;
H. Peter Anvinea838272002-04-30 20:51:53 +0000578 while (e->type && e->value == 0)
579 e++;
580 if (e->type) {
581 error (ERR_NONFATAL, "invalid effective address");
582 result->opcode = -1;
583 return result;
584 }
H. Peter Anvinea6e34d2002-04-30 20:51:32 +0000585 }
586 } else {
587 o = 0;
588 result->oprs[operand].wrt = NO_SEG;
589 result->oprs[operand].segment = NO_SEG;
590 }
591
592 if (e->type != 0) { /* there'd better be nothing left! */
593 error (ERR_NONFATAL, "invalid effective address");
594 result->opcode = -1;
595 return result;
596 }
597
598 result->oprs[operand].type |= MEMORY;
599 if (b==-1 && (i==-1 || s==0))
600 result->oprs[operand].type |= MEM_OFFS;
601 result->oprs[operand].basereg = b;
602 result->oprs[operand].indexreg = i;
603 result->oprs[operand].scale = s;
604 result->oprs[operand].offset = o;
605 } else { /* it's not a memory reference */
606 if (is_reloc(value)) { /* it's immediate */
607 result->oprs[operand].type |= IMMEDIATE;
608 result->oprs[operand].offset = reloc_value(value);
609 result->oprs[operand].segment = reloc_seg(value);
610 result->oprs[operand].wrt = reloc_wrt(value);
611 if (is_simple(value) && reloc_value(value)==1)
612 result->oprs[operand].type |= UNITY;
613 } else { /* it's a register */
614 if (value->type>=EXPR_SIMPLE || value->value!=1) {
615 error (ERR_NONFATAL, "invalid operand type");
616 result->opcode = -1;
617 return result;
618 }
619 /* clear overrides, except TO which applies to FPU regs */
620 result->oprs[operand].type &= TO;
621 result->oprs[operand].type |= REGISTER;
622 result->oprs[operand].type |= reg_flags[value->type];
623 result->oprs[operand].basereg = value->type;
624 }
625 }
626 }
627
628 result->operands = operand; /* set operand count */
629
630 while (operand<3) /* clear remaining operands */
631 result->oprs[operand++].type = 0;
632
633 /*
634 * Transform RESW, RESD, RESQ, REST into RESB.
635 */
636 switch (result->opcode) {
637 case I_RESW: result->opcode=I_RESB; result->oprs[0].offset*=2; break;
638 case I_RESD: result->opcode=I_RESB; result->oprs[0].offset*=4; break;
639 case I_RESQ: result->opcode=I_RESB; result->oprs[0].offset*=8; break;
640 case I_REST: result->opcode=I_RESB; result->oprs[0].offset*=10; break;
641 }
642
643 return result;
644}
645
646static int is_comma_next (void) {
647 char *p;
648
649 p = bufptr;
650 while (isspace(*p)) p++;
651 return (*p == ',' || *p == ';' || !*p);
652}
653
H. Peter Anvinea6e34d2002-04-30 20:51:32 +0000654/*
655 * This tokeniser routine has only one side effect, that of
656 * updating `bufptr'. Hence by saving `bufptr', lookahead may be
657 * performed.
658 */
659
660static int nexttoken (void) {
661 char ourcopy[256], *r, *s;
662
663 while (isspace(*bufptr)) bufptr++;
664 if (!*bufptr) return 0;
665
666 /* we have a token; either an id, a number or a char */
667 if (isidstart(*bufptr) ||
668 (*bufptr == '$' && isidstart(bufptr[1]))) {
669 /* now we've got an identifier */
670 int i;
671 int is_sym = FALSE;
672
673 if (*bufptr == '$') {
674 is_sym = TRUE;
675 bufptr++;
676 }
677
678 tokval.t_charptr = q;
679 *q++ = *bufptr++;
680 while (isidchar(*bufptr)) *q++ = *bufptr++;
681 *q++ = '\0';
682 for (s=tokval.t_charptr, r=ourcopy; *s; s++)
683 *r++ = tolower (*s);
684 *r = '\0';
685 if (is_sym)
686 return TOKEN_ID; /* bypass all other checks */
687 /* right, so we have an identifier sitting in temp storage. now,
688 * is it actually a register or instruction name, or what? */
689 if ((tokval.t_integer=bsi(ourcopy, reg_names,
690 elements(reg_names)))>=0)
691 return TOKEN_REG;
692 if ((tokval.t_integer=bsi(ourcopy, insn_names,
693 elements(insn_names)))>=0)
694 return TOKEN_INSN;
695 for (i=0; i<elements(icn); i++)
696 if (!strncmp(ourcopy, icn[i], strlen(icn[i]))) {
697 char *p = ourcopy + strlen(icn[i]);
698 tokval.t_integer = ico[i];
699 if ((tokval.t_inttwo=bsi(p, conditions,
700 elements(conditions)))>=0)
701 return TOKEN_INSN;
702 }
703 if ((tokval.t_integer=bsi(ourcopy, prefix_names,
704 elements(prefix_names)))>=0) {
705 tokval.t_integer += PREFIX_ENUM_START;
706 return TOKEN_PREFIX;
707 }
708 if ((tokval.t_integer=bsi(ourcopy, special_names,
709 elements(special_names)))>=0)
710 return TOKEN_SPECIAL;
711 if (!strcmp(ourcopy, "seg"))
712 return TOKEN_SEG;
713 if (!strcmp(ourcopy, "wrt"))
714 return TOKEN_WRT;
715 return TOKEN_ID;
716 } else if (*bufptr == '$' && !isnumchar(bufptr[1])) {
717 /*
718 * It's a $ sign with no following hex number; this must
719 * mean it's a Here token ($), evaluating to the current
720 * assembly location, or a Base token ($$), evaluating to
721 * the base of the current segment.
722 */
723 bufptr++;
724 if (*bufptr == '$') {
725 bufptr++;
726 return TOKEN_BASE;
727 }
728 return TOKEN_HERE;
729 } else if (isnumstart(*bufptr)) { /* now we've got a number */
730 char *r = q;
731 int rn_error;
732
733 *q++ = *bufptr++;
734 while (isnumchar(*bufptr)) {
735 *q++ = *bufptr++;
736 }
737 if (*bufptr == '.') {
738 /*
739 * a floating point constant
740 */
741 *q++ = *bufptr++;
742 while (isnumchar(*bufptr)) {
743 *q++ = *bufptr++;
744 }
745 *q++ = '\0';
746 tokval.t_charptr = r;
747 return TOKEN_FLOAT;
748 }
749 *q++ = '\0';
750 tokval.t_integer = readnum(r, &rn_error);
751 if (rn_error)
752 return TOKEN_ERRNUM; /* some malformation occurred */
753 tokval.t_charptr = NULL;
754 return TOKEN_NUM;
755 } else if (*bufptr == '\'' || *bufptr == '"') {/* a char constant */
756 char quote = *bufptr++, *r;
757 r = tokval.t_charptr = bufptr;
758 while (*bufptr && *bufptr != quote) bufptr++;
759 tokval.t_inttwo = bufptr - r; /* store full version */
760 if (!*bufptr)
761 return TOKEN_ERRNUM; /* unmatched quotes */
762 tokval.t_integer = 0;
763 r = bufptr++; /* skip over final quote */
764 while (quote != *--r) {
765 tokval.t_integer = (tokval.t_integer<<8) + (unsigned char) *r;
766 }
767 return TOKEN_NUM;
768 } else if (*bufptr == ';') { /* a comment has happened - stay */
769 return 0;
770 } else if ((*bufptr == '>' || *bufptr == '<' ||
771 *bufptr == '/' || *bufptr == '%') && bufptr[1] == *bufptr) {
772 bufptr += 2;
773 return (bufptr[-2] == '>' ? TOKEN_SHR :
774 bufptr[-2] == '<' ? TOKEN_SHL :
775 bufptr[-2] == '/' ? TOKEN_SDIV :
776 TOKEN_SMOD);
777 } else /* just an ordinary char */
778 return (unsigned char) (*bufptr++);
779}
780
781/* return index of "string" in "array", or -1 if no match. */
782static int bsi (char *string, char **array, int size) {
783 int i = -1, j = size; /* always, i < index < j */
784 while (j-i >= 2) {
785 int k = (i+j)/2;
786 int l = strcmp(string, array[k]);
787 if (l<0) /* it's in the first half */
788 j = k;
789 else if (l>0) /* it's in the second half */
790 i = k;
791 else /* we've got it :) */
792 return k;
793 }
794 return -1; /* we haven't got it :( */
795}
796
797void cleanup_insn (insn *i) {
798 extop *e;
799
800 while (i->eops) {
801 e = i->eops;
802 i->eops = i->eops->next;
803 nasm_free (e);
804 }
805}
806
807/* ------------- Evaluator begins here ------------------ */
808
809static expr exprtempstorage[1024], *tempptr; /* store exprs in here */
810
811/*
812 * Add two vector datatypes. We have some bizarre behaviour on far-
813 * absolute segment types: we preserve them during addition _only_
814 * if one of the segments is a truly pure scalar.
815 */
816static expr *add_vectors(expr *p, expr *q) {
817 expr *r = tempptr;
818 int preserve;
819
820 preserve = is_really_simple(p) || is_really_simple(q);
821
822 while (p->type && q->type &&
823 p->type < EXPR_SEGBASE+SEG_ABS &&
824 q->type < EXPR_SEGBASE+SEG_ABS)
825 if (p->type > q->type) {
826 tempptr->type = q->type;
827 tempptr->value = q->value;
828 tempptr++, q++;
829 } else if (p->type < q->type) {
830 tempptr->type = p->type;
831 tempptr->value = p->value;
832 tempptr++, p++;
833 } else { /* *p and *q have same type */
834 tempptr->type = p->type;
835 tempptr->value = p->value + q->value;
836 tempptr++, p++, q++;
837 }
838 while (p->type &&
839 (preserve || p->type < EXPR_SEGBASE+SEG_ABS)) {
840 tempptr->type = p->type;
841 tempptr->value = p->value;
842 tempptr++, p++;
843 }
844 while (q->type &&
845 (preserve || q->type < EXPR_SEGBASE+SEG_ABS)) {
846 tempptr->type = q->type;
847 tempptr->value = q->value;
848 tempptr++, q++;
849 }
850 (tempptr++)->type = 0;
851
852 return r;
853}
854
855/*
856 * Multiply a vector by a scalar. Strip far-absolute segment part
857 * if present.
858 */
859static expr *scalar_mult(expr *vect, long scalar) {
860 expr *p = vect;
861
862 while (p->type && p->type < EXPR_SEGBASE+SEG_ABS) {
863 p->value = scalar * (p->value);
864 p++;
865 }
866 p->type = 0;
867
868 return vect;
869}
870
871static expr *scalarvect (long scalar) {
872 expr *p = tempptr;
873 tempptr->type = EXPR_SIMPLE;
874 tempptr->value = scalar;
875 tempptr++;
876 tempptr->type = 0;
877 tempptr++;
878 return p;
879}
880
881/*
882 * Return TRUE if the argument is a simple scalar. (Or a far-
883 * absolute, which counts.)
884 */
885static int is_simple (expr *vect) {
886 while (vect->type && !vect->value)
887 vect++;
888 if (!vect->type)
889 return 1;
890 if (vect->type != EXPR_SIMPLE)
891 return 0;
892 do {
893 vect++;
894 } while (vect->type && !vect->value);
895 if (vect->type && vect->type < EXPR_SEGBASE+SEG_ABS) return 0;
896 return 1;
897}
898
899/*
900 * Return TRUE if the argument is a simple scalar, _NOT_ a far-
901 * absolute.
902 */
903static int is_really_simple (expr *vect) {
904 while (vect->type && !vect->value)
905 vect++;
906 if (!vect->type)
907 return 1;
908 if (vect->type != EXPR_SIMPLE)
909 return 0;
910 do {
911 vect++;
912 } while (vect->type && !vect->value);
913 if (vect->type) return 0;
914 return 1;
915}
916
917/*
918 * Return TRUE if the argument is relocatable (i.e. a simple
919 * scalar, plus at most one segment-base, plus possibly a WRT).
920 */
921static int is_reloc (expr *vect) {
922 while (vect->type && !vect->value)
923 vect++;
924 if (!vect->type)
925 return 1;
926 if (vect->type < EXPR_SIMPLE)
927 return 0;
928 if (vect->type == EXPR_SIMPLE) {
929 do {
930 vect++;
931 } while (vect->type && !vect->value);
932 if (!vect->type)
933 return 1;
934 }
H. Peter Anvin6768eb72002-04-30 20:52:26 +0000935 if (vect->type != EXPR_WRT && vect->value != 0 && vect->value != 1)
936 return 0; /* segment base multiplier non-unity */
H. Peter Anvinea6e34d2002-04-30 20:51:32 +0000937 do {
938 vect++;
939 } while (vect->type && (vect->type == EXPR_WRT || !vect->value));
940 if (!vect->type)
941 return 1;
942 return 1;
943}
944
945/*
946 * Return the scalar part of a relocatable vector. (Including
947 * simple scalar vectors - those qualify as relocatable.)
948 */
949static long reloc_value (expr *vect) {
950 while (vect->type && !vect->value)
951 vect++;
952 if (!vect->type) return 0;
953 if (vect->type == EXPR_SIMPLE)
954 return vect->value;
955 else
956 return 0;
957}
958
959/*
960 * Return the segment number of a relocatable vector, or NO_SEG for
961 * simple scalars.
962 */
963static long reloc_seg (expr *vect) {
964 while (vect->type && (vect->type == EXPR_WRT || !vect->value))
965 vect++;
966 if (vect->type == EXPR_SIMPLE) {
967 do {
968 vect++;
969 } while (vect->type && (vect->type == EXPR_WRT || !vect->value));
970 }
971 if (!vect->type)
972 return NO_SEG;
973 else
974 return vect->type - EXPR_SEGBASE;
975}
976
977/*
978 * Return the WRT segment number of a relocatable vector, or NO_SEG
979 * if no WRT part is present.
980 */
981static long reloc_wrt (expr *vect) {
982 while (vect->type && vect->type < EXPR_WRT)
983 vect++;
984 if (vect->type == EXPR_WRT) {
985 return vect->value;
986 } else
987 return NO_SEG;
988}
989
990static void eval_reset(void) {
991 tempptr = exprtempstorage; /* initialise temporary storage */
992}
993
994/*
995 * The SEG operator: calculate the segment part of a relocatable
996 * value. Return NULL, as usual, if an error occurs. Report the
997 * error too.
998 */
999static expr *segment_part (expr *e) {
1000 long seg;
1001
1002 if (!is_reloc(e)) {
1003 error(ERR_NONFATAL, "cannot apply SEG to a non-relocatable value");
1004 return NULL;
1005 }
1006
1007 seg = reloc_seg(e);
1008 if (seg == NO_SEG) {
1009 error(ERR_NONFATAL, "cannot apply SEG to a non-relocatable value");
1010 return NULL;
1011 } else if (seg & SEG_ABS)
1012 return scalarvect(seg & ~SEG_ABS);
1013 else {
1014 expr *f = tempptr++;
1015 tempptr++->type = 0;
1016 f->type = EXPR_SEGBASE+outfmt->segbase(seg+1);
1017 f->value = 1;
1018 return f;
1019 }
1020}
1021
1022/*
1023 * Recursive-descent parser. Called with a single boolean operand,
1024 * which is TRUE if the evaluation is critical (i.e. unresolved
1025 * symbols are an error condition). Must update the global `i' to
1026 * reflect the token after the parsed string. May return NULL.
1027 *
1028 * evaluate() should report its own errors: on return it is assumed
1029 * that if NULL has been returned, the error has already been
1030 * reported.
1031 */
1032
1033/*
1034 * Grammar parsed is:
1035 *
1036 * expr : expr0 [ WRT expr6 ]
1037 * expr0 : expr1 [ {|} expr1]
1038 * expr1 : expr2 [ {^} expr2]
1039 * expr2 : expr3 [ {&} expr3]
1040 * expr3 : expr4 [ {<<,>>} expr4...]
1041 * expr4 : expr5 [ {+,-} expr5...]
1042 * expr5 : expr6 [ {*,/,%,//,%%} expr6...]
1043 * expr6 : { ~,+,-,SEG } expr6
1044 * | (expr0)
1045 * | symbol
1046 * | $
1047 * | number
1048 */
1049
1050static expr *expr0(int), *expr1(int), *expr2(int), *expr3(int);
1051static expr *expr4(int), *expr5(int), *expr6(int);
1052
1053static expr *expr0(int critical) {
1054 expr *e, *f;
1055
1056 e = expr1(critical);
1057 if (!e)
1058 return NULL;
1059 while (i == '|') {
1060 i = nexttoken();
1061 f = expr1(critical);
1062 if (!f)
1063 return NULL;
1064 if (!is_simple(e) || !is_simple(f)) {
1065 error(ERR_NONFATAL, "`|' operator may only be applied to"
1066 " scalar values");
1067 }
1068 e = scalarvect (reloc_value(e) | reloc_value(f));
1069 }
1070 return e;
1071}
1072
1073static expr *expr1(int critical) {
1074 expr *e, *f;
1075
1076 e = expr2(critical);
1077 if (!e)
1078 return NULL;
1079 while (i == '^') {
1080 i = nexttoken();
1081 f = expr2(critical);
1082 if (!f)
1083 return NULL;
1084 if (!is_simple(e) || !is_simple(f)) {
1085 error(ERR_NONFATAL, "`^' operator may only be applied to"
1086 " scalar values");
1087 }
1088 e = scalarvect (reloc_value(e) ^ reloc_value(f));
1089 }
1090 return e;
1091}
1092
1093static expr *expr2(int critical) {
1094 expr *e, *f;
1095
1096 e = expr3(critical);
1097 if (!e)
1098 return NULL;
1099 while (i == '&') {
1100 i = nexttoken();
1101 f = expr3(critical);
1102 if (!f)
1103 return NULL;
1104 if (!is_simple(e) || !is_simple(f)) {
1105 error(ERR_NONFATAL, "`&' operator may only be applied to"
1106 " scalar values");
1107 }
1108 e = scalarvect (reloc_value(e) & reloc_value(f));
1109 }
1110 return e;
1111}
1112
1113static expr *expr3(int critical) {
1114 expr *e, *f;
1115
1116 e = expr4(critical);
1117 if (!e)
1118 return NULL;
1119 while (i == TOKEN_SHL || i == TOKEN_SHR) {
1120 int j = i;
1121 i = nexttoken();
1122 f = expr4(critical);
1123 if (!f)
1124 return NULL;
1125 if (!is_simple(e) || !is_simple(f)) {
1126 error(ERR_NONFATAL, "shift operator may only be applied to"
1127 " scalar values");
1128 }
1129 switch (j) {
1130 case TOKEN_SHL:
1131 e = scalarvect (reloc_value(e) << reloc_value(f));
1132 break;
1133 case TOKEN_SHR:
1134 e = scalarvect (((unsigned long)reloc_value(e)) >>
1135 reloc_value(f));
1136 break;
1137 }
1138 }
1139 return e;
1140}
1141
1142static expr *expr4(int critical) {
1143 expr *e, *f;
1144
1145 e = expr5(critical);
1146 if (!e)
1147 return NULL;
1148 while (i == '+' || i == '-') {
1149 int j = i;
1150 i = nexttoken();
1151 f = expr5(critical);
1152 if (!f)
1153 return NULL;
1154 switch (j) {
1155 case '+':
1156 e = add_vectors (e, f);
1157 break;
1158 case '-':
1159 e = add_vectors (e, scalar_mult(f, -1L));
1160 break;
1161 }
1162 }
1163 return e;
1164}
1165
1166static expr *expr5(int critical) {
1167 expr *e, *f;
1168
1169 e = expr6(critical);
1170 if (!e)
1171 return NULL;
1172 while (i == '*' || i == '/' || i == '*' ||
1173 i == TOKEN_SDIV || i == TOKEN_SMOD) {
1174 int j = i;
1175 i = nexttoken();
1176 f = expr6(critical);
1177 if (!f)
1178 return NULL;
1179 if (j != '*' && (!is_simple(e) || !is_simple(f))) {
1180 error(ERR_NONFATAL, "division operator may only be applied to"
1181 " scalar values");
1182 return NULL;
1183 }
1184 if (j != '*' && reloc_value(f) == 0) {
1185 error(ERR_NONFATAL, "division by zero");
1186 return NULL;
1187 }
1188 switch (j) {
1189 case '*':
1190 if (is_simple(e))
1191 e = scalar_mult (f, reloc_value(e));
1192 else if (is_simple(f))
1193 e = scalar_mult (e, reloc_value(f));
1194 else {
1195 error(ERR_NONFATAL, "unable to multiply two "
1196 "non-scalar objects");
1197 return NULL;
1198 }
1199 break;
1200 case '/':
1201 e = scalarvect (((unsigned long)reloc_value(e)) /
1202 ((unsigned long)reloc_value(f)));
1203 break;
1204 case '%':
1205 e = scalarvect (((unsigned long)reloc_value(e)) %
1206 ((unsigned long)reloc_value(f)));
1207 break;
1208 case TOKEN_SDIV:
1209 e = scalarvect (((signed long)reloc_value(e)) /
1210 ((signed long)reloc_value(f)));
1211 break;
1212 case TOKEN_SMOD:
1213 e = scalarvect (((signed long)reloc_value(e)) %
1214 ((signed long)reloc_value(f)));
1215 break;
1216 }
1217 }
1218 return e;
1219}
1220
1221static expr *expr6(int critical) {
1222 expr *e;
1223 long label_seg, label_ofs;
1224
1225 if (i == '-') {
1226 i = nexttoken();
1227 e = expr6(critical);
1228 if (!e)
1229 return NULL;
1230 return scalar_mult (e, -1L);
1231 } else if (i == '+') {
1232 i = nexttoken();
1233 return expr6(critical);
1234 } else if (i == '~') {
1235 i = nexttoken();
1236 e = expr6(critical);
1237 if (!e)
1238 return NULL;
1239 if (!is_simple(e)) {
1240 error(ERR_NONFATAL, "`~' operator may only be applied to"
1241 " scalar values");
1242 return NULL;
1243 }
1244 return scalarvect(~reloc_value(e));
1245 } else if (i == TOKEN_SEG) {
1246 i = nexttoken();
1247 e = expr6(critical);
1248 if (!e)
1249 return NULL;
1250 return segment_part(e);
1251 } else if (i == '(') {
1252 i = nexttoken();
1253 e = expr0(critical);
1254 if (!e)
1255 return NULL;
1256 if (i != ')') {
1257 error(ERR_NONFATAL, "expecting `)'");
1258 return NULL;
1259 }
1260 i = nexttoken();
1261 return e;
1262 } else if (i == TOKEN_NUM || i == TOKEN_REG || i == TOKEN_ID ||
1263 i == TOKEN_HERE || i == TOKEN_BASE) {
1264 e = tempptr;
1265 switch (i) {
1266 case TOKEN_NUM:
1267 e->type = EXPR_SIMPLE;
1268 e->value = tokval.t_integer;
1269 break;
1270 case TOKEN_REG:
1271 e->type = tokval.t_integer;
1272 e->value = 1;
1273 break;
1274 case TOKEN_ID:
1275 case TOKEN_HERE:
1276 case TOKEN_BASE:
1277 /*
1278 * Since the whole line is parsed before the label it
1279 * defines is given to the label manager, we have
1280 * problems with lines such as
1281 *
1282 * end: TIMES 512-(end-start) DB 0
1283 *
1284 * where `end' is not known on pass one, despite not
1285 * really being a forward reference, and due to
1286 * criticality it is _needed_. Hence we check our label
1287 * against the currently defined one, and do our own
1288 * resolution of it if we have to.
1289 */
1290 if (i == TOKEN_BASE) {
1291 label_seg = seg;
1292 label_ofs = 0;
1293 } else if (i == TOKEN_HERE || !strcmp(tokval.t_charptr, label)) {
1294 label_seg = seg;
1295 label_ofs = ofs;
1296 } else if (!labelfunc(tokval.t_charptr, &label_seg, &label_ofs)) {
1297 if (critical == 2) {
1298 error (ERR_NONFATAL, "symbol `%s' undefined",
1299 tokval.t_charptr);
1300 return NULL;
1301 } else if (critical == 1) {
1302 error (ERR_NONFATAL, "symbol `%s' not defined before use",
1303 tokval.t_charptr);
1304 return NULL;
1305 } else {
H. Peter Anvinea838272002-04-30 20:51:53 +00001306 forward = TRUE;
H. Peter Anvinea6e34d2002-04-30 20:51:32 +00001307 label_seg = seg;
1308 label_ofs = ofs;
1309 }
1310 }
1311 e->type = EXPR_SIMPLE;
1312 e->value = label_ofs;
1313 if (label_seg!=NO_SEG) {
1314 tempptr++;
1315 tempptr->type = EXPR_SEGBASE + label_seg;
1316 tempptr->value = 1;
1317 }
1318 break;
1319 }
1320 tempptr++;
1321 tempptr->type = 0;
1322 tempptr++;
1323 i = nexttoken();
1324 return e;
1325 } else {
1326 error(ERR_NONFATAL, "expression syntax error");
1327 return NULL;
1328 }
1329}
1330
1331static expr *evaluate (int critical) {
1332 expr *e;
1333 expr *f = NULL;
1334
1335 e = expr0 (critical);
1336 if (!e)
1337 return NULL;
1338
1339 if (i == TOKEN_WRT) {
H. Peter Anvinea6e34d2002-04-30 20:51:32 +00001340 i = nexttoken(); /* eat the WRT */
1341 f = expr6 (critical);
1342 if (!f)
1343 return NULL;
1344 }
1345 e = scalar_mult (e, 1L); /* strip far-absolute segment part */
1346 if (f) {
1347 expr *g = tempptr++;
1348 tempptr++->type = 0;
1349 g->type = EXPR_WRT;
1350 if (!is_reloc(f)) {
1351 error(ERR_NONFATAL, "invalid right-hand operand to WRT");
1352 return NULL;
1353 }
1354 g->value = reloc_seg(f);
1355 if (g->value == NO_SEG)
1356 g->value = reloc_value(f) | SEG_ABS;
1357 else if (!(g->value & SEG_ABS) && !(g->value % 2) && critical) {
1358 error(ERR_NONFATAL, "invalid right-hand operand to WRT");
1359 return NULL;
1360 }
1361 e = add_vectors (e, g);
1362 }
1363 return e;
1364}