H. Peter Anvin | ea6e34d | 2002-04-30 20:51:32 +0000 | [diff] [blame] | 1 | /* parser.c source line parser for the Netwide Assembler |
| 2 | * |
| 3 | * The Netwide Assembler is copyright (C) 1996 Simon Tatham and |
| 4 | * Julian Hall. All rights reserved. The software is |
| 5 | * redistributable under the licence given in the file "Licence" |
| 6 | * distributed in the NASM archive. |
| 7 | * |
| 8 | * initial version 27/iii/95 by Simon Tatham |
| 9 | */ |
| 10 | |
| 11 | #include <stdio.h> |
| 12 | #include <stdlib.h> |
| 13 | #include <stddef.h> |
| 14 | #include <string.h> |
| 15 | #include <ctype.h> |
| 16 | |
| 17 | #include "nasm.h" |
| 18 | #include "nasmlib.h" |
| 19 | #include "parser.h" |
| 20 | #include "float.h" |
| 21 | |
| 22 | #include "names.c" |
| 23 | |
| 24 | |
| 25 | static long reg_flags[] = { /* sizes and special flags */ |
| 26 | 0, REG8, REG_AL, REG_AX, REG8, REG8, REG16, REG16, REG8, REG_CL, |
| 27 | REG_CREG, REG_CREG, REG_CREG, REG_CR4, REG_CS, REG_CX, REG8, |
| 28 | REG16, REG8, REG_DREG, REG_DREG, REG_DREG, REG_DREG, REG_DREG, |
| 29 | REG_DREG, REG_DESS, REG_DX, REG_EAX, REG32, REG32, REG_ECX, |
| 30 | REG32, REG32, REG_DESS, REG32, REG32, REG_FSGS, REG_FSGS, |
| 31 | MMXREG, MMXREG, MMXREG, MMXREG, MMXREG, MMXREG, MMXREG, MMXREG, |
| 32 | REG16, REG16, REG_DESS, FPU0, FPUREG, FPUREG, FPUREG, FPUREG, |
| 33 | FPUREG, FPUREG, FPUREG, REG_TREG, REG_TREG, REG_TREG, REG_TREG, |
| 34 | REG_TREG |
| 35 | }; |
| 36 | |
| 37 | enum { /* special tokens */ |
| 38 | S_BYTE, S_DWORD, S_FAR, S_LONG, S_NEAR, S_QWORD, S_SHORT, S_TO, |
| 39 | S_TWORD, S_WORD |
| 40 | }; |
| 41 | |
| 42 | static char *special_names[] = { /* and the actual text */ |
| 43 | "byte", "dword", "far", "long", "near", "qword", "short", "to", |
| 44 | "tword", "word" |
| 45 | }; |
| 46 | |
| 47 | static char *prefix_names[] = { |
| 48 | "a16", "a32", "lock", "o16", "o32", "rep", "repe", "repne", |
| 49 | "repnz", "repz", "times" |
| 50 | }; |
| 51 | |
| 52 | /* |
| 53 | * Evaluator datatype. Expressions, within the evaluator, are |
| 54 | * stored as an array of these beasts, terminated by a record with |
| 55 | * type==0. Mostly, it's a vector type: each type denotes some kind |
| 56 | * of a component, and the value denotes the multiple of that |
| 57 | * component present in the expression. The exception is the WRT |
| 58 | * type, whose `value' field denotes the segment to which the |
| 59 | * expression is relative. These segments will be segment-base |
| 60 | * types, i.e. either odd segment values or SEG_ABS types. So it is |
| 61 | * still valid to assume that anything with a `value' field of zero |
| 62 | * is insignificant. |
| 63 | */ |
| 64 | typedef struct { |
| 65 | long type; /* a register, or EXPR_xxx */ |
| 66 | long value; /* must be >= 32 bits */ |
| 67 | } expr; |
| 68 | |
| 69 | static void eval_reset(void); |
| 70 | static expr *evaluate(int); |
| 71 | |
| 72 | /* |
| 73 | * ASSUMPTION MADE HERE. The number of distinct register names |
| 74 | * (i.e. possible "type" fields for an expr structure) does not |
| 75 | * exceed 126. |
| 76 | */ |
| 77 | #define EXPR_SIMPLE 126 |
| 78 | #define EXPR_WRT 127 |
| 79 | #define EXPR_SEGBASE 128 |
| 80 | |
| 81 | static int is_reloc(expr *); |
| 82 | static int is_simple(expr *); |
| 83 | static int is_really_simple (expr *); |
| 84 | static long reloc_value(expr *); |
| 85 | static long reloc_seg(expr *); |
| 86 | static long reloc_wrt(expr *); |
| 87 | |
| 88 | enum { /* token types, other than chars */ |
| 89 | TOKEN_ID = 256, TOKEN_NUM, TOKEN_REG, TOKEN_INSN, TOKEN_ERRNUM, |
| 90 | TOKEN_HERE, TOKEN_BASE, TOKEN_SPECIAL, TOKEN_PREFIX, TOKEN_SHL, |
| 91 | TOKEN_SHR, TOKEN_SDIV, TOKEN_SMOD, TOKEN_SEG, TOKEN_WRT, |
| 92 | TOKEN_FLOAT |
| 93 | }; |
| 94 | |
| 95 | struct tokenval { |
| 96 | long t_integer, t_inttwo; |
| 97 | char *t_charptr; |
| 98 | }; |
| 99 | |
| 100 | static char tempstorage[1024], *q; |
| 101 | static int bsi (char *string, char **array, int size);/* binary search */ |
| 102 | |
| 103 | static int nexttoken (void); |
| 104 | static int is_comma_next (void); |
| 105 | |
| 106 | static char *bufptr; |
| 107 | static int i; |
| 108 | static struct tokenval tokval; |
| 109 | static lfunc labelfunc; |
| 110 | static efunc error; |
| 111 | static char *label; |
| 112 | static struct ofmt *outfmt; |
| 113 | |
| 114 | static long seg, ofs; |
| 115 | |
H. Peter Anvin | ea83827 | 2002-04-30 20:51:53 +0000 | [diff] [blame^] | 116 | static int forward; |
| 117 | |
H. Peter Anvin | ea6e34d | 2002-04-30 20:51:32 +0000 | [diff] [blame] | 118 | insn *parse_line (long segment, long offset, lfunc lookup_label, int pass, |
| 119 | char *buffer, insn *result, struct ofmt *output, |
| 120 | efunc errfunc) { |
| 121 | int operand; |
| 122 | int critical; |
| 123 | |
H. Peter Anvin | ea83827 | 2002-04-30 20:51:53 +0000 | [diff] [blame^] | 124 | forward = result->forw_ref = FALSE; |
H. Peter Anvin | ea6e34d | 2002-04-30 20:51:32 +0000 | [diff] [blame] | 125 | q = tempstorage; |
| 126 | bufptr = buffer; |
| 127 | labelfunc = lookup_label; |
| 128 | outfmt = output; |
| 129 | error = errfunc; |
| 130 | seg = segment; |
| 131 | ofs = offset; |
| 132 | label = ""; |
| 133 | |
| 134 | i = nexttoken(); |
| 135 | |
| 136 | result->eops = NULL; /* must do this, whatever happens */ |
| 137 | |
| 138 | if (i==0) { /* blank line - ignore */ |
| 139 | result->label = NULL; /* so, no label on it */ |
| 140 | result->opcode = -1; /* and no instruction either */ |
| 141 | return result; |
| 142 | } |
| 143 | if (i != TOKEN_ID && i != TOKEN_INSN && i != TOKEN_PREFIX && |
| 144 | (i!=TOKEN_REG || (REG_SREG & ~reg_flags[tokval.t_integer]))) { |
| 145 | error (ERR_NONFATAL, "label or instruction expected" |
| 146 | " at start of line"); |
| 147 | result->label = NULL; |
| 148 | result->opcode = -1; |
| 149 | return result; |
| 150 | } |
| 151 | |
| 152 | if (i == TOKEN_ID) { /* there's a label here */ |
| 153 | label = result->label = tokval.t_charptr; |
| 154 | i = nexttoken(); |
| 155 | if (i == ':') { /* skip over the optional colon */ |
| 156 | i = nexttoken(); |
| 157 | } |
| 158 | } else /* no label; so, moving swiftly on */ |
| 159 | result->label = NULL; |
| 160 | |
| 161 | if (i==0) { |
| 162 | result->opcode = -1; /* this line contains just a label */ |
| 163 | return result; |
| 164 | } |
| 165 | |
| 166 | result->nprefix = 0; |
| 167 | result->times = 1; |
| 168 | |
| 169 | while (i == TOKEN_PREFIX || |
| 170 | (i==TOKEN_REG && !(REG_SREG & ~reg_flags[tokval.t_integer]))) { |
| 171 | /* |
| 172 | * Handle special case: the TIMES prefix. |
| 173 | */ |
| 174 | if (i == TOKEN_PREFIX && tokval.t_integer == P_TIMES) { |
| 175 | expr *value; |
| 176 | |
| 177 | i = nexttoken(); |
| 178 | eval_reset(); |
| 179 | value = evaluate (pass); |
| 180 | if (!value) { /* but, error in evaluator */ |
| 181 | result->opcode = -1; /* unrecoverable parse error: */ |
| 182 | return result; /* ignore this instruction */ |
| 183 | } |
| 184 | if (!is_simple (value)) { |
| 185 | error (ERR_NONFATAL, |
| 186 | "non-constant argument supplied to TIMES"); |
| 187 | result->times = 1; |
| 188 | } else |
| 189 | result->times = value->value; |
| 190 | } else { |
| 191 | if (result->nprefix == MAXPREFIX) |
| 192 | error (ERR_NONFATAL, |
| 193 | "instruction has more than %d prefixes", MAXPREFIX); |
| 194 | else |
| 195 | result->prefixes[result->nprefix++] = tokval.t_integer; |
| 196 | i = nexttoken(); |
| 197 | } |
| 198 | } |
| 199 | |
| 200 | if (i != TOKEN_INSN) { |
| 201 | error (ERR_NONFATAL, "parser: instruction expected"); |
| 202 | result->opcode = -1; |
| 203 | return result; |
| 204 | } |
| 205 | |
| 206 | result->opcode = tokval.t_integer; |
| 207 | result->condition = tokval.t_inttwo; |
| 208 | |
| 209 | /* |
| 210 | * RESB, RESW and RESD cannot be satisfied with incorrectly |
| 211 | * evaluated operands, since the correct values _must_ be known |
| 212 | * on the first pass. Hence, even in pass one, we set the |
| 213 | * `critical' flag on calling evaluate(), so that it will bomb |
| 214 | * out on undefined symbols. Nasty, but there's nothing we can |
| 215 | * do about it. |
| 216 | * |
| 217 | * For the moment, EQU has the same difficulty, so we'll |
| 218 | * include that. |
| 219 | */ |
| 220 | if (result->opcode == I_RESB || |
| 221 | result->opcode == I_RESW || |
| 222 | result->opcode == I_RESD || |
| 223 | result->opcode == I_RESQ || |
| 224 | result->opcode == I_REST || |
| 225 | result->opcode == I_EQU) |
| 226 | critical = pass; |
| 227 | else |
| 228 | critical = (pass==2 ? 2 : 0); |
| 229 | |
| 230 | if (result->opcode == I_DB || |
| 231 | result->opcode == I_DW || |
| 232 | result->opcode == I_DD || |
| 233 | result->opcode == I_DQ || |
| 234 | result->opcode == I_DT) { |
| 235 | extop *eop, **tail = &result->eops; |
| 236 | int oper_num = 0; |
| 237 | |
| 238 | /* |
| 239 | * Begin to read the DB/DW/DD/DQ/DT operands. |
| 240 | */ |
| 241 | while (1) { |
| 242 | i = nexttoken(); |
| 243 | if (i == 0) |
| 244 | break; |
| 245 | eop = *tail = nasm_malloc(sizeof(extop)); |
| 246 | tail = &eop->next; |
| 247 | eop->next = NULL; |
| 248 | eop->type = EOT_NOTHING; |
| 249 | oper_num++; |
| 250 | |
| 251 | if (i == TOKEN_NUM && tokval.t_charptr && is_comma_next()) { |
| 252 | eop->type = EOT_DB_STRING; |
| 253 | eop->stringval = tokval.t_charptr; |
| 254 | eop->stringlen = tokval.t_inttwo; |
| 255 | i = nexttoken(); /* eat the comma */ |
| 256 | continue; |
| 257 | } |
| 258 | |
| 259 | if (i == TOKEN_FLOAT || i == '-') { |
| 260 | long sign = +1L; |
| 261 | |
| 262 | if (i == '-') { |
| 263 | char *save = bufptr; |
| 264 | i = nexttoken(); |
| 265 | sign = -1L; |
| 266 | if (i != TOKEN_FLOAT) { |
| 267 | bufptr = save; |
| 268 | i = '-'; |
| 269 | } |
| 270 | } |
| 271 | |
| 272 | if (i == TOKEN_FLOAT) { |
| 273 | eop->type = EOT_DB_STRING; |
| 274 | eop->stringval = q; |
| 275 | if (result->opcode == I_DD) |
| 276 | eop->stringlen = 4; |
| 277 | else if (result->opcode == I_DQ) |
| 278 | eop->stringlen = 8; |
| 279 | else if (result->opcode == I_DT) |
| 280 | eop->stringlen = 10; |
| 281 | else { |
| 282 | error(ERR_NONFATAL, "floating-point constant" |
| 283 | " encountered in `D%c' instruction", |
| 284 | result->opcode == I_DW ? 'W' : 'B'); |
| 285 | eop->type = EOT_NOTHING; |
| 286 | } |
| 287 | q += eop->stringlen; |
| 288 | if (!float_const (tokval.t_charptr, sign, |
| 289 | (unsigned char *)eop->stringval, |
| 290 | eop->stringlen, error)) |
| 291 | eop->type = EOT_NOTHING; |
| 292 | i = nexttoken(); /* eat the comma */ |
| 293 | continue; |
| 294 | } |
| 295 | } |
| 296 | |
| 297 | /* anything else */ { |
| 298 | expr *value; |
| 299 | eval_reset(); |
| 300 | value = evaluate (critical); |
| 301 | if (!value) { /* but, error in evaluator */ |
| 302 | result->opcode = -1;/* unrecoverable parse error: */ |
| 303 | return result; /* ignore this instruction */ |
| 304 | } |
| 305 | if (is_reloc(value)) { |
| 306 | eop->type = EOT_DB_NUMBER; |
| 307 | eop->offset = reloc_value(value); |
| 308 | eop->segment = reloc_seg(value); |
| 309 | eop->wrt = reloc_wrt(value); |
| 310 | } else { |
| 311 | error (ERR_NONFATAL, |
| 312 | "`%s' operand %d: expression is not simple" |
| 313 | " or relocatable", |
| 314 | insn_names[result->opcode], oper_num); |
| 315 | } |
| 316 | } |
| 317 | } |
| 318 | return result; |
| 319 | } |
| 320 | |
| 321 | /* right. Now we begin to parse the operands. There may be up to three |
| 322 | * of these, separated by commas, and terminated by a zero token. */ |
| 323 | |
| 324 | for (operand = 0; operand < 3; operand++) { |
| 325 | expr *seg, *value; /* used most of the time */ |
| 326 | int mref; /* is this going to be a memory ref? */ |
| 327 | |
| 328 | result->oprs[operand].addr_size = 0;/* have to zero this whatever */ |
| 329 | i = nexttoken(); |
| 330 | if (i == 0) break; /* end of operands: get out of here */ |
| 331 | result->oprs[operand].type = 0; /* so far, no override */ |
| 332 | while (i == TOKEN_SPECIAL) {/* size specifiers */ |
| 333 | switch ((int)tokval.t_integer) { |
| 334 | case S_BYTE: |
| 335 | result->oprs[operand].type |= BITS8; |
| 336 | break; |
| 337 | case S_WORD: |
| 338 | result->oprs[operand].type |= BITS16; |
| 339 | break; |
| 340 | case S_DWORD: |
| 341 | case S_LONG: |
| 342 | result->oprs[operand].type |= BITS32; |
| 343 | break; |
| 344 | case S_QWORD: |
| 345 | result->oprs[operand].type |= BITS64; |
| 346 | break; |
| 347 | case S_TWORD: |
| 348 | result->oprs[operand].type |= BITS80; |
| 349 | break; |
| 350 | case S_TO: |
| 351 | result->oprs[operand].type |= TO; |
| 352 | break; |
| 353 | case S_FAR: |
| 354 | result->oprs[operand].type |= FAR; |
| 355 | break; |
| 356 | case S_NEAR: |
| 357 | result->oprs[operand].type |= NEAR; |
| 358 | break; |
| 359 | case S_SHORT: |
| 360 | result->oprs[operand].type |= SHORT; |
| 361 | break; |
| 362 | } |
| 363 | i = nexttoken(); |
| 364 | } |
| 365 | |
| 366 | if (i == '[') { /* memory reference */ |
| 367 | i = nexttoken(); |
| 368 | mref = TRUE; |
| 369 | if (i == TOKEN_SPECIAL) { /* check for address size override */ |
| 370 | switch ((int)tokval.t_integer) { |
| 371 | case S_WORD: |
| 372 | result->oprs[operand].addr_size = 16; |
| 373 | break; |
| 374 | case S_DWORD: |
| 375 | case S_LONG: |
| 376 | result->oprs[operand].addr_size = 32; |
| 377 | break; |
| 378 | default: |
| 379 | error (ERR_NONFATAL, "invalid size specification in" |
| 380 | " effective address"); |
| 381 | } |
| 382 | i = nexttoken(); |
| 383 | } |
| 384 | } else /* immediate operand, or register */ |
| 385 | mref = FALSE; |
| 386 | |
| 387 | eval_reset(); |
| 388 | |
| 389 | value = evaluate (critical); |
H. Peter Anvin | ea83827 | 2002-04-30 20:51:53 +0000 | [diff] [blame^] | 390 | if (forward) |
| 391 | result->forw_ref = TRUE; |
H. Peter Anvin | ea6e34d | 2002-04-30 20:51:32 +0000 | [diff] [blame] | 392 | if (!value) { /* error in evaluator */ |
| 393 | result->opcode = -1; /* unrecoverable parse error: */ |
| 394 | return result; /* ignore this instruction */ |
| 395 | } |
| 396 | if (i == ':' && mref) { /* it was seg:offset */ |
| 397 | seg = value; /* so shift this into the segment */ |
| 398 | i = nexttoken(); /* then skip the colon */ |
| 399 | if (i == TOKEN_SPECIAL) { /* another check for size override */ |
| 400 | switch ((int)tokval.t_integer) { |
| 401 | case S_WORD: |
| 402 | result->oprs[operand].addr_size = 16; |
| 403 | break; |
| 404 | case S_DWORD: |
| 405 | case S_LONG: |
| 406 | result->oprs[operand].addr_size = 32; |
| 407 | break; |
| 408 | default: |
| 409 | error (ERR_NONFATAL, "invalid size specification in" |
| 410 | " effective address"); |
| 411 | } |
| 412 | i = nexttoken(); |
| 413 | } |
| 414 | value = evaluate (critical); |
H. Peter Anvin | ea83827 | 2002-04-30 20:51:53 +0000 | [diff] [blame^] | 415 | if (forward) |
| 416 | result->forw_ref = TRUE; |
H. Peter Anvin | ea6e34d | 2002-04-30 20:51:32 +0000 | [diff] [blame] | 417 | /* and get the offset */ |
| 418 | if (!value) { /* but, error in evaluator */ |
| 419 | result->opcode = -1; /* unrecoverable parse error: */ |
| 420 | return result; /* ignore this instruction */ |
| 421 | } |
| 422 | } else seg = NULL; |
| 423 | if (mref) { /* find ] at the end */ |
| 424 | if (i != ']') { |
| 425 | error (ERR_NONFATAL, "parser: expecting ]"); |
| 426 | do { /* error recovery again */ |
| 427 | i = nexttoken(); |
| 428 | } while (i != 0 && i != ','); |
| 429 | } else /* we got the required ] */ |
| 430 | i = nexttoken(); |
| 431 | } else { /* immediate operand */ |
| 432 | if (i != 0 && i != ',' && i != ':') { |
| 433 | error (ERR_NONFATAL, "comma or end of line expected"); |
| 434 | do { /* error recovery */ |
| 435 | i = nexttoken(); |
| 436 | } while (i != 0 && i != ','); |
| 437 | } else if (i == ':') { |
| 438 | result->oprs[operand].type |= COLON; |
| 439 | } |
| 440 | } |
| 441 | |
| 442 | /* now convert the exprs returned from evaluate() into operand |
| 443 | * descriptions... */ |
| 444 | |
| 445 | if (mref) { /* it's a memory reference */ |
| 446 | expr *e = value; |
| 447 | int b, i, s; /* basereg, indexreg, scale */ |
| 448 | long o; /* offset */ |
| 449 | |
| 450 | if (seg) { /* segment override */ |
| 451 | if (seg[1].type!=0 || seg->value!=1 || |
| 452 | REG_SREG & ~reg_flags[seg->type]) |
| 453 | error (ERR_NONFATAL, "invalid segment override"); |
| 454 | else if (result->nprefix == MAXPREFIX) |
| 455 | error (ERR_NONFATAL, |
| 456 | "instruction has more than %d prefixes", |
| 457 | MAXPREFIX); |
| 458 | else |
| 459 | result->prefixes[result->nprefix++] = seg->type; |
| 460 | } |
| 461 | |
| 462 | b = i = -1, o = s = 0; |
| 463 | |
| 464 | if (e->type < EXPR_SIMPLE) { /* this bit's a register */ |
| 465 | if (e->value == 1) /* in fact it can be basereg */ |
| 466 | b = e->type; |
| 467 | else /* no, it has to be indexreg */ |
| 468 | i = e->type, s = e->value; |
| 469 | e++; |
| 470 | } |
| 471 | if (e->type && e->type < EXPR_SIMPLE) {/* it's a second register */ |
| 472 | if (e->value != 1) { /* it has to be indexreg */ |
| 473 | if (i != -1) { /* but it can't be */ |
| 474 | error(ERR_NONFATAL, "invalid effective address"); |
| 475 | result->opcode = -1; |
| 476 | return result; |
| 477 | } else |
| 478 | i = e->type, s = e->value; |
| 479 | } else { /* it can be basereg */ |
| 480 | if (b != -1) /* or can it? */ |
| 481 | i = e->type, s = 1; |
| 482 | else |
| 483 | b = e->type; |
| 484 | } |
| 485 | e++; |
| 486 | } |
| 487 | if (e->type != 0) { /* is there an offset? */ |
| 488 | if (e->type < EXPR_SIMPLE) {/* in fact, is there an error? */ |
| 489 | error (ERR_NONFATAL, "invalid effective address"); |
| 490 | result->opcode = -1; |
| 491 | return result; |
| 492 | } else { |
| 493 | if (e->type == EXPR_SIMPLE) { |
| 494 | o = e->value; |
| 495 | e++; |
| 496 | } |
| 497 | if (e->type == EXPR_WRT) { |
| 498 | result->oprs[operand].wrt = e->value; |
| 499 | e++; |
| 500 | } else |
| 501 | result->oprs[operand].wrt = NO_SEG; |
H. Peter Anvin | ea83827 | 2002-04-30 20:51:53 +0000 | [diff] [blame^] | 502 | /* |
| 503 | * Look for a segment base type. |
| 504 | */ |
| 505 | if (e->type && e->type < EXPR_SEGBASE) { |
| 506 | error (ERR_NONFATAL, "invalid effective address"); |
| 507 | result->opcode = -1; |
| 508 | return result; |
| 509 | } |
| 510 | while (e->type && e->value == 0) |
| 511 | e++; |
| 512 | if (e->type && e->value != 1) { |
| 513 | error (ERR_NONFATAL, "invalid effective address"); |
| 514 | result->opcode = -1; |
| 515 | return result; |
| 516 | } |
| 517 | if (e->type) { |
| 518 | result->oprs[operand].segment = e->type-EXPR_SEGBASE; |
H. Peter Anvin | ea6e34d | 2002-04-30 20:51:32 +0000 | [diff] [blame] | 519 | e++; |
| 520 | } else |
| 521 | result->oprs[operand].segment = NO_SEG; |
H. Peter Anvin | ea83827 | 2002-04-30 20:51:53 +0000 | [diff] [blame^] | 522 | while (e->type && e->value == 0) |
| 523 | e++; |
| 524 | if (e->type) { |
| 525 | error (ERR_NONFATAL, "invalid effective address"); |
| 526 | result->opcode = -1; |
| 527 | return result; |
| 528 | } |
H. Peter Anvin | ea6e34d | 2002-04-30 20:51:32 +0000 | [diff] [blame] | 529 | } |
| 530 | } else { |
| 531 | o = 0; |
| 532 | result->oprs[operand].wrt = NO_SEG; |
| 533 | result->oprs[operand].segment = NO_SEG; |
| 534 | } |
| 535 | |
| 536 | if (e->type != 0) { /* there'd better be nothing left! */ |
| 537 | error (ERR_NONFATAL, "invalid effective address"); |
| 538 | result->opcode = -1; |
| 539 | return result; |
| 540 | } |
| 541 | |
| 542 | result->oprs[operand].type |= MEMORY; |
| 543 | if (b==-1 && (i==-1 || s==0)) |
| 544 | result->oprs[operand].type |= MEM_OFFS; |
| 545 | result->oprs[operand].basereg = b; |
| 546 | result->oprs[operand].indexreg = i; |
| 547 | result->oprs[operand].scale = s; |
| 548 | result->oprs[operand].offset = o; |
| 549 | } else { /* it's not a memory reference */ |
| 550 | if (is_reloc(value)) { /* it's immediate */ |
| 551 | result->oprs[operand].type |= IMMEDIATE; |
| 552 | result->oprs[operand].offset = reloc_value(value); |
| 553 | result->oprs[operand].segment = reloc_seg(value); |
| 554 | result->oprs[operand].wrt = reloc_wrt(value); |
| 555 | if (is_simple(value) && reloc_value(value)==1) |
| 556 | result->oprs[operand].type |= UNITY; |
| 557 | } else { /* it's a register */ |
| 558 | if (value->type>=EXPR_SIMPLE || value->value!=1) { |
| 559 | error (ERR_NONFATAL, "invalid operand type"); |
| 560 | result->opcode = -1; |
| 561 | return result; |
| 562 | } |
| 563 | /* clear overrides, except TO which applies to FPU regs */ |
| 564 | result->oprs[operand].type &= TO; |
| 565 | result->oprs[operand].type |= REGISTER; |
| 566 | result->oprs[operand].type |= reg_flags[value->type]; |
| 567 | result->oprs[operand].basereg = value->type; |
| 568 | } |
| 569 | } |
| 570 | } |
| 571 | |
| 572 | result->operands = operand; /* set operand count */ |
| 573 | |
| 574 | while (operand<3) /* clear remaining operands */ |
| 575 | result->oprs[operand++].type = 0; |
| 576 | |
| 577 | /* |
| 578 | * Transform RESW, RESD, RESQ, REST into RESB. |
| 579 | */ |
| 580 | switch (result->opcode) { |
| 581 | case I_RESW: result->opcode=I_RESB; result->oprs[0].offset*=2; break; |
| 582 | case I_RESD: result->opcode=I_RESB; result->oprs[0].offset*=4; break; |
| 583 | case I_RESQ: result->opcode=I_RESB; result->oprs[0].offset*=8; break; |
| 584 | case I_REST: result->opcode=I_RESB; result->oprs[0].offset*=10; break; |
| 585 | } |
| 586 | |
| 587 | return result; |
| 588 | } |
| 589 | |
| 590 | static int is_comma_next (void) { |
| 591 | char *p; |
| 592 | |
| 593 | p = bufptr; |
| 594 | while (isspace(*p)) p++; |
| 595 | return (*p == ',' || *p == ';' || !*p); |
| 596 | } |
| 597 | |
| 598 | /* isidstart matches any character that may start an identifier, and isidchar |
| 599 | * matches any character that may appear at places other than the start of an |
| 600 | * identifier. E.g. a period may only appear at the start of an identifier |
| 601 | * (for local labels), whereas a number may appear anywhere *but* at the |
| 602 | * start. */ |
| 603 | |
| 604 | #define isidstart(c) ( isalpha(c) || (c)=='_' || (c)=='.' || (c)=='?' ) |
| 605 | #define isidchar(c) ( isidstart(c) || isdigit(c) || (c)=='$' || (c)=='#' \ |
| 606 | || (c)=='@' || (c)=='~' ) |
| 607 | |
| 608 | /* Ditto for numeric constants. */ |
| 609 | |
| 610 | #define isnumstart(c) ( isdigit(c) || (c)=='$' ) |
| 611 | #define isnumchar(c) ( isalnum(c) ) |
| 612 | |
| 613 | /* This returns the numeric value of a given 'digit'. */ |
| 614 | |
| 615 | #define numvalue(c) ((c)>='a' ? (c)-'a'+10 : (c)>='A' ? (c)-'A'+10 : (c)-'0') |
| 616 | |
| 617 | /* |
| 618 | * This tokeniser routine has only one side effect, that of |
| 619 | * updating `bufptr'. Hence by saving `bufptr', lookahead may be |
| 620 | * performed. |
| 621 | */ |
| 622 | |
| 623 | static int nexttoken (void) { |
| 624 | char ourcopy[256], *r, *s; |
| 625 | |
| 626 | while (isspace(*bufptr)) bufptr++; |
| 627 | if (!*bufptr) return 0; |
| 628 | |
| 629 | /* we have a token; either an id, a number or a char */ |
| 630 | if (isidstart(*bufptr) || |
| 631 | (*bufptr == '$' && isidstart(bufptr[1]))) { |
| 632 | /* now we've got an identifier */ |
| 633 | int i; |
| 634 | int is_sym = FALSE; |
| 635 | |
| 636 | if (*bufptr == '$') { |
| 637 | is_sym = TRUE; |
| 638 | bufptr++; |
| 639 | } |
| 640 | |
| 641 | tokval.t_charptr = q; |
| 642 | *q++ = *bufptr++; |
| 643 | while (isidchar(*bufptr)) *q++ = *bufptr++; |
| 644 | *q++ = '\0'; |
| 645 | for (s=tokval.t_charptr, r=ourcopy; *s; s++) |
| 646 | *r++ = tolower (*s); |
| 647 | *r = '\0'; |
| 648 | if (is_sym) |
| 649 | return TOKEN_ID; /* bypass all other checks */ |
| 650 | /* right, so we have an identifier sitting in temp storage. now, |
| 651 | * is it actually a register or instruction name, or what? */ |
| 652 | if ((tokval.t_integer=bsi(ourcopy, reg_names, |
| 653 | elements(reg_names)))>=0) |
| 654 | return TOKEN_REG; |
| 655 | if ((tokval.t_integer=bsi(ourcopy, insn_names, |
| 656 | elements(insn_names)))>=0) |
| 657 | return TOKEN_INSN; |
| 658 | for (i=0; i<elements(icn); i++) |
| 659 | if (!strncmp(ourcopy, icn[i], strlen(icn[i]))) { |
| 660 | char *p = ourcopy + strlen(icn[i]); |
| 661 | tokval.t_integer = ico[i]; |
| 662 | if ((tokval.t_inttwo=bsi(p, conditions, |
| 663 | elements(conditions)))>=0) |
| 664 | return TOKEN_INSN; |
| 665 | } |
| 666 | if ((tokval.t_integer=bsi(ourcopy, prefix_names, |
| 667 | elements(prefix_names)))>=0) { |
| 668 | tokval.t_integer += PREFIX_ENUM_START; |
| 669 | return TOKEN_PREFIX; |
| 670 | } |
| 671 | if ((tokval.t_integer=bsi(ourcopy, special_names, |
| 672 | elements(special_names)))>=0) |
| 673 | return TOKEN_SPECIAL; |
| 674 | if (!strcmp(ourcopy, "seg")) |
| 675 | return TOKEN_SEG; |
| 676 | if (!strcmp(ourcopy, "wrt")) |
| 677 | return TOKEN_WRT; |
| 678 | return TOKEN_ID; |
| 679 | } else if (*bufptr == '$' && !isnumchar(bufptr[1])) { |
| 680 | /* |
| 681 | * It's a $ sign with no following hex number; this must |
| 682 | * mean it's a Here token ($), evaluating to the current |
| 683 | * assembly location, or a Base token ($$), evaluating to |
| 684 | * the base of the current segment. |
| 685 | */ |
| 686 | bufptr++; |
| 687 | if (*bufptr == '$') { |
| 688 | bufptr++; |
| 689 | return TOKEN_BASE; |
| 690 | } |
| 691 | return TOKEN_HERE; |
| 692 | } else if (isnumstart(*bufptr)) { /* now we've got a number */ |
| 693 | char *r = q; |
| 694 | int rn_error; |
| 695 | |
| 696 | *q++ = *bufptr++; |
| 697 | while (isnumchar(*bufptr)) { |
| 698 | *q++ = *bufptr++; |
| 699 | } |
| 700 | if (*bufptr == '.') { |
| 701 | /* |
| 702 | * a floating point constant |
| 703 | */ |
| 704 | *q++ = *bufptr++; |
| 705 | while (isnumchar(*bufptr)) { |
| 706 | *q++ = *bufptr++; |
| 707 | } |
| 708 | *q++ = '\0'; |
| 709 | tokval.t_charptr = r; |
| 710 | return TOKEN_FLOAT; |
| 711 | } |
| 712 | *q++ = '\0'; |
| 713 | tokval.t_integer = readnum(r, &rn_error); |
| 714 | if (rn_error) |
| 715 | return TOKEN_ERRNUM; /* some malformation occurred */ |
| 716 | tokval.t_charptr = NULL; |
| 717 | return TOKEN_NUM; |
| 718 | } else if (*bufptr == '\'' || *bufptr == '"') {/* a char constant */ |
| 719 | char quote = *bufptr++, *r; |
| 720 | r = tokval.t_charptr = bufptr; |
| 721 | while (*bufptr && *bufptr != quote) bufptr++; |
| 722 | tokval.t_inttwo = bufptr - r; /* store full version */ |
| 723 | if (!*bufptr) |
| 724 | return TOKEN_ERRNUM; /* unmatched quotes */ |
| 725 | tokval.t_integer = 0; |
| 726 | r = bufptr++; /* skip over final quote */ |
| 727 | while (quote != *--r) { |
| 728 | tokval.t_integer = (tokval.t_integer<<8) + (unsigned char) *r; |
| 729 | } |
| 730 | return TOKEN_NUM; |
| 731 | } else if (*bufptr == ';') { /* a comment has happened - stay */ |
| 732 | return 0; |
| 733 | } else if ((*bufptr == '>' || *bufptr == '<' || |
| 734 | *bufptr == '/' || *bufptr == '%') && bufptr[1] == *bufptr) { |
| 735 | bufptr += 2; |
| 736 | return (bufptr[-2] == '>' ? TOKEN_SHR : |
| 737 | bufptr[-2] == '<' ? TOKEN_SHL : |
| 738 | bufptr[-2] == '/' ? TOKEN_SDIV : |
| 739 | TOKEN_SMOD); |
| 740 | } else /* just an ordinary char */ |
| 741 | return (unsigned char) (*bufptr++); |
| 742 | } |
| 743 | |
| 744 | /* return index of "string" in "array", or -1 if no match. */ |
| 745 | static int bsi (char *string, char **array, int size) { |
| 746 | int i = -1, j = size; /* always, i < index < j */ |
| 747 | while (j-i >= 2) { |
| 748 | int k = (i+j)/2; |
| 749 | int l = strcmp(string, array[k]); |
| 750 | if (l<0) /* it's in the first half */ |
| 751 | j = k; |
| 752 | else if (l>0) /* it's in the second half */ |
| 753 | i = k; |
| 754 | else /* we've got it :) */ |
| 755 | return k; |
| 756 | } |
| 757 | return -1; /* we haven't got it :( */ |
| 758 | } |
| 759 | |
| 760 | void cleanup_insn (insn *i) { |
| 761 | extop *e; |
| 762 | |
| 763 | while (i->eops) { |
| 764 | e = i->eops; |
| 765 | i->eops = i->eops->next; |
| 766 | nasm_free (e); |
| 767 | } |
| 768 | } |
| 769 | |
| 770 | /* ------------- Evaluator begins here ------------------ */ |
| 771 | |
| 772 | static expr exprtempstorage[1024], *tempptr; /* store exprs in here */ |
| 773 | |
| 774 | /* |
| 775 | * Add two vector datatypes. We have some bizarre behaviour on far- |
| 776 | * absolute segment types: we preserve them during addition _only_ |
| 777 | * if one of the segments is a truly pure scalar. |
| 778 | */ |
| 779 | static expr *add_vectors(expr *p, expr *q) { |
| 780 | expr *r = tempptr; |
| 781 | int preserve; |
| 782 | |
| 783 | preserve = is_really_simple(p) || is_really_simple(q); |
| 784 | |
| 785 | while (p->type && q->type && |
| 786 | p->type < EXPR_SEGBASE+SEG_ABS && |
| 787 | q->type < EXPR_SEGBASE+SEG_ABS) |
| 788 | if (p->type > q->type) { |
| 789 | tempptr->type = q->type; |
| 790 | tempptr->value = q->value; |
| 791 | tempptr++, q++; |
| 792 | } else if (p->type < q->type) { |
| 793 | tempptr->type = p->type; |
| 794 | tempptr->value = p->value; |
| 795 | tempptr++, p++; |
| 796 | } else { /* *p and *q have same type */ |
| 797 | tempptr->type = p->type; |
| 798 | tempptr->value = p->value + q->value; |
| 799 | tempptr++, p++, q++; |
| 800 | } |
| 801 | while (p->type && |
| 802 | (preserve || p->type < EXPR_SEGBASE+SEG_ABS)) { |
| 803 | tempptr->type = p->type; |
| 804 | tempptr->value = p->value; |
| 805 | tempptr++, p++; |
| 806 | } |
| 807 | while (q->type && |
| 808 | (preserve || q->type < EXPR_SEGBASE+SEG_ABS)) { |
| 809 | tempptr->type = q->type; |
| 810 | tempptr->value = q->value; |
| 811 | tempptr++, q++; |
| 812 | } |
| 813 | (tempptr++)->type = 0; |
| 814 | |
| 815 | return r; |
| 816 | } |
| 817 | |
| 818 | /* |
| 819 | * Multiply a vector by a scalar. Strip far-absolute segment part |
| 820 | * if present. |
| 821 | */ |
| 822 | static expr *scalar_mult(expr *vect, long scalar) { |
| 823 | expr *p = vect; |
| 824 | |
| 825 | while (p->type && p->type < EXPR_SEGBASE+SEG_ABS) { |
| 826 | p->value = scalar * (p->value); |
| 827 | p++; |
| 828 | } |
| 829 | p->type = 0; |
| 830 | |
| 831 | return vect; |
| 832 | } |
| 833 | |
| 834 | static expr *scalarvect (long scalar) { |
| 835 | expr *p = tempptr; |
| 836 | tempptr->type = EXPR_SIMPLE; |
| 837 | tempptr->value = scalar; |
| 838 | tempptr++; |
| 839 | tempptr->type = 0; |
| 840 | tempptr++; |
| 841 | return p; |
| 842 | } |
| 843 | |
| 844 | /* |
| 845 | * Return TRUE if the argument is a simple scalar. (Or a far- |
| 846 | * absolute, which counts.) |
| 847 | */ |
| 848 | static int is_simple (expr *vect) { |
| 849 | while (vect->type && !vect->value) |
| 850 | vect++; |
| 851 | if (!vect->type) |
| 852 | return 1; |
| 853 | if (vect->type != EXPR_SIMPLE) |
| 854 | return 0; |
| 855 | do { |
| 856 | vect++; |
| 857 | } while (vect->type && !vect->value); |
| 858 | if (vect->type && vect->type < EXPR_SEGBASE+SEG_ABS) return 0; |
| 859 | return 1; |
| 860 | } |
| 861 | |
| 862 | /* |
| 863 | * Return TRUE if the argument is a simple scalar, _NOT_ a far- |
| 864 | * absolute. |
| 865 | */ |
| 866 | static int is_really_simple (expr *vect) { |
| 867 | while (vect->type && !vect->value) |
| 868 | vect++; |
| 869 | if (!vect->type) |
| 870 | return 1; |
| 871 | if (vect->type != EXPR_SIMPLE) |
| 872 | return 0; |
| 873 | do { |
| 874 | vect++; |
| 875 | } while (vect->type && !vect->value); |
| 876 | if (vect->type) return 0; |
| 877 | return 1; |
| 878 | } |
| 879 | |
| 880 | /* |
| 881 | * Return TRUE if the argument is relocatable (i.e. a simple |
| 882 | * scalar, plus at most one segment-base, plus possibly a WRT). |
| 883 | */ |
| 884 | static int is_reloc (expr *vect) { |
| 885 | while (vect->type && !vect->value) |
| 886 | vect++; |
| 887 | if (!vect->type) |
| 888 | return 1; |
| 889 | if (vect->type < EXPR_SIMPLE) |
| 890 | return 0; |
| 891 | if (vect->type == EXPR_SIMPLE) { |
| 892 | do { |
| 893 | vect++; |
| 894 | } while (vect->type && !vect->value); |
| 895 | if (!vect->type) |
| 896 | return 1; |
| 897 | } |
| 898 | do { |
| 899 | vect++; |
| 900 | } while (vect->type && (vect->type == EXPR_WRT || !vect->value)); |
| 901 | if (!vect->type) |
| 902 | return 1; |
| 903 | return 1; |
| 904 | } |
| 905 | |
| 906 | /* |
| 907 | * Return the scalar part of a relocatable vector. (Including |
| 908 | * simple scalar vectors - those qualify as relocatable.) |
| 909 | */ |
| 910 | static long reloc_value (expr *vect) { |
| 911 | while (vect->type && !vect->value) |
| 912 | vect++; |
| 913 | if (!vect->type) return 0; |
| 914 | if (vect->type == EXPR_SIMPLE) |
| 915 | return vect->value; |
| 916 | else |
| 917 | return 0; |
| 918 | } |
| 919 | |
| 920 | /* |
| 921 | * Return the segment number of a relocatable vector, or NO_SEG for |
| 922 | * simple scalars. |
| 923 | */ |
| 924 | static long reloc_seg (expr *vect) { |
| 925 | while (vect->type && (vect->type == EXPR_WRT || !vect->value)) |
| 926 | vect++; |
| 927 | if (vect->type == EXPR_SIMPLE) { |
| 928 | do { |
| 929 | vect++; |
| 930 | } while (vect->type && (vect->type == EXPR_WRT || !vect->value)); |
| 931 | } |
| 932 | if (!vect->type) |
| 933 | return NO_SEG; |
| 934 | else |
| 935 | return vect->type - EXPR_SEGBASE; |
| 936 | } |
| 937 | |
| 938 | /* |
| 939 | * Return the WRT segment number of a relocatable vector, or NO_SEG |
| 940 | * if no WRT part is present. |
| 941 | */ |
| 942 | static long reloc_wrt (expr *vect) { |
| 943 | while (vect->type && vect->type < EXPR_WRT) |
| 944 | vect++; |
| 945 | if (vect->type == EXPR_WRT) { |
| 946 | return vect->value; |
| 947 | } else |
| 948 | return NO_SEG; |
| 949 | } |
| 950 | |
| 951 | static void eval_reset(void) { |
| 952 | tempptr = exprtempstorage; /* initialise temporary storage */ |
| 953 | } |
| 954 | |
| 955 | /* |
| 956 | * The SEG operator: calculate the segment part of a relocatable |
| 957 | * value. Return NULL, as usual, if an error occurs. Report the |
| 958 | * error too. |
| 959 | */ |
| 960 | static expr *segment_part (expr *e) { |
| 961 | long seg; |
| 962 | |
| 963 | if (!is_reloc(e)) { |
| 964 | error(ERR_NONFATAL, "cannot apply SEG to a non-relocatable value"); |
| 965 | return NULL; |
| 966 | } |
| 967 | |
| 968 | seg = reloc_seg(e); |
| 969 | if (seg == NO_SEG) { |
| 970 | error(ERR_NONFATAL, "cannot apply SEG to a non-relocatable value"); |
| 971 | return NULL; |
| 972 | } else if (seg & SEG_ABS) |
| 973 | return scalarvect(seg & ~SEG_ABS); |
| 974 | else { |
| 975 | expr *f = tempptr++; |
| 976 | tempptr++->type = 0; |
| 977 | f->type = EXPR_SEGBASE+outfmt->segbase(seg+1); |
| 978 | f->value = 1; |
| 979 | return f; |
| 980 | } |
| 981 | } |
| 982 | |
| 983 | /* |
| 984 | * Recursive-descent parser. Called with a single boolean operand, |
| 985 | * which is TRUE if the evaluation is critical (i.e. unresolved |
| 986 | * symbols are an error condition). Must update the global `i' to |
| 987 | * reflect the token after the parsed string. May return NULL. |
| 988 | * |
| 989 | * evaluate() should report its own errors: on return it is assumed |
| 990 | * that if NULL has been returned, the error has already been |
| 991 | * reported. |
| 992 | */ |
| 993 | |
| 994 | /* |
| 995 | * Grammar parsed is: |
| 996 | * |
| 997 | * expr : expr0 [ WRT expr6 ] |
| 998 | * expr0 : expr1 [ {|} expr1] |
| 999 | * expr1 : expr2 [ {^} expr2] |
| 1000 | * expr2 : expr3 [ {&} expr3] |
| 1001 | * expr3 : expr4 [ {<<,>>} expr4...] |
| 1002 | * expr4 : expr5 [ {+,-} expr5...] |
| 1003 | * expr5 : expr6 [ {*,/,%,//,%%} expr6...] |
| 1004 | * expr6 : { ~,+,-,SEG } expr6 |
| 1005 | * | (expr0) |
| 1006 | * | symbol |
| 1007 | * | $ |
| 1008 | * | number |
| 1009 | */ |
| 1010 | |
| 1011 | static expr *expr0(int), *expr1(int), *expr2(int), *expr3(int); |
| 1012 | static expr *expr4(int), *expr5(int), *expr6(int); |
| 1013 | |
| 1014 | static expr *expr0(int critical) { |
| 1015 | expr *e, *f; |
| 1016 | |
| 1017 | e = expr1(critical); |
| 1018 | if (!e) |
| 1019 | return NULL; |
| 1020 | while (i == '|') { |
| 1021 | i = nexttoken(); |
| 1022 | f = expr1(critical); |
| 1023 | if (!f) |
| 1024 | return NULL; |
| 1025 | if (!is_simple(e) || !is_simple(f)) { |
| 1026 | error(ERR_NONFATAL, "`|' operator may only be applied to" |
| 1027 | " scalar values"); |
| 1028 | } |
| 1029 | e = scalarvect (reloc_value(e) | reloc_value(f)); |
| 1030 | } |
| 1031 | return e; |
| 1032 | } |
| 1033 | |
| 1034 | static expr *expr1(int critical) { |
| 1035 | expr *e, *f; |
| 1036 | |
| 1037 | e = expr2(critical); |
| 1038 | if (!e) |
| 1039 | return NULL; |
| 1040 | while (i == '^') { |
| 1041 | i = nexttoken(); |
| 1042 | f = expr2(critical); |
| 1043 | if (!f) |
| 1044 | return NULL; |
| 1045 | if (!is_simple(e) || !is_simple(f)) { |
| 1046 | error(ERR_NONFATAL, "`^' operator may only be applied to" |
| 1047 | " scalar values"); |
| 1048 | } |
| 1049 | e = scalarvect (reloc_value(e) ^ reloc_value(f)); |
| 1050 | } |
| 1051 | return e; |
| 1052 | } |
| 1053 | |
| 1054 | static expr *expr2(int critical) { |
| 1055 | expr *e, *f; |
| 1056 | |
| 1057 | e = expr3(critical); |
| 1058 | if (!e) |
| 1059 | return NULL; |
| 1060 | while (i == '&') { |
| 1061 | i = nexttoken(); |
| 1062 | f = expr3(critical); |
| 1063 | if (!f) |
| 1064 | return NULL; |
| 1065 | if (!is_simple(e) || !is_simple(f)) { |
| 1066 | error(ERR_NONFATAL, "`&' operator may only be applied to" |
| 1067 | " scalar values"); |
| 1068 | } |
| 1069 | e = scalarvect (reloc_value(e) & reloc_value(f)); |
| 1070 | } |
| 1071 | return e; |
| 1072 | } |
| 1073 | |
| 1074 | static expr *expr3(int critical) { |
| 1075 | expr *e, *f; |
| 1076 | |
| 1077 | e = expr4(critical); |
| 1078 | if (!e) |
| 1079 | return NULL; |
| 1080 | while (i == TOKEN_SHL || i == TOKEN_SHR) { |
| 1081 | int j = i; |
| 1082 | i = nexttoken(); |
| 1083 | f = expr4(critical); |
| 1084 | if (!f) |
| 1085 | return NULL; |
| 1086 | if (!is_simple(e) || !is_simple(f)) { |
| 1087 | error(ERR_NONFATAL, "shift operator may only be applied to" |
| 1088 | " scalar values"); |
| 1089 | } |
| 1090 | switch (j) { |
| 1091 | case TOKEN_SHL: |
| 1092 | e = scalarvect (reloc_value(e) << reloc_value(f)); |
| 1093 | break; |
| 1094 | case TOKEN_SHR: |
| 1095 | e = scalarvect (((unsigned long)reloc_value(e)) >> |
| 1096 | reloc_value(f)); |
| 1097 | break; |
| 1098 | } |
| 1099 | } |
| 1100 | return e; |
| 1101 | } |
| 1102 | |
| 1103 | static expr *expr4(int critical) { |
| 1104 | expr *e, *f; |
| 1105 | |
| 1106 | e = expr5(critical); |
| 1107 | if (!e) |
| 1108 | return NULL; |
| 1109 | while (i == '+' || i == '-') { |
| 1110 | int j = i; |
| 1111 | i = nexttoken(); |
| 1112 | f = expr5(critical); |
| 1113 | if (!f) |
| 1114 | return NULL; |
| 1115 | switch (j) { |
| 1116 | case '+': |
| 1117 | e = add_vectors (e, f); |
| 1118 | break; |
| 1119 | case '-': |
| 1120 | e = add_vectors (e, scalar_mult(f, -1L)); |
| 1121 | break; |
| 1122 | } |
| 1123 | } |
| 1124 | return e; |
| 1125 | } |
| 1126 | |
| 1127 | static expr *expr5(int critical) { |
| 1128 | expr *e, *f; |
| 1129 | |
| 1130 | e = expr6(critical); |
| 1131 | if (!e) |
| 1132 | return NULL; |
| 1133 | while (i == '*' || i == '/' || i == '*' || |
| 1134 | i == TOKEN_SDIV || i == TOKEN_SMOD) { |
| 1135 | int j = i; |
| 1136 | i = nexttoken(); |
| 1137 | f = expr6(critical); |
| 1138 | if (!f) |
| 1139 | return NULL; |
| 1140 | if (j != '*' && (!is_simple(e) || !is_simple(f))) { |
| 1141 | error(ERR_NONFATAL, "division operator may only be applied to" |
| 1142 | " scalar values"); |
| 1143 | return NULL; |
| 1144 | } |
| 1145 | if (j != '*' && reloc_value(f) == 0) { |
| 1146 | error(ERR_NONFATAL, "division by zero"); |
| 1147 | return NULL; |
| 1148 | } |
| 1149 | switch (j) { |
| 1150 | case '*': |
| 1151 | if (is_simple(e)) |
| 1152 | e = scalar_mult (f, reloc_value(e)); |
| 1153 | else if (is_simple(f)) |
| 1154 | e = scalar_mult (e, reloc_value(f)); |
| 1155 | else { |
| 1156 | error(ERR_NONFATAL, "unable to multiply two " |
| 1157 | "non-scalar objects"); |
| 1158 | return NULL; |
| 1159 | } |
| 1160 | break; |
| 1161 | case '/': |
| 1162 | e = scalarvect (((unsigned long)reloc_value(e)) / |
| 1163 | ((unsigned long)reloc_value(f))); |
| 1164 | break; |
| 1165 | case '%': |
| 1166 | e = scalarvect (((unsigned long)reloc_value(e)) % |
| 1167 | ((unsigned long)reloc_value(f))); |
| 1168 | break; |
| 1169 | case TOKEN_SDIV: |
| 1170 | e = scalarvect (((signed long)reloc_value(e)) / |
| 1171 | ((signed long)reloc_value(f))); |
| 1172 | break; |
| 1173 | case TOKEN_SMOD: |
| 1174 | e = scalarvect (((signed long)reloc_value(e)) % |
| 1175 | ((signed long)reloc_value(f))); |
| 1176 | break; |
| 1177 | } |
| 1178 | } |
| 1179 | return e; |
| 1180 | } |
| 1181 | |
| 1182 | static expr *expr6(int critical) { |
| 1183 | expr *e; |
| 1184 | long label_seg, label_ofs; |
| 1185 | |
| 1186 | if (i == '-') { |
| 1187 | i = nexttoken(); |
| 1188 | e = expr6(critical); |
| 1189 | if (!e) |
| 1190 | return NULL; |
| 1191 | return scalar_mult (e, -1L); |
| 1192 | } else if (i == '+') { |
| 1193 | i = nexttoken(); |
| 1194 | return expr6(critical); |
| 1195 | } else if (i == '~') { |
| 1196 | i = nexttoken(); |
| 1197 | e = expr6(critical); |
| 1198 | if (!e) |
| 1199 | return NULL; |
| 1200 | if (!is_simple(e)) { |
| 1201 | error(ERR_NONFATAL, "`~' operator may only be applied to" |
| 1202 | " scalar values"); |
| 1203 | return NULL; |
| 1204 | } |
| 1205 | return scalarvect(~reloc_value(e)); |
| 1206 | } else if (i == TOKEN_SEG) { |
| 1207 | i = nexttoken(); |
| 1208 | e = expr6(critical); |
| 1209 | if (!e) |
| 1210 | return NULL; |
| 1211 | return segment_part(e); |
| 1212 | } else if (i == '(') { |
| 1213 | i = nexttoken(); |
| 1214 | e = expr0(critical); |
| 1215 | if (!e) |
| 1216 | return NULL; |
| 1217 | if (i != ')') { |
| 1218 | error(ERR_NONFATAL, "expecting `)'"); |
| 1219 | return NULL; |
| 1220 | } |
| 1221 | i = nexttoken(); |
| 1222 | return e; |
| 1223 | } else if (i == TOKEN_NUM || i == TOKEN_REG || i == TOKEN_ID || |
| 1224 | i == TOKEN_HERE || i == TOKEN_BASE) { |
| 1225 | e = tempptr; |
| 1226 | switch (i) { |
| 1227 | case TOKEN_NUM: |
| 1228 | e->type = EXPR_SIMPLE; |
| 1229 | e->value = tokval.t_integer; |
| 1230 | break; |
| 1231 | case TOKEN_REG: |
| 1232 | e->type = tokval.t_integer; |
| 1233 | e->value = 1; |
| 1234 | break; |
| 1235 | case TOKEN_ID: |
| 1236 | case TOKEN_HERE: |
| 1237 | case TOKEN_BASE: |
| 1238 | /* |
| 1239 | * Since the whole line is parsed before the label it |
| 1240 | * defines is given to the label manager, we have |
| 1241 | * problems with lines such as |
| 1242 | * |
| 1243 | * end: TIMES 512-(end-start) DB 0 |
| 1244 | * |
| 1245 | * where `end' is not known on pass one, despite not |
| 1246 | * really being a forward reference, and due to |
| 1247 | * criticality it is _needed_. Hence we check our label |
| 1248 | * against the currently defined one, and do our own |
| 1249 | * resolution of it if we have to. |
| 1250 | */ |
| 1251 | if (i == TOKEN_BASE) { |
| 1252 | label_seg = seg; |
| 1253 | label_ofs = 0; |
| 1254 | } else if (i == TOKEN_HERE || !strcmp(tokval.t_charptr, label)) { |
| 1255 | label_seg = seg; |
| 1256 | label_ofs = ofs; |
| 1257 | } else if (!labelfunc(tokval.t_charptr, &label_seg, &label_ofs)) { |
| 1258 | if (critical == 2) { |
| 1259 | error (ERR_NONFATAL, "symbol `%s' undefined", |
| 1260 | tokval.t_charptr); |
| 1261 | return NULL; |
| 1262 | } else if (critical == 1) { |
| 1263 | error (ERR_NONFATAL, "symbol `%s' not defined before use", |
| 1264 | tokval.t_charptr); |
| 1265 | return NULL; |
| 1266 | } else { |
H. Peter Anvin | ea83827 | 2002-04-30 20:51:53 +0000 | [diff] [blame^] | 1267 | forward = TRUE; |
H. Peter Anvin | ea6e34d | 2002-04-30 20:51:32 +0000 | [diff] [blame] | 1268 | label_seg = seg; |
| 1269 | label_ofs = ofs; |
| 1270 | } |
| 1271 | } |
| 1272 | e->type = EXPR_SIMPLE; |
| 1273 | e->value = label_ofs; |
| 1274 | if (label_seg!=NO_SEG) { |
| 1275 | tempptr++; |
| 1276 | tempptr->type = EXPR_SEGBASE + label_seg; |
| 1277 | tempptr->value = 1; |
| 1278 | } |
| 1279 | break; |
| 1280 | } |
| 1281 | tempptr++; |
| 1282 | tempptr->type = 0; |
| 1283 | tempptr++; |
| 1284 | i = nexttoken(); |
| 1285 | return e; |
| 1286 | } else { |
| 1287 | error(ERR_NONFATAL, "expression syntax error"); |
| 1288 | return NULL; |
| 1289 | } |
| 1290 | } |
| 1291 | |
| 1292 | static expr *evaluate (int critical) { |
| 1293 | expr *e; |
| 1294 | expr *f = NULL; |
| 1295 | |
| 1296 | e = expr0 (critical); |
| 1297 | if (!e) |
| 1298 | return NULL; |
| 1299 | |
| 1300 | if (i == TOKEN_WRT) { |
| 1301 | if (!is_reloc(e)) { |
| 1302 | error(ERR_NONFATAL, "invalid left-hand operand to WRT"); |
| 1303 | return NULL; |
| 1304 | } |
| 1305 | i = nexttoken(); /* eat the WRT */ |
| 1306 | f = expr6 (critical); |
| 1307 | if (!f) |
| 1308 | return NULL; |
| 1309 | } |
| 1310 | e = scalar_mult (e, 1L); /* strip far-absolute segment part */ |
| 1311 | if (f) { |
| 1312 | expr *g = tempptr++; |
| 1313 | tempptr++->type = 0; |
| 1314 | g->type = EXPR_WRT; |
| 1315 | if (!is_reloc(f)) { |
| 1316 | error(ERR_NONFATAL, "invalid right-hand operand to WRT"); |
| 1317 | return NULL; |
| 1318 | } |
| 1319 | g->value = reloc_seg(f); |
| 1320 | if (g->value == NO_SEG) |
| 1321 | g->value = reloc_value(f) | SEG_ABS; |
| 1322 | else if (!(g->value & SEG_ABS) && !(g->value % 2) && critical) { |
| 1323 | error(ERR_NONFATAL, "invalid right-hand operand to WRT"); |
| 1324 | return NULL; |
| 1325 | } |
| 1326 | e = add_vectors (e, g); |
| 1327 | } |
| 1328 | return e; |
| 1329 | } |