H. Peter Anvin | ea6e34d | 2002-04-30 20:51:32 +0000 | [diff] [blame] | 1 | /* parser.c source line parser for the Netwide Assembler |
| 2 | * |
| 3 | * The Netwide Assembler is copyright (C) 1996 Simon Tatham and |
| 4 | * Julian Hall. All rights reserved. The software is |
| 5 | * redistributable under the licence given in the file "Licence" |
| 6 | * distributed in the NASM archive. |
| 7 | * |
| 8 | * initial version 27/iii/95 by Simon Tatham |
| 9 | */ |
| 10 | |
| 11 | #include <stdio.h> |
| 12 | #include <stdlib.h> |
| 13 | #include <stddef.h> |
| 14 | #include <string.h> |
| 15 | #include <ctype.h> |
| 16 | |
| 17 | #include "nasm.h" |
| 18 | #include "nasmlib.h" |
| 19 | #include "parser.h" |
| 20 | #include "float.h" |
| 21 | |
| 22 | #include "names.c" |
| 23 | |
H. Peter Anvin | ea6e34d | 2002-04-30 20:51:32 +0000 | [diff] [blame] | 24 | static long reg_flags[] = { /* sizes and special flags */ |
| 25 | 0, REG8, REG_AL, REG_AX, REG8, REG8, REG16, REG16, REG8, REG_CL, |
| 26 | REG_CREG, REG_CREG, REG_CREG, REG_CR4, REG_CS, REG_CX, REG8, |
| 27 | REG16, REG8, REG_DREG, REG_DREG, REG_DREG, REG_DREG, REG_DREG, |
| 28 | REG_DREG, REG_DESS, REG_DX, REG_EAX, REG32, REG32, REG_ECX, |
| 29 | REG32, REG32, REG_DESS, REG32, REG32, REG_FSGS, REG_FSGS, |
| 30 | MMXREG, MMXREG, MMXREG, MMXREG, MMXREG, MMXREG, MMXREG, MMXREG, |
| 31 | REG16, REG16, REG_DESS, FPU0, FPUREG, FPUREG, FPUREG, FPUREG, |
| 32 | FPUREG, FPUREG, FPUREG, REG_TREG, REG_TREG, REG_TREG, REG_TREG, |
| 33 | REG_TREG |
| 34 | }; |
| 35 | |
| 36 | enum { /* special tokens */ |
| 37 | S_BYTE, S_DWORD, S_FAR, S_LONG, S_NEAR, S_QWORD, S_SHORT, S_TO, |
| 38 | S_TWORD, S_WORD |
| 39 | }; |
| 40 | |
| 41 | static char *special_names[] = { /* and the actual text */ |
| 42 | "byte", "dword", "far", "long", "near", "qword", "short", "to", |
| 43 | "tword", "word" |
| 44 | }; |
| 45 | |
| 46 | static char *prefix_names[] = { |
| 47 | "a16", "a32", "lock", "o16", "o32", "rep", "repe", "repne", |
| 48 | "repnz", "repz", "times" |
| 49 | }; |
| 50 | |
| 51 | /* |
| 52 | * Evaluator datatype. Expressions, within the evaluator, are |
| 53 | * stored as an array of these beasts, terminated by a record with |
| 54 | * type==0. Mostly, it's a vector type: each type denotes some kind |
| 55 | * of a component, and the value denotes the multiple of that |
| 56 | * component present in the expression. The exception is the WRT |
| 57 | * type, whose `value' field denotes the segment to which the |
| 58 | * expression is relative. These segments will be segment-base |
| 59 | * types, i.e. either odd segment values or SEG_ABS types. So it is |
| 60 | * still valid to assume that anything with a `value' field of zero |
| 61 | * is insignificant. |
| 62 | */ |
| 63 | typedef struct { |
| 64 | long type; /* a register, or EXPR_xxx */ |
| 65 | long value; /* must be >= 32 bits */ |
| 66 | } expr; |
| 67 | |
| 68 | static void eval_reset(void); |
| 69 | static expr *evaluate(int); |
| 70 | |
| 71 | /* |
| 72 | * ASSUMPTION MADE HERE. The number of distinct register names |
| 73 | * (i.e. possible "type" fields for an expr structure) does not |
| 74 | * exceed 126. |
| 75 | */ |
| 76 | #define EXPR_SIMPLE 126 |
| 77 | #define EXPR_WRT 127 |
| 78 | #define EXPR_SEGBASE 128 |
| 79 | |
| 80 | static int is_reloc(expr *); |
| 81 | static int is_simple(expr *); |
| 82 | static int is_really_simple (expr *); |
| 83 | static long reloc_value(expr *); |
| 84 | static long reloc_seg(expr *); |
| 85 | static long reloc_wrt(expr *); |
| 86 | |
| 87 | enum { /* token types, other than chars */ |
| 88 | TOKEN_ID = 256, TOKEN_NUM, TOKEN_REG, TOKEN_INSN, TOKEN_ERRNUM, |
| 89 | TOKEN_HERE, TOKEN_BASE, TOKEN_SPECIAL, TOKEN_PREFIX, TOKEN_SHL, |
| 90 | TOKEN_SHR, TOKEN_SDIV, TOKEN_SMOD, TOKEN_SEG, TOKEN_WRT, |
| 91 | TOKEN_FLOAT |
| 92 | }; |
| 93 | |
| 94 | struct tokenval { |
| 95 | long t_integer, t_inttwo; |
| 96 | char *t_charptr; |
| 97 | }; |
| 98 | |
| 99 | static char tempstorage[1024], *q; |
| 100 | static int bsi (char *string, char **array, int size);/* binary search */ |
| 101 | |
| 102 | static int nexttoken (void); |
| 103 | static int is_comma_next (void); |
| 104 | |
| 105 | static char *bufptr; |
| 106 | static int i; |
| 107 | static struct tokenval tokval; |
| 108 | static lfunc labelfunc; |
| 109 | static efunc error; |
| 110 | static char *label; |
| 111 | static struct ofmt *outfmt; |
| 112 | |
| 113 | static long seg, ofs; |
| 114 | |
H. Peter Anvin | ea83827 | 2002-04-30 20:51:53 +0000 | [diff] [blame] | 115 | static int forward; |
| 116 | |
H. Peter Anvin | ea6e34d | 2002-04-30 20:51:32 +0000 | [diff] [blame] | 117 | insn *parse_line (long segment, long offset, lfunc lookup_label, int pass, |
| 118 | char *buffer, insn *result, struct ofmt *output, |
| 119 | efunc errfunc) { |
| 120 | int operand; |
| 121 | int critical; |
| 122 | |
H. Peter Anvin | ea83827 | 2002-04-30 20:51:53 +0000 | [diff] [blame] | 123 | forward = result->forw_ref = FALSE; |
H. Peter Anvin | ea6e34d | 2002-04-30 20:51:32 +0000 | [diff] [blame] | 124 | q = tempstorage; |
| 125 | bufptr = buffer; |
| 126 | labelfunc = lookup_label; |
| 127 | outfmt = output; |
| 128 | error = errfunc; |
| 129 | seg = segment; |
| 130 | ofs = offset; |
| 131 | label = ""; |
| 132 | |
| 133 | i = nexttoken(); |
| 134 | |
| 135 | result->eops = NULL; /* must do this, whatever happens */ |
| 136 | |
| 137 | if (i==0) { /* blank line - ignore */ |
| 138 | result->label = NULL; /* so, no label on it */ |
| 139 | result->opcode = -1; /* and no instruction either */ |
| 140 | return result; |
| 141 | } |
| 142 | if (i != TOKEN_ID && i != TOKEN_INSN && i != TOKEN_PREFIX && |
| 143 | (i!=TOKEN_REG || (REG_SREG & ~reg_flags[tokval.t_integer]))) { |
| 144 | error (ERR_NONFATAL, "label or instruction expected" |
| 145 | " at start of line"); |
| 146 | result->label = NULL; |
| 147 | result->opcode = -1; |
| 148 | return result; |
| 149 | } |
| 150 | |
| 151 | if (i == TOKEN_ID) { /* there's a label here */ |
| 152 | label = result->label = tokval.t_charptr; |
| 153 | i = nexttoken(); |
| 154 | if (i == ':') { /* skip over the optional colon */ |
| 155 | i = nexttoken(); |
| 156 | } |
| 157 | } else /* no label; so, moving swiftly on */ |
| 158 | result->label = NULL; |
| 159 | |
| 160 | if (i==0) { |
| 161 | result->opcode = -1; /* this line contains just a label */ |
| 162 | return result; |
| 163 | } |
| 164 | |
| 165 | result->nprefix = 0; |
H. Peter Anvin | d7ed89e | 2002-04-30 20:52:08 +0000 | [diff] [blame^] | 166 | result->times = 1L; |
H. Peter Anvin | ea6e34d | 2002-04-30 20:51:32 +0000 | [diff] [blame] | 167 | |
| 168 | while (i == TOKEN_PREFIX || |
| 169 | (i==TOKEN_REG && !(REG_SREG & ~reg_flags[tokval.t_integer]))) { |
| 170 | /* |
| 171 | * Handle special case: the TIMES prefix. |
| 172 | */ |
| 173 | if (i == TOKEN_PREFIX && tokval.t_integer == P_TIMES) { |
| 174 | expr *value; |
| 175 | |
| 176 | i = nexttoken(); |
| 177 | eval_reset(); |
| 178 | value = evaluate (pass); |
| 179 | if (!value) { /* but, error in evaluator */ |
| 180 | result->opcode = -1; /* unrecoverable parse error: */ |
| 181 | return result; /* ignore this instruction */ |
| 182 | } |
| 183 | if (!is_simple (value)) { |
| 184 | error (ERR_NONFATAL, |
| 185 | "non-constant argument supplied to TIMES"); |
H. Peter Anvin | d7ed89e | 2002-04-30 20:52:08 +0000 | [diff] [blame^] | 186 | result->times = 1L; |
| 187 | } else { |
H. Peter Anvin | ea6e34d | 2002-04-30 20:51:32 +0000 | [diff] [blame] | 188 | result->times = value->value; |
H. Peter Anvin | d7ed89e | 2002-04-30 20:52:08 +0000 | [diff] [blame^] | 189 | if (value->value < 0) |
| 190 | error(ERR_WARNING, "TIMES value %d is negative", |
| 191 | value->value); |
| 192 | } |
H. Peter Anvin | ea6e34d | 2002-04-30 20:51:32 +0000 | [diff] [blame] | 193 | } else { |
| 194 | if (result->nprefix == MAXPREFIX) |
| 195 | error (ERR_NONFATAL, |
| 196 | "instruction has more than %d prefixes", MAXPREFIX); |
| 197 | else |
| 198 | result->prefixes[result->nprefix++] = tokval.t_integer; |
| 199 | i = nexttoken(); |
| 200 | } |
| 201 | } |
| 202 | |
| 203 | if (i != TOKEN_INSN) { |
| 204 | error (ERR_NONFATAL, "parser: instruction expected"); |
| 205 | result->opcode = -1; |
| 206 | return result; |
| 207 | } |
| 208 | |
| 209 | result->opcode = tokval.t_integer; |
| 210 | result->condition = tokval.t_inttwo; |
| 211 | |
| 212 | /* |
| 213 | * RESB, RESW and RESD cannot be satisfied with incorrectly |
| 214 | * evaluated operands, since the correct values _must_ be known |
| 215 | * on the first pass. Hence, even in pass one, we set the |
| 216 | * `critical' flag on calling evaluate(), so that it will bomb |
| 217 | * out on undefined symbols. Nasty, but there's nothing we can |
| 218 | * do about it. |
| 219 | * |
| 220 | * For the moment, EQU has the same difficulty, so we'll |
| 221 | * include that. |
| 222 | */ |
| 223 | if (result->opcode == I_RESB || |
| 224 | result->opcode == I_RESW || |
| 225 | result->opcode == I_RESD || |
| 226 | result->opcode == I_RESQ || |
| 227 | result->opcode == I_REST || |
| 228 | result->opcode == I_EQU) |
| 229 | critical = pass; |
| 230 | else |
| 231 | critical = (pass==2 ? 2 : 0); |
| 232 | |
| 233 | if (result->opcode == I_DB || |
| 234 | result->opcode == I_DW || |
| 235 | result->opcode == I_DD || |
| 236 | result->opcode == I_DQ || |
H. Peter Anvin | d7ed89e | 2002-04-30 20:52:08 +0000 | [diff] [blame^] | 237 | result->opcode == I_DT || |
| 238 | result->opcode == I_INCBIN) { |
H. Peter Anvin | ea6e34d | 2002-04-30 20:51:32 +0000 | [diff] [blame] | 239 | extop *eop, **tail = &result->eops; |
| 240 | int oper_num = 0; |
| 241 | |
| 242 | /* |
| 243 | * Begin to read the DB/DW/DD/DQ/DT operands. |
| 244 | */ |
| 245 | while (1) { |
| 246 | i = nexttoken(); |
| 247 | if (i == 0) |
| 248 | break; |
| 249 | eop = *tail = nasm_malloc(sizeof(extop)); |
| 250 | tail = &eop->next; |
| 251 | eop->next = NULL; |
| 252 | eop->type = EOT_NOTHING; |
| 253 | oper_num++; |
| 254 | |
| 255 | if (i == TOKEN_NUM && tokval.t_charptr && is_comma_next()) { |
| 256 | eop->type = EOT_DB_STRING; |
| 257 | eop->stringval = tokval.t_charptr; |
| 258 | eop->stringlen = tokval.t_inttwo; |
| 259 | i = nexttoken(); /* eat the comma */ |
| 260 | continue; |
| 261 | } |
| 262 | |
| 263 | if (i == TOKEN_FLOAT || i == '-') { |
| 264 | long sign = +1L; |
| 265 | |
| 266 | if (i == '-') { |
| 267 | char *save = bufptr; |
| 268 | i = nexttoken(); |
| 269 | sign = -1L; |
| 270 | if (i != TOKEN_FLOAT) { |
| 271 | bufptr = save; |
| 272 | i = '-'; |
| 273 | } |
| 274 | } |
| 275 | |
| 276 | if (i == TOKEN_FLOAT) { |
| 277 | eop->type = EOT_DB_STRING; |
| 278 | eop->stringval = q; |
| 279 | if (result->opcode == I_DD) |
| 280 | eop->stringlen = 4; |
| 281 | else if (result->opcode == I_DQ) |
| 282 | eop->stringlen = 8; |
| 283 | else if (result->opcode == I_DT) |
| 284 | eop->stringlen = 10; |
| 285 | else { |
| 286 | error(ERR_NONFATAL, "floating-point constant" |
| 287 | " encountered in `D%c' instruction", |
| 288 | result->opcode == I_DW ? 'W' : 'B'); |
| 289 | eop->type = EOT_NOTHING; |
| 290 | } |
| 291 | q += eop->stringlen; |
| 292 | if (!float_const (tokval.t_charptr, sign, |
| 293 | (unsigned char *)eop->stringval, |
| 294 | eop->stringlen, error)) |
| 295 | eop->type = EOT_NOTHING; |
| 296 | i = nexttoken(); /* eat the comma */ |
| 297 | continue; |
| 298 | } |
| 299 | } |
| 300 | |
| 301 | /* anything else */ { |
| 302 | expr *value; |
| 303 | eval_reset(); |
| 304 | value = evaluate (critical); |
| 305 | if (!value) { /* but, error in evaluator */ |
| 306 | result->opcode = -1;/* unrecoverable parse error: */ |
| 307 | return result; /* ignore this instruction */ |
| 308 | } |
| 309 | if (is_reloc(value)) { |
| 310 | eop->type = EOT_DB_NUMBER; |
| 311 | eop->offset = reloc_value(value); |
| 312 | eop->segment = reloc_seg(value); |
| 313 | eop->wrt = reloc_wrt(value); |
| 314 | } else { |
| 315 | error (ERR_NONFATAL, |
| 316 | "`%s' operand %d: expression is not simple" |
| 317 | " or relocatable", |
| 318 | insn_names[result->opcode], oper_num); |
| 319 | } |
| 320 | } |
| 321 | } |
H. Peter Anvin | d7ed89e | 2002-04-30 20:52:08 +0000 | [diff] [blame^] | 322 | |
| 323 | if (result->opcode == I_INCBIN) { |
| 324 | /* |
| 325 | * Correct syntax for INCBIN is that there should be |
| 326 | * one string operand, followed by one or two numeric |
| 327 | * operands. |
| 328 | */ |
| 329 | if (!result->eops || result->eops->type != EOT_DB_STRING) |
| 330 | error (ERR_NONFATAL, "`incbin' expects a file name"); |
| 331 | else if (result->eops->next && |
| 332 | result->eops->next->type != EOT_DB_NUMBER) |
| 333 | error (ERR_NONFATAL, "`incbin': second parameter is", |
| 334 | " non-numeric"); |
| 335 | else if (result->eops->next && result->eops->next->next && |
| 336 | result->eops->next->next->type != EOT_DB_NUMBER) |
| 337 | error (ERR_NONFATAL, "`incbin': third parameter is", |
| 338 | " non-numeric"); |
| 339 | else if (result->eops->next && result->eops->next->next && |
| 340 | result->eops->next->next->next) |
| 341 | error (ERR_NONFATAL, "`incbin': more than three parameters"); |
| 342 | else |
| 343 | return result; |
| 344 | /* |
| 345 | * If we reach here, one of the above errors happened. |
| 346 | * Throw the instruction away. |
| 347 | */ |
| 348 | result->opcode = -1; |
| 349 | return result; |
| 350 | } |
| 351 | |
H. Peter Anvin | ea6e34d | 2002-04-30 20:51:32 +0000 | [diff] [blame] | 352 | return result; |
| 353 | } |
| 354 | |
| 355 | /* right. Now we begin to parse the operands. There may be up to three |
| 356 | * of these, separated by commas, and terminated by a zero token. */ |
| 357 | |
| 358 | for (operand = 0; operand < 3; operand++) { |
| 359 | expr *seg, *value; /* used most of the time */ |
| 360 | int mref; /* is this going to be a memory ref? */ |
| 361 | |
| 362 | result->oprs[operand].addr_size = 0;/* have to zero this whatever */ |
| 363 | i = nexttoken(); |
| 364 | if (i == 0) break; /* end of operands: get out of here */ |
| 365 | result->oprs[operand].type = 0; /* so far, no override */ |
| 366 | while (i == TOKEN_SPECIAL) {/* size specifiers */ |
| 367 | switch ((int)tokval.t_integer) { |
| 368 | case S_BYTE: |
| 369 | result->oprs[operand].type |= BITS8; |
| 370 | break; |
| 371 | case S_WORD: |
| 372 | result->oprs[operand].type |= BITS16; |
| 373 | break; |
| 374 | case S_DWORD: |
| 375 | case S_LONG: |
| 376 | result->oprs[operand].type |= BITS32; |
| 377 | break; |
| 378 | case S_QWORD: |
| 379 | result->oprs[operand].type |= BITS64; |
| 380 | break; |
| 381 | case S_TWORD: |
| 382 | result->oprs[operand].type |= BITS80; |
| 383 | break; |
| 384 | case S_TO: |
| 385 | result->oprs[operand].type |= TO; |
| 386 | break; |
| 387 | case S_FAR: |
| 388 | result->oprs[operand].type |= FAR; |
| 389 | break; |
| 390 | case S_NEAR: |
| 391 | result->oprs[operand].type |= NEAR; |
| 392 | break; |
| 393 | case S_SHORT: |
| 394 | result->oprs[operand].type |= SHORT; |
| 395 | break; |
| 396 | } |
| 397 | i = nexttoken(); |
| 398 | } |
| 399 | |
| 400 | if (i == '[') { /* memory reference */ |
| 401 | i = nexttoken(); |
| 402 | mref = TRUE; |
| 403 | if (i == TOKEN_SPECIAL) { /* check for address size override */ |
| 404 | switch ((int)tokval.t_integer) { |
| 405 | case S_WORD: |
| 406 | result->oprs[operand].addr_size = 16; |
| 407 | break; |
| 408 | case S_DWORD: |
| 409 | case S_LONG: |
| 410 | result->oprs[operand].addr_size = 32; |
| 411 | break; |
| 412 | default: |
| 413 | error (ERR_NONFATAL, "invalid size specification in" |
| 414 | " effective address"); |
| 415 | } |
| 416 | i = nexttoken(); |
| 417 | } |
| 418 | } else /* immediate operand, or register */ |
| 419 | mref = FALSE; |
| 420 | |
| 421 | eval_reset(); |
| 422 | |
| 423 | value = evaluate (critical); |
H. Peter Anvin | ea83827 | 2002-04-30 20:51:53 +0000 | [diff] [blame] | 424 | if (forward) |
| 425 | result->forw_ref = TRUE; |
H. Peter Anvin | ea6e34d | 2002-04-30 20:51:32 +0000 | [diff] [blame] | 426 | if (!value) { /* error in evaluator */ |
| 427 | result->opcode = -1; /* unrecoverable parse error: */ |
| 428 | return result; /* ignore this instruction */ |
| 429 | } |
| 430 | if (i == ':' && mref) { /* it was seg:offset */ |
| 431 | seg = value; /* so shift this into the segment */ |
| 432 | i = nexttoken(); /* then skip the colon */ |
| 433 | if (i == TOKEN_SPECIAL) { /* another check for size override */ |
| 434 | switch ((int)tokval.t_integer) { |
| 435 | case S_WORD: |
| 436 | result->oprs[operand].addr_size = 16; |
| 437 | break; |
| 438 | case S_DWORD: |
| 439 | case S_LONG: |
| 440 | result->oprs[operand].addr_size = 32; |
| 441 | break; |
| 442 | default: |
| 443 | error (ERR_NONFATAL, "invalid size specification in" |
| 444 | " effective address"); |
| 445 | } |
| 446 | i = nexttoken(); |
| 447 | } |
| 448 | value = evaluate (critical); |
H. Peter Anvin | ea83827 | 2002-04-30 20:51:53 +0000 | [diff] [blame] | 449 | if (forward) |
| 450 | result->forw_ref = TRUE; |
H. Peter Anvin | ea6e34d | 2002-04-30 20:51:32 +0000 | [diff] [blame] | 451 | /* and get the offset */ |
| 452 | if (!value) { /* but, error in evaluator */ |
| 453 | result->opcode = -1; /* unrecoverable parse error: */ |
| 454 | return result; /* ignore this instruction */ |
| 455 | } |
| 456 | } else seg = NULL; |
| 457 | if (mref) { /* find ] at the end */ |
| 458 | if (i != ']') { |
| 459 | error (ERR_NONFATAL, "parser: expecting ]"); |
| 460 | do { /* error recovery again */ |
| 461 | i = nexttoken(); |
| 462 | } while (i != 0 && i != ','); |
| 463 | } else /* we got the required ] */ |
| 464 | i = nexttoken(); |
| 465 | } else { /* immediate operand */ |
| 466 | if (i != 0 && i != ',' && i != ':') { |
| 467 | error (ERR_NONFATAL, "comma or end of line expected"); |
| 468 | do { /* error recovery */ |
| 469 | i = nexttoken(); |
| 470 | } while (i != 0 && i != ','); |
| 471 | } else if (i == ':') { |
| 472 | result->oprs[operand].type |= COLON; |
| 473 | } |
| 474 | } |
| 475 | |
| 476 | /* now convert the exprs returned from evaluate() into operand |
| 477 | * descriptions... */ |
| 478 | |
| 479 | if (mref) { /* it's a memory reference */ |
| 480 | expr *e = value; |
| 481 | int b, i, s; /* basereg, indexreg, scale */ |
| 482 | long o; /* offset */ |
| 483 | |
| 484 | if (seg) { /* segment override */ |
| 485 | if (seg[1].type!=0 || seg->value!=1 || |
| 486 | REG_SREG & ~reg_flags[seg->type]) |
| 487 | error (ERR_NONFATAL, "invalid segment override"); |
| 488 | else if (result->nprefix == MAXPREFIX) |
| 489 | error (ERR_NONFATAL, |
| 490 | "instruction has more than %d prefixes", |
| 491 | MAXPREFIX); |
| 492 | else |
| 493 | result->prefixes[result->nprefix++] = seg->type; |
| 494 | } |
| 495 | |
| 496 | b = i = -1, o = s = 0; |
| 497 | |
| 498 | if (e->type < EXPR_SIMPLE) { /* this bit's a register */ |
| 499 | if (e->value == 1) /* in fact it can be basereg */ |
| 500 | b = e->type; |
| 501 | else /* no, it has to be indexreg */ |
| 502 | i = e->type, s = e->value; |
| 503 | e++; |
| 504 | } |
| 505 | if (e->type && e->type < EXPR_SIMPLE) {/* it's a second register */ |
| 506 | if (e->value != 1) { /* it has to be indexreg */ |
| 507 | if (i != -1) { /* but it can't be */ |
| 508 | error(ERR_NONFATAL, "invalid effective address"); |
| 509 | result->opcode = -1; |
| 510 | return result; |
| 511 | } else |
| 512 | i = e->type, s = e->value; |
| 513 | } else { /* it can be basereg */ |
| 514 | if (b != -1) /* or can it? */ |
| 515 | i = e->type, s = 1; |
| 516 | else |
| 517 | b = e->type; |
| 518 | } |
| 519 | e++; |
| 520 | } |
| 521 | if (e->type != 0) { /* is there an offset? */ |
| 522 | if (e->type < EXPR_SIMPLE) {/* in fact, is there an error? */ |
| 523 | error (ERR_NONFATAL, "invalid effective address"); |
| 524 | result->opcode = -1; |
| 525 | return result; |
| 526 | } else { |
| 527 | if (e->type == EXPR_SIMPLE) { |
| 528 | o = e->value; |
| 529 | e++; |
| 530 | } |
| 531 | if (e->type == EXPR_WRT) { |
| 532 | result->oprs[operand].wrt = e->value; |
| 533 | e++; |
| 534 | } else |
| 535 | result->oprs[operand].wrt = NO_SEG; |
H. Peter Anvin | ea83827 | 2002-04-30 20:51:53 +0000 | [diff] [blame] | 536 | /* |
| 537 | * Look for a segment base type. |
| 538 | */ |
| 539 | if (e->type && e->type < EXPR_SEGBASE) { |
| 540 | error (ERR_NONFATAL, "invalid effective address"); |
| 541 | result->opcode = -1; |
| 542 | return result; |
| 543 | } |
| 544 | while (e->type && e->value == 0) |
| 545 | e++; |
| 546 | if (e->type && e->value != 1) { |
| 547 | error (ERR_NONFATAL, "invalid effective address"); |
| 548 | result->opcode = -1; |
| 549 | return result; |
| 550 | } |
| 551 | if (e->type) { |
| 552 | result->oprs[operand].segment = e->type-EXPR_SEGBASE; |
H. Peter Anvin | ea6e34d | 2002-04-30 20:51:32 +0000 | [diff] [blame] | 553 | e++; |
| 554 | } else |
| 555 | result->oprs[operand].segment = NO_SEG; |
H. Peter Anvin | ea83827 | 2002-04-30 20:51:53 +0000 | [diff] [blame] | 556 | while (e->type && e->value == 0) |
| 557 | e++; |
| 558 | if (e->type) { |
| 559 | error (ERR_NONFATAL, "invalid effective address"); |
| 560 | result->opcode = -1; |
| 561 | return result; |
| 562 | } |
H. Peter Anvin | ea6e34d | 2002-04-30 20:51:32 +0000 | [diff] [blame] | 563 | } |
| 564 | } else { |
| 565 | o = 0; |
| 566 | result->oprs[operand].wrt = NO_SEG; |
| 567 | result->oprs[operand].segment = NO_SEG; |
| 568 | } |
| 569 | |
| 570 | if (e->type != 0) { /* there'd better be nothing left! */ |
| 571 | error (ERR_NONFATAL, "invalid effective address"); |
| 572 | result->opcode = -1; |
| 573 | return result; |
| 574 | } |
| 575 | |
| 576 | result->oprs[operand].type |= MEMORY; |
| 577 | if (b==-1 && (i==-1 || s==0)) |
| 578 | result->oprs[operand].type |= MEM_OFFS; |
| 579 | result->oprs[operand].basereg = b; |
| 580 | result->oprs[operand].indexreg = i; |
| 581 | result->oprs[operand].scale = s; |
| 582 | result->oprs[operand].offset = o; |
| 583 | } else { /* it's not a memory reference */ |
| 584 | if (is_reloc(value)) { /* it's immediate */ |
| 585 | result->oprs[operand].type |= IMMEDIATE; |
| 586 | result->oprs[operand].offset = reloc_value(value); |
| 587 | result->oprs[operand].segment = reloc_seg(value); |
| 588 | result->oprs[operand].wrt = reloc_wrt(value); |
| 589 | if (is_simple(value) && reloc_value(value)==1) |
| 590 | result->oprs[operand].type |= UNITY; |
| 591 | } else { /* it's a register */ |
| 592 | if (value->type>=EXPR_SIMPLE || value->value!=1) { |
| 593 | error (ERR_NONFATAL, "invalid operand type"); |
| 594 | result->opcode = -1; |
| 595 | return result; |
| 596 | } |
| 597 | /* clear overrides, except TO which applies to FPU regs */ |
| 598 | result->oprs[operand].type &= TO; |
| 599 | result->oprs[operand].type |= REGISTER; |
| 600 | result->oprs[operand].type |= reg_flags[value->type]; |
| 601 | result->oprs[operand].basereg = value->type; |
| 602 | } |
| 603 | } |
| 604 | } |
| 605 | |
| 606 | result->operands = operand; /* set operand count */ |
| 607 | |
| 608 | while (operand<3) /* clear remaining operands */ |
| 609 | result->oprs[operand++].type = 0; |
| 610 | |
| 611 | /* |
| 612 | * Transform RESW, RESD, RESQ, REST into RESB. |
| 613 | */ |
| 614 | switch (result->opcode) { |
| 615 | case I_RESW: result->opcode=I_RESB; result->oprs[0].offset*=2; break; |
| 616 | case I_RESD: result->opcode=I_RESB; result->oprs[0].offset*=4; break; |
| 617 | case I_RESQ: result->opcode=I_RESB; result->oprs[0].offset*=8; break; |
| 618 | case I_REST: result->opcode=I_RESB; result->oprs[0].offset*=10; break; |
| 619 | } |
| 620 | |
| 621 | return result; |
| 622 | } |
| 623 | |
| 624 | static int is_comma_next (void) { |
| 625 | char *p; |
| 626 | |
| 627 | p = bufptr; |
| 628 | while (isspace(*p)) p++; |
| 629 | return (*p == ',' || *p == ';' || !*p); |
| 630 | } |
| 631 | |
H. Peter Anvin | ea6e34d | 2002-04-30 20:51:32 +0000 | [diff] [blame] | 632 | /* |
| 633 | * This tokeniser routine has only one side effect, that of |
| 634 | * updating `bufptr'. Hence by saving `bufptr', lookahead may be |
| 635 | * performed. |
| 636 | */ |
| 637 | |
| 638 | static int nexttoken (void) { |
| 639 | char ourcopy[256], *r, *s; |
| 640 | |
| 641 | while (isspace(*bufptr)) bufptr++; |
| 642 | if (!*bufptr) return 0; |
| 643 | |
| 644 | /* we have a token; either an id, a number or a char */ |
| 645 | if (isidstart(*bufptr) || |
| 646 | (*bufptr == '$' && isidstart(bufptr[1]))) { |
| 647 | /* now we've got an identifier */ |
| 648 | int i; |
| 649 | int is_sym = FALSE; |
| 650 | |
| 651 | if (*bufptr == '$') { |
| 652 | is_sym = TRUE; |
| 653 | bufptr++; |
| 654 | } |
| 655 | |
| 656 | tokval.t_charptr = q; |
| 657 | *q++ = *bufptr++; |
| 658 | while (isidchar(*bufptr)) *q++ = *bufptr++; |
| 659 | *q++ = '\0'; |
| 660 | for (s=tokval.t_charptr, r=ourcopy; *s; s++) |
| 661 | *r++ = tolower (*s); |
| 662 | *r = '\0'; |
| 663 | if (is_sym) |
| 664 | return TOKEN_ID; /* bypass all other checks */ |
| 665 | /* right, so we have an identifier sitting in temp storage. now, |
| 666 | * is it actually a register or instruction name, or what? */ |
| 667 | if ((tokval.t_integer=bsi(ourcopy, reg_names, |
| 668 | elements(reg_names)))>=0) |
| 669 | return TOKEN_REG; |
| 670 | if ((tokval.t_integer=bsi(ourcopy, insn_names, |
| 671 | elements(insn_names)))>=0) |
| 672 | return TOKEN_INSN; |
| 673 | for (i=0; i<elements(icn); i++) |
| 674 | if (!strncmp(ourcopy, icn[i], strlen(icn[i]))) { |
| 675 | char *p = ourcopy + strlen(icn[i]); |
| 676 | tokval.t_integer = ico[i]; |
| 677 | if ((tokval.t_inttwo=bsi(p, conditions, |
| 678 | elements(conditions)))>=0) |
| 679 | return TOKEN_INSN; |
| 680 | } |
| 681 | if ((tokval.t_integer=bsi(ourcopy, prefix_names, |
| 682 | elements(prefix_names)))>=0) { |
| 683 | tokval.t_integer += PREFIX_ENUM_START; |
| 684 | return TOKEN_PREFIX; |
| 685 | } |
| 686 | if ((tokval.t_integer=bsi(ourcopy, special_names, |
| 687 | elements(special_names)))>=0) |
| 688 | return TOKEN_SPECIAL; |
| 689 | if (!strcmp(ourcopy, "seg")) |
| 690 | return TOKEN_SEG; |
| 691 | if (!strcmp(ourcopy, "wrt")) |
| 692 | return TOKEN_WRT; |
| 693 | return TOKEN_ID; |
| 694 | } else if (*bufptr == '$' && !isnumchar(bufptr[1])) { |
| 695 | /* |
| 696 | * It's a $ sign with no following hex number; this must |
| 697 | * mean it's a Here token ($), evaluating to the current |
| 698 | * assembly location, or a Base token ($$), evaluating to |
| 699 | * the base of the current segment. |
| 700 | */ |
| 701 | bufptr++; |
| 702 | if (*bufptr == '$') { |
| 703 | bufptr++; |
| 704 | return TOKEN_BASE; |
| 705 | } |
| 706 | return TOKEN_HERE; |
| 707 | } else if (isnumstart(*bufptr)) { /* now we've got a number */ |
| 708 | char *r = q; |
| 709 | int rn_error; |
| 710 | |
| 711 | *q++ = *bufptr++; |
| 712 | while (isnumchar(*bufptr)) { |
| 713 | *q++ = *bufptr++; |
| 714 | } |
| 715 | if (*bufptr == '.') { |
| 716 | /* |
| 717 | * a floating point constant |
| 718 | */ |
| 719 | *q++ = *bufptr++; |
| 720 | while (isnumchar(*bufptr)) { |
| 721 | *q++ = *bufptr++; |
| 722 | } |
| 723 | *q++ = '\0'; |
| 724 | tokval.t_charptr = r; |
| 725 | return TOKEN_FLOAT; |
| 726 | } |
| 727 | *q++ = '\0'; |
| 728 | tokval.t_integer = readnum(r, &rn_error); |
| 729 | if (rn_error) |
| 730 | return TOKEN_ERRNUM; /* some malformation occurred */ |
| 731 | tokval.t_charptr = NULL; |
| 732 | return TOKEN_NUM; |
| 733 | } else if (*bufptr == '\'' || *bufptr == '"') {/* a char constant */ |
| 734 | char quote = *bufptr++, *r; |
| 735 | r = tokval.t_charptr = bufptr; |
| 736 | while (*bufptr && *bufptr != quote) bufptr++; |
| 737 | tokval.t_inttwo = bufptr - r; /* store full version */ |
| 738 | if (!*bufptr) |
| 739 | return TOKEN_ERRNUM; /* unmatched quotes */ |
| 740 | tokval.t_integer = 0; |
| 741 | r = bufptr++; /* skip over final quote */ |
| 742 | while (quote != *--r) { |
| 743 | tokval.t_integer = (tokval.t_integer<<8) + (unsigned char) *r; |
| 744 | } |
| 745 | return TOKEN_NUM; |
| 746 | } else if (*bufptr == ';') { /* a comment has happened - stay */ |
| 747 | return 0; |
| 748 | } else if ((*bufptr == '>' || *bufptr == '<' || |
| 749 | *bufptr == '/' || *bufptr == '%') && bufptr[1] == *bufptr) { |
| 750 | bufptr += 2; |
| 751 | return (bufptr[-2] == '>' ? TOKEN_SHR : |
| 752 | bufptr[-2] == '<' ? TOKEN_SHL : |
| 753 | bufptr[-2] == '/' ? TOKEN_SDIV : |
| 754 | TOKEN_SMOD); |
| 755 | } else /* just an ordinary char */ |
| 756 | return (unsigned char) (*bufptr++); |
| 757 | } |
| 758 | |
| 759 | /* return index of "string" in "array", or -1 if no match. */ |
| 760 | static int bsi (char *string, char **array, int size) { |
| 761 | int i = -1, j = size; /* always, i < index < j */ |
| 762 | while (j-i >= 2) { |
| 763 | int k = (i+j)/2; |
| 764 | int l = strcmp(string, array[k]); |
| 765 | if (l<0) /* it's in the first half */ |
| 766 | j = k; |
| 767 | else if (l>0) /* it's in the second half */ |
| 768 | i = k; |
| 769 | else /* we've got it :) */ |
| 770 | return k; |
| 771 | } |
| 772 | return -1; /* we haven't got it :( */ |
| 773 | } |
| 774 | |
| 775 | void cleanup_insn (insn *i) { |
| 776 | extop *e; |
| 777 | |
| 778 | while (i->eops) { |
| 779 | e = i->eops; |
| 780 | i->eops = i->eops->next; |
| 781 | nasm_free (e); |
| 782 | } |
| 783 | } |
| 784 | |
| 785 | /* ------------- Evaluator begins here ------------------ */ |
| 786 | |
| 787 | static expr exprtempstorage[1024], *tempptr; /* store exprs in here */ |
| 788 | |
| 789 | /* |
| 790 | * Add two vector datatypes. We have some bizarre behaviour on far- |
| 791 | * absolute segment types: we preserve them during addition _only_ |
| 792 | * if one of the segments is a truly pure scalar. |
| 793 | */ |
| 794 | static expr *add_vectors(expr *p, expr *q) { |
| 795 | expr *r = tempptr; |
| 796 | int preserve; |
| 797 | |
| 798 | preserve = is_really_simple(p) || is_really_simple(q); |
| 799 | |
| 800 | while (p->type && q->type && |
| 801 | p->type < EXPR_SEGBASE+SEG_ABS && |
| 802 | q->type < EXPR_SEGBASE+SEG_ABS) |
| 803 | if (p->type > q->type) { |
| 804 | tempptr->type = q->type; |
| 805 | tempptr->value = q->value; |
| 806 | tempptr++, q++; |
| 807 | } else if (p->type < q->type) { |
| 808 | tempptr->type = p->type; |
| 809 | tempptr->value = p->value; |
| 810 | tempptr++, p++; |
| 811 | } else { /* *p and *q have same type */ |
| 812 | tempptr->type = p->type; |
| 813 | tempptr->value = p->value + q->value; |
| 814 | tempptr++, p++, q++; |
| 815 | } |
| 816 | while (p->type && |
| 817 | (preserve || p->type < EXPR_SEGBASE+SEG_ABS)) { |
| 818 | tempptr->type = p->type; |
| 819 | tempptr->value = p->value; |
| 820 | tempptr++, p++; |
| 821 | } |
| 822 | while (q->type && |
| 823 | (preserve || q->type < EXPR_SEGBASE+SEG_ABS)) { |
| 824 | tempptr->type = q->type; |
| 825 | tempptr->value = q->value; |
| 826 | tempptr++, q++; |
| 827 | } |
| 828 | (tempptr++)->type = 0; |
| 829 | |
| 830 | return r; |
| 831 | } |
| 832 | |
| 833 | /* |
| 834 | * Multiply a vector by a scalar. Strip far-absolute segment part |
| 835 | * if present. |
| 836 | */ |
| 837 | static expr *scalar_mult(expr *vect, long scalar) { |
| 838 | expr *p = vect; |
| 839 | |
| 840 | while (p->type && p->type < EXPR_SEGBASE+SEG_ABS) { |
| 841 | p->value = scalar * (p->value); |
| 842 | p++; |
| 843 | } |
| 844 | p->type = 0; |
| 845 | |
| 846 | return vect; |
| 847 | } |
| 848 | |
| 849 | static expr *scalarvect (long scalar) { |
| 850 | expr *p = tempptr; |
| 851 | tempptr->type = EXPR_SIMPLE; |
| 852 | tempptr->value = scalar; |
| 853 | tempptr++; |
| 854 | tempptr->type = 0; |
| 855 | tempptr++; |
| 856 | return p; |
| 857 | } |
| 858 | |
| 859 | /* |
| 860 | * Return TRUE if the argument is a simple scalar. (Or a far- |
| 861 | * absolute, which counts.) |
| 862 | */ |
| 863 | static int is_simple (expr *vect) { |
| 864 | while (vect->type && !vect->value) |
| 865 | vect++; |
| 866 | if (!vect->type) |
| 867 | return 1; |
| 868 | if (vect->type != EXPR_SIMPLE) |
| 869 | return 0; |
| 870 | do { |
| 871 | vect++; |
| 872 | } while (vect->type && !vect->value); |
| 873 | if (vect->type && vect->type < EXPR_SEGBASE+SEG_ABS) return 0; |
| 874 | return 1; |
| 875 | } |
| 876 | |
| 877 | /* |
| 878 | * Return TRUE if the argument is a simple scalar, _NOT_ a far- |
| 879 | * absolute. |
| 880 | */ |
| 881 | static int is_really_simple (expr *vect) { |
| 882 | while (vect->type && !vect->value) |
| 883 | vect++; |
| 884 | if (!vect->type) |
| 885 | return 1; |
| 886 | if (vect->type != EXPR_SIMPLE) |
| 887 | return 0; |
| 888 | do { |
| 889 | vect++; |
| 890 | } while (vect->type && !vect->value); |
| 891 | if (vect->type) return 0; |
| 892 | return 1; |
| 893 | } |
| 894 | |
| 895 | /* |
| 896 | * Return TRUE if the argument is relocatable (i.e. a simple |
| 897 | * scalar, plus at most one segment-base, plus possibly a WRT). |
| 898 | */ |
| 899 | static int is_reloc (expr *vect) { |
| 900 | while (vect->type && !vect->value) |
| 901 | vect++; |
| 902 | if (!vect->type) |
| 903 | return 1; |
| 904 | if (vect->type < EXPR_SIMPLE) |
| 905 | return 0; |
| 906 | if (vect->type == EXPR_SIMPLE) { |
| 907 | do { |
| 908 | vect++; |
| 909 | } while (vect->type && !vect->value); |
| 910 | if (!vect->type) |
| 911 | return 1; |
| 912 | } |
| 913 | do { |
| 914 | vect++; |
| 915 | } while (vect->type && (vect->type == EXPR_WRT || !vect->value)); |
| 916 | if (!vect->type) |
| 917 | return 1; |
| 918 | return 1; |
| 919 | } |
| 920 | |
| 921 | /* |
| 922 | * Return the scalar part of a relocatable vector. (Including |
| 923 | * simple scalar vectors - those qualify as relocatable.) |
| 924 | */ |
| 925 | static long reloc_value (expr *vect) { |
| 926 | while (vect->type && !vect->value) |
| 927 | vect++; |
| 928 | if (!vect->type) return 0; |
| 929 | if (vect->type == EXPR_SIMPLE) |
| 930 | return vect->value; |
| 931 | else |
| 932 | return 0; |
| 933 | } |
| 934 | |
| 935 | /* |
| 936 | * Return the segment number of a relocatable vector, or NO_SEG for |
| 937 | * simple scalars. |
| 938 | */ |
| 939 | static long reloc_seg (expr *vect) { |
| 940 | while (vect->type && (vect->type == EXPR_WRT || !vect->value)) |
| 941 | vect++; |
| 942 | if (vect->type == EXPR_SIMPLE) { |
| 943 | do { |
| 944 | vect++; |
| 945 | } while (vect->type && (vect->type == EXPR_WRT || !vect->value)); |
| 946 | } |
| 947 | if (!vect->type) |
| 948 | return NO_SEG; |
| 949 | else |
| 950 | return vect->type - EXPR_SEGBASE; |
| 951 | } |
| 952 | |
| 953 | /* |
| 954 | * Return the WRT segment number of a relocatable vector, or NO_SEG |
| 955 | * if no WRT part is present. |
| 956 | */ |
| 957 | static long reloc_wrt (expr *vect) { |
| 958 | while (vect->type && vect->type < EXPR_WRT) |
| 959 | vect++; |
| 960 | if (vect->type == EXPR_WRT) { |
| 961 | return vect->value; |
| 962 | } else |
| 963 | return NO_SEG; |
| 964 | } |
| 965 | |
| 966 | static void eval_reset(void) { |
| 967 | tempptr = exprtempstorage; /* initialise temporary storage */ |
| 968 | } |
| 969 | |
| 970 | /* |
| 971 | * The SEG operator: calculate the segment part of a relocatable |
| 972 | * value. Return NULL, as usual, if an error occurs. Report the |
| 973 | * error too. |
| 974 | */ |
| 975 | static expr *segment_part (expr *e) { |
| 976 | long seg; |
| 977 | |
| 978 | if (!is_reloc(e)) { |
| 979 | error(ERR_NONFATAL, "cannot apply SEG to a non-relocatable value"); |
| 980 | return NULL; |
| 981 | } |
| 982 | |
| 983 | seg = reloc_seg(e); |
| 984 | if (seg == NO_SEG) { |
| 985 | error(ERR_NONFATAL, "cannot apply SEG to a non-relocatable value"); |
| 986 | return NULL; |
| 987 | } else if (seg & SEG_ABS) |
| 988 | return scalarvect(seg & ~SEG_ABS); |
| 989 | else { |
| 990 | expr *f = tempptr++; |
| 991 | tempptr++->type = 0; |
| 992 | f->type = EXPR_SEGBASE+outfmt->segbase(seg+1); |
| 993 | f->value = 1; |
| 994 | return f; |
| 995 | } |
| 996 | } |
| 997 | |
| 998 | /* |
| 999 | * Recursive-descent parser. Called with a single boolean operand, |
| 1000 | * which is TRUE if the evaluation is critical (i.e. unresolved |
| 1001 | * symbols are an error condition). Must update the global `i' to |
| 1002 | * reflect the token after the parsed string. May return NULL. |
| 1003 | * |
| 1004 | * evaluate() should report its own errors: on return it is assumed |
| 1005 | * that if NULL has been returned, the error has already been |
| 1006 | * reported. |
| 1007 | */ |
| 1008 | |
| 1009 | /* |
| 1010 | * Grammar parsed is: |
| 1011 | * |
| 1012 | * expr : expr0 [ WRT expr6 ] |
| 1013 | * expr0 : expr1 [ {|} expr1] |
| 1014 | * expr1 : expr2 [ {^} expr2] |
| 1015 | * expr2 : expr3 [ {&} expr3] |
| 1016 | * expr3 : expr4 [ {<<,>>} expr4...] |
| 1017 | * expr4 : expr5 [ {+,-} expr5...] |
| 1018 | * expr5 : expr6 [ {*,/,%,//,%%} expr6...] |
| 1019 | * expr6 : { ~,+,-,SEG } expr6 |
| 1020 | * | (expr0) |
| 1021 | * | symbol |
| 1022 | * | $ |
| 1023 | * | number |
| 1024 | */ |
| 1025 | |
| 1026 | static expr *expr0(int), *expr1(int), *expr2(int), *expr3(int); |
| 1027 | static expr *expr4(int), *expr5(int), *expr6(int); |
| 1028 | |
| 1029 | static expr *expr0(int critical) { |
| 1030 | expr *e, *f; |
| 1031 | |
| 1032 | e = expr1(critical); |
| 1033 | if (!e) |
| 1034 | return NULL; |
| 1035 | while (i == '|') { |
| 1036 | i = nexttoken(); |
| 1037 | f = expr1(critical); |
| 1038 | if (!f) |
| 1039 | return NULL; |
| 1040 | if (!is_simple(e) || !is_simple(f)) { |
| 1041 | error(ERR_NONFATAL, "`|' operator may only be applied to" |
| 1042 | " scalar values"); |
| 1043 | } |
| 1044 | e = scalarvect (reloc_value(e) | reloc_value(f)); |
| 1045 | } |
| 1046 | return e; |
| 1047 | } |
| 1048 | |
| 1049 | static expr *expr1(int critical) { |
| 1050 | expr *e, *f; |
| 1051 | |
| 1052 | e = expr2(critical); |
| 1053 | if (!e) |
| 1054 | return NULL; |
| 1055 | while (i == '^') { |
| 1056 | i = nexttoken(); |
| 1057 | f = expr2(critical); |
| 1058 | if (!f) |
| 1059 | return NULL; |
| 1060 | if (!is_simple(e) || !is_simple(f)) { |
| 1061 | error(ERR_NONFATAL, "`^' operator may only be applied to" |
| 1062 | " scalar values"); |
| 1063 | } |
| 1064 | e = scalarvect (reloc_value(e) ^ reloc_value(f)); |
| 1065 | } |
| 1066 | return e; |
| 1067 | } |
| 1068 | |
| 1069 | static expr *expr2(int critical) { |
| 1070 | expr *e, *f; |
| 1071 | |
| 1072 | e = expr3(critical); |
| 1073 | if (!e) |
| 1074 | return NULL; |
| 1075 | while (i == '&') { |
| 1076 | i = nexttoken(); |
| 1077 | f = expr3(critical); |
| 1078 | if (!f) |
| 1079 | return NULL; |
| 1080 | if (!is_simple(e) || !is_simple(f)) { |
| 1081 | error(ERR_NONFATAL, "`&' operator may only be applied to" |
| 1082 | " scalar values"); |
| 1083 | } |
| 1084 | e = scalarvect (reloc_value(e) & reloc_value(f)); |
| 1085 | } |
| 1086 | return e; |
| 1087 | } |
| 1088 | |
| 1089 | static expr *expr3(int critical) { |
| 1090 | expr *e, *f; |
| 1091 | |
| 1092 | e = expr4(critical); |
| 1093 | if (!e) |
| 1094 | return NULL; |
| 1095 | while (i == TOKEN_SHL || i == TOKEN_SHR) { |
| 1096 | int j = i; |
| 1097 | i = nexttoken(); |
| 1098 | f = expr4(critical); |
| 1099 | if (!f) |
| 1100 | return NULL; |
| 1101 | if (!is_simple(e) || !is_simple(f)) { |
| 1102 | error(ERR_NONFATAL, "shift operator may only be applied to" |
| 1103 | " scalar values"); |
| 1104 | } |
| 1105 | switch (j) { |
| 1106 | case TOKEN_SHL: |
| 1107 | e = scalarvect (reloc_value(e) << reloc_value(f)); |
| 1108 | break; |
| 1109 | case TOKEN_SHR: |
| 1110 | e = scalarvect (((unsigned long)reloc_value(e)) >> |
| 1111 | reloc_value(f)); |
| 1112 | break; |
| 1113 | } |
| 1114 | } |
| 1115 | return e; |
| 1116 | } |
| 1117 | |
| 1118 | static expr *expr4(int critical) { |
| 1119 | expr *e, *f; |
| 1120 | |
| 1121 | e = expr5(critical); |
| 1122 | if (!e) |
| 1123 | return NULL; |
| 1124 | while (i == '+' || i == '-') { |
| 1125 | int j = i; |
| 1126 | i = nexttoken(); |
| 1127 | f = expr5(critical); |
| 1128 | if (!f) |
| 1129 | return NULL; |
| 1130 | switch (j) { |
| 1131 | case '+': |
| 1132 | e = add_vectors (e, f); |
| 1133 | break; |
| 1134 | case '-': |
| 1135 | e = add_vectors (e, scalar_mult(f, -1L)); |
| 1136 | break; |
| 1137 | } |
| 1138 | } |
| 1139 | return e; |
| 1140 | } |
| 1141 | |
| 1142 | static expr *expr5(int critical) { |
| 1143 | expr *e, *f; |
| 1144 | |
| 1145 | e = expr6(critical); |
| 1146 | if (!e) |
| 1147 | return NULL; |
| 1148 | while (i == '*' || i == '/' || i == '*' || |
| 1149 | i == TOKEN_SDIV || i == TOKEN_SMOD) { |
| 1150 | int j = i; |
| 1151 | i = nexttoken(); |
| 1152 | f = expr6(critical); |
| 1153 | if (!f) |
| 1154 | return NULL; |
| 1155 | if (j != '*' && (!is_simple(e) || !is_simple(f))) { |
| 1156 | error(ERR_NONFATAL, "division operator may only be applied to" |
| 1157 | " scalar values"); |
| 1158 | return NULL; |
| 1159 | } |
| 1160 | if (j != '*' && reloc_value(f) == 0) { |
| 1161 | error(ERR_NONFATAL, "division by zero"); |
| 1162 | return NULL; |
| 1163 | } |
| 1164 | switch (j) { |
| 1165 | case '*': |
| 1166 | if (is_simple(e)) |
| 1167 | e = scalar_mult (f, reloc_value(e)); |
| 1168 | else if (is_simple(f)) |
| 1169 | e = scalar_mult (e, reloc_value(f)); |
| 1170 | else { |
| 1171 | error(ERR_NONFATAL, "unable to multiply two " |
| 1172 | "non-scalar objects"); |
| 1173 | return NULL; |
| 1174 | } |
| 1175 | break; |
| 1176 | case '/': |
| 1177 | e = scalarvect (((unsigned long)reloc_value(e)) / |
| 1178 | ((unsigned long)reloc_value(f))); |
| 1179 | break; |
| 1180 | case '%': |
| 1181 | e = scalarvect (((unsigned long)reloc_value(e)) % |
| 1182 | ((unsigned long)reloc_value(f))); |
| 1183 | break; |
| 1184 | case TOKEN_SDIV: |
| 1185 | e = scalarvect (((signed long)reloc_value(e)) / |
| 1186 | ((signed long)reloc_value(f))); |
| 1187 | break; |
| 1188 | case TOKEN_SMOD: |
| 1189 | e = scalarvect (((signed long)reloc_value(e)) % |
| 1190 | ((signed long)reloc_value(f))); |
| 1191 | break; |
| 1192 | } |
| 1193 | } |
| 1194 | return e; |
| 1195 | } |
| 1196 | |
| 1197 | static expr *expr6(int critical) { |
| 1198 | expr *e; |
| 1199 | long label_seg, label_ofs; |
| 1200 | |
| 1201 | if (i == '-') { |
| 1202 | i = nexttoken(); |
| 1203 | e = expr6(critical); |
| 1204 | if (!e) |
| 1205 | return NULL; |
| 1206 | return scalar_mult (e, -1L); |
| 1207 | } else if (i == '+') { |
| 1208 | i = nexttoken(); |
| 1209 | return expr6(critical); |
| 1210 | } else if (i == '~') { |
| 1211 | i = nexttoken(); |
| 1212 | e = expr6(critical); |
| 1213 | if (!e) |
| 1214 | return NULL; |
| 1215 | if (!is_simple(e)) { |
| 1216 | error(ERR_NONFATAL, "`~' operator may only be applied to" |
| 1217 | " scalar values"); |
| 1218 | return NULL; |
| 1219 | } |
| 1220 | return scalarvect(~reloc_value(e)); |
| 1221 | } else if (i == TOKEN_SEG) { |
| 1222 | i = nexttoken(); |
| 1223 | e = expr6(critical); |
| 1224 | if (!e) |
| 1225 | return NULL; |
| 1226 | return segment_part(e); |
| 1227 | } else if (i == '(') { |
| 1228 | i = nexttoken(); |
| 1229 | e = expr0(critical); |
| 1230 | if (!e) |
| 1231 | return NULL; |
| 1232 | if (i != ')') { |
| 1233 | error(ERR_NONFATAL, "expecting `)'"); |
| 1234 | return NULL; |
| 1235 | } |
| 1236 | i = nexttoken(); |
| 1237 | return e; |
| 1238 | } else if (i == TOKEN_NUM || i == TOKEN_REG || i == TOKEN_ID || |
| 1239 | i == TOKEN_HERE || i == TOKEN_BASE) { |
| 1240 | e = tempptr; |
| 1241 | switch (i) { |
| 1242 | case TOKEN_NUM: |
| 1243 | e->type = EXPR_SIMPLE; |
| 1244 | e->value = tokval.t_integer; |
| 1245 | break; |
| 1246 | case TOKEN_REG: |
| 1247 | e->type = tokval.t_integer; |
| 1248 | e->value = 1; |
| 1249 | break; |
| 1250 | case TOKEN_ID: |
| 1251 | case TOKEN_HERE: |
| 1252 | case TOKEN_BASE: |
| 1253 | /* |
| 1254 | * Since the whole line is parsed before the label it |
| 1255 | * defines is given to the label manager, we have |
| 1256 | * problems with lines such as |
| 1257 | * |
| 1258 | * end: TIMES 512-(end-start) DB 0 |
| 1259 | * |
| 1260 | * where `end' is not known on pass one, despite not |
| 1261 | * really being a forward reference, and due to |
| 1262 | * criticality it is _needed_. Hence we check our label |
| 1263 | * against the currently defined one, and do our own |
| 1264 | * resolution of it if we have to. |
| 1265 | */ |
| 1266 | if (i == TOKEN_BASE) { |
| 1267 | label_seg = seg; |
| 1268 | label_ofs = 0; |
| 1269 | } else if (i == TOKEN_HERE || !strcmp(tokval.t_charptr, label)) { |
| 1270 | label_seg = seg; |
| 1271 | label_ofs = ofs; |
| 1272 | } else if (!labelfunc(tokval.t_charptr, &label_seg, &label_ofs)) { |
| 1273 | if (critical == 2) { |
| 1274 | error (ERR_NONFATAL, "symbol `%s' undefined", |
| 1275 | tokval.t_charptr); |
| 1276 | return NULL; |
| 1277 | } else if (critical == 1) { |
| 1278 | error (ERR_NONFATAL, "symbol `%s' not defined before use", |
| 1279 | tokval.t_charptr); |
| 1280 | return NULL; |
| 1281 | } else { |
H. Peter Anvin | ea83827 | 2002-04-30 20:51:53 +0000 | [diff] [blame] | 1282 | forward = TRUE; |
H. Peter Anvin | ea6e34d | 2002-04-30 20:51:32 +0000 | [diff] [blame] | 1283 | label_seg = seg; |
| 1284 | label_ofs = ofs; |
| 1285 | } |
| 1286 | } |
| 1287 | e->type = EXPR_SIMPLE; |
| 1288 | e->value = label_ofs; |
| 1289 | if (label_seg!=NO_SEG) { |
| 1290 | tempptr++; |
| 1291 | tempptr->type = EXPR_SEGBASE + label_seg; |
| 1292 | tempptr->value = 1; |
| 1293 | } |
| 1294 | break; |
| 1295 | } |
| 1296 | tempptr++; |
| 1297 | tempptr->type = 0; |
| 1298 | tempptr++; |
| 1299 | i = nexttoken(); |
| 1300 | return e; |
| 1301 | } else { |
| 1302 | error(ERR_NONFATAL, "expression syntax error"); |
| 1303 | return NULL; |
| 1304 | } |
| 1305 | } |
| 1306 | |
| 1307 | static expr *evaluate (int critical) { |
| 1308 | expr *e; |
| 1309 | expr *f = NULL; |
| 1310 | |
| 1311 | e = expr0 (critical); |
| 1312 | if (!e) |
| 1313 | return NULL; |
| 1314 | |
| 1315 | if (i == TOKEN_WRT) { |
| 1316 | if (!is_reloc(e)) { |
| 1317 | error(ERR_NONFATAL, "invalid left-hand operand to WRT"); |
| 1318 | return NULL; |
| 1319 | } |
| 1320 | i = nexttoken(); /* eat the WRT */ |
| 1321 | f = expr6 (critical); |
| 1322 | if (!f) |
| 1323 | return NULL; |
| 1324 | } |
| 1325 | e = scalar_mult (e, 1L); /* strip far-absolute segment part */ |
| 1326 | if (f) { |
| 1327 | expr *g = tempptr++; |
| 1328 | tempptr++->type = 0; |
| 1329 | g->type = EXPR_WRT; |
| 1330 | if (!is_reloc(f)) { |
| 1331 | error(ERR_NONFATAL, "invalid right-hand operand to WRT"); |
| 1332 | return NULL; |
| 1333 | } |
| 1334 | g->value = reloc_seg(f); |
| 1335 | if (g->value == NO_SEG) |
| 1336 | g->value = reloc_value(f) | SEG_ABS; |
| 1337 | else if (!(g->value & SEG_ABS) && !(g->value % 2) && critical) { |
| 1338 | error(ERR_NONFATAL, "invalid right-hand operand to WRT"); |
| 1339 | return NULL; |
| 1340 | } |
| 1341 | e = add_vectors (e, g); |
| 1342 | } |
| 1343 | return e; |
| 1344 | } |