masm.mac, parser: VERY limited MASM emulation package
Very limited MASM emulation.
The parser has been extended to emulate the PTR keyword if the
corresponding macro is enabled, and the syntax displacement[index] for
memory operations is now recognized.
Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
diff --git a/asm/parser.c b/asm/parser.c
index 072e884..012364a 100644
--- a/asm/parser.c
+++ b/asm/parser.c
@@ -234,96 +234,91 @@
}
}
+static inline const expr *next_expr(const expr *e, const expr **next_list)
+{
+ e++;
+ if (!e->type) {
+ if (next_list) {
+ e = *next_list;
+ *next_list = NULL;
+ } else {
+ e = NULL;
+ }
+ }
+ return e;
+}
+
+static inline void init_operand(operand *op)
+{
+ memset(op, 0, sizeof *op);
+
+ op->basereg = -1;
+ op->indexreg = -1;
+ op->segment = NO_SEG;
+ op->wrt = NO_SEG;
+}
+
static int parse_mref(operand *op, const expr *e)
{
int b, i, s; /* basereg, indexreg, scale */
int64_t o; /* offset */
- b = i = -1;
- o = s = 0;
- op->segment = op->wrt = NO_SEG;
+ b = op->basereg;
+ i = op->indexreg;
+ s = op->scale;
+ o = op->offset;
- if (e->type && e->type <= EXPR_REG_END) { /* this bit's a register */
- bool is_gpr = is_class(REG_GPR,nasm_reg_flags[e->type]);
+ for (; e->type; e++) {
+ if (e->type <= EXPR_REG_END) {
+ bool is_gpr = is_class(REG_GPR,nasm_reg_flags[e->type]);
- if (is_gpr && e->value == 1)
- b = e->type; /* It can be basereg */
- else /* No, it has to be indexreg */
- i = e->type, s = e->value;
- e++;
- }
- if (e->type && e->type <= EXPR_REG_END) { /* it's a 2nd register */
- bool is_gpr = is_class(REG_GPR,nasm_reg_flags[e->type]);
-
- if (b != -1) /* If the first was the base, ... */
- i = e->type, s = e->value; /* second has to be indexreg */
-
- else if (!is_gpr || e->value != 1) {
- /* If both want to be index */
- nasm_nonfatal("invalid effective address: two index registers");
- return -1;
- } else
- b = e->type;
- e++;
- }
-
- if (e->type) { /* is there an offset? */
- if (e->type <= EXPR_REG_END) { /* in fact, is there an error? */
- nasm_nonfatal("invalid effective address: impossible register");
- return -1;
- } else {
- if (e->type == EXPR_UNKNOWN) {
- op->opflags |= OPFLAG_UNKNOWN;
- o = 0; /* doesn't matter what */
- while (e->type)
- e++; /* go to the end of the line */
+ if (is_gpr && e->value == 1 && b == -1) {
+ /* It can be basereg */
+ b = e->type;
+ } else if (i == -1) {
+ /* Must be index register */
+ i = e->type;
+ s = e->value;
} else {
- if (e->type == EXPR_SIMPLE) {
- o = e->value;
- e++;
- }
- if (e->type == EXPR_WRT) {
- op->wrt = e->value;
- e++;
- }
- /*
- * Look for a segment base type.
- */
- for (; e->type; e++) {
- if (!e->value)
- continue;
-
- if (e->type <= EXPR_REG_END) {
- nasm_nonfatal("invalid effective address: too many registers");
- return -1;
- } else if (e->type < EXPR_SEGBASE) {
- nasm_nonfatal("invalid effective address: bad subexpression type");
- return -1;
- } else if (e->value == 1) {
- if (op->segment != NO_SEG) {
- nasm_nonfatal("invalid effective address: multiple base segments");
- return -1;
- }
- op->segment = e->type - EXPR_SEGBASE;
- } else if (e->value == -1 &&
- e->type == location.segment + EXPR_SEGBASE &&
- !(op->opflags & OPFLAG_RELATIVE)) {
- op->opflags |= OPFLAG_RELATIVE;
- } else {
- nasm_nonfatal("invalid effective address: impossible segment base multiplier");
- return -1;
- }
- }
+ if (b == -1)
+ nasm_nonfatal("invalid effective address: two index registers");
+ else if (!is_gpr)
+ nasm_nonfatal("invalid effective address: impossible register");
+ else
+ nasm_nonfatal("invalid effective address: too many registers");
+ return -1;
}
+ } else if (e->type == EXPR_UNKNOWN) {
+ op->opflags |= OPFLAG_UNKNOWN;
+ } else if (e->type == EXPR_SIMPLE) {
+ o += e->value;
+ } else if (e->type == EXPR_WRT) {
+ op->wrt = e->value;
+ } else if (e->type >= EXPR_SEGBASE) {
+ if (e->value == 1) {
+ if (op->segment != NO_SEG) {
+ nasm_nonfatal("invalid effective address: multiple base segments");
+ return -1;
+ }
+ op->segment = e->type - EXPR_SEGBASE;
+ } else if (e->value == -1 &&
+ e->type == location.segment + EXPR_SEGBASE &&
+ !(op->opflags & OPFLAG_RELATIVE)) {
+ op->opflags |= OPFLAG_RELATIVE;
+ } else {
+ nasm_nonfatal("invalid effective address: impossible segment base multiplier");
+ return -1;
+ }
+ } else {
+ nasm_nonfatal("invalid effective address: bad subexpression type");
+ return -1;
}
- }
+ }
- nasm_assert(!e->type); /* We should be at the end */
-
- op->basereg = b;
+ op->basereg = b;
op->indexreg = i;
- op->scale = s;
- op->offset = o;
+ op->scale = s;
+ op->offset = o;
return 0;
}
@@ -419,6 +414,7 @@
bool critical;
bool first;
bool recover;
+ bool far_jmp_ok;
int i;
nasm_static_assert(P_none == 0);
@@ -740,20 +736,18 @@
* Now we begin to parse the operands. There may be up to four
* of these, separated by commas, and terminated by a zero token.
*/
+ far_jmp_ok = result->opcode == I_JMP || result->opcode == I_CALL;
for (opnum = 0; opnum < MAX_OPERANDS; opnum++) {
operand *op = &result->oprs[opnum];
expr *value; /* used most of the time */
- bool mref; /* is this going to be a memory ref? */
- bool bracket; /* is it a [] mref, or a & mref? */
+ bool mref = false; /* is this going to be a memory ref? */
+ int bracket = 0; /* is it a [] mref, or a "naked" mref? */
bool mib; /* compound (mib) mref? */
int setsize = 0;
decoflags_t brace_flags = 0; /* flags for decorators in braces */
- op->disp_size = 0; /* have to zero this whatever */
- op->eaflags = 0; /* and this */
- op->opflags = 0;
- op->decoflags = 0;
+ init_operand(op);
i = stdscan(NULL, &tokval);
if (i == TOKEN_EOS)
@@ -829,30 +823,55 @@
i = stdscan(NULL, &tokval);
}
- if (i == '[' || i == '&') { /* memory reference */
+ if (i == '[' || i == TOKEN_MASM_PTR || i == '&') {
+ /* memory reference */
mref = true;
- bracket = (i == '[');
- i = stdscan(NULL, &tokval); /* then skip the colon */
- while (i == TOKEN_SPECIAL || i == TOKEN_SIZE ||
- i == TOKEN_PREFIX) {
- process_size_override(result, op);
- i = stdscan(NULL, &tokval);
- }
- /* when a comma follows an opening bracket - [ , eax*4] */
- if (i == ',') {
- /* treat as if there is a zero displacement virtually */
- tokval.t_type = TOKEN_NUM;
- tokval.t_integer = 0;
- stdscan_set(stdscan_get() - 1); /* rewind the comma */
- }
- } else { /* immediate operand, or register */
- mref = false;
- bracket = false; /* placate optimisers */
+ bracket += (i == '[');
+ i = stdscan(NULL, &tokval);
}
- if ((op->type & FAR) && !mref &&
- result->opcode != I_JMP && result->opcode != I_CALL)
- nasm_nonfatal("invalid use of FAR operand specifier");
+ mref_more:
+ if (mref) {
+ bool done = false;
+ bool nofw = false;
+
+ while (!done) {
+ switch (i) {
+ case TOKEN_SPECIAL:
+ case TOKEN_SIZE:
+ case TOKEN_PREFIX:
+ process_size_override(result, op);
+ break;
+
+ case '[':
+ bracket++;
+ break;
+
+ case ',':
+ tokval.t_type = TOKEN_NUM;
+ tokval.t_integer = 0;
+ stdscan_set(stdscan_get() - 1); /* rewind the comma */
+ done = nofw = true;
+ break;
+
+ case TOKEN_MASM_FLAT:
+ i = stdscan(NULL, &tokval);
+ if (i != ':') {
+ nasm_nonfatal("unknown use of FLAT in MASM emulation");
+ nofw = true;
+ }
+ done = true;
+ break;
+
+ default:
+ done = nofw = true;
+ break;
+ }
+
+ if (!nofw)
+ i = stdscan(NULL, &tokval);
+ }
+ }
value = evaluate(stdscan, NULL, &tokval,
&op->opflags, critical, &hints);
@@ -862,7 +881,18 @@
}
if (!value) /* Error in evaluator */
goto fail;
- if (i == ':' && mref) { /* it was seg:offset */
+
+ if (i == '[' && !bracket) {
+ /* displacement[regs] syntax */
+ mref = true;
+ parse_mref(op, value); /* Process what we have so far */
+ goto mref_more;
+ }
+
+ if (i == ':' && (mref || !far_jmp_ok)) {
+ /* segment override? */
+ mref = true;
+
/*
* Process the segment override.
*/
@@ -879,29 +909,15 @@
}
i = stdscan(NULL, &tokval); /* then skip the colon */
- while (i == TOKEN_SPECIAL || i == TOKEN_SIZE ||
- i == TOKEN_PREFIX) {
- process_size_override(result, op);
- i = stdscan(NULL, &tokval);
- }
- value = evaluate(stdscan, NULL, &tokval,
- &op->opflags, critical, &hints);
- i = tokval.t_type;
- if (op->opflags & OPFLAG_FORWARD) {
- result->forw_ref = true;
- }
- /* and get the offset */
- if (!value) /* Error in evaluator */
- goto fail;
+ goto mref_more;
}
mib = false;
if (mref && bracket && i == ',') {
/* [seg:base+offset,index*scale] syntax (mib) */
+ operand o2; /* Index operand */
- operand o1, o2; /* Partial operands */
-
- if (parse_mref(&o1, value))
+ if (parse_mref(op, value))
goto fail;
i = stdscan(NULL, &tokval); /* Eat comma */
@@ -911,6 +927,7 @@
if (!value)
goto fail;
+ init_operand(&o2);
if (parse_mref(&o2, value))
goto fail;
@@ -920,18 +937,14 @@
o2.basereg = -1;
}
- if (o1.indexreg != -1 || o2.basereg != -1 || o2.offset != 0 ||
+ if (op->indexreg != -1 || o2.basereg != -1 || o2.offset != 0 ||
o2.segment != NO_SEG || o2.wrt != NO_SEG) {
nasm_nonfatal("invalid mib expression");
goto fail;
}
- op->basereg = o1.basereg;
op->indexreg = o2.indexreg;
op->scale = o2.scale;
- op->offset = o1.offset;
- op->segment = o1.segment;
- op->wrt = o1.wrt;
if (op->basereg != -1) {
op->hintbase = op->basereg;
@@ -948,21 +961,33 @@
}
recover = false;
- if (mref && bracket) { /* find ] at the end */
- if (i != ']') {
- nasm_nonfatal("parser: expecting ]");
- recover = true;
- } else { /* we got the required ] */
- i = stdscan(NULL, &tokval);
- if (i == TOKEN_DECORATOR || i == TOKEN_OPMASK) {
- /* parse opmask (and zeroing) after an operand */
- recover = parse_braces(&brace_flags);
- i = tokval.t_type;
- }
- if (i != 0 && i != ',') {
- nasm_nonfatal("comma or end of line expected");
+ if (mref) {
+ if (bracket == 1) {
+ if (i == ']') {
+ bracket--;
+ i = stdscan(NULL, &tokval);
+ } else {
+ nasm_nonfatal("expecting ] at end of memory operand");
recover = true;
}
+ } else if (bracket == 0) {
+ /* Do nothing */
+ } else if (bracket > 0) {
+ nasm_nonfatal("excess brackets in memory operand");
+ recover = true;
+ } else if (bracket < 0) {
+ nasm_nonfatal("unmatched ] in memory operand");
+ recover = true;
+ }
+
+ if (i == TOKEN_DECORATOR || i == TOKEN_OPMASK) {
+ /* parse opmask (and zeroing) after an operand */
+ recover = parse_braces(&brace_flags);
+ i = tokval.t_type;
+ }
+ if (!recover && i != 0 && i != ',') {
+ nasm_nonfatal("comma, decorator or end of line expected, got %d", i);
+ recover = true;
}
} else { /* immediate operand */
if (i != 0 && i != ',' && i != ':' &&
@@ -998,6 +1023,9 @@
op->hinttype = hints.type;
}
mref_set_optype(op);
+ } else if ((op->type & FAR) && !far_jmp_ok) {
+ nasm_nonfatal("invalid use of FAR operand specifier");
+ recover = true;
} else { /* it's not a memory reference */
if (is_just_unknown(value)) { /* it's immediate but unknown */
op->type |= IMMEDIATE;