Blame - parser.c - chromium.googlesource.com/chromium/deps/nasm

blob: a45bf0daaed185fffcee2d2b8383a62cd10fa93d [file] [log] [blame]

H. Peter Anvin	ea6e34d	2002-04-30 20:51:32 +0000	[diff] [blame]	1	/* parser.c source line parser for the Netwide Assembler
				2	*
				3	* The Netwide Assembler is copyright (C) 1996 Simon Tatham and
				4	* Julian Hall. All rights reserved. The software is
				5	* redistributable under the licence given in the file "Licence"
				6	* distributed in the NASM archive.
				7	*
				8	* initial version 27/iii/95 by Simon Tatham
				9	*/
				10
				11	#include <stdio.h>
				12	#include <stdlib.h>
				13	#include <stddef.h>
				14	#include <string.h>
				15	#include <ctype.h>
				16
				17	#include "nasm.h"
				18	#include "nasmlib.h"
				19	#include "parser.h"
				20	#include "float.h"
				21
				22	#include "names.c"
				23
				24
				25	static long reg_flags[] = { /* sizes and special flags */
				26	0, REG8, REG_AL, REG_AX, REG8, REG8, REG16, REG16, REG8, REG_CL,
				27	REG_CREG, REG_CREG, REG_CREG, REG_CR4, REG_CS, REG_CX, REG8,
				28	REG16, REG8, REG_DREG, REG_DREG, REG_DREG, REG_DREG, REG_DREG,
				29	REG_DREG, REG_DESS, REG_DX, REG_EAX, REG32, REG32, REG_ECX,
				30	REG32, REG32, REG_DESS, REG32, REG32, REG_FSGS, REG_FSGS,
				31	MMXREG, MMXREG, MMXREG, MMXREG, MMXREG, MMXREG, MMXREG, MMXREG,
				32	REG16, REG16, REG_DESS, FPU0, FPUREG, FPUREG, FPUREG, FPUREG,
				33	FPUREG, FPUREG, FPUREG, REG_TREG, REG_TREG, REG_TREG, REG_TREG,
				34	REG_TREG
				35	};
				36
				37	enum { /* special tokens */
				38	S_BYTE, S_DWORD, S_FAR, S_LONG, S_NEAR, S_QWORD, S_SHORT, S_TO,
				39	S_TWORD, S_WORD
				40	};
				41
				42	static char special_names[] = { / and the actual text */
				43	"byte", "dword", "far", "long", "near", "qword", "short", "to",
				44	"tword", "word"
				45	};
				46
				47	static char *prefix_names[] = {
				48	"a16", "a32", "lock", "o16", "o32", "rep", "repe", "repne",
				49	"repnz", "repz", "times"
				50	};
				51
				52	/*
				53	* Evaluator datatype. Expressions, within the evaluator, are
				54	* stored as an array of these beasts, terminated by a record with
				55	* type==0. Mostly, it's a vector type: each type denotes some kind
				56	* of a component, and the value denotes the multiple of that
				57	* component present in the expression. The exception is the WRT
				58	* type, whose `value' field denotes the segment to which the
				59	* expression is relative. These segments will be segment-base
				60	* types, i.e. either odd segment values or SEG_ABS types. So it is
				61	* still valid to assume that anything with a `value' field of zero
				62	* is insignificant.
				63	*/
				64	typedef struct {
				65	long type; /* a register, or EXPR_xxx */
				66	long value; /* must be >= 32 bits */
				67	} expr;
				68
				69	static void eval_reset(void);
				70	static expr *evaluate(int);
				71
				72	/*
				73	* ASSUMPTION MADE HERE. The number of distinct register names
				74	* (i.e. possible "type" fields for an expr structure) does not
				75	* exceed 126.
				76	*/
				77	#define EXPR_SIMPLE 126
				78	#define EXPR_WRT 127
				79	#define EXPR_SEGBASE 128
				80
				81	static int is_reloc(expr *);
				82	static int is_simple(expr *);
				83	static int is_really_simple (expr *);
				84	static long reloc_value(expr *);
				85	static long reloc_seg(expr *);
				86	static long reloc_wrt(expr *);
				87
				88	enum { /* token types, other than chars */
				89	TOKEN_ID = 256, TOKEN_NUM, TOKEN_REG, TOKEN_INSN, TOKEN_ERRNUM,
				90	TOKEN_HERE, TOKEN_BASE, TOKEN_SPECIAL, TOKEN_PREFIX, TOKEN_SHL,
				91	TOKEN_SHR, TOKEN_SDIV, TOKEN_SMOD, TOKEN_SEG, TOKEN_WRT,
				92	TOKEN_FLOAT
				93	};
				94
				95	struct tokenval {
				96	long t_integer, t_inttwo;
				97	char *t_charptr;
				98	};
				99
				100	static char tempstorage[1024], *q;
				101	static int bsi (char string, char array, int size);/ binary search */
				102
				103	static int nexttoken (void);
				104	static int is_comma_next (void);
				105
				106	static char *bufptr;
				107	static int i;
				108	static struct tokenval tokval;
				109	static lfunc labelfunc;
				110	static efunc error;
				111	static char *label;
				112	static struct ofmt *outfmt;
				113
				114	static long seg, ofs;
				115
H. Peter Anvin	ea83827	2002-04-30 20:51:53 +0000	[diff] [blame^]	116	static int forward;
				117
H. Peter Anvin	ea6e34d	2002-04-30 20:51:32 +0000	[diff] [blame]	118	insn *parse_line (long segment, long offset, lfunc lookup_label, int pass,
				119	char buffer, insn result, struct ofmt *output,
				120	efunc errfunc) {
				121	int operand;
				122	int critical;
				123
H. Peter Anvin	ea83827	2002-04-30 20:51:53 +0000	[diff] [blame^]	124	forward = result->forw_ref = FALSE;
H. Peter Anvin	ea6e34d	2002-04-30 20:51:32 +0000	[diff] [blame]	125	q = tempstorage;
				126	bufptr = buffer;
				127	labelfunc = lookup_label;
				128	outfmt = output;
				129	error = errfunc;
				130	seg = segment;
				131	ofs = offset;
				132	label = "";
				133
				134	i = nexttoken();
				135
				136	result->eops = NULL; /* must do this, whatever happens */
				137
				138	if (i==0) { /* blank line - ignore */
				139	result->label = NULL; /* so, no label on it */
				140	result->opcode = -1; /* and no instruction either */
				141	return result;
				142	}
				143	if (i != TOKEN_ID && i != TOKEN_INSN && i != TOKEN_PREFIX &&
				144	(i!=TOKEN_REG \|\| (REG_SREG & ~reg_flags[tokval.t_integer]))) {
				145	error (ERR_NONFATAL, "label or instruction expected"
				146	" at start of line");
				147	result->label = NULL;
				148	result->opcode = -1;
				149	return result;
				150	}
				151
				152	if (i == TOKEN_ID) { /* there's a label here */
				153	label = result->label = tokval.t_charptr;
				154	i = nexttoken();
				155	if (i == ':') { /* skip over the optional colon */
				156	i = nexttoken();
				157	}
				158	} else /* no label; so, moving swiftly on */
				159	result->label = NULL;
				160
				161	if (i==0) {
				162	result->opcode = -1; /* this line contains just a label */
				163	return result;
				164	}
				165
				166	result->nprefix = 0;
				167	result->times = 1;
				168
				169	while (i == TOKEN_PREFIX \|\|
				170	(i==TOKEN_REG && !(REG_SREG & ~reg_flags[tokval.t_integer]))) {
				171	/*
				172	* Handle special case: the TIMES prefix.
				173	*/
				174	if (i == TOKEN_PREFIX && tokval.t_integer == P_TIMES) {
				175	expr *value;
				176
				177	i = nexttoken();
				178	eval_reset();
				179	value = evaluate (pass);
				180	if (!value) { /* but, error in evaluator */
				181	result->opcode = -1; /* unrecoverable parse error: */
				182	return result; /* ignore this instruction */
				183	}
				184	if (!is_simple (value)) {
				185	error (ERR_NONFATAL,
				186	"non-constant argument supplied to TIMES");
				187	result->times = 1;
				188	} else
				189	result->times = value->value;
				190	} else {
				191	if (result->nprefix == MAXPREFIX)
				192	error (ERR_NONFATAL,
				193	"instruction has more than %d prefixes", MAXPREFIX);
				194	else
				195	result->prefixes[result->nprefix++] = tokval.t_integer;
				196	i = nexttoken();
				197	}
				198	}
				199
				200	if (i != TOKEN_INSN) {
				201	error (ERR_NONFATAL, "parser: instruction expected");
				202	result->opcode = -1;
				203	return result;
				204	}
				205
				206	result->opcode = tokval.t_integer;
				207	result->condition = tokval.t_inttwo;
				208
				209	/*
				210	* RESB, RESW and RESD cannot be satisfied with incorrectly
				211	* evaluated operands, since the correct values _must_ be known
				212	* on the first pass. Hence, even in pass one, we set the
				213	* `critical' flag on calling evaluate(), so that it will bomb
				214	* out on undefined symbols. Nasty, but there's nothing we can
				215	* do about it.
				216	*
				217	* For the moment, EQU has the same difficulty, so we'll
				218	* include that.
				219	*/
				220	if (result->opcode == I_RESB \|\|
				221	result->opcode == I_RESW \|\|
				222	result->opcode == I_RESD \|\|
				223	result->opcode == I_RESQ \|\|
				224	result->opcode == I_REST \|\|
				225	result->opcode == I_EQU)
				226	critical = pass;
				227	else
				228	critical = (pass==2 ? 2 : 0);
				229
				230	if (result->opcode == I_DB \|\|
				231	result->opcode == I_DW \|\|
				232	result->opcode == I_DD \|\|
				233	result->opcode == I_DQ \|\|
				234	result->opcode == I_DT) {
				235	extop eop, *tail = &result->eops;
				236	int oper_num = 0;
				237
				238	/*
				239	* Begin to read the DB/DW/DD/DQ/DT operands.
				240	*/
				241	while (1) {
				242	i = nexttoken();
				243	if (i == 0)
				244	break;
				245	eop = *tail = nasm_malloc(sizeof(extop));
				246	tail = &eop->next;
				247	eop->next = NULL;
				248	eop->type = EOT_NOTHING;
				249	oper_num++;
				250
				251	if (i == TOKEN_NUM && tokval.t_charptr && is_comma_next()) {
				252	eop->type = EOT_DB_STRING;
				253	eop->stringval = tokval.t_charptr;
				254	eop->stringlen = tokval.t_inttwo;
				255	i = nexttoken(); /* eat the comma */
				256	continue;
				257	}
				258
				259	if (i == TOKEN_FLOAT \|\| i == '-') {
				260	long sign = +1L;
				261
				262	if (i == '-') {
				263	char *save = bufptr;
				264	i = nexttoken();
				265	sign = -1L;
				266	if (i != TOKEN_FLOAT) {
				267	bufptr = save;
				268	i = '-';
				269	}
				270	}
				271
				272	if (i == TOKEN_FLOAT) {
				273	eop->type = EOT_DB_STRING;
				274	eop->stringval = q;
				275	if (result->opcode == I_DD)
				276	eop->stringlen = 4;
				277	else if (result->opcode == I_DQ)
				278	eop->stringlen = 8;
				279	else if (result->opcode == I_DT)
				280	eop->stringlen = 10;
				281	else {
				282	error(ERR_NONFATAL, "floating-point constant"
				283	" encountered in `D%c' instruction",
				284	result->opcode == I_DW ? 'W' : 'B');
				285	eop->type = EOT_NOTHING;
				286	}
				287	q += eop->stringlen;
				288	if (!float_const (tokval.t_charptr, sign,
				289	(unsigned char *)eop->stringval,
				290	eop->stringlen, error))
				291	eop->type = EOT_NOTHING;
				292	i = nexttoken(); /* eat the comma */
				293	continue;
				294	}
				295	}
				296
				297	/* anything else */ {
				298	expr *value;
				299	eval_reset();
				300	value = evaluate (critical);
				301	if (!value) { /* but, error in evaluator */
				302	result->opcode = -1;/* unrecoverable parse error: */
				303	return result; /* ignore this instruction */
				304	}
				305	if (is_reloc(value)) {
				306	eop->type = EOT_DB_NUMBER;
				307	eop->offset = reloc_value(value);
				308	eop->segment = reloc_seg(value);
				309	eop->wrt = reloc_wrt(value);
				310	} else {
				311	error (ERR_NONFATAL,
				312	"`%s' operand %d: expression is not simple"
				313	" or relocatable",
				314	insn_names[result->opcode], oper_num);
				315	}
				316	}
				317	}
				318	return result;
				319	}
				320
				321	/* right. Now we begin to parse the operands. There may be up to three
				322	* of these, separated by commas, and terminated by a zero token. */
				323
				324	for (operand = 0; operand < 3; operand++) {
				325	expr seg, value; /* used most of the time */
				326	int mref; /* is this going to be a memory ref? */
				327
				328	result->oprs[operand].addr_size = 0;/* have to zero this whatever */
				329	i = nexttoken();
				330	if (i == 0) break; /* end of operands: get out of here */
				331	result->oprs[operand].type = 0; /* so far, no override */
				332	while (i == TOKEN_SPECIAL) {/* size specifiers */
				333	switch ((int)tokval.t_integer) {
				334	case S_BYTE:
				335	result->oprs[operand].type \|= BITS8;
				336	break;
				337	case S_WORD:
				338	result->oprs[operand].type \|= BITS16;
				339	break;
				340	case S_DWORD:
				341	case S_LONG:
				342	result->oprs[operand].type \|= BITS32;
				343	break;
				344	case S_QWORD:
				345	result->oprs[operand].type \|= BITS64;
				346	break;
				347	case S_TWORD:
				348	result->oprs[operand].type \|= BITS80;
				349	break;
				350	case S_TO:
				351	result->oprs[operand].type \|= TO;
				352	break;
				353	case S_FAR:
				354	result->oprs[operand].type \|= FAR;
				355	break;
				356	case S_NEAR:
				357	result->oprs[operand].type \|= NEAR;
				358	break;
				359	case S_SHORT:
				360	result->oprs[operand].type \|= SHORT;
				361	break;
				362	}
				363	i = nexttoken();
				364	}
				365
				366	if (i == '[') { /* memory reference */
				367	i = nexttoken();
				368	mref = TRUE;
				369	if (i == TOKEN_SPECIAL) { /* check for address size override */
				370	switch ((int)tokval.t_integer) {
				371	case S_WORD:
				372	result->oprs[operand].addr_size = 16;
				373	break;
				374	case S_DWORD:
				375	case S_LONG:
				376	result->oprs[operand].addr_size = 32;
				377	break;
				378	default:
				379	error (ERR_NONFATAL, "invalid size specification in"
				380	" effective address");
				381	}
				382	i = nexttoken();
				383	}
				384	} else /* immediate operand, or register */
				385	mref = FALSE;
				386
				387	eval_reset();
				388
				389	value = evaluate (critical);
H. Peter Anvin	ea83827	2002-04-30 20:51:53 +0000	[diff] [blame^]	390	if (forward)
				391	result->forw_ref = TRUE;
H. Peter Anvin	ea6e34d	2002-04-30 20:51:32 +0000	[diff] [blame]	392	if (!value) { /* error in evaluator */
				393	result->opcode = -1; /* unrecoverable parse error: */
				394	return result; /* ignore this instruction */
				395	}
				396	if (i == ':' && mref) { /* it was seg:offset */
				397	seg = value; /* so shift this into the segment */
				398	i = nexttoken(); /* then skip the colon */
				399	if (i == TOKEN_SPECIAL) { /* another check for size override */
				400	switch ((int)tokval.t_integer) {
				401	case S_WORD:
				402	result->oprs[operand].addr_size = 16;
				403	break;
				404	case S_DWORD:
				405	case S_LONG:
				406	result->oprs[operand].addr_size = 32;
				407	break;
				408	default:
				409	error (ERR_NONFATAL, "invalid size specification in"
				410	" effective address");
				411	}
				412	i = nexttoken();
				413	}
				414	value = evaluate (critical);
H. Peter Anvin	ea83827	2002-04-30 20:51:53 +0000	[diff] [blame^]	415	if (forward)
				416	result->forw_ref = TRUE;
H. Peter Anvin	ea6e34d	2002-04-30 20:51:32 +0000	[diff] [blame]	417	/* and get the offset */
				418	if (!value) { /* but, error in evaluator */
				419	result->opcode = -1; /* unrecoverable parse error: */
				420	return result; /* ignore this instruction */
				421	}
				422	} else seg = NULL;
				423	if (mref) { /* find ] at the end */
				424	if (i != ']') {
				425	error (ERR_NONFATAL, "parser: expecting ]");
				426	do { /* error recovery again */
				427	i = nexttoken();
				428	} while (i != 0 && i != ',');
				429	} else /* we got the required ] */
				430	i = nexttoken();
				431	} else { /* immediate operand */
				432	if (i != 0 && i != ',' && i != ':') {
				433	error (ERR_NONFATAL, "comma or end of line expected");
				434	do { /* error recovery */
				435	i = nexttoken();
				436	} while (i != 0 && i != ',');
				437	} else if (i == ':') {
				438	result->oprs[operand].type \|= COLON;
				439	}
				440	}
				441
				442	/* now convert the exprs returned from evaluate() into operand
				443	* descriptions... */
				444
				445	if (mref) { /* it's a memory reference */
				446	expr *e = value;
				447	int b, i, s; /* basereg, indexreg, scale */
				448	long o; /* offset */
				449
				450	if (seg) { /* segment override */
				451	if (seg[1].type!=0 \|\| seg->value!=1 \|\|
				452	REG_SREG & ~reg_flags[seg->type])
				453	error (ERR_NONFATAL, "invalid segment override");
				454	else if (result->nprefix == MAXPREFIX)
				455	error (ERR_NONFATAL,
				456	"instruction has more than %d prefixes",
				457	MAXPREFIX);
				458	else
				459	result->prefixes[result->nprefix++] = seg->type;
				460	}
				461
				462	b = i = -1, o = s = 0;
				463
				464	if (e->type < EXPR_SIMPLE) { /* this bit's a register */
				465	if (e->value == 1) /* in fact it can be basereg */
				466	b = e->type;
				467	else /* no, it has to be indexreg */
				468	i = e->type, s = e->value;
				469	e++;
				470	}
				471	if (e->type && e->type < EXPR_SIMPLE) {/* it's a second register */
				472	if (e->value != 1) { /* it has to be indexreg */
				473	if (i != -1) { /* but it can't be */
				474	error(ERR_NONFATAL, "invalid effective address");
				475	result->opcode = -1;
				476	return result;
				477	} else
				478	i = e->type, s = e->value;
				479	} else { /* it can be basereg */
				480	if (b != -1) /* or can it? */
				481	i = e->type, s = 1;
				482	else
				483	b = e->type;
				484	}
				485	e++;
				486	}
				487	if (e->type != 0) { /* is there an offset? */
				488	if (e->type < EXPR_SIMPLE) {/* in fact, is there an error? */
				489	error (ERR_NONFATAL, "invalid effective address");
				490	result->opcode = -1;
				491	return result;
				492	} else {
				493	if (e->type == EXPR_SIMPLE) {
				494	o = e->value;
				495	e++;
				496	}
				497	if (e->type == EXPR_WRT) {
				498	result->oprs[operand].wrt = e->value;
				499	e++;
				500	} else
				501	result->oprs[operand].wrt = NO_SEG;
H. Peter Anvin	ea83827	2002-04-30 20:51:53 +0000	[diff] [blame^]	502	/*
				503	* Look for a segment base type.
				504	*/
				505	if (e->type && e->type < EXPR_SEGBASE) {
				506	error (ERR_NONFATAL, "invalid effective address");
				507	result->opcode = -1;
				508	return result;
				509	}
				510	while (e->type && e->value == 0)
				511	e++;
				512	if (e->type && e->value != 1) {
				513	error (ERR_NONFATAL, "invalid effective address");
				514	result->opcode = -1;
				515	return result;
				516	}
				517	if (e->type) {
				518	result->oprs[operand].segment = e->type-EXPR_SEGBASE;
H. Peter Anvin	ea6e34d	2002-04-30 20:51:32 +0000	[diff] [blame]	519	e++;
				520	} else
				521	result->oprs[operand].segment = NO_SEG;
H. Peter Anvin	ea83827	2002-04-30 20:51:53 +0000	[diff] [blame^]	522	while (e->type && e->value == 0)
				523	e++;
				524	if (e->type) {
				525	error (ERR_NONFATAL, "invalid effective address");
				526	result->opcode = -1;
				527	return result;
				528	}
H. Peter Anvin	ea6e34d	2002-04-30 20:51:32 +0000	[diff] [blame]	529	}
				530	} else {
				531	o = 0;
				532	result->oprs[operand].wrt = NO_SEG;
				533	result->oprs[operand].segment = NO_SEG;
				534	}
				535
				536	if (e->type != 0) { /* there'd better be nothing left! */
				537	error (ERR_NONFATAL, "invalid effective address");
				538	result->opcode = -1;
				539	return result;
				540	}
				541
				542	result->oprs[operand].type \|= MEMORY;
				543	if (b==-1 && (i==-1 \|\| s==0))
				544	result->oprs[operand].type \|= MEM_OFFS;
				545	result->oprs[operand].basereg = b;
				546	result->oprs[operand].indexreg = i;
				547	result->oprs[operand].scale = s;
				548	result->oprs[operand].offset = o;
				549	} else { /* it's not a memory reference */
				550	if (is_reloc(value)) { /* it's immediate */
				551	result->oprs[operand].type \|= IMMEDIATE;
				552	result->oprs[operand].offset = reloc_value(value);
				553	result->oprs[operand].segment = reloc_seg(value);
				554	result->oprs[operand].wrt = reloc_wrt(value);
				555	if (is_simple(value) && reloc_value(value)==1)
				556	result->oprs[operand].type \|= UNITY;
				557	} else { /* it's a register */
				558	if (value->type>=EXPR_SIMPLE \|\| value->value!=1) {
				559	error (ERR_NONFATAL, "invalid operand type");
				560	result->opcode = -1;
				561	return result;
				562	}
				563	/* clear overrides, except TO which applies to FPU regs */
				564	result->oprs[operand].type &= TO;
				565	result->oprs[operand].type \|= REGISTER;
				566	result->oprs[operand].type \|= reg_flags[value->type];
				567	result->oprs[operand].basereg = value->type;
				568	}
				569	}
				570	}
				571
				572	result->operands = operand; /* set operand count */
				573
				574	while (operand<3) /* clear remaining operands */
				575	result->oprs[operand++].type = 0;
				576
				577	/*
				578	* Transform RESW, RESD, RESQ, REST into RESB.
				579	*/
				580	switch (result->opcode) {
				581	case I_RESW: result->opcode=I_RESB; result->oprs[0].offset*=2; break;
				582	case I_RESD: result->opcode=I_RESB; result->oprs[0].offset*=4; break;
				583	case I_RESQ: result->opcode=I_RESB; result->oprs[0].offset*=8; break;
				584	case I_REST: result->opcode=I_RESB; result->oprs[0].offset*=10; break;
				585	}
				586
				587	return result;
				588	}
				589
				590	static int is_comma_next (void) {
				591	char *p;
				592
				593	p = bufptr;
				594	while (isspace(*p)) p++;
				595	return (p == ',' \|\| p == ';' \|\| !*p);
				596	}
				597
				598	/* isidstart matches any character that may start an identifier, and isidchar
				599	* matches any character that may appear at places other than the start of an
				600	* identifier. E.g. a period may only appear at the start of an identifier
				601	* (for local labels), whereas a number may appear anywhere but at the
				602	* start. */
				603
				604	#define isidstart(c) ( isalpha(c) \|\| (c)=='_' \|\| (c)=='.' \|\| (c)=='?' )
				605	#define isidchar(c) ( isidstart(c) \|\| isdigit(c) \|\| (c)=='$' \|\| (c)=='#' \
				606	\|\| (c)=='@' \|\| (c)=='~' )
				607
				608	/* Ditto for numeric constants. */
				609
				610	#define isnumstart(c) ( isdigit(c) \|\| (c)=='$' )
				611	#define isnumchar(c) ( isalnum(c) )
				612
				613	/* This returns the numeric value of a given 'digit'. */
				614
				615	#define numvalue(c) ((c)>='a' ? (c)-'a'+10 : (c)>='A' ? (c)-'A'+10 : (c)-'0')
				616
				617	/*
				618	* This tokeniser routine has only one side effect, that of
				619	* updating `bufptr'. Hence by saving `bufptr', lookahead may be
				620	* performed.
				621	*/
				622
				623	static int nexttoken (void) {
				624	char ourcopy[256], r, s;
				625
				626	while (isspace(*bufptr)) bufptr++;
				627	if (!*bufptr) return 0;
				628
				629	/* we have a token; either an id, a number or a char */
				630	if (isidstart(*bufptr) \|\|
				631	(*bufptr == '$' && isidstart(bufptr[1]))) {
				632	/* now we've got an identifier */
				633	int i;
				634	int is_sym = FALSE;
				635
				636	if (*bufptr == '$') {
				637	is_sym = TRUE;
				638	bufptr++;
				639	}
				640
				641	tokval.t_charptr = q;
				642	q++ = bufptr++;
				643	while (isidchar(bufptr)) q++ = *bufptr++;
				644	*q++ = '\0';
				645	for (s=tokval.t_charptr, r=ourcopy; *s; s++)
				646	r++ = tolower (s);
				647	*r = '\0';
				648	if (is_sym)
				649	return TOKEN_ID; /* bypass all other checks */
				650	/* right, so we have an identifier sitting in temp storage. now,
				651	* is it actually a register or instruction name, or what? */
				652	if ((tokval.t_integer=bsi(ourcopy, reg_names,
				653	elements(reg_names)))>=0)
				654	return TOKEN_REG;
				655	if ((tokval.t_integer=bsi(ourcopy, insn_names,
				656	elements(insn_names)))>=0)
				657	return TOKEN_INSN;
				658	for (i=0; i<elements(icn); i++)
				659	if (!strncmp(ourcopy, icn[i], strlen(icn[i]))) {
				660	char *p = ourcopy + strlen(icn[i]);
				661	tokval.t_integer = ico[i];
				662	if ((tokval.t_inttwo=bsi(p, conditions,
				663	elements(conditions)))>=0)
				664	return TOKEN_INSN;
				665	}
				666	if ((tokval.t_integer=bsi(ourcopy, prefix_names,
				667	elements(prefix_names)))>=0) {
				668	tokval.t_integer += PREFIX_ENUM_START;
				669	return TOKEN_PREFIX;
				670	}
				671	if ((tokval.t_integer=bsi(ourcopy, special_names,
				672	elements(special_names)))>=0)
				673	return TOKEN_SPECIAL;
				674	if (!strcmp(ourcopy, "seg"))
				675	return TOKEN_SEG;
				676	if (!strcmp(ourcopy, "wrt"))
				677	return TOKEN_WRT;
				678	return TOKEN_ID;
				679	} else if (*bufptr == '$' && !isnumchar(bufptr[1])) {
				680	/*
				681	* It's a $ sign with no following hex number; this must
				682	* mean it's a Here token ($), evaluating to the current
				683	* assembly location, or a Base token ($$), evaluating to
				684	* the base of the current segment.
				685	*/
				686	bufptr++;
				687	if (*bufptr == '$') {
				688	bufptr++;
				689	return TOKEN_BASE;
				690	}
				691	return TOKEN_HERE;
				692	} else if (isnumstart(bufptr)) { / now we've got a number */
				693	char *r = q;
				694	int rn_error;
				695
				696	q++ = bufptr++;
				697	while (isnumchar(*bufptr)) {
				698	q++ = bufptr++;
				699	}
				700	if (*bufptr == '.') {
				701	/*
				702	* a floating point constant
				703	*/
				704	q++ = bufptr++;
				705	while (isnumchar(*bufptr)) {
				706	q++ = bufptr++;
				707	}
				708	*q++ = '\0';
				709	tokval.t_charptr = r;
				710	return TOKEN_FLOAT;
				711	}
				712	*q++ = '\0';
				713	tokval.t_integer = readnum(r, &rn_error);
				714	if (rn_error)
				715	return TOKEN_ERRNUM; /* some malformation occurred */
				716	tokval.t_charptr = NULL;
				717	return TOKEN_NUM;
				718	} else if (bufptr == '\'' \|\| bufptr == '"') {/* a char constant */
				719	char quote = bufptr++, r;
				720	r = tokval.t_charptr = bufptr;
				721	while (bufptr && bufptr != quote) bufptr++;
				722	tokval.t_inttwo = bufptr - r; /* store full version */
				723	if (!*bufptr)
				724	return TOKEN_ERRNUM; /* unmatched quotes */
				725	tokval.t_integer = 0;
				726	r = bufptr++; /* skip over final quote */
				727	while (quote != *--r) {
				728	tokval.t_integer = (tokval.t_integer<<8) + (unsigned char) *r;
				729	}
				730	return TOKEN_NUM;
				731	} else if (bufptr == ';') { / a comment has happened - stay */
				732	return 0;
				733	} else if ((bufptr == '>' \|\| bufptr == '<' \|\|
				734	bufptr == '/' \|\| bufptr == '%') && bufptr[1] == *bufptr) {
				735	bufptr += 2;
				736	return (bufptr[-2] == '>' ? TOKEN_SHR :
				737	bufptr[-2] == '<' ? TOKEN_SHL :
				738	bufptr[-2] == '/' ? TOKEN_SDIV :
				739	TOKEN_SMOD);
				740	} else /* just an ordinary char */
				741	return (unsigned char) (*bufptr++);
				742	}
				743
				744	/* return index of "string" in "array", or -1 if no match. */
				745	static int bsi (char string, char *array, int size) {
				746	int i = -1, j = size; /* always, i < index < j */
				747	while (j-i >= 2) {
				748	int k = (i+j)/2;
				749	int l = strcmp(string, array[k]);
				750	if (l<0) /* it's in the first half */
				751	j = k;
				752	else if (l>0) /* it's in the second half */
				753	i = k;
				754	else /* we've got it :) */
				755	return k;
				756	}
				757	return -1; /* we haven't got it :( */
				758	}
				759
				760	void cleanup_insn (insn *i) {
				761	extop *e;
				762
				763	while (i->eops) {
				764	e = i->eops;
				765	i->eops = i->eops->next;
				766	nasm_free (e);
				767	}
				768	}
				769
				770	/* ------------- Evaluator begins here ------------------ */
				771
				772	static expr exprtempstorage[1024], tempptr; / store exprs in here */
				773
				774	/*
				775	* Add two vector datatypes. We have some bizarre behaviour on far-
				776	* absolute segment types: we preserve them during addition _only_
				777	* if one of the segments is a truly pure scalar.
				778	*/
				779	static expr add_vectors(expr p, expr *q) {
				780	expr *r = tempptr;
				781	int preserve;
				782
				783	preserve = is_really_simple(p) \|\| is_really_simple(q);
				784
				785	while (p->type && q->type &&
				786	p->type < EXPR_SEGBASE+SEG_ABS &&
				787	q->type < EXPR_SEGBASE+SEG_ABS)
				788	if (p->type > q->type) {
				789	tempptr->type = q->type;
				790	tempptr->value = q->value;
				791	tempptr++, q++;
				792	} else if (p->type < q->type) {
				793	tempptr->type = p->type;
				794	tempptr->value = p->value;
				795	tempptr++, p++;
				796	} else { /* p and q have same type */
				797	tempptr->type = p->type;
				798	tempptr->value = p->value + q->value;
				799	tempptr++, p++, q++;
				800	}
				801	while (p->type &&
				802	(preserve \|\| p->type < EXPR_SEGBASE+SEG_ABS)) {
				803	tempptr->type = p->type;
				804	tempptr->value = p->value;
				805	tempptr++, p++;
				806	}
				807	while (q->type &&
				808	(preserve \|\| q->type < EXPR_SEGBASE+SEG_ABS)) {
				809	tempptr->type = q->type;
				810	tempptr->value = q->value;
				811	tempptr++, q++;
				812	}
				813	(tempptr++)->type = 0;
				814
				815	return r;
				816	}
				817
				818	/*
				819	* Multiply a vector by a scalar. Strip far-absolute segment part
				820	* if present.
				821	*/
				822	static expr scalar_mult(expr vect, long scalar) {
				823	expr *p = vect;
				824
				825	while (p->type && p->type < EXPR_SEGBASE+SEG_ABS) {
				826	p->value = scalar * (p->value);
				827	p++;
				828	}
				829	p->type = 0;
				830
				831	return vect;
				832	}
				833
				834	static expr *scalarvect (long scalar) {
				835	expr *p = tempptr;
				836	tempptr->type = EXPR_SIMPLE;
				837	tempptr->value = scalar;
				838	tempptr++;
				839	tempptr->type = 0;
				840	tempptr++;
				841	return p;
				842	}
				843
				844	/*
				845	* Return TRUE if the argument is a simple scalar. (Or a far-
				846	* absolute, which counts.)
				847	*/
				848	static int is_simple (expr *vect) {
				849	while (vect->type && !vect->value)
				850	vect++;
				851	if (!vect->type)
				852	return 1;
				853	if (vect->type != EXPR_SIMPLE)
				854	return 0;
				855	do {
				856	vect++;
				857	} while (vect->type && !vect->value);
				858	if (vect->type && vect->type < EXPR_SEGBASE+SEG_ABS) return 0;
				859	return 1;
				860	}
				861
				862	/*
				863	* Return TRUE if the argument is a simple scalar, _NOT_ a far-
				864	* absolute.
				865	*/
				866	static int is_really_simple (expr *vect) {
				867	while (vect->type && !vect->value)
				868	vect++;
				869	if (!vect->type)
				870	return 1;
				871	if (vect->type != EXPR_SIMPLE)
				872	return 0;
				873	do {
				874	vect++;
				875	} while (vect->type && !vect->value);
				876	if (vect->type) return 0;
				877	return 1;
				878	}
				879
				880	/*
				881	* Return TRUE if the argument is relocatable (i.e. a simple
				882	* scalar, plus at most one segment-base, plus possibly a WRT).
				883	*/
				884	static int is_reloc (expr *vect) {
				885	while (vect->type && !vect->value)
				886	vect++;
				887	if (!vect->type)
				888	return 1;
				889	if (vect->type < EXPR_SIMPLE)
				890	return 0;
				891	if (vect->type == EXPR_SIMPLE) {
				892	do {
				893	vect++;
				894	} while (vect->type && !vect->value);
				895	if (!vect->type)
				896	return 1;
				897	}
				898	do {
				899	vect++;
				900	} while (vect->type && (vect->type == EXPR_WRT \|\| !vect->value));
				901	if (!vect->type)
				902	return 1;
				903	return 1;
				904	}
				905
				906	/*
				907	* Return the scalar part of a relocatable vector. (Including
				908	* simple scalar vectors - those qualify as relocatable.)
				909	*/
				910	static long reloc_value (expr *vect) {
				911	while (vect->type && !vect->value)
				912	vect++;
				913	if (!vect->type) return 0;
				914	if (vect->type == EXPR_SIMPLE)
				915	return vect->value;
				916	else
				917	return 0;
				918	}
				919
				920	/*
				921	* Return the segment number of a relocatable vector, or NO_SEG for
				922	* simple scalars.
				923	*/
				924	static long reloc_seg (expr *vect) {
				925	while (vect->type && (vect->type == EXPR_WRT \|\| !vect->value))
				926	vect++;
				927	if (vect->type == EXPR_SIMPLE) {
				928	do {
				929	vect++;
				930	} while (vect->type && (vect->type == EXPR_WRT \|\| !vect->value));
				931	}
				932	if (!vect->type)
				933	return NO_SEG;
				934	else
				935	return vect->type - EXPR_SEGBASE;
				936	}
				937
				938	/*
				939	* Return the WRT segment number of a relocatable vector, or NO_SEG
				940	* if no WRT part is present.
				941	*/
				942	static long reloc_wrt (expr *vect) {
				943	while (vect->type && vect->type < EXPR_WRT)
				944	vect++;
				945	if (vect->type == EXPR_WRT) {
				946	return vect->value;
				947	} else
				948	return NO_SEG;
				949	}
				950
				951	static void eval_reset(void) {
				952	tempptr = exprtempstorage; /* initialise temporary storage */
				953	}
				954
				955	/*
				956	* The SEG operator: calculate the segment part of a relocatable
				957	* value. Return NULL, as usual, if an error occurs. Report the
				958	* error too.
				959	*/
				960	static expr segment_part (expr e) {
				961	long seg;
				962
				963	if (!is_reloc(e)) {
				964	error(ERR_NONFATAL, "cannot apply SEG to a non-relocatable value");
				965	return NULL;
				966	}
				967
				968	seg = reloc_seg(e);
				969	if (seg == NO_SEG) {
				970	error(ERR_NONFATAL, "cannot apply SEG to a non-relocatable value");
				971	return NULL;
				972	} else if (seg & SEG_ABS)
				973	return scalarvect(seg & ~SEG_ABS);
				974	else {
				975	expr *f = tempptr++;
				976	tempptr++->type = 0;
				977	f->type = EXPR_SEGBASE+outfmt->segbase(seg+1);
				978	f->value = 1;
				979	return f;
				980	}
				981	}
				982
				983	/*
				984	* Recursive-descent parser. Called with a single boolean operand,
				985	* which is TRUE if the evaluation is critical (i.e. unresolved
				986	* symbols are an error condition). Must update the global `i' to
				987	* reflect the token after the parsed string. May return NULL.
				988	*
				989	* evaluate() should report its own errors: on return it is assumed
				990	* that if NULL has been returned, the error has already been
				991	* reported.
				992	*/
				993
				994	/*
				995	* Grammar parsed is:
				996	*
				997	* expr : expr0 [ WRT expr6 ]
				998	* expr0 : expr1 [ {\|} expr1]
				999	* expr1 : expr2 [ {^} expr2]
				1000	* expr2 : expr3 [ {&} expr3]
				1001	* expr3 : expr4 [ {<<,>>} expr4...]
				1002	* expr4 : expr5 [ {+,-} expr5...]
				1003	* expr5 : expr6 [ {*,/,%,//,%%} expr6...]
				1004	* expr6 : { ~,+,-,SEG } expr6
				1005	* \| (expr0)
				1006	* \| symbol
				1007	* \| $
				1008	* \| number
				1009	*/
				1010
				1011	static expr expr0(int), expr1(int), expr2(int), expr3(int);
				1012	static expr expr4(int), expr5(int), *expr6(int);
				1013
				1014	static expr *expr0(int critical) {
				1015	expr e, f;
				1016
				1017	e = expr1(critical);
				1018	if (!e)
				1019	return NULL;
				1020	while (i == '\|') {
				1021	i = nexttoken();
				1022	f = expr1(critical);
				1023	if (!f)
				1024	return NULL;
				1025	if (!is_simple(e) \|\| !is_simple(f)) {
				1026	error(ERR_NONFATAL, "`\|' operator may only be applied to"
				1027	" scalar values");
				1028	}
				1029	e = scalarvect (reloc_value(e) \| reloc_value(f));
				1030	}
				1031	return e;
				1032	}
				1033
				1034	static expr *expr1(int critical) {
				1035	expr e, f;
				1036
				1037	e = expr2(critical);
				1038	if (!e)
				1039	return NULL;
				1040	while (i == '^') {
				1041	i = nexttoken();
				1042	f = expr2(critical);
				1043	if (!f)
				1044	return NULL;
				1045	if (!is_simple(e) \|\| !is_simple(f)) {
				1046	error(ERR_NONFATAL, "`^' operator may only be applied to"
				1047	" scalar values");
				1048	}
				1049	e = scalarvect (reloc_value(e) ^ reloc_value(f));
				1050	}
				1051	return e;
				1052	}
				1053
				1054	static expr *expr2(int critical) {
				1055	expr e, f;
				1056
				1057	e = expr3(critical);
				1058	if (!e)
				1059	return NULL;
				1060	while (i == '&') {
				1061	i = nexttoken();
				1062	f = expr3(critical);
				1063	if (!f)
				1064	return NULL;
				1065	if (!is_simple(e) \|\| !is_simple(f)) {
				1066	error(ERR_NONFATAL, "`&' operator may only be applied to"
				1067	" scalar values");
				1068	}
				1069	e = scalarvect (reloc_value(e) & reloc_value(f));
				1070	}
				1071	return e;
				1072	}
				1073
				1074	static expr *expr3(int critical) {
				1075	expr e, f;
				1076
				1077	e = expr4(critical);
				1078	if (!e)
				1079	return NULL;
				1080	while (i == TOKEN_SHL \|\| i == TOKEN_SHR) {
				1081	int j = i;
				1082	i = nexttoken();
				1083	f = expr4(critical);
				1084	if (!f)
				1085	return NULL;
				1086	if (!is_simple(e) \|\| !is_simple(f)) {
				1087	error(ERR_NONFATAL, "shift operator may only be applied to"
				1088	" scalar values");
				1089	}
				1090	switch (j) {
				1091	case TOKEN_SHL:
				1092	e = scalarvect (reloc_value(e) << reloc_value(f));
				1093	break;
				1094	case TOKEN_SHR:
				1095	e = scalarvect (((unsigned long)reloc_value(e)) >>
				1096	reloc_value(f));
				1097	break;
				1098	}
				1099	}
				1100	return e;
				1101	}
				1102
				1103	static expr *expr4(int critical) {
				1104	expr e, f;
				1105
				1106	e = expr5(critical);
				1107	if (!e)
				1108	return NULL;
				1109	while (i == '+' \|\| i == '-') {
				1110	int j = i;
				1111	i = nexttoken();
				1112	f = expr5(critical);
				1113	if (!f)
				1114	return NULL;
				1115	switch (j) {
				1116	case '+':
				1117	e = add_vectors (e, f);
				1118	break;
				1119	case '-':
				1120	e = add_vectors (e, scalar_mult(f, -1L));
				1121	break;
				1122	}
				1123	}
				1124	return e;
				1125	}
				1126
				1127	static expr *expr5(int critical) {
				1128	expr e, f;
				1129
				1130	e = expr6(critical);
				1131	if (!e)
				1132	return NULL;
				1133	while (i == '' \|\| i == '/' \|\| i == '' \|\|
				1134	i == TOKEN_SDIV \|\| i == TOKEN_SMOD) {
				1135	int j = i;
				1136	i = nexttoken();
				1137	f = expr6(critical);
				1138	if (!f)
				1139	return NULL;
				1140	if (j != '*' && (!is_simple(e) \|\| !is_simple(f))) {
				1141	error(ERR_NONFATAL, "division operator may only be applied to"
				1142	" scalar values");
				1143	return NULL;
				1144	}
				1145	if (j != '*' && reloc_value(f) == 0) {
				1146	error(ERR_NONFATAL, "division by zero");
				1147	return NULL;
				1148	}
				1149	switch (j) {
				1150	case '*':
				1151	if (is_simple(e))
				1152	e = scalar_mult (f, reloc_value(e));
				1153	else if (is_simple(f))
				1154	e = scalar_mult (e, reloc_value(f));
				1155	else {
				1156	error(ERR_NONFATAL, "unable to multiply two "
				1157	"non-scalar objects");
				1158	return NULL;
				1159	}
				1160	break;
				1161	case '/':
				1162	e = scalarvect (((unsigned long)reloc_value(e)) /
				1163	((unsigned long)reloc_value(f)));
				1164	break;
				1165	case '%':
				1166	e = scalarvect (((unsigned long)reloc_value(e)) %
				1167	((unsigned long)reloc_value(f)));
				1168	break;
				1169	case TOKEN_SDIV:
				1170	e = scalarvect (((signed long)reloc_value(e)) /
				1171	((signed long)reloc_value(f)));
				1172	break;
				1173	case TOKEN_SMOD:
				1174	e = scalarvect (((signed long)reloc_value(e)) %
				1175	((signed long)reloc_value(f)));
				1176	break;
				1177	}
				1178	}
				1179	return e;
				1180	}
				1181
				1182	static expr *expr6(int critical) {
				1183	expr *e;
				1184	long label_seg, label_ofs;
				1185
				1186	if (i == '-') {
				1187	i = nexttoken();
				1188	e = expr6(critical);
				1189	if (!e)
				1190	return NULL;
				1191	return scalar_mult (e, -1L);
				1192	} else if (i == '+') {
				1193	i = nexttoken();
				1194	return expr6(critical);
				1195	} else if (i == '~') {
				1196	i = nexttoken();
				1197	e = expr6(critical);
				1198	if (!e)
				1199	return NULL;
				1200	if (!is_simple(e)) {
				1201	error(ERR_NONFATAL, "`~' operator may only be applied to"
				1202	" scalar values");
				1203	return NULL;
				1204	}
				1205	return scalarvect(~reloc_value(e));
				1206	} else if (i == TOKEN_SEG) {
				1207	i = nexttoken();
				1208	e = expr6(critical);
				1209	if (!e)
				1210	return NULL;
				1211	return segment_part(e);
				1212	} else if (i == '(') {
				1213	i = nexttoken();
				1214	e = expr0(critical);
				1215	if (!e)
				1216	return NULL;
				1217	if (i != ')') {
				1218	error(ERR_NONFATAL, "expecting `)'");
				1219	return NULL;
				1220	}
				1221	i = nexttoken();
				1222	return e;
				1223	} else if (i == TOKEN_NUM \|\| i == TOKEN_REG \|\| i == TOKEN_ID \|\|
				1224	i == TOKEN_HERE \|\| i == TOKEN_BASE) {
				1225	e = tempptr;
				1226	switch (i) {
				1227	case TOKEN_NUM:
				1228	e->type = EXPR_SIMPLE;
				1229	e->value = tokval.t_integer;
				1230	break;
				1231	case TOKEN_REG:
				1232	e->type = tokval.t_integer;
				1233	e->value = 1;
				1234	break;
				1235	case TOKEN_ID:
				1236	case TOKEN_HERE:
				1237	case TOKEN_BASE:
				1238	/*
				1239	* Since the whole line is parsed before the label it
				1240	* defines is given to the label manager, we have
				1241	* problems with lines such as
				1242	*
				1243	* end: TIMES 512-(end-start) DB 0
				1244	*
				1245	* where `end' is not known on pass one, despite not
				1246	* really being a forward reference, and due to
				1247	* criticality it is _needed_. Hence we check our label
				1248	* against the currently defined one, and do our own
				1249	* resolution of it if we have to.
				1250	*/
				1251	if (i == TOKEN_BASE) {
				1252	label_seg = seg;
				1253	label_ofs = 0;
				1254	} else if (i == TOKEN_HERE \|\| !strcmp(tokval.t_charptr, label)) {
				1255	label_seg = seg;
				1256	label_ofs = ofs;
				1257	} else if (!labelfunc(tokval.t_charptr, &label_seg, &label_ofs)) {
				1258	if (critical == 2) {
				1259	error (ERR_NONFATAL, "symbol `%s' undefined",
				1260	tokval.t_charptr);
				1261	return NULL;
				1262	} else if (critical == 1) {
				1263	error (ERR_NONFATAL, "symbol `%s' not defined before use",
				1264	tokval.t_charptr);
				1265	return NULL;
				1266	} else {
H. Peter Anvin	ea83827	2002-04-30 20:51:53 +0000	[diff] [blame^]	1267	forward = TRUE;
H. Peter Anvin	ea6e34d	2002-04-30 20:51:32 +0000	[diff] [blame]	1268	label_seg = seg;
				1269	label_ofs = ofs;
				1270	}
				1271	}
				1272	e->type = EXPR_SIMPLE;
				1273	e->value = label_ofs;
				1274	if (label_seg!=NO_SEG) {
				1275	tempptr++;
				1276	tempptr->type = EXPR_SEGBASE + label_seg;
				1277	tempptr->value = 1;
				1278	}
				1279	break;
				1280	}
				1281	tempptr++;
				1282	tempptr->type = 0;
				1283	tempptr++;
				1284	i = nexttoken();
				1285	return e;
				1286	} else {
				1287	error(ERR_NONFATAL, "expression syntax error");
				1288	return NULL;
				1289	}
				1290	}
				1291
				1292	static expr *evaluate (int critical) {
				1293	expr *e;
				1294	expr *f = NULL;
				1295
				1296	e = expr0 (critical);
				1297	if (!e)
				1298	return NULL;
				1299
				1300	if (i == TOKEN_WRT) {
				1301	if (!is_reloc(e)) {
				1302	error(ERR_NONFATAL, "invalid left-hand operand to WRT");
				1303	return NULL;
				1304	}
				1305	i = nexttoken(); /* eat the WRT */
				1306	f = expr6 (critical);
				1307	if (!f)
				1308	return NULL;
				1309	}
				1310	e = scalar_mult (e, 1L); /* strip far-absolute segment part */
				1311	if (f) {
				1312	expr *g = tempptr++;
				1313	tempptr++->type = 0;
				1314	g->type = EXPR_WRT;
				1315	if (!is_reloc(f)) {
				1316	error(ERR_NONFATAL, "invalid right-hand operand to WRT");
				1317	return NULL;
				1318	}
				1319	g->value = reloc_seg(f);
				1320	if (g->value == NO_SEG)
				1321	g->value = reloc_value(f) \| SEG_ABS;
				1322	else if (!(g->value & SEG_ABS) && !(g->value % 2) && critical) {
				1323	error(ERR_NONFATAL, "invalid right-hand operand to WRT");
				1324	return NULL;
				1325	}
				1326	e = add_vectors (e, g);
				1327	}
				1328	return e;
				1329	}