Blame - parser.c - chromium.googlesource.com/chromium/deps/nasm

blob: 14c7a5ba710c8e40d11ce2478336f170055d077a [file] [log] [blame]

H. Peter Anvin	ea6e34d	2002-04-30 20:51:32 +0000	[diff] [blame^]	1	/* parser.c source line parser for the Netwide Assembler
				2	*
				3	* The Netwide Assembler is copyright (C) 1996 Simon Tatham and
				4	* Julian Hall. All rights reserved. The software is
				5	* redistributable under the licence given in the file "Licence"
				6	* distributed in the NASM archive.
				7	*
				8	* initial version 27/iii/95 by Simon Tatham
				9	*/
				10
				11	#include <stdio.h>
				12	#include <stdlib.h>
				13	#include <stddef.h>
				14	#include <string.h>
				15	#include <ctype.h>
				16
				17	#include "nasm.h"
				18	#include "nasmlib.h"
				19	#include "parser.h"
				20	#include "float.h"
				21
				22	#include "names.c"
				23
				24
				25	static long reg_flags[] = { /* sizes and special flags */
				26	0, REG8, REG_AL, REG_AX, REG8, REG8, REG16, REG16, REG8, REG_CL,
				27	REG_CREG, REG_CREG, REG_CREG, REG_CR4, REG_CS, REG_CX, REG8,
				28	REG16, REG8, REG_DREG, REG_DREG, REG_DREG, REG_DREG, REG_DREG,
				29	REG_DREG, REG_DESS, REG_DX, REG_EAX, REG32, REG32, REG_ECX,
				30	REG32, REG32, REG_DESS, REG32, REG32, REG_FSGS, REG_FSGS,
				31	MMXREG, MMXREG, MMXREG, MMXREG, MMXREG, MMXREG, MMXREG, MMXREG,
				32	REG16, REG16, REG_DESS, FPU0, FPUREG, FPUREG, FPUREG, FPUREG,
				33	FPUREG, FPUREG, FPUREG, REG_TREG, REG_TREG, REG_TREG, REG_TREG,
				34	REG_TREG
				35	};
				36
				37	enum { /* special tokens */
				38	S_BYTE, S_DWORD, S_FAR, S_LONG, S_NEAR, S_QWORD, S_SHORT, S_TO,
				39	S_TWORD, S_WORD
				40	};
				41
				42	static char special_names[] = { / and the actual text */
				43	"byte", "dword", "far", "long", "near", "qword", "short", "to",
				44	"tword", "word"
				45	};
				46
				47	static char *prefix_names[] = {
				48	"a16", "a32", "lock", "o16", "o32", "rep", "repe", "repne",
				49	"repnz", "repz", "times"
				50	};
				51
				52	/*
				53	* Evaluator datatype. Expressions, within the evaluator, are
				54	* stored as an array of these beasts, terminated by a record with
				55	* type==0. Mostly, it's a vector type: each type denotes some kind
				56	* of a component, and the value denotes the multiple of that
				57	* component present in the expression. The exception is the WRT
				58	* type, whose `value' field denotes the segment to which the
				59	* expression is relative. These segments will be segment-base
				60	* types, i.e. either odd segment values or SEG_ABS types. So it is
				61	* still valid to assume that anything with a `value' field of zero
				62	* is insignificant.
				63	*/
				64	typedef struct {
				65	long type; /* a register, or EXPR_xxx */
				66	long value; /* must be >= 32 bits */
				67	} expr;
				68
				69	static void eval_reset(void);
				70	static expr *evaluate(int);
				71
				72	/*
				73	* ASSUMPTION MADE HERE. The number of distinct register names
				74	* (i.e. possible "type" fields for an expr structure) does not
				75	* exceed 126.
				76	*/
				77	#define EXPR_SIMPLE 126
				78	#define EXPR_WRT 127
				79	#define EXPR_SEGBASE 128
				80
				81	static int is_reloc(expr *);
				82	static int is_simple(expr *);
				83	static int is_really_simple (expr *);
				84	static long reloc_value(expr *);
				85	static long reloc_seg(expr *);
				86	static long reloc_wrt(expr *);
				87
				88	enum { /* token types, other than chars */
				89	TOKEN_ID = 256, TOKEN_NUM, TOKEN_REG, TOKEN_INSN, TOKEN_ERRNUM,
				90	TOKEN_HERE, TOKEN_BASE, TOKEN_SPECIAL, TOKEN_PREFIX, TOKEN_SHL,
				91	TOKEN_SHR, TOKEN_SDIV, TOKEN_SMOD, TOKEN_SEG, TOKEN_WRT,
				92	TOKEN_FLOAT
				93	};
				94
				95	struct tokenval {
				96	long t_integer, t_inttwo;
				97	char *t_charptr;
				98	};
				99
				100	static char tempstorage[1024], *q;
				101	static int bsi (char string, char array, int size);/ binary search */
				102
				103	static int nexttoken (void);
				104	static int is_comma_next (void);
				105
				106	static char *bufptr;
				107	static int i;
				108	static struct tokenval tokval;
				109	static lfunc labelfunc;
				110	static efunc error;
				111	static char *label;
				112	static struct ofmt *outfmt;
				113
				114	static long seg, ofs;
				115
				116	insn *parse_line (long segment, long offset, lfunc lookup_label, int pass,
				117	char buffer, insn result, struct ofmt *output,
				118	efunc errfunc) {
				119	int operand;
				120	int critical;
				121
				122	q = tempstorage;
				123	bufptr = buffer;
				124	labelfunc = lookup_label;
				125	outfmt = output;
				126	error = errfunc;
				127	seg = segment;
				128	ofs = offset;
				129	label = "";
				130
				131	i = nexttoken();
				132
				133	result->eops = NULL; /* must do this, whatever happens */
				134
				135	if (i==0) { /* blank line - ignore */
				136	result->label = NULL; /* so, no label on it */
				137	result->opcode = -1; /* and no instruction either */
				138	return result;
				139	}
				140	if (i != TOKEN_ID && i != TOKEN_INSN && i != TOKEN_PREFIX &&
				141	(i!=TOKEN_REG \|\| (REG_SREG & ~reg_flags[tokval.t_integer]))) {
				142	error (ERR_NONFATAL, "label or instruction expected"
				143	" at start of line");
				144	result->label = NULL;
				145	result->opcode = -1;
				146	return result;
				147	}
				148
				149	if (i == TOKEN_ID) { /* there's a label here */
				150	label = result->label = tokval.t_charptr;
				151	i = nexttoken();
				152	if (i == ':') { /* skip over the optional colon */
				153	i = nexttoken();
				154	}
				155	} else /* no label; so, moving swiftly on */
				156	result->label = NULL;
				157
				158	if (i==0) {
				159	result->opcode = -1; /* this line contains just a label */
				160	return result;
				161	}
				162
				163	result->nprefix = 0;
				164	result->times = 1;
				165
				166	while (i == TOKEN_PREFIX \|\|
				167	(i==TOKEN_REG && !(REG_SREG & ~reg_flags[tokval.t_integer]))) {
				168	/*
				169	* Handle special case: the TIMES prefix.
				170	*/
				171	if (i == TOKEN_PREFIX && tokval.t_integer == P_TIMES) {
				172	expr *value;
				173
				174	i = nexttoken();
				175	eval_reset();
				176	value = evaluate (pass);
				177	if (!value) { /* but, error in evaluator */
				178	result->opcode = -1; /* unrecoverable parse error: */
				179	return result; /* ignore this instruction */
				180	}
				181	if (!is_simple (value)) {
				182	error (ERR_NONFATAL,
				183	"non-constant argument supplied to TIMES");
				184	result->times = 1;
				185	} else
				186	result->times = value->value;
				187	} else {
				188	if (result->nprefix == MAXPREFIX)
				189	error (ERR_NONFATAL,
				190	"instruction has more than %d prefixes", MAXPREFIX);
				191	else
				192	result->prefixes[result->nprefix++] = tokval.t_integer;
				193	i = nexttoken();
				194	}
				195	}
				196
				197	if (i != TOKEN_INSN) {
				198	error (ERR_NONFATAL, "parser: instruction expected");
				199	result->opcode = -1;
				200	return result;
				201	}
				202
				203	result->opcode = tokval.t_integer;
				204	result->condition = tokval.t_inttwo;
				205
				206	/*
				207	* RESB, RESW and RESD cannot be satisfied with incorrectly
				208	* evaluated operands, since the correct values _must_ be known
				209	* on the first pass. Hence, even in pass one, we set the
				210	* `critical' flag on calling evaluate(), so that it will bomb
				211	* out on undefined symbols. Nasty, but there's nothing we can
				212	* do about it.
				213	*
				214	* For the moment, EQU has the same difficulty, so we'll
				215	* include that.
				216	*/
				217	if (result->opcode == I_RESB \|\|
				218	result->opcode == I_RESW \|\|
				219	result->opcode == I_RESD \|\|
				220	result->opcode == I_RESQ \|\|
				221	result->opcode == I_REST \|\|
				222	result->opcode == I_EQU)
				223	critical = pass;
				224	else
				225	critical = (pass==2 ? 2 : 0);
				226
				227	if (result->opcode == I_DB \|\|
				228	result->opcode == I_DW \|\|
				229	result->opcode == I_DD \|\|
				230	result->opcode == I_DQ \|\|
				231	result->opcode == I_DT) {
				232	extop eop, *tail = &result->eops;
				233	int oper_num = 0;
				234
				235	/*
				236	* Begin to read the DB/DW/DD/DQ/DT operands.
				237	*/
				238	while (1) {
				239	i = nexttoken();
				240	if (i == 0)
				241	break;
				242	eop = *tail = nasm_malloc(sizeof(extop));
				243	tail = &eop->next;
				244	eop->next = NULL;
				245	eop->type = EOT_NOTHING;
				246	oper_num++;
				247
				248	if (i == TOKEN_NUM && tokval.t_charptr && is_comma_next()) {
				249	eop->type = EOT_DB_STRING;
				250	eop->stringval = tokval.t_charptr;
				251	eop->stringlen = tokval.t_inttwo;
				252	i = nexttoken(); /* eat the comma */
				253	continue;
				254	}
				255
				256	if (i == TOKEN_FLOAT \|\| i == '-') {
				257	long sign = +1L;
				258
				259	if (i == '-') {
				260	char *save = bufptr;
				261	i = nexttoken();
				262	sign = -1L;
				263	if (i != TOKEN_FLOAT) {
				264	bufptr = save;
				265	i = '-';
				266	}
				267	}
				268
				269	if (i == TOKEN_FLOAT) {
				270	eop->type = EOT_DB_STRING;
				271	eop->stringval = q;
				272	if (result->opcode == I_DD)
				273	eop->stringlen = 4;
				274	else if (result->opcode == I_DQ)
				275	eop->stringlen = 8;
				276	else if (result->opcode == I_DT)
				277	eop->stringlen = 10;
				278	else {
				279	error(ERR_NONFATAL, "floating-point constant"
				280	" encountered in `D%c' instruction",
				281	result->opcode == I_DW ? 'W' : 'B');
				282	eop->type = EOT_NOTHING;
				283	}
				284	q += eop->stringlen;
				285	if (!float_const (tokval.t_charptr, sign,
				286	(unsigned char *)eop->stringval,
				287	eop->stringlen, error))
				288	eop->type = EOT_NOTHING;
				289	i = nexttoken(); /* eat the comma */
				290	continue;
				291	}
				292	}
				293
				294	/* anything else */ {
				295	expr *value;
				296	eval_reset();
				297	value = evaluate (critical);
				298	if (!value) { /* but, error in evaluator */
				299	result->opcode = -1;/* unrecoverable parse error: */
				300	return result; /* ignore this instruction */
				301	}
				302	if (is_reloc(value)) {
				303	eop->type = EOT_DB_NUMBER;
				304	eop->offset = reloc_value(value);
				305	eop->segment = reloc_seg(value);
				306	eop->wrt = reloc_wrt(value);
				307	} else {
				308	error (ERR_NONFATAL,
				309	"`%s' operand %d: expression is not simple"
				310	" or relocatable",
				311	insn_names[result->opcode], oper_num);
				312	}
				313	}
				314	}
				315	return result;
				316	}
				317
				318	/* right. Now we begin to parse the operands. There may be up to three
				319	* of these, separated by commas, and terminated by a zero token. */
				320
				321	for (operand = 0; operand < 3; operand++) {
				322	expr seg, value; /* used most of the time */
				323	int mref; /* is this going to be a memory ref? */
				324
				325	result->oprs[operand].addr_size = 0;/* have to zero this whatever */
				326	i = nexttoken();
				327	if (i == 0) break; /* end of operands: get out of here */
				328	result->oprs[operand].type = 0; /* so far, no override */
				329	while (i == TOKEN_SPECIAL) {/* size specifiers */
				330	switch ((int)tokval.t_integer) {
				331	case S_BYTE:
				332	result->oprs[operand].type \|= BITS8;
				333	break;
				334	case S_WORD:
				335	result->oprs[operand].type \|= BITS16;
				336	break;
				337	case S_DWORD:
				338	case S_LONG:
				339	result->oprs[operand].type \|= BITS32;
				340	break;
				341	case S_QWORD:
				342	result->oprs[operand].type \|= BITS64;
				343	break;
				344	case S_TWORD:
				345	result->oprs[operand].type \|= BITS80;
				346	break;
				347	case S_TO:
				348	result->oprs[operand].type \|= TO;
				349	break;
				350	case S_FAR:
				351	result->oprs[operand].type \|= FAR;
				352	break;
				353	case S_NEAR:
				354	result->oprs[operand].type \|= NEAR;
				355	break;
				356	case S_SHORT:
				357	result->oprs[operand].type \|= SHORT;
				358	break;
				359	}
				360	i = nexttoken();
				361	}
				362
				363	if (i == '[') { /* memory reference */
				364	i = nexttoken();
				365	mref = TRUE;
				366	if (i == TOKEN_SPECIAL) { /* check for address size override */
				367	switch ((int)tokval.t_integer) {
				368	case S_WORD:
				369	result->oprs[operand].addr_size = 16;
				370	break;
				371	case S_DWORD:
				372	case S_LONG:
				373	result->oprs[operand].addr_size = 32;
				374	break;
				375	default:
				376	error (ERR_NONFATAL, "invalid size specification in"
				377	" effective address");
				378	}
				379	i = nexttoken();
				380	}
				381	} else /* immediate operand, or register */
				382	mref = FALSE;
				383
				384	eval_reset();
				385
				386	value = evaluate (critical);
				387	if (!value) { /* error in evaluator */
				388	result->opcode = -1; /* unrecoverable parse error: */
				389	return result; /* ignore this instruction */
				390	}
				391	if (i == ':' && mref) { /* it was seg:offset */
				392	seg = value; /* so shift this into the segment */
				393	i = nexttoken(); /* then skip the colon */
				394	if (i == TOKEN_SPECIAL) { /* another check for size override */
				395	switch ((int)tokval.t_integer) {
				396	case S_WORD:
				397	result->oprs[operand].addr_size = 16;
				398	break;
				399	case S_DWORD:
				400	case S_LONG:
				401	result->oprs[operand].addr_size = 32;
				402	break;
				403	default:
				404	error (ERR_NONFATAL, "invalid size specification in"
				405	" effective address");
				406	}
				407	i = nexttoken();
				408	}
				409	value = evaluate (critical);
				410	/* and get the offset */
				411	if (!value) { /* but, error in evaluator */
				412	result->opcode = -1; /* unrecoverable parse error: */
				413	return result; /* ignore this instruction */
				414	}
				415	} else seg = NULL;
				416	if (mref) { /* find ] at the end */
				417	if (i != ']') {
				418	error (ERR_NONFATAL, "parser: expecting ]");
				419	do { /* error recovery again */
				420	i = nexttoken();
				421	} while (i != 0 && i != ',');
				422	} else /* we got the required ] */
				423	i = nexttoken();
				424	} else { /* immediate operand */
				425	if (i != 0 && i != ',' && i != ':') {
				426	error (ERR_NONFATAL, "comma or end of line expected");
				427	do { /* error recovery */
				428	i = nexttoken();
				429	} while (i != 0 && i != ',');
				430	} else if (i == ':') {
				431	result->oprs[operand].type \|= COLON;
				432	}
				433	}
				434
				435	/* now convert the exprs returned from evaluate() into operand
				436	* descriptions... */
				437
				438	if (mref) { /* it's a memory reference */
				439	expr *e = value;
				440	int b, i, s; /* basereg, indexreg, scale */
				441	long o; /* offset */
				442
				443	if (seg) { /* segment override */
				444	if (seg[1].type!=0 \|\| seg->value!=1 \|\|
				445	REG_SREG & ~reg_flags[seg->type])
				446	error (ERR_NONFATAL, "invalid segment override");
				447	else if (result->nprefix == MAXPREFIX)
				448	error (ERR_NONFATAL,
				449	"instruction has more than %d prefixes",
				450	MAXPREFIX);
				451	else
				452	result->prefixes[result->nprefix++] = seg->type;
				453	}
				454
				455	b = i = -1, o = s = 0;
				456
				457	if (e->type < EXPR_SIMPLE) { /* this bit's a register */
				458	if (e->value == 1) /* in fact it can be basereg */
				459	b = e->type;
				460	else /* no, it has to be indexreg */
				461	i = e->type, s = e->value;
				462	e++;
				463	}
				464	if (e->type && e->type < EXPR_SIMPLE) {/* it's a second register */
				465	if (e->value != 1) { /* it has to be indexreg */
				466	if (i != -1) { /* but it can't be */
				467	error(ERR_NONFATAL, "invalid effective address");
				468	result->opcode = -1;
				469	return result;
				470	} else
				471	i = e->type, s = e->value;
				472	} else { /* it can be basereg */
				473	if (b != -1) /* or can it? */
				474	i = e->type, s = 1;
				475	else
				476	b = e->type;
				477	}
				478	e++;
				479	}
				480	if (e->type != 0) { /* is there an offset? */
				481	if (e->type < EXPR_SIMPLE) {/* in fact, is there an error? */
				482	error (ERR_NONFATAL, "invalid effective address");
				483	result->opcode = -1;
				484	return result;
				485	} else {
				486	if (e->type == EXPR_SIMPLE) {
				487	o = e->value;
				488	e++;
				489	}
				490	if (e->type == EXPR_WRT) {
				491	result->oprs[operand].wrt = e->value;
				492	e++;
				493	} else
				494	result->oprs[operand].wrt = NO_SEG;
				495	if (e->type != 0) { /* is there a segment id? */
				496	if (e->type < EXPR_SEGBASE) {
				497	error (ERR_NONFATAL,
				498	"invalid effective address");
				499	result->opcode = -1;
				500	return result;
				501	} else
				502	result->oprs[operand].segment = (e->type -
				503	EXPR_SEGBASE);
				504	e++;
				505	} else
				506	result->oprs[operand].segment = NO_SEG;
				507	}
				508	} else {
				509	o = 0;
				510	result->oprs[operand].wrt = NO_SEG;
				511	result->oprs[operand].segment = NO_SEG;
				512	}
				513
				514	if (e->type != 0) { /* there'd better be nothing left! */
				515	error (ERR_NONFATAL, "invalid effective address");
				516	result->opcode = -1;
				517	return result;
				518	}
				519
				520	result->oprs[operand].type \|= MEMORY;
				521	if (b==-1 && (i==-1 \|\| s==0))
				522	result->oprs[operand].type \|= MEM_OFFS;
				523	result->oprs[operand].basereg = b;
				524	result->oprs[operand].indexreg = i;
				525	result->oprs[operand].scale = s;
				526	result->oprs[operand].offset = o;
				527	} else { /* it's not a memory reference */
				528	if (is_reloc(value)) { /* it's immediate */
				529	result->oprs[operand].type \|= IMMEDIATE;
				530	result->oprs[operand].offset = reloc_value(value);
				531	result->oprs[operand].segment = reloc_seg(value);
				532	result->oprs[operand].wrt = reloc_wrt(value);
				533	if (is_simple(value) && reloc_value(value)==1)
				534	result->oprs[operand].type \|= UNITY;
				535	} else { /* it's a register */
				536	if (value->type>=EXPR_SIMPLE \|\| value->value!=1) {
				537	error (ERR_NONFATAL, "invalid operand type");
				538	result->opcode = -1;
				539	return result;
				540	}
				541	/* clear overrides, except TO which applies to FPU regs */
				542	result->oprs[operand].type &= TO;
				543	result->oprs[operand].type \|= REGISTER;
				544	result->oprs[operand].type \|= reg_flags[value->type];
				545	result->oprs[operand].basereg = value->type;
				546	}
				547	}
				548	}
				549
				550	result->operands = operand; /* set operand count */
				551
				552	while (operand<3) /* clear remaining operands */
				553	result->oprs[operand++].type = 0;
				554
				555	/*
				556	* Transform RESW, RESD, RESQ, REST into RESB.
				557	*/
				558	switch (result->opcode) {
				559	case I_RESW: result->opcode=I_RESB; result->oprs[0].offset*=2; break;
				560	case I_RESD: result->opcode=I_RESB; result->oprs[0].offset*=4; break;
				561	case I_RESQ: result->opcode=I_RESB; result->oprs[0].offset*=8; break;
				562	case I_REST: result->opcode=I_RESB; result->oprs[0].offset*=10; break;
				563	}
				564
				565	return result;
				566	}
				567
				568	static int is_comma_next (void) {
				569	char *p;
				570
				571	p = bufptr;
				572	while (isspace(*p)) p++;
				573	return (p == ',' \|\| p == ';' \|\| !*p);
				574	}
				575
				576	/* isidstart matches any character that may start an identifier, and isidchar
				577	* matches any character that may appear at places other than the start of an
				578	* identifier. E.g. a period may only appear at the start of an identifier
				579	* (for local labels), whereas a number may appear anywhere but at the
				580	* start. */
				581
				582	#define isidstart(c) ( isalpha(c) \|\| (c)=='_' \|\| (c)=='.' \|\| (c)=='?' )
				583	#define isidchar(c) ( isidstart(c) \|\| isdigit(c) \|\| (c)=='$' \|\| (c)=='#' \
				584	\|\| (c)=='@' \|\| (c)=='~' )
				585
				586	/* Ditto for numeric constants. */
				587
				588	#define isnumstart(c) ( isdigit(c) \|\| (c)=='$' )
				589	#define isnumchar(c) ( isalnum(c) )
				590
				591	/* This returns the numeric value of a given 'digit'. */
				592
				593	#define numvalue(c) ((c)>='a' ? (c)-'a'+10 : (c)>='A' ? (c)-'A'+10 : (c)-'0')
				594
				595	/*
				596	* This tokeniser routine has only one side effect, that of
				597	* updating `bufptr'. Hence by saving `bufptr', lookahead may be
				598	* performed.
				599	*/
				600
				601	static int nexttoken (void) {
				602	char ourcopy[256], r, s;
				603
				604	while (isspace(*bufptr)) bufptr++;
				605	if (!*bufptr) return 0;
				606
				607	/* we have a token; either an id, a number or a char */
				608	if (isidstart(*bufptr) \|\|
				609	(*bufptr == '$' && isidstart(bufptr[1]))) {
				610	/* now we've got an identifier */
				611	int i;
				612	int is_sym = FALSE;
				613
				614	if (*bufptr == '$') {
				615	is_sym = TRUE;
				616	bufptr++;
				617	}
				618
				619	tokval.t_charptr = q;
				620	q++ = bufptr++;
				621	while (isidchar(bufptr)) q++ = *bufptr++;
				622	*q++ = '\0';
				623	for (s=tokval.t_charptr, r=ourcopy; *s; s++)
				624	r++ = tolower (s);
				625	*r = '\0';
				626	if (is_sym)
				627	return TOKEN_ID; /* bypass all other checks */
				628	/* right, so we have an identifier sitting in temp storage. now,
				629	* is it actually a register or instruction name, or what? */
				630	if ((tokval.t_integer=bsi(ourcopy, reg_names,
				631	elements(reg_names)))>=0)
				632	return TOKEN_REG;
				633	if ((tokval.t_integer=bsi(ourcopy, insn_names,
				634	elements(insn_names)))>=0)
				635	return TOKEN_INSN;
				636	for (i=0; i<elements(icn); i++)
				637	if (!strncmp(ourcopy, icn[i], strlen(icn[i]))) {
				638	char *p = ourcopy + strlen(icn[i]);
				639	tokval.t_integer = ico[i];
				640	if ((tokval.t_inttwo=bsi(p, conditions,
				641	elements(conditions)))>=0)
				642	return TOKEN_INSN;
				643	}
				644	if ((tokval.t_integer=bsi(ourcopy, prefix_names,
				645	elements(prefix_names)))>=0) {
				646	tokval.t_integer += PREFIX_ENUM_START;
				647	return TOKEN_PREFIX;
				648	}
				649	if ((tokval.t_integer=bsi(ourcopy, special_names,
				650	elements(special_names)))>=0)
				651	return TOKEN_SPECIAL;
				652	if (!strcmp(ourcopy, "seg"))
				653	return TOKEN_SEG;
				654	if (!strcmp(ourcopy, "wrt"))
				655	return TOKEN_WRT;
				656	return TOKEN_ID;
				657	} else if (*bufptr == '$' && !isnumchar(bufptr[1])) {
				658	/*
				659	* It's a $ sign with no following hex number; this must
				660	* mean it's a Here token ($), evaluating to the current
				661	* assembly location, or a Base token ($$), evaluating to
				662	* the base of the current segment.
				663	*/
				664	bufptr++;
				665	if (*bufptr == '$') {
				666	bufptr++;
				667	return TOKEN_BASE;
				668	}
				669	return TOKEN_HERE;
				670	} else if (isnumstart(bufptr)) { / now we've got a number */
				671	char *r = q;
				672	int rn_error;
				673
				674	q++ = bufptr++;
				675	while (isnumchar(*bufptr)) {
				676	q++ = bufptr++;
				677	}
				678	if (*bufptr == '.') {
				679	/*
				680	* a floating point constant
				681	*/
				682	q++ = bufptr++;
				683	while (isnumchar(*bufptr)) {
				684	q++ = bufptr++;
				685	}
				686	*q++ = '\0';
				687	tokval.t_charptr = r;
				688	return TOKEN_FLOAT;
				689	}
				690	*q++ = '\0';
				691	tokval.t_integer = readnum(r, &rn_error);
				692	if (rn_error)
				693	return TOKEN_ERRNUM; /* some malformation occurred */
				694	tokval.t_charptr = NULL;
				695	return TOKEN_NUM;
				696	} else if (bufptr == '\'' \|\| bufptr == '"') {/* a char constant */
				697	char quote = bufptr++, r;
				698	r = tokval.t_charptr = bufptr;
				699	while (bufptr && bufptr != quote) bufptr++;
				700	tokval.t_inttwo = bufptr - r; /* store full version */
				701	if (!*bufptr)
				702	return TOKEN_ERRNUM; /* unmatched quotes */
				703	tokval.t_integer = 0;
				704	r = bufptr++; /* skip over final quote */
				705	while (quote != *--r) {
				706	tokval.t_integer = (tokval.t_integer<<8) + (unsigned char) *r;
				707	}
				708	return TOKEN_NUM;
				709	} else if (bufptr == ';') { / a comment has happened - stay */
				710	return 0;
				711	} else if ((bufptr == '>' \|\| bufptr == '<' \|\|
				712	bufptr == '/' \|\| bufptr == '%') && bufptr[1] == *bufptr) {
				713	bufptr += 2;
				714	return (bufptr[-2] == '>' ? TOKEN_SHR :
				715	bufptr[-2] == '<' ? TOKEN_SHL :
				716	bufptr[-2] == '/' ? TOKEN_SDIV :
				717	TOKEN_SMOD);
				718	} else /* just an ordinary char */
				719	return (unsigned char) (*bufptr++);
				720	}
				721
				722	/* return index of "string" in "array", or -1 if no match. */
				723	static int bsi (char string, char *array, int size) {
				724	int i = -1, j = size; /* always, i < index < j */
				725	while (j-i >= 2) {
				726	int k = (i+j)/2;
				727	int l = strcmp(string, array[k]);
				728	if (l<0) /* it's in the first half */
				729	j = k;
				730	else if (l>0) /* it's in the second half */
				731	i = k;
				732	else /* we've got it :) */
				733	return k;
				734	}
				735	return -1; /* we haven't got it :( */
				736	}
				737
				738	void cleanup_insn (insn *i) {
				739	extop *e;
				740
				741	while (i->eops) {
				742	e = i->eops;
				743	i->eops = i->eops->next;
				744	nasm_free (e);
				745	}
				746	}
				747
				748	/* ------------- Evaluator begins here ------------------ */
				749
				750	static expr exprtempstorage[1024], tempptr; / store exprs in here */
				751
				752	/*
				753	* Add two vector datatypes. We have some bizarre behaviour on far-
				754	* absolute segment types: we preserve them during addition _only_
				755	* if one of the segments is a truly pure scalar.
				756	*/
				757	static expr add_vectors(expr p, expr *q) {
				758	expr *r = tempptr;
				759	int preserve;
				760
				761	preserve = is_really_simple(p) \|\| is_really_simple(q);
				762
				763	while (p->type && q->type &&
				764	p->type < EXPR_SEGBASE+SEG_ABS &&
				765	q->type < EXPR_SEGBASE+SEG_ABS)
				766	if (p->type > q->type) {
				767	tempptr->type = q->type;
				768	tempptr->value = q->value;
				769	tempptr++, q++;
				770	} else if (p->type < q->type) {
				771	tempptr->type = p->type;
				772	tempptr->value = p->value;
				773	tempptr++, p++;
				774	} else { /* p and q have same type */
				775	tempptr->type = p->type;
				776	tempptr->value = p->value + q->value;
				777	tempptr++, p++, q++;
				778	}
				779	while (p->type &&
				780	(preserve \|\| p->type < EXPR_SEGBASE+SEG_ABS)) {
				781	tempptr->type = p->type;
				782	tempptr->value = p->value;
				783	tempptr++, p++;
				784	}
				785	while (q->type &&
				786	(preserve \|\| q->type < EXPR_SEGBASE+SEG_ABS)) {
				787	tempptr->type = q->type;
				788	tempptr->value = q->value;
				789	tempptr++, q++;
				790	}
				791	(tempptr++)->type = 0;
				792
				793	return r;
				794	}
				795
				796	/*
				797	* Multiply a vector by a scalar. Strip far-absolute segment part
				798	* if present.
				799	*/
				800	static expr scalar_mult(expr vect, long scalar) {
				801	expr *p = vect;
				802
				803	while (p->type && p->type < EXPR_SEGBASE+SEG_ABS) {
				804	p->value = scalar * (p->value);
				805	p++;
				806	}
				807	p->type = 0;
				808
				809	return vect;
				810	}
				811
				812	static expr *scalarvect (long scalar) {
				813	expr *p = tempptr;
				814	tempptr->type = EXPR_SIMPLE;
				815	tempptr->value = scalar;
				816	tempptr++;
				817	tempptr->type = 0;
				818	tempptr++;
				819	return p;
				820	}
				821
				822	/*
				823	* Return TRUE if the argument is a simple scalar. (Or a far-
				824	* absolute, which counts.)
				825	*/
				826	static int is_simple (expr *vect) {
				827	while (vect->type && !vect->value)
				828	vect++;
				829	if (!vect->type)
				830	return 1;
				831	if (vect->type != EXPR_SIMPLE)
				832	return 0;
				833	do {
				834	vect++;
				835	} while (vect->type && !vect->value);
				836	if (vect->type && vect->type < EXPR_SEGBASE+SEG_ABS) return 0;
				837	return 1;
				838	}
				839
				840	/*
				841	* Return TRUE if the argument is a simple scalar, _NOT_ a far-
				842	* absolute.
				843	*/
				844	static int is_really_simple (expr *vect) {
				845	while (vect->type && !vect->value)
				846	vect++;
				847	if (!vect->type)
				848	return 1;
				849	if (vect->type != EXPR_SIMPLE)
				850	return 0;
				851	do {
				852	vect++;
				853	} while (vect->type && !vect->value);
				854	if (vect->type) return 0;
				855	return 1;
				856	}
				857
				858	/*
				859	* Return TRUE if the argument is relocatable (i.e. a simple
				860	* scalar, plus at most one segment-base, plus possibly a WRT).
				861	*/
				862	static int is_reloc (expr *vect) {
				863	while (vect->type && !vect->value)
				864	vect++;
				865	if (!vect->type)
				866	return 1;
				867	if (vect->type < EXPR_SIMPLE)
				868	return 0;
				869	if (vect->type == EXPR_SIMPLE) {
				870	do {
				871	vect++;
				872	} while (vect->type && !vect->value);
				873	if (!vect->type)
				874	return 1;
				875	}
				876	do {
				877	vect++;
				878	} while (vect->type && (vect->type == EXPR_WRT \|\| !vect->value));
				879	if (!vect->type)
				880	return 1;
				881	return 1;
				882	}
				883
				884	/*
				885	* Return the scalar part of a relocatable vector. (Including
				886	* simple scalar vectors - those qualify as relocatable.)
				887	*/
				888	static long reloc_value (expr *vect) {
				889	while (vect->type && !vect->value)
				890	vect++;
				891	if (!vect->type) return 0;
				892	if (vect->type == EXPR_SIMPLE)
				893	return vect->value;
				894	else
				895	return 0;
				896	}
				897
				898	/*
				899	* Return the segment number of a relocatable vector, or NO_SEG for
				900	* simple scalars.
				901	*/
				902	static long reloc_seg (expr *vect) {
				903	while (vect->type && (vect->type == EXPR_WRT \|\| !vect->value))
				904	vect++;
				905	if (vect->type == EXPR_SIMPLE) {
				906	do {
				907	vect++;
				908	} while (vect->type && (vect->type == EXPR_WRT \|\| !vect->value));
				909	}
				910	if (!vect->type)
				911	return NO_SEG;
				912	else
				913	return vect->type - EXPR_SEGBASE;
				914	}
				915
				916	/*
				917	* Return the WRT segment number of a relocatable vector, or NO_SEG
				918	* if no WRT part is present.
				919	*/
				920	static long reloc_wrt (expr *vect) {
				921	while (vect->type && vect->type < EXPR_WRT)
				922	vect++;
				923	if (vect->type == EXPR_WRT) {
				924	return vect->value;
				925	} else
				926	return NO_SEG;
				927	}
				928
				929	static void eval_reset(void) {
				930	tempptr = exprtempstorage; /* initialise temporary storage */
				931	}
				932
				933	/*
				934	* The SEG operator: calculate the segment part of a relocatable
				935	* value. Return NULL, as usual, if an error occurs. Report the
				936	* error too.
				937	*/
				938	static expr segment_part (expr e) {
				939	long seg;
				940
				941	if (!is_reloc(e)) {
				942	error(ERR_NONFATAL, "cannot apply SEG to a non-relocatable value");
				943	return NULL;
				944	}
				945
				946	seg = reloc_seg(e);
				947	if (seg == NO_SEG) {
				948	error(ERR_NONFATAL, "cannot apply SEG to a non-relocatable value");
				949	return NULL;
				950	} else if (seg & SEG_ABS)
				951	return scalarvect(seg & ~SEG_ABS);
				952	else {
				953	expr *f = tempptr++;
				954	tempptr++->type = 0;
				955	f->type = EXPR_SEGBASE+outfmt->segbase(seg+1);
				956	f->value = 1;
				957	return f;
				958	}
				959	}
				960
				961	/*
				962	* Recursive-descent parser. Called with a single boolean operand,
				963	* which is TRUE if the evaluation is critical (i.e. unresolved
				964	* symbols are an error condition). Must update the global `i' to
				965	* reflect the token after the parsed string. May return NULL.
				966	*
				967	* evaluate() should report its own errors: on return it is assumed
				968	* that if NULL has been returned, the error has already been
				969	* reported.
				970	*/
				971
				972	/*
				973	* Grammar parsed is:
				974	*
				975	* expr : expr0 [ WRT expr6 ]
				976	* expr0 : expr1 [ {\|} expr1]
				977	* expr1 : expr2 [ {^} expr2]
				978	* expr2 : expr3 [ {&} expr3]
				979	* expr3 : expr4 [ {<<,>>} expr4...]
				980	* expr4 : expr5 [ {+,-} expr5...]
				981	* expr5 : expr6 [ {*,/,%,//,%%} expr6...]
				982	* expr6 : { ~,+,-,SEG } expr6
				983	* \| (expr0)
				984	* \| symbol
				985	* \| $
				986	* \| number
				987	*/
				988
				989	static expr expr0(int), expr1(int), expr2(int), expr3(int);
				990	static expr expr4(int), expr5(int), *expr6(int);
				991
				992	static expr *expr0(int critical) {
				993	expr e, f;
				994
				995	e = expr1(critical);
				996	if (!e)
				997	return NULL;
				998	while (i == '\|') {
				999	i = nexttoken();
				1000	f = expr1(critical);
				1001	if (!f)
				1002	return NULL;
				1003	if (!is_simple(e) \|\| !is_simple(f)) {
				1004	error(ERR_NONFATAL, "`\|' operator may only be applied to"
				1005	" scalar values");
				1006	}
				1007	e = scalarvect (reloc_value(e) \| reloc_value(f));
				1008	}
				1009	return e;
				1010	}
				1011
				1012	static expr *expr1(int critical) {
				1013	expr e, f;
				1014
				1015	e = expr2(critical);
				1016	if (!e)
				1017	return NULL;
				1018	while (i == '^') {
				1019	i = nexttoken();
				1020	f = expr2(critical);
				1021	if (!f)
				1022	return NULL;
				1023	if (!is_simple(e) \|\| !is_simple(f)) {
				1024	error(ERR_NONFATAL, "`^' operator may only be applied to"
				1025	" scalar values");
				1026	}
				1027	e = scalarvect (reloc_value(e) ^ reloc_value(f));
				1028	}
				1029	return e;
				1030	}
				1031
				1032	static expr *expr2(int critical) {
				1033	expr e, f;
				1034
				1035	e = expr3(critical);
				1036	if (!e)
				1037	return NULL;
				1038	while (i == '&') {
				1039	i = nexttoken();
				1040	f = expr3(critical);
				1041	if (!f)
				1042	return NULL;
				1043	if (!is_simple(e) \|\| !is_simple(f)) {
				1044	error(ERR_NONFATAL, "`&' operator may only be applied to"
				1045	" scalar values");
				1046	}
				1047	e = scalarvect (reloc_value(e) & reloc_value(f));
				1048	}
				1049	return e;
				1050	}
				1051
				1052	static expr *expr3(int critical) {
				1053	expr e, f;
				1054
				1055	e = expr4(critical);
				1056	if (!e)
				1057	return NULL;
				1058	while (i == TOKEN_SHL \|\| i == TOKEN_SHR) {
				1059	int j = i;
				1060	i = nexttoken();
				1061	f = expr4(critical);
				1062	if (!f)
				1063	return NULL;
				1064	if (!is_simple(e) \|\| !is_simple(f)) {
				1065	error(ERR_NONFATAL, "shift operator may only be applied to"
				1066	" scalar values");
				1067	}
				1068	switch (j) {
				1069	case TOKEN_SHL:
				1070	e = scalarvect (reloc_value(e) << reloc_value(f));
				1071	break;
				1072	case TOKEN_SHR:
				1073	e = scalarvect (((unsigned long)reloc_value(e)) >>
				1074	reloc_value(f));
				1075	break;
				1076	}
				1077	}
				1078	return e;
				1079	}
				1080
				1081	static expr *expr4(int critical) {
				1082	expr e, f;
				1083
				1084	e = expr5(critical);
				1085	if (!e)
				1086	return NULL;
				1087	while (i == '+' \|\| i == '-') {
				1088	int j = i;
				1089	i = nexttoken();
				1090	f = expr5(critical);
				1091	if (!f)
				1092	return NULL;
				1093	switch (j) {
				1094	case '+':
				1095	e = add_vectors (e, f);
				1096	break;
				1097	case '-':
				1098	e = add_vectors (e, scalar_mult(f, -1L));
				1099	break;
				1100	}
				1101	}
				1102	return e;
				1103	}
				1104
				1105	static expr *expr5(int critical) {
				1106	expr e, f;
				1107
				1108	e = expr6(critical);
				1109	if (!e)
				1110	return NULL;
				1111	while (i == '' \|\| i == '/' \|\| i == '' \|\|
				1112	i == TOKEN_SDIV \|\| i == TOKEN_SMOD) {
				1113	int j = i;
				1114	i = nexttoken();
				1115	f = expr6(critical);
				1116	if (!f)
				1117	return NULL;
				1118	if (j != '*' && (!is_simple(e) \|\| !is_simple(f))) {
				1119	error(ERR_NONFATAL, "division operator may only be applied to"
				1120	" scalar values");
				1121	return NULL;
				1122	}
				1123	if (j != '*' && reloc_value(f) == 0) {
				1124	error(ERR_NONFATAL, "division by zero");
				1125	return NULL;
				1126	}
				1127	switch (j) {
				1128	case '*':
				1129	if (is_simple(e))
				1130	e = scalar_mult (f, reloc_value(e));
				1131	else if (is_simple(f))
				1132	e = scalar_mult (e, reloc_value(f));
				1133	else {
				1134	error(ERR_NONFATAL, "unable to multiply two "
				1135	"non-scalar objects");
				1136	return NULL;
				1137	}
				1138	break;
				1139	case '/':
				1140	e = scalarvect (((unsigned long)reloc_value(e)) /
				1141	((unsigned long)reloc_value(f)));
				1142	break;
				1143	case '%':
				1144	e = scalarvect (((unsigned long)reloc_value(e)) %
				1145	((unsigned long)reloc_value(f)));
				1146	break;
				1147	case TOKEN_SDIV:
				1148	e = scalarvect (((signed long)reloc_value(e)) /
				1149	((signed long)reloc_value(f)));
				1150	break;
				1151	case TOKEN_SMOD:
				1152	e = scalarvect (((signed long)reloc_value(e)) %
				1153	((signed long)reloc_value(f)));
				1154	break;
				1155	}
				1156	}
				1157	return e;
				1158	}
				1159
				1160	static expr *expr6(int critical) {
				1161	expr *e;
				1162	long label_seg, label_ofs;
				1163
				1164	if (i == '-') {
				1165	i = nexttoken();
				1166	e = expr6(critical);
				1167	if (!e)
				1168	return NULL;
				1169	return scalar_mult (e, -1L);
				1170	} else if (i == '+') {
				1171	i = nexttoken();
				1172	return expr6(critical);
				1173	} else if (i == '~') {
				1174	i = nexttoken();
				1175	e = expr6(critical);
				1176	if (!e)
				1177	return NULL;
				1178	if (!is_simple(e)) {
				1179	error(ERR_NONFATAL, "`~' operator may only be applied to"
				1180	" scalar values");
				1181	return NULL;
				1182	}
				1183	return scalarvect(~reloc_value(e));
				1184	} else if (i == TOKEN_SEG) {
				1185	i = nexttoken();
				1186	e = expr6(critical);
				1187	if (!e)
				1188	return NULL;
				1189	return segment_part(e);
				1190	} else if (i == '(') {
				1191	i = nexttoken();
				1192	e = expr0(critical);
				1193	if (!e)
				1194	return NULL;
				1195	if (i != ')') {
				1196	error(ERR_NONFATAL, "expecting `)'");
				1197	return NULL;
				1198	}
				1199	i = nexttoken();
				1200	return e;
				1201	} else if (i == TOKEN_NUM \|\| i == TOKEN_REG \|\| i == TOKEN_ID \|\|
				1202	i == TOKEN_HERE \|\| i == TOKEN_BASE) {
				1203	e = tempptr;
				1204	switch (i) {
				1205	case TOKEN_NUM:
				1206	e->type = EXPR_SIMPLE;
				1207	e->value = tokval.t_integer;
				1208	break;
				1209	case TOKEN_REG:
				1210	e->type = tokval.t_integer;
				1211	e->value = 1;
				1212	break;
				1213	case TOKEN_ID:
				1214	case TOKEN_HERE:
				1215	case TOKEN_BASE:
				1216	/*
				1217	* Since the whole line is parsed before the label it
				1218	* defines is given to the label manager, we have
				1219	* problems with lines such as
				1220	*
				1221	* end: TIMES 512-(end-start) DB 0
				1222	*
				1223	* where `end' is not known on pass one, despite not
				1224	* really being a forward reference, and due to
				1225	* criticality it is _needed_. Hence we check our label
				1226	* against the currently defined one, and do our own
				1227	* resolution of it if we have to.
				1228	*/
				1229	if (i == TOKEN_BASE) {
				1230	label_seg = seg;
				1231	label_ofs = 0;
				1232	} else if (i == TOKEN_HERE \|\| !strcmp(tokval.t_charptr, label)) {
				1233	label_seg = seg;
				1234	label_ofs = ofs;
				1235	} else if (!labelfunc(tokval.t_charptr, &label_seg, &label_ofs)) {
				1236	if (critical == 2) {
				1237	error (ERR_NONFATAL, "symbol `%s' undefined",
				1238	tokval.t_charptr);
				1239	return NULL;
				1240	} else if (critical == 1) {
				1241	error (ERR_NONFATAL, "symbol `%s' not defined before use",
				1242	tokval.t_charptr);
				1243	return NULL;
				1244	} else {
				1245	label_seg = seg;
				1246	label_ofs = ofs;
				1247	}
				1248	}
				1249	e->type = EXPR_SIMPLE;
				1250	e->value = label_ofs;
				1251	if (label_seg!=NO_SEG) {
				1252	tempptr++;
				1253	tempptr->type = EXPR_SEGBASE + label_seg;
				1254	tempptr->value = 1;
				1255	}
				1256	break;
				1257	}
				1258	tempptr++;
				1259	tempptr->type = 0;
				1260	tempptr++;
				1261	i = nexttoken();
				1262	return e;
				1263	} else {
				1264	error(ERR_NONFATAL, "expression syntax error");
				1265	return NULL;
				1266	}
				1267	}
				1268
				1269	static expr *evaluate (int critical) {
				1270	expr *e;
				1271	expr *f = NULL;
				1272
				1273	e = expr0 (critical);
				1274	if (!e)
				1275	return NULL;
				1276
				1277	if (i == TOKEN_WRT) {
				1278	if (!is_reloc(e)) {
				1279	error(ERR_NONFATAL, "invalid left-hand operand to WRT");
				1280	return NULL;
				1281	}
				1282	i = nexttoken(); /* eat the WRT */
				1283	f = expr6 (critical);
				1284	if (!f)
				1285	return NULL;
				1286	}
				1287	e = scalar_mult (e, 1L); /* strip far-absolute segment part */
				1288	if (f) {
				1289	expr *g = tempptr++;
				1290	tempptr++->type = 0;
				1291	g->type = EXPR_WRT;
				1292	if (!is_reloc(f)) {
				1293	error(ERR_NONFATAL, "invalid right-hand operand to WRT");
				1294	return NULL;
				1295	}
				1296	g->value = reloc_seg(f);
				1297	if (g->value == NO_SEG)
				1298	g->value = reloc_value(f) \| SEG_ABS;
				1299	else if (!(g->value & SEG_ABS) && !(g->value % 2) && critical) {
				1300	error(ERR_NONFATAL, "invalid right-hand operand to WRT");
				1301	return NULL;
				1302	}
				1303	e = add_vectors (e, g);
				1304	}
				1305	return e;
				1306	}