blob: 2a784467caac6de8bf7c9646f1fa0993d0e529be [file] [log] [blame]
drh75897232000-05-29 14:26:00 +00001/*
drh75897232000-05-29 14:26:00 +00002** This file contains all sources (including headers) to the LEMON
3** LALR(1) parser generator. The sources have been combined into a
drh960e8c62001-04-03 16:53:21 +00004** single file to make it easy to include LEMON in the source tree
5** and Makefile of another program.
drh75897232000-05-29 14:26:00 +00006**
drhb19a2bc2001-09-16 00:13:26 +00007** The author of this program disclaims copyright.
drh75897232000-05-29 14:26:00 +00008*/
9#include <stdio.h>
drhf9a2e7b2003-04-15 01:49:48 +000010#include <stdarg.h>
drh75897232000-05-29 14:26:00 +000011#include <string.h>
12#include <ctype.h>
drh8b582012003-10-21 13:16:03 +000013#include <stdlib.h>
drh75897232000-05-29 14:26:00 +000014
drh75897232000-05-29 14:26:00 +000015#ifndef __WIN32__
16# if defined(_WIN32) || defined(WIN32)
17# define __WIN32__
18# endif
19#endif
20
21/* #define PRIVATE static */
22#define PRIVATE
23
24#ifdef TEST
25#define MAXRHS 5 /* Set low to exercise exception code */
26#else
27#define MAXRHS 1000
28#endif
29
30char *msort();
31extern void *malloc();
32
33/******** From the file "action.h" *************************************/
34struct action *Action_new();
35struct action *Action_sort();
drh75897232000-05-29 14:26:00 +000036
37/********* From the file "assert.h" ************************************/
38void myassert();
39#ifndef NDEBUG
40# define assert(X) if(!(X))myassert(__FILE__,__LINE__)
41#else
42# define assert(X)
43#endif
44
45/********** From the file "build.h" ************************************/
46void FindRulePrecedences();
47void FindFirstSets();
48void FindStates();
49void FindLinks();
50void FindFollowSets();
51void FindActions();
52
53/********* From the file "configlist.h" *********************************/
54void Configlist_init(/* void */);
55struct config *Configlist_add(/* struct rule *, int */);
56struct config *Configlist_addbasis(/* struct rule *, int */);
57void Configlist_closure(/* void */);
58void Configlist_sort(/* void */);
59void Configlist_sortbasis(/* void */);
60struct config *Configlist_return(/* void */);
61struct config *Configlist_basis(/* void */);
62void Configlist_eat(/* struct config * */);
63void Configlist_reset(/* void */);
64
65/********* From the file "error.h" ***************************************/
drhf9a2e7b2003-04-15 01:49:48 +000066void ErrorMsg(const char *, int,const char *, ...);
drh75897232000-05-29 14:26:00 +000067
68/****** From the file "option.h" ******************************************/
69struct s_options {
70 enum { OPT_FLAG=1, OPT_INT, OPT_DBL, OPT_STR,
71 OPT_FFLAG, OPT_FINT, OPT_FDBL, OPT_FSTR} type;
72 char *label;
73 char *arg;
74 char *message;
75};
drhb0c86772000-06-02 23:21:26 +000076int OptInit(/* char**,struct s_options*,FILE* */);
77int OptNArgs(/* void */);
78char *OptArg(/* int */);
79void OptErr(/* int */);
80void OptPrint(/* void */);
drh75897232000-05-29 14:26:00 +000081
82/******** From the file "parse.h" *****************************************/
83void Parse(/* struct lemon *lemp */);
84
85/********* From the file "plink.h" ***************************************/
86struct plink *Plink_new(/* void */);
87void Plink_add(/* struct plink **, struct config * */);
88void Plink_copy(/* struct plink **, struct plink * */);
89void Plink_delete(/* struct plink * */);
90
91/********** From the file "report.h" *************************************/
92void Reprint(/* struct lemon * */);
93void ReportOutput(/* struct lemon * */);
94void ReportTable(/* struct lemon * */);
95void ReportHeader(/* struct lemon * */);
96void CompressTables(/* struct lemon * */);
97
98/********** From the file "set.h" ****************************************/
99void SetSize(/* int N */); /* All sets will be of size N */
100char *SetNew(/* void */); /* A new set for element 0..N */
101void SetFree(/* char* */); /* Deallocate a set */
102
103int SetAdd(/* char*,int */); /* Add element to a set */
104int SetUnion(/* char *A,char *B */); /* A <- A U B, thru element N */
105
106#define SetFind(X,Y) (X[Y]) /* True if Y is in set X */
107
108/********** From the file "struct.h" *************************************/
109/*
110** Principal data structures for the LEMON parser generator.
111*/
112
drhb27b83a2002-08-14 23:18:57 +0000113typedef enum {B_FALSE=0, B_TRUE} Boolean;
drh75897232000-05-29 14:26:00 +0000114
115/* Symbols (terminals and nonterminals) of the grammar are stored
116** in the following: */
117struct symbol {
118 char *name; /* Name of the symbol */
119 int index; /* Index number for this symbol */
120 enum {
121 TERMINAL,
122 NONTERMINAL
123 } type; /* Symbols are all either TERMINALS or NTs */
124 struct rule *rule; /* Linked list of rules of this (if an NT) */
drh0bd1f4e2002-06-06 18:54:39 +0000125 struct symbol *fallback; /* fallback token in case this token doesn't parse */
drh75897232000-05-29 14:26:00 +0000126 int prec; /* Precedence if defined (-1 otherwise) */
127 enum e_assoc {
128 LEFT,
129 RIGHT,
130 NONE,
131 UNK
132 } assoc; /* Associativity if predecence is defined */
133 char *firstset; /* First-set for all rules of this symbol */
134 Boolean lambda; /* True if NT and can generate an empty string */
135 char *destructor; /* Code which executes whenever this symbol is
136 ** popped from the stack during error processing */
137 int destructorln; /* Line number of destructor code */
138 char *datatype; /* The data type of information held by this
139 ** object. Only used if type==NONTERMINAL */
140 int dtnum; /* The data type number. In the parser, the value
141 ** stack is a union. The .yy%d element of this
142 ** union is the correct data type for this object */
143};
144
145/* Each production rule in the grammar is stored in the following
146** structure. */
147struct rule {
148 struct symbol *lhs; /* Left-hand side of the rule */
149 char *lhsalias; /* Alias for the LHS (NULL if none) */
150 int ruleline; /* Line number for the rule */
151 int nrhs; /* Number of RHS symbols */
152 struct symbol **rhs; /* The RHS symbols */
153 char **rhsalias; /* An alias for each RHS symbol (NULL if none) */
154 int line; /* Line number at which code begins */
155 char *code; /* The code executed when this rule is reduced */
156 struct symbol *precsym; /* Precedence symbol for this rule */
157 int index; /* An index number for this rule */
158 Boolean canReduce; /* True if this rule is ever reduced */
159 struct rule *nextlhs; /* Next rule with the same LHS */
160 struct rule *next; /* Next rule in the global list */
161};
162
163/* A configuration is a production rule of the grammar together with
164** a mark (dot) showing how much of that rule has been processed so far.
165** Configurations also contain a follow-set which is a list of terminal
166** symbols which are allowed to immediately follow the end of the rule.
167** Every configuration is recorded as an instance of the following: */
168struct config {
169 struct rule *rp; /* The rule upon which the configuration is based */
170 int dot; /* The parse point */
171 char *fws; /* Follow-set for this configuration only */
172 struct plink *fplp; /* Follow-set forward propagation links */
173 struct plink *bplp; /* Follow-set backwards propagation links */
174 struct state *stp; /* Pointer to state which contains this */
175 enum {
176 COMPLETE, /* The status is used during followset and */
177 INCOMPLETE /* shift computations */
178 } status;
179 struct config *next; /* Next configuration in the state */
180 struct config *bp; /* The next basis configuration */
181};
182
183/* Every shift or reduce operation is stored as one of the following */
184struct action {
185 struct symbol *sp; /* The look-ahead symbol */
186 enum e_action {
187 SHIFT,
188 ACCEPT,
189 REDUCE,
190 ERROR,
191 CONFLICT, /* Was a reduce, but part of a conflict */
192 SH_RESOLVED, /* Was a shift. Precedence resolved conflict */
193 RD_RESOLVED, /* Was reduce. Precedence resolved conflict */
194 NOT_USED /* Deleted by compression */
195 } type;
196 union {
197 struct state *stp; /* The new state, if a shift */
198 struct rule *rp; /* The rule, if a reduce */
199 } x;
200 struct action *next; /* Next action for this state */
201 struct action *collide; /* Next action with the same hash */
202};
203
204/* Each state of the generated parser's finite state machine
205** is encoded as an instance of the following structure. */
206struct state {
207 struct config *bp; /* The basis configurations for this state */
208 struct config *cfp; /* All configurations in this set */
209 int index; /* Sequencial number for this state */
210 struct action *ap; /* Array of actions for this state */
drh8b582012003-10-21 13:16:03 +0000211 int nTknAct, nNtAct; /* Number of actions on terminals and nonterminals */
212 int iTknOfst, iNtOfst; /* yy_action[] offset for terminals and nonterms */
213 int iDflt; /* Default action */
drh75897232000-05-29 14:26:00 +0000214};
drh8b582012003-10-21 13:16:03 +0000215#define NO_OFFSET (-2147483647)
drh75897232000-05-29 14:26:00 +0000216
217/* A followset propagation link indicates that the contents of one
218** configuration followset should be propagated to another whenever
219** the first changes. */
220struct plink {
221 struct config *cfp; /* The configuration to which linked */
222 struct plink *next; /* The next propagate link */
223};
224
225/* The state vector for the entire parser generator is recorded as
226** follows. (LEMON uses no global variables and makes little use of
227** static variables. Fields in the following structure can be thought
228** of as begin global variables in the program.) */
229struct lemon {
230 struct state **sorted; /* Table of states sorted by state number */
231 struct rule *rule; /* List of all rules */
232 int nstate; /* Number of states */
233 int nrule; /* Number of rules */
234 int nsymbol; /* Number of terminal and nonterminal symbols */
235 int nterminal; /* Number of terminal symbols */
236 struct symbol **symbols; /* Sorted array of pointers to symbols */
237 int errorcnt; /* Number of errors */
238 struct symbol *errsym; /* The error symbol */
239 char *name; /* Name of the generated parser */
240 char *arg; /* Declaration of the 3th argument to parser */
241 char *tokentype; /* Type of terminal symbols in the parser stack */
drh960e8c62001-04-03 16:53:21 +0000242 char *vartype; /* The default type of non-terminal symbols */
drh75897232000-05-29 14:26:00 +0000243 char *start; /* Name of the start symbol for the grammar */
244 char *stacksize; /* Size of the parser stack */
245 char *include; /* Code to put at the start of the C file */
246 int includeln; /* Line number for start of include code */
247 char *error; /* Code to execute when an error is seen */
248 int errorln; /* Line number for start of error code */
249 char *overflow; /* Code to execute on a stack overflow */
250 int overflowln; /* Line number for start of overflow code */
251 char *failure; /* Code to execute on parser failure */
252 int failureln; /* Line number for start of failure code */
253 char *accept; /* Code to execute when the parser excepts */
254 int acceptln; /* Line number for the start of accept code */
255 char *extracode; /* Code appended to the generated file */
256 int extracodeln; /* Line number for the start of the extra code */
257 char *tokendest; /* Code to execute to destroy token data */
258 int tokendestln; /* Line number for token destroyer code */
drh960e8c62001-04-03 16:53:21 +0000259 char *vardest; /* Code for the default non-terminal destructor */
260 int vardestln; /* Line number for default non-term destructor code*/
drh75897232000-05-29 14:26:00 +0000261 char *filename; /* Name of the input file */
262 char *outname; /* Name of the current output file */
263 char *tokenprefix; /* A prefix added to token names in the .h file */
264 int nconflict; /* Number of parsing conflicts */
265 int tablesize; /* Size of the parse tables */
266 int basisflag; /* Print only basis configurations */
drh0bd1f4e2002-06-06 18:54:39 +0000267 int has_fallback; /* True if any %fallback is seen in the grammer */
drh75897232000-05-29 14:26:00 +0000268 char *argv0; /* Name of the program */
269};
270
271#define MemoryCheck(X) if((X)==0){ \
272 extern void memory_error(); \
273 memory_error(); \
274}
275
276/**************** From the file "table.h" *********************************/
277/*
278** All code in this file has been automatically generated
279** from a specification in the file
280** "table.q"
281** by the associative array code building program "aagen".
282** Do not edit this file! Instead, edit the specification
283** file, then rerun aagen.
284*/
285/*
286** Code for processing tables in the LEMON parser generator.
287*/
288
289/* Routines for handling a strings */
290
291char *Strsafe();
292
293void Strsafe_init(/* void */);
294int Strsafe_insert(/* char * */);
295char *Strsafe_find(/* char * */);
296
297/* Routines for handling symbols of the grammar */
298
299struct symbol *Symbol_new();
300int Symbolcmpp(/* struct symbol **, struct symbol ** */);
301void Symbol_init(/* void */);
302int Symbol_insert(/* struct symbol *, char * */);
303struct symbol *Symbol_find(/* char * */);
304struct symbol *Symbol_Nth(/* int */);
305int Symbol_count(/* */);
306struct symbol **Symbol_arrayof(/* */);
307
308/* Routines to manage the state table */
309
310int Configcmp(/* struct config *, struct config * */);
311struct state *State_new();
312void State_init(/* void */);
313int State_insert(/* struct state *, struct config * */);
314struct state *State_find(/* struct config * */);
315struct state **State_arrayof(/* */);
316
317/* Routines used for efficiency in Configlist_add */
318
319void Configtable_init(/* void */);
320int Configtable_insert(/* struct config * */);
321struct config *Configtable_find(/* struct config * */);
322void Configtable_clear(/* int(*)(struct config *) */);
323/****************** From the file "action.c" *******************************/
324/*
325** Routines processing parser actions in the LEMON parser generator.
326*/
327
328/* Allocate a new parser action */
329struct action *Action_new(){
330 static struct action *freelist = 0;
331 struct action *new;
332
333 if( freelist==0 ){
334 int i;
335 int amt = 100;
336 freelist = (struct action *)malloc( sizeof(struct action)*amt );
337 if( freelist==0 ){
338 fprintf(stderr,"Unable to allocate memory for a new parser action.");
339 exit(1);
340 }
341 for(i=0; i<amt-1; i++) freelist[i].next = &freelist[i+1];
342 freelist[amt-1].next = 0;
343 }
344 new = freelist;
345 freelist = freelist->next;
346 return new;
347}
348
349/* Compare two actions */
350static int actioncmp(ap1,ap2)
351struct action *ap1;
352struct action *ap2;
353{
354 int rc;
355 rc = ap1->sp->index - ap2->sp->index;
356 if( rc==0 ) rc = (int)ap1->type - (int)ap2->type;
357 if( rc==0 ){
drh61bc2722000-08-20 11:42:46 +0000358 assert( ap1->type==REDUCE || ap1->type==RD_RESOLVED || ap1->type==CONFLICT);
359 assert( ap2->type==REDUCE || ap2->type==RD_RESOLVED || ap2->type==CONFLICT);
drh75897232000-05-29 14:26:00 +0000360 rc = ap1->x.rp->index - ap2->x.rp->index;
361 }
362 return rc;
363}
364
365/* Sort parser actions */
366struct action *Action_sort(ap)
367struct action *ap;
368{
drh218dc692004-05-31 23:13:45 +0000369 ap = (struct action *)msort((char *)ap,(char **)&ap->next,actioncmp);
drh75897232000-05-29 14:26:00 +0000370 return ap;
371}
372
373void Action_add(app,type,sp,arg)
374struct action **app;
375enum e_action type;
376struct symbol *sp;
377char *arg;
378{
379 struct action *new;
380 new = Action_new();
381 new->next = *app;
382 *app = new;
383 new->type = type;
384 new->sp = sp;
385 if( type==SHIFT ){
386 new->x.stp = (struct state *)arg;
387 }else{
388 new->x.rp = (struct rule *)arg;
389 }
390}
drh8b582012003-10-21 13:16:03 +0000391/********************** New code to implement the "acttab" module ***********/
392/*
393** This module implements routines use to construct the yy_action[] table.
394*/
395
396/*
397** The state of the yy_action table under construction is an instance of
398** the following structure
399*/
400typedef struct acttab acttab;
401struct acttab {
402 int nAction; /* Number of used slots in aAction[] */
403 int nActionAlloc; /* Slots allocated for aAction[] */
404 struct {
405 int lookahead; /* Value of the lookahead token */
406 int action; /* Action to take on the given lookahead */
407 } *aAction, /* The yy_action[] table under construction */
408 *aLookahead; /* A single new transaction set */
409 int mnLookahead; /* Minimum aLookahead[].lookahead */
410 int mnAction; /* Action associated with mnLookahead */
411 int mxLookahead; /* Maximum aLookahead[].lookahead */
412 int nLookahead; /* Used slots in aLookahead[] */
413 int nLookaheadAlloc; /* Slots allocated in aLookahead[] */
414};
415
416/* Return the number of entries in the yy_action table */
417#define acttab_size(X) ((X)->nAction)
418
419/* The value for the N-th entry in yy_action */
420#define acttab_yyaction(X,N) ((X)->aAction[N].action)
421
422/* The value for the N-th entry in yy_lookahead */
423#define acttab_yylookahead(X,N) ((X)->aAction[N].lookahead)
424
425/* Free all memory associated with the given acttab */
426void acttab_free(acttab *p){
427 free( p->aAction );
428 free( p->aLookahead );
429 free( p );
430}
431
432/* Allocate a new acttab structure */
433acttab *acttab_alloc(void){
434 acttab *p = malloc( sizeof(*p) );
435 if( p==0 ){
436 fprintf(stderr,"Unable to allocate memory for a new acttab.");
437 exit(1);
438 }
439 memset(p, 0, sizeof(*p));
440 return p;
441}
442
443/* Add a new action to the current transaction set
444*/
445void acttab_action(acttab *p, int lookahead, int action){
446 if( p->nLookahead>=p->nLookaheadAlloc ){
447 p->nLookaheadAlloc += 25;
448 p->aLookahead = realloc( p->aLookahead,
449 sizeof(p->aLookahead[0])*p->nLookaheadAlloc );
450 if( p->aLookahead==0 ){
451 fprintf(stderr,"malloc failed\n");
452 exit(1);
453 }
454 }
455 if( p->nLookahead==0 ){
456 p->mxLookahead = lookahead;
457 p->mnLookahead = lookahead;
458 p->mnAction = action;
459 }else{
460 if( p->mxLookahead<lookahead ) p->mxLookahead = lookahead;
461 if( p->mnLookahead>lookahead ){
462 p->mnLookahead = lookahead;
463 p->mnAction = action;
464 }
465 }
466 p->aLookahead[p->nLookahead].lookahead = lookahead;
467 p->aLookahead[p->nLookahead].action = action;
468 p->nLookahead++;
469}
470
471/*
472** Add the transaction set built up with prior calls to acttab_action()
473** into the current action table. Then reset the transaction set back
474** to an empty set in preparation for a new round of acttab_action() calls.
475**
476** Return the offset into the action table of the new transaction.
477*/
478int acttab_insert(acttab *p){
479 int i, j, k, n;
480 assert( p->nLookahead>0 );
481
482 /* Make sure we have enough space to hold the expanded action table
483 ** in the worst case. The worst case occurs if the transaction set
484 ** must be appended to the current action table
485 */
drh784d86f2004-02-19 18:41:53 +0000486 n = p->mxLookahead + 1;
drh8b582012003-10-21 13:16:03 +0000487 if( p->nAction + n >= p->nActionAlloc ){
drhfdbf9282003-10-21 16:34:41 +0000488 int oldAlloc = p->nActionAlloc;
drh8b582012003-10-21 13:16:03 +0000489 p->nActionAlloc = p->nAction + n + p->nActionAlloc + 20;
490 p->aAction = realloc( p->aAction,
491 sizeof(p->aAction[0])*p->nActionAlloc);
492 if( p->aAction==0 ){
493 fprintf(stderr,"malloc failed\n");
494 exit(1);
495 }
drhfdbf9282003-10-21 16:34:41 +0000496 for(i=oldAlloc; i<p->nActionAlloc; i++){
drh8b582012003-10-21 13:16:03 +0000497 p->aAction[i].lookahead = -1;
498 p->aAction[i].action = -1;
499 }
500 }
501
502 /* Scan the existing action table looking for an offset where we can
503 ** insert the current transaction set. Fall out of the loop when that
504 ** offset is found. In the worst case, we fall out of the loop when
505 ** i reaches p->nAction, which means we append the new transaction set.
506 **
507 ** i is the index in p->aAction[] where p->mnLookahead is inserted.
508 */
drh784d86f2004-02-19 18:41:53 +0000509 for(i=0; i<p->nAction+p->mnLookahead; i++){
drh8b582012003-10-21 13:16:03 +0000510 if( p->aAction[i].lookahead<0 ){
511 for(j=0; j<p->nLookahead; j++){
512 k = p->aLookahead[j].lookahead - p->mnLookahead + i;
513 if( k<0 ) break;
514 if( p->aAction[k].lookahead>=0 ) break;
515 }
drhfdbf9282003-10-21 16:34:41 +0000516 if( j<p->nLookahead ) continue;
517 for(j=0; j<p->nAction; j++){
518 if( p->aAction[j].lookahead==j+p->mnLookahead-i ) break;
519 }
520 if( j==p->nAction ){
521 break; /* Fits in empty slots */
522 }
drh8b582012003-10-21 13:16:03 +0000523 }else if( p->aAction[i].lookahead==p->mnLookahead ){
524 if( p->aAction[i].action!=p->mnAction ) continue;
525 for(j=0; j<p->nLookahead; j++){
526 k = p->aLookahead[j].lookahead - p->mnLookahead + i;
527 if( k<0 || k>=p->nAction ) break;
528 if( p->aLookahead[j].lookahead!=p->aAction[k].lookahead ) break;
529 if( p->aLookahead[j].action!=p->aAction[k].action ) break;
530 }
531 if( j<p->nLookahead ) continue;
532 n = 0;
533 for(j=0; j<p->nAction; j++){
drhfdbf9282003-10-21 16:34:41 +0000534 if( p->aAction[j].lookahead<0 ) continue;
535 if( p->aAction[j].lookahead==j+p->mnLookahead-i ) n++;
drh8b582012003-10-21 13:16:03 +0000536 }
drhfdbf9282003-10-21 16:34:41 +0000537 if( n==p->nLookahead ){
538 break; /* Same as a prior transaction set */
539 }
drh8b582012003-10-21 13:16:03 +0000540 }
541 }
542 /* Insert transaction set at index i. */
543 for(j=0; j<p->nLookahead; j++){
544 k = p->aLookahead[j].lookahead - p->mnLookahead + i;
545 p->aAction[k] = p->aLookahead[j];
546 if( k>=p->nAction ) p->nAction = k+1;
547 }
548 p->nLookahead = 0;
549
550 /* Return the offset that is added to the lookahead in order to get the
551 ** index into yy_action of the action */
552 return i - p->mnLookahead;
553}
554
drh75897232000-05-29 14:26:00 +0000555/********************** From the file "assert.c" ****************************/
556/*
557** A more efficient way of handling assertions.
558*/
559void myassert(file,line)
560char *file;
561int line;
562{
563 fprintf(stderr,"Assertion failed on line %d of file \"%s\"\n",line,file);
564 exit(1);
565}
566/********************** From the file "build.c" *****************************/
567/*
568** Routines to construction the finite state machine for the LEMON
569** parser generator.
570*/
571
572/* Find a precedence symbol of every rule in the grammar.
573**
574** Those rules which have a precedence symbol coded in the input
575** grammar using the "[symbol]" construct will already have the
576** rp->precsym field filled. Other rules take as their precedence
577** symbol the first RHS symbol with a defined precedence. If there
578** are not RHS symbols with a defined precedence, the precedence
579** symbol field is left blank.
580*/
581void FindRulePrecedences(xp)
582struct lemon *xp;
583{
584 struct rule *rp;
585 for(rp=xp->rule; rp; rp=rp->next){
586 if( rp->precsym==0 ){
587 int i;
588 for(i=0; i<rp->nrhs; i++){
589 if( rp->rhs[i]->prec>=0 ){
590 rp->precsym = rp->rhs[i];
591 break;
592 }
593 }
594 }
595 }
596 return;
597}
598
599/* Find all nonterminals which will generate the empty string.
600** Then go back and compute the first sets of every nonterminal.
601** The first set is the set of all terminal symbols which can begin
602** a string generated by that nonterminal.
603*/
604void FindFirstSets(lemp)
605struct lemon *lemp;
606{
607 int i;
608 struct rule *rp;
609 int progress;
610
611 for(i=0; i<lemp->nsymbol; i++){
drhb27b83a2002-08-14 23:18:57 +0000612 lemp->symbols[i]->lambda = B_FALSE;
drh75897232000-05-29 14:26:00 +0000613 }
614 for(i=lemp->nterminal; i<lemp->nsymbol; i++){
615 lemp->symbols[i]->firstset = SetNew();
616 }
617
618 /* First compute all lambdas */
619 do{
620 progress = 0;
621 for(rp=lemp->rule; rp; rp=rp->next){
622 if( rp->lhs->lambda ) continue;
623 for(i=0; i<rp->nrhs; i++){
drhb27b83a2002-08-14 23:18:57 +0000624 if( rp->rhs[i]->lambda==B_FALSE ) break;
drh75897232000-05-29 14:26:00 +0000625 }
626 if( i==rp->nrhs ){
drhb27b83a2002-08-14 23:18:57 +0000627 rp->lhs->lambda = B_TRUE;
drh75897232000-05-29 14:26:00 +0000628 progress = 1;
629 }
630 }
631 }while( progress );
632
633 /* Now compute all first sets */
634 do{
635 struct symbol *s1, *s2;
636 progress = 0;
637 for(rp=lemp->rule; rp; rp=rp->next){
638 s1 = rp->lhs;
639 for(i=0; i<rp->nrhs; i++){
640 s2 = rp->rhs[i];
641 if( s2->type==TERMINAL ){
642 progress += SetAdd(s1->firstset,s2->index);
643 break;
644 }else if( s1==s2 ){
drhb27b83a2002-08-14 23:18:57 +0000645 if( s1->lambda==B_FALSE ) break;
drh75897232000-05-29 14:26:00 +0000646 }else{
647 progress += SetUnion(s1->firstset,s2->firstset);
drhb27b83a2002-08-14 23:18:57 +0000648 if( s2->lambda==B_FALSE ) break;
drh75897232000-05-29 14:26:00 +0000649 }
650 }
651 }
652 }while( progress );
653 return;
654}
655
656/* Compute all LR(0) states for the grammar. Links
657** are added to between some states so that the LR(1) follow sets
658** can be computed later.
659*/
660PRIVATE struct state *getstate(/* struct lemon * */); /* forward reference */
661void FindStates(lemp)
662struct lemon *lemp;
663{
664 struct symbol *sp;
665 struct rule *rp;
666
667 Configlist_init();
668
669 /* Find the start symbol */
670 if( lemp->start ){
671 sp = Symbol_find(lemp->start);
672 if( sp==0 ){
673 ErrorMsg(lemp->filename,0,
674"The specified start symbol \"%s\" is not \
675in a nonterminal of the grammar. \"%s\" will be used as the start \
676symbol instead.",lemp->start,lemp->rule->lhs->name);
677 lemp->errorcnt++;
678 sp = lemp->rule->lhs;
679 }
680 }else{
681 sp = lemp->rule->lhs;
682 }
683
684 /* Make sure the start symbol doesn't occur on the right-hand side of
685 ** any rule. Report an error if it does. (YACC would generate a new
686 ** start symbol in this case.) */
687 for(rp=lemp->rule; rp; rp=rp->next){
688 int i;
689 for(i=0; i<rp->nrhs; i++){
690 if( rp->rhs[i]==sp ){
691 ErrorMsg(lemp->filename,0,
692"The start symbol \"%s\" occurs on the \
693right-hand side of a rule. This will result in a parser which \
694does not work properly.",sp->name);
695 lemp->errorcnt++;
696 }
697 }
698 }
699
700 /* The basis configuration set for the first state
701 ** is all rules which have the start symbol as their
702 ** left-hand side */
703 for(rp=sp->rule; rp; rp=rp->nextlhs){
704 struct config *newcfp;
705 newcfp = Configlist_addbasis(rp,0);
706 SetAdd(newcfp->fws,0);
707 }
708
709 /* Compute the first state. All other states will be
710 ** computed automatically during the computation of the first one.
711 ** The returned pointer to the first state is not used. */
712 (void)getstate(lemp);
713 return;
714}
715
716/* Return a pointer to a state which is described by the configuration
717** list which has been built from calls to Configlist_add.
718*/
719PRIVATE void buildshifts(/* struct lemon *, struct state * */); /* Forwd ref */
720PRIVATE struct state *getstate(lemp)
721struct lemon *lemp;
722{
723 struct config *cfp, *bp;
724 struct state *stp;
725
726 /* Extract the sorted basis of the new state. The basis was constructed
727 ** by prior calls to "Configlist_addbasis()". */
728 Configlist_sortbasis();
729 bp = Configlist_basis();
730
731 /* Get a state with the same basis */
732 stp = State_find(bp);
733 if( stp ){
734 /* A state with the same basis already exists! Copy all the follow-set
735 ** propagation links from the state under construction into the
736 ** preexisting state, then return a pointer to the preexisting state */
737 struct config *x, *y;
738 for(x=bp, y=stp->bp; x && y; x=x->bp, y=y->bp){
739 Plink_copy(&y->bplp,x->bplp);
740 Plink_delete(x->fplp);
741 x->fplp = x->bplp = 0;
742 }
743 cfp = Configlist_return();
744 Configlist_eat(cfp);
745 }else{
746 /* This really is a new state. Construct all the details */
747 Configlist_closure(lemp); /* Compute the configuration closure */
748 Configlist_sort(); /* Sort the configuration closure */
749 cfp = Configlist_return(); /* Get a pointer to the config list */
750 stp = State_new(); /* A new state structure */
751 MemoryCheck(stp);
752 stp->bp = bp; /* Remember the configuration basis */
753 stp->cfp = cfp; /* Remember the configuration closure */
754 stp->index = lemp->nstate++; /* Every state gets a sequence number */
755 stp->ap = 0; /* No actions, yet. */
756 State_insert(stp,stp->bp); /* Add to the state table */
757 buildshifts(lemp,stp); /* Recursively compute successor states */
758 }
759 return stp;
760}
761
762/* Construct all successor states to the given state. A "successor"
763** state is any state which can be reached by a shift action.
764*/
765PRIVATE void buildshifts(lemp,stp)
766struct lemon *lemp;
767struct state *stp; /* The state from which successors are computed */
768{
769 struct config *cfp; /* For looping thru the config closure of "stp" */
770 struct config *bcfp; /* For the inner loop on config closure of "stp" */
771 struct config *new; /* */
772 struct symbol *sp; /* Symbol following the dot in configuration "cfp" */
773 struct symbol *bsp; /* Symbol following the dot in configuration "bcfp" */
774 struct state *newstp; /* A pointer to a successor state */
775
776 /* Each configuration becomes complete after it contibutes to a successor
777 ** state. Initially, all configurations are incomplete */
778 for(cfp=stp->cfp; cfp; cfp=cfp->next) cfp->status = INCOMPLETE;
779
780 /* Loop through all configurations of the state "stp" */
781 for(cfp=stp->cfp; cfp; cfp=cfp->next){
782 if( cfp->status==COMPLETE ) continue; /* Already used by inner loop */
783 if( cfp->dot>=cfp->rp->nrhs ) continue; /* Can't shift this config */
784 Configlist_reset(); /* Reset the new config set */
785 sp = cfp->rp->rhs[cfp->dot]; /* Symbol after the dot */
786
787 /* For every configuration in the state "stp" which has the symbol "sp"
788 ** following its dot, add the same configuration to the basis set under
789 ** construction but with the dot shifted one symbol to the right. */
790 for(bcfp=cfp; bcfp; bcfp=bcfp->next){
791 if( bcfp->status==COMPLETE ) continue; /* Already used */
792 if( bcfp->dot>=bcfp->rp->nrhs ) continue; /* Can't shift this one */
793 bsp = bcfp->rp->rhs[bcfp->dot]; /* Get symbol after dot */
794 if( bsp!=sp ) continue; /* Must be same as for "cfp" */
795 bcfp->status = COMPLETE; /* Mark this config as used */
796 new = Configlist_addbasis(bcfp->rp,bcfp->dot+1);
797 Plink_add(&new->bplp,bcfp);
798 }
799
800 /* Get a pointer to the state described by the basis configuration set
801 ** constructed in the preceding loop */
802 newstp = getstate(lemp);
803
804 /* The state "newstp" is reached from the state "stp" by a shift action
805 ** on the symbol "sp" */
drh218dc692004-05-31 23:13:45 +0000806 Action_add(&stp->ap,SHIFT,sp,(char *)newstp);
drh75897232000-05-29 14:26:00 +0000807 }
808}
809
810/*
811** Construct the propagation links
812*/
813void FindLinks(lemp)
814struct lemon *lemp;
815{
816 int i;
817 struct config *cfp, *other;
818 struct state *stp;
819 struct plink *plp;
820
821 /* Housekeeping detail:
822 ** Add to every propagate link a pointer back to the state to
823 ** which the link is attached. */
824 for(i=0; i<lemp->nstate; i++){
825 stp = lemp->sorted[i];
826 for(cfp=stp->cfp; cfp; cfp=cfp->next){
827 cfp->stp = stp;
828 }
829 }
830
831 /* Convert all backlinks into forward links. Only the forward
832 ** links are used in the follow-set computation. */
833 for(i=0; i<lemp->nstate; i++){
834 stp = lemp->sorted[i];
835 for(cfp=stp->cfp; cfp; cfp=cfp->next){
836 for(plp=cfp->bplp; plp; plp=plp->next){
837 other = plp->cfp;
838 Plink_add(&other->fplp,cfp);
839 }
840 }
841 }
842}
843
844/* Compute all followsets.
845**
846** A followset is the set of all symbols which can come immediately
847** after a configuration.
848*/
849void FindFollowSets(lemp)
850struct lemon *lemp;
851{
852 int i;
853 struct config *cfp;
854 struct plink *plp;
855 int progress;
856 int change;
857
858 for(i=0; i<lemp->nstate; i++){
859 for(cfp=lemp->sorted[i]->cfp; cfp; cfp=cfp->next){
860 cfp->status = INCOMPLETE;
861 }
862 }
863
864 do{
865 progress = 0;
866 for(i=0; i<lemp->nstate; i++){
867 for(cfp=lemp->sorted[i]->cfp; cfp; cfp=cfp->next){
868 if( cfp->status==COMPLETE ) continue;
869 for(plp=cfp->fplp; plp; plp=plp->next){
870 change = SetUnion(plp->cfp->fws,cfp->fws);
871 if( change ){
872 plp->cfp->status = INCOMPLETE;
873 progress = 1;
874 }
875 }
876 cfp->status = COMPLETE;
877 }
878 }
879 }while( progress );
880}
881
882static int resolve_conflict();
883
884/* Compute the reduce actions, and resolve conflicts.
885*/
886void FindActions(lemp)
887struct lemon *lemp;
888{
889 int i,j;
890 struct config *cfp;
891 struct state *stp;
892 struct symbol *sp;
893 struct rule *rp;
894
895 /* Add all of the reduce actions
896 ** A reduce action is added for each element of the followset of
897 ** a configuration which has its dot at the extreme right.
898 */
899 for(i=0; i<lemp->nstate; i++){ /* Loop over all states */
900 stp = lemp->sorted[i];
901 for(cfp=stp->cfp; cfp; cfp=cfp->next){ /* Loop over all configurations */
902 if( cfp->rp->nrhs==cfp->dot ){ /* Is dot at extreme right? */
903 for(j=0; j<lemp->nterminal; j++){
904 if( SetFind(cfp->fws,j) ){
905 /* Add a reduce action to the state "stp" which will reduce by the
906 ** rule "cfp->rp" if the lookahead symbol is "lemp->symbols[j]" */
drh218dc692004-05-31 23:13:45 +0000907 Action_add(&stp->ap,REDUCE,lemp->symbols[j],(char *)cfp->rp);
drh75897232000-05-29 14:26:00 +0000908 }
909 }
910 }
911 }
912 }
913
914 /* Add the accepting token */
915 if( lemp->start ){
916 sp = Symbol_find(lemp->start);
917 if( sp==0 ) sp = lemp->rule->lhs;
918 }else{
919 sp = lemp->rule->lhs;
920 }
921 /* Add to the first state (which is always the starting state of the
922 ** finite state machine) an action to ACCEPT if the lookahead is the
923 ** start nonterminal. */
924 Action_add(&lemp->sorted[0]->ap,ACCEPT,sp,0);
925
926 /* Resolve conflicts */
927 for(i=0; i<lemp->nstate; i++){
928 struct action *ap, *nap;
929 struct state *stp;
930 stp = lemp->sorted[i];
931 assert( stp->ap );
932 stp->ap = Action_sort(stp->ap);
drhb59499c2002-02-23 18:45:13 +0000933 for(ap=stp->ap; ap && ap->next; ap=ap->next){
drh75897232000-05-29 14:26:00 +0000934 for(nap=ap->next; nap && nap->sp==ap->sp; nap=nap->next){
935 /* The two actions "ap" and "nap" have the same lookahead.
936 ** Figure out which one should be used */
937 lemp->nconflict += resolve_conflict(ap,nap,lemp->errsym);
938 }
939 }
940 }
941
942 /* Report an error for each rule that can never be reduced. */
drhb27b83a2002-08-14 23:18:57 +0000943 for(rp=lemp->rule; rp; rp=rp->next) rp->canReduce = B_FALSE;
drh75897232000-05-29 14:26:00 +0000944 for(i=0; i<lemp->nstate; i++){
945 struct action *ap;
946 for(ap=lemp->sorted[i]->ap; ap; ap=ap->next){
drhb27b83a2002-08-14 23:18:57 +0000947 if( ap->type==REDUCE ) ap->x.rp->canReduce = B_TRUE;
drh75897232000-05-29 14:26:00 +0000948 }
949 }
950 for(rp=lemp->rule; rp; rp=rp->next){
951 if( rp->canReduce ) continue;
952 ErrorMsg(lemp->filename,rp->ruleline,"This rule can not be reduced.\n");
953 lemp->errorcnt++;
954 }
955}
956
957/* Resolve a conflict between the two given actions. If the
958** conflict can't be resolve, return non-zero.
959**
960** NO LONGER TRUE:
961** To resolve a conflict, first look to see if either action
962** is on an error rule. In that case, take the action which
963** is not associated with the error rule. If neither or both
964** actions are associated with an error rule, then try to
965** use precedence to resolve the conflict.
966**
967** If either action is a SHIFT, then it must be apx. This
968** function won't work if apx->type==REDUCE and apy->type==SHIFT.
969*/
970static int resolve_conflict(apx,apy,errsym)
971struct action *apx;
972struct action *apy;
973struct symbol *errsym; /* The error symbol (if defined. NULL otherwise) */
974{
975 struct symbol *spx, *spy;
976 int errcnt = 0;
977 assert( apx->sp==apy->sp ); /* Otherwise there would be no conflict */
978 if( apx->type==SHIFT && apy->type==REDUCE ){
979 spx = apx->sp;
980 spy = apy->x.rp->precsym;
981 if( spy==0 || spx->prec<0 || spy->prec<0 ){
982 /* Not enough precedence information. */
983 apy->type = CONFLICT;
984 errcnt++;
985 }else if( spx->prec>spy->prec ){ /* Lower precedence wins */
986 apy->type = RD_RESOLVED;
987 }else if( spx->prec<spy->prec ){
988 apx->type = SH_RESOLVED;
989 }else if( spx->prec==spy->prec && spx->assoc==RIGHT ){ /* Use operator */
990 apy->type = RD_RESOLVED; /* associativity */
991 }else if( spx->prec==spy->prec && spx->assoc==LEFT ){ /* to break tie */
992 apx->type = SH_RESOLVED;
993 }else{
994 assert( spx->prec==spy->prec && spx->assoc==NONE );
995 apy->type = CONFLICT;
996 errcnt++;
997 }
998 }else if( apx->type==REDUCE && apy->type==REDUCE ){
999 spx = apx->x.rp->precsym;
1000 spy = apy->x.rp->precsym;
1001 if( spx==0 || spy==0 || spx->prec<0 ||
1002 spy->prec<0 || spx->prec==spy->prec ){
1003 apy->type = CONFLICT;
1004 errcnt++;
1005 }else if( spx->prec>spy->prec ){
1006 apy->type = RD_RESOLVED;
1007 }else if( spx->prec<spy->prec ){
1008 apx->type = RD_RESOLVED;
1009 }
1010 }else{
drhb59499c2002-02-23 18:45:13 +00001011 assert(
1012 apx->type==SH_RESOLVED ||
1013 apx->type==RD_RESOLVED ||
1014 apx->type==CONFLICT ||
1015 apy->type==SH_RESOLVED ||
1016 apy->type==RD_RESOLVED ||
1017 apy->type==CONFLICT
1018 );
1019 /* The REDUCE/SHIFT case cannot happen because SHIFTs come before
1020 ** REDUCEs on the list. If we reach this point it must be because
1021 ** the parser conflict had already been resolved. */
drh75897232000-05-29 14:26:00 +00001022 }
1023 return errcnt;
1024}
1025/********************* From the file "configlist.c" *************************/
1026/*
1027** Routines to processing a configuration list and building a state
1028** in the LEMON parser generator.
1029*/
1030
1031static struct config *freelist = 0; /* List of free configurations */
1032static struct config *current = 0; /* Top of list of configurations */
1033static struct config **currentend = 0; /* Last on list of configs */
1034static struct config *basis = 0; /* Top of list of basis configs */
1035static struct config **basisend = 0; /* End of list of basis configs */
1036
1037/* Return a pointer to a new configuration */
1038PRIVATE struct config *newconfig(){
1039 struct config *new;
1040 if( freelist==0 ){
1041 int i;
1042 int amt = 3;
1043 freelist = (struct config *)malloc( sizeof(struct config)*amt );
1044 if( freelist==0 ){
1045 fprintf(stderr,"Unable to allocate memory for a new configuration.");
1046 exit(1);
1047 }
1048 for(i=0; i<amt-1; i++) freelist[i].next = &freelist[i+1];
1049 freelist[amt-1].next = 0;
1050 }
1051 new = freelist;
1052 freelist = freelist->next;
1053 return new;
1054}
1055
1056/* The configuration "old" is no longer used */
1057PRIVATE void deleteconfig(old)
1058struct config *old;
1059{
1060 old->next = freelist;
1061 freelist = old;
1062}
1063
1064/* Initialized the configuration list builder */
1065void Configlist_init(){
1066 current = 0;
1067 currentend = &current;
1068 basis = 0;
1069 basisend = &basis;
1070 Configtable_init();
1071 return;
1072}
1073
1074/* Initialized the configuration list builder */
1075void Configlist_reset(){
1076 current = 0;
1077 currentend = &current;
1078 basis = 0;
1079 basisend = &basis;
1080 Configtable_clear(0);
1081 return;
1082}
1083
1084/* Add another configuration to the configuration list */
1085struct config *Configlist_add(rp,dot)
1086struct rule *rp; /* The rule */
1087int dot; /* Index into the RHS of the rule where the dot goes */
1088{
1089 struct config *cfp, model;
1090
1091 assert( currentend!=0 );
1092 model.rp = rp;
1093 model.dot = dot;
1094 cfp = Configtable_find(&model);
1095 if( cfp==0 ){
1096 cfp = newconfig();
1097 cfp->rp = rp;
1098 cfp->dot = dot;
1099 cfp->fws = SetNew();
1100 cfp->stp = 0;
1101 cfp->fplp = cfp->bplp = 0;
1102 cfp->next = 0;
1103 cfp->bp = 0;
1104 *currentend = cfp;
1105 currentend = &cfp->next;
1106 Configtable_insert(cfp);
1107 }
1108 return cfp;
1109}
1110
1111/* Add a basis configuration to the configuration list */
1112struct config *Configlist_addbasis(rp,dot)
1113struct rule *rp;
1114int dot;
1115{
1116 struct config *cfp, model;
1117
1118 assert( basisend!=0 );
1119 assert( currentend!=0 );
1120 model.rp = rp;
1121 model.dot = dot;
1122 cfp = Configtable_find(&model);
1123 if( cfp==0 ){
1124 cfp = newconfig();
1125 cfp->rp = rp;
1126 cfp->dot = dot;
1127 cfp->fws = SetNew();
1128 cfp->stp = 0;
1129 cfp->fplp = cfp->bplp = 0;
1130 cfp->next = 0;
1131 cfp->bp = 0;
1132 *currentend = cfp;
1133 currentend = &cfp->next;
1134 *basisend = cfp;
1135 basisend = &cfp->bp;
1136 Configtable_insert(cfp);
1137 }
1138 return cfp;
1139}
1140
1141/* Compute the closure of the configuration list */
1142void Configlist_closure(lemp)
1143struct lemon *lemp;
1144{
1145 struct config *cfp, *newcfp;
1146 struct rule *rp, *newrp;
1147 struct symbol *sp, *xsp;
1148 int i, dot;
1149
1150 assert( currentend!=0 );
1151 for(cfp=current; cfp; cfp=cfp->next){
1152 rp = cfp->rp;
1153 dot = cfp->dot;
1154 if( dot>=rp->nrhs ) continue;
1155 sp = rp->rhs[dot];
1156 if( sp->type==NONTERMINAL ){
1157 if( sp->rule==0 && sp!=lemp->errsym ){
1158 ErrorMsg(lemp->filename,rp->line,"Nonterminal \"%s\" has no rules.",
1159 sp->name);
1160 lemp->errorcnt++;
1161 }
1162 for(newrp=sp->rule; newrp; newrp=newrp->nextlhs){
1163 newcfp = Configlist_add(newrp,0);
1164 for(i=dot+1; i<rp->nrhs; i++){
1165 xsp = rp->rhs[i];
1166 if( xsp->type==TERMINAL ){
1167 SetAdd(newcfp->fws,xsp->index);
1168 break;
1169 }else{
1170 SetUnion(newcfp->fws,xsp->firstset);
drhb27b83a2002-08-14 23:18:57 +00001171 if( xsp->lambda==B_FALSE ) break;
drh75897232000-05-29 14:26:00 +00001172 }
1173 }
1174 if( i==rp->nrhs ) Plink_add(&cfp->fplp,newcfp);
1175 }
1176 }
1177 }
1178 return;
1179}
1180
1181/* Sort the configuration list */
1182void Configlist_sort(){
drh218dc692004-05-31 23:13:45 +00001183 current = (struct config *)msort((char *)current,(char **)&(current->next),Configcmp);
drh75897232000-05-29 14:26:00 +00001184 currentend = 0;
1185 return;
1186}
1187
1188/* Sort the basis configuration list */
1189void Configlist_sortbasis(){
drh218dc692004-05-31 23:13:45 +00001190 basis = (struct config *)msort((char *)current,(char **)&(current->bp),Configcmp);
drh75897232000-05-29 14:26:00 +00001191 basisend = 0;
1192 return;
1193}
1194
1195/* Return a pointer to the head of the configuration list and
1196** reset the list */
1197struct config *Configlist_return(){
1198 struct config *old;
1199 old = current;
1200 current = 0;
1201 currentend = 0;
1202 return old;
1203}
1204
1205/* Return a pointer to the head of the configuration list and
1206** reset the list */
1207struct config *Configlist_basis(){
1208 struct config *old;
1209 old = basis;
1210 basis = 0;
1211 basisend = 0;
1212 return old;
1213}
1214
1215/* Free all elements of the given configuration list */
1216void Configlist_eat(cfp)
1217struct config *cfp;
1218{
1219 struct config *nextcfp;
1220 for(; cfp; cfp=nextcfp){
1221 nextcfp = cfp->next;
1222 assert( cfp->fplp==0 );
1223 assert( cfp->bplp==0 );
1224 if( cfp->fws ) SetFree(cfp->fws);
1225 deleteconfig(cfp);
1226 }
1227 return;
1228}
1229/***************** From the file "error.c" *********************************/
1230/*
1231** Code for printing error message.
1232*/
1233
1234/* Find a good place to break "msg" so that its length is at least "min"
1235** but no more than "max". Make the point as close to max as possible.
1236*/
1237static int findbreak(msg,min,max)
1238char *msg;
1239int min;
1240int max;
1241{
1242 int i,spot;
1243 char c;
1244 for(i=spot=min; i<=max; i++){
1245 c = msg[i];
1246 if( c=='\t' ) msg[i] = ' ';
1247 if( c=='\n' ){ msg[i] = ' '; spot = i; break; }
1248 if( c==0 ){ spot = i; break; }
1249 if( c=='-' && i<max-1 ) spot = i+1;
1250 if( c==' ' ) spot = i;
1251 }
1252 return spot;
1253}
1254
1255/*
1256** The error message is split across multiple lines if necessary. The
1257** splits occur at a space, if there is a space available near the end
1258** of the line.
1259*/
1260#define ERRMSGSIZE 10000 /* Hope this is big enough. No way to error check */
1261#define LINEWIDTH 79 /* Max width of any output line */
1262#define PREFIXLIMIT 30 /* Max width of the prefix on each line */
drhf9a2e7b2003-04-15 01:49:48 +00001263void ErrorMsg(const char *filename, int lineno, const char *format, ...){
drh75897232000-05-29 14:26:00 +00001264 char errmsg[ERRMSGSIZE];
1265 char prefix[PREFIXLIMIT+10];
1266 int errmsgsize;
1267 int prefixsize;
1268 int availablewidth;
1269 va_list ap;
1270 int end, restart, base;
1271
drhf9a2e7b2003-04-15 01:49:48 +00001272 va_start(ap, format);
drh75897232000-05-29 14:26:00 +00001273 /* Prepare a prefix to be prepended to every output line */
1274 if( lineno>0 ){
1275 sprintf(prefix,"%.*s:%d: ",PREFIXLIMIT-10,filename,lineno);
1276 }else{
1277 sprintf(prefix,"%.*s: ",PREFIXLIMIT-10,filename);
1278 }
1279 prefixsize = strlen(prefix);
1280 availablewidth = LINEWIDTH - prefixsize;
1281
1282 /* Generate the error message */
1283 vsprintf(errmsg,format,ap);
1284 va_end(ap);
1285 errmsgsize = strlen(errmsg);
1286 /* Remove trailing '\n's from the error message. */
1287 while( errmsgsize>0 && errmsg[errmsgsize-1]=='\n' ){
1288 errmsg[--errmsgsize] = 0;
1289 }
1290
1291 /* Print the error message */
1292 base = 0;
1293 while( errmsg[base]!=0 ){
1294 end = restart = findbreak(&errmsg[base],0,availablewidth);
1295 restart += base;
1296 while( errmsg[restart]==' ' ) restart++;
1297 fprintf(stdout,"%s%.*s\n",prefix,end,&errmsg[base]);
1298 base = restart;
1299 }
1300}
1301/**************** From the file "main.c" ************************************/
1302/*
1303** Main program file for the LEMON parser generator.
1304*/
1305
1306/* Report an out-of-memory condition and abort. This function
1307** is used mostly by the "MemoryCheck" macro in struct.h
1308*/
1309void memory_error(){
1310 fprintf(stderr,"Out of memory. Aborting...\n");
1311 exit(1);
1312}
1313
drh6d08b4d2004-07-20 12:45:22 +00001314static int nDefine = 0; /* Number of -D options on the command line */
1315static char **azDefine = 0; /* Name of the -D macros */
1316
1317/* This routine is called with the argument to each -D command-line option.
1318** Add the macro defined to the azDefine array.
1319*/
1320static void handle_D_option(char *z){
1321 char **paz;
1322 nDefine++;
1323 azDefine = realloc(azDefine, sizeof(azDefine[0])*nDefine);
1324 if( azDefine==0 ){
1325 fprintf(stderr,"out of memory\n");
1326 exit(1);
1327 }
1328 paz = &azDefine[nDefine-1];
1329 *paz = malloc( strlen(z)+1 );
1330 if( *paz==0 ){
1331 fprintf(stderr,"out of memory\n");
1332 exit(1);
1333 }
1334 strcpy(*paz, z);
1335 for(z=*paz; *z && *z!='='; z++){}
1336 *z = 0;
1337}
1338
drh75897232000-05-29 14:26:00 +00001339
1340/* The main program. Parse the command line and do it... */
1341int main(argc,argv)
1342int argc;
1343char **argv;
1344{
1345 static int version = 0;
1346 static int rpflag = 0;
1347 static int basisflag = 0;
1348 static int compress = 0;
1349 static int quiet = 0;
1350 static int statistics = 0;
1351 static int mhflag = 0;
1352 static struct s_options options[] = {
1353 {OPT_FLAG, "b", (char*)&basisflag, "Print only the basis in report."},
1354 {OPT_FLAG, "c", (char*)&compress, "Don't compress the action table."},
drh6d08b4d2004-07-20 12:45:22 +00001355 {OPT_FSTR, "D", (char*)handle_D_option, "Define an %ifdef macro."},
drh75897232000-05-29 14:26:00 +00001356 {OPT_FLAG, "g", (char*)&rpflag, "Print grammar without actions."},
1357 {OPT_FLAG, "m", (char*)&mhflag, "Output a makeheaders compatible file"},
1358 {OPT_FLAG, "q", (char*)&quiet, "(Quiet) Don't print the report file."},
drh6d08b4d2004-07-20 12:45:22 +00001359 {OPT_FLAG, "s", (char*)&statistics,
1360 "Print parser stats to standard output."},
drh75897232000-05-29 14:26:00 +00001361 {OPT_FLAG, "x", (char*)&version, "Print the version number."},
1362 {OPT_FLAG,0,0,0}
1363 };
1364 int i;
1365 struct lemon lem;
1366
drhb0c86772000-06-02 23:21:26 +00001367 OptInit(argv,options,stderr);
drh75897232000-05-29 14:26:00 +00001368 if( version ){
drhb19a2bc2001-09-16 00:13:26 +00001369 printf("Lemon version 1.0\n");
drh75897232000-05-29 14:26:00 +00001370 exit(0);
1371 }
drhb0c86772000-06-02 23:21:26 +00001372 if( OptNArgs()!=1 ){
drh75897232000-05-29 14:26:00 +00001373 fprintf(stderr,"Exactly one filename argument is required.\n");
1374 exit(1);
1375 }
1376 lem.errorcnt = 0;
1377
1378 /* Initialize the machine */
1379 Strsafe_init();
1380 Symbol_init();
1381 State_init();
1382 lem.argv0 = argv[0];
drhb0c86772000-06-02 23:21:26 +00001383 lem.filename = OptArg(0);
drh75897232000-05-29 14:26:00 +00001384 lem.basisflag = basisflag;
drh0bd1f4e2002-06-06 18:54:39 +00001385 lem.has_fallback = 0;
drh75897232000-05-29 14:26:00 +00001386 lem.nconflict = 0;
1387 lem.name = lem.include = lem.arg = lem.tokentype = lem.start = 0;
drh960e8c62001-04-03 16:53:21 +00001388 lem.vartype = 0;
drh75897232000-05-29 14:26:00 +00001389 lem.stacksize = 0;
1390 lem.error = lem.overflow = lem.failure = lem.accept = lem.tokendest =
1391 lem.tokenprefix = lem.outname = lem.extracode = 0;
drh960e8c62001-04-03 16:53:21 +00001392 lem.vardest = 0;
drh75897232000-05-29 14:26:00 +00001393 lem.tablesize = 0;
1394 Symbol_new("$");
1395 lem.errsym = Symbol_new("error");
1396
1397 /* Parse the input file */
1398 Parse(&lem);
1399 if( lem.errorcnt ) exit(lem.errorcnt);
1400 if( lem.rule==0 ){
1401 fprintf(stderr,"Empty grammar.\n");
1402 exit(1);
1403 }
1404
1405 /* Count and index the symbols of the grammar */
1406 lem.nsymbol = Symbol_count();
1407 Symbol_new("{default}");
1408 lem.symbols = Symbol_arrayof();
drh60d31652004-02-22 00:08:04 +00001409 for(i=0; i<=lem.nsymbol; i++) lem.symbols[i]->index = i;
drh75897232000-05-29 14:26:00 +00001410 qsort(lem.symbols,lem.nsymbol+1,sizeof(struct symbol*),
1411 (int(*)())Symbolcmpp);
1412 for(i=0; i<=lem.nsymbol; i++) lem.symbols[i]->index = i;
1413 for(i=1; isupper(lem.symbols[i]->name[0]); i++);
1414 lem.nterminal = i;
1415
1416 /* Generate a reprint of the grammar, if requested on the command line */
1417 if( rpflag ){
1418 Reprint(&lem);
1419 }else{
1420 /* Initialize the size for all follow and first sets */
1421 SetSize(lem.nterminal);
1422
1423 /* Find the precedence for every production rule (that has one) */
1424 FindRulePrecedences(&lem);
1425
1426 /* Compute the lambda-nonterminals and the first-sets for every
1427 ** nonterminal */
1428 FindFirstSets(&lem);
1429
1430 /* Compute all LR(0) states. Also record follow-set propagation
1431 ** links so that the follow-set can be computed later */
1432 lem.nstate = 0;
1433 FindStates(&lem);
1434 lem.sorted = State_arrayof();
1435
1436 /* Tie up loose ends on the propagation links */
1437 FindLinks(&lem);
1438
1439 /* Compute the follow set of every reducible configuration */
1440 FindFollowSets(&lem);
1441
1442 /* Compute the action tables */
1443 FindActions(&lem);
1444
1445 /* Compress the action tables */
1446 if( compress==0 ) CompressTables(&lem);
1447
1448 /* Generate a report of the parser generated. (the "y.output" file) */
1449 if( !quiet ) ReportOutput(&lem);
1450
1451 /* Generate the source code for the parser */
1452 ReportTable(&lem, mhflag);
1453
1454 /* Produce a header file for use by the scanner. (This step is
1455 ** omitted if the "-m" option is used because makeheaders will
1456 ** generate the file for us.) */
1457 if( !mhflag ) ReportHeader(&lem);
1458 }
1459 if( statistics ){
1460 printf("Parser statistics: %d terminals, %d nonterminals, %d rules\n",
1461 lem.nterminal, lem.nsymbol - lem.nterminal, lem.nrule);
1462 printf(" %d states, %d parser table entries, %d conflicts\n",
1463 lem.nstate, lem.tablesize, lem.nconflict);
1464 }
1465 if( lem.nconflict ){
1466 fprintf(stderr,"%d parsing conflicts.\n",lem.nconflict);
1467 }
1468 exit(lem.errorcnt + lem.nconflict);
drh218dc692004-05-31 23:13:45 +00001469 return (lem.errorcnt + lem.nconflict);
drh75897232000-05-29 14:26:00 +00001470}
1471/******************** From the file "msort.c" *******************************/
1472/*
1473** A generic merge-sort program.
1474**
1475** USAGE:
1476** Let "ptr" be a pointer to some structure which is at the head of
1477** a null-terminated list. Then to sort the list call:
1478**
1479** ptr = msort(ptr,&(ptr->next),cmpfnc);
1480**
1481** In the above, "cmpfnc" is a pointer to a function which compares
1482** two instances of the structure and returns an integer, as in
1483** strcmp. The second argument is a pointer to the pointer to the
1484** second element of the linked list. This address is used to compute
1485** the offset to the "next" field within the structure. The offset to
1486** the "next" field must be constant for all structures in the list.
1487**
1488** The function returns a new pointer which is the head of the list
1489** after sorting.
1490**
1491** ALGORITHM:
1492** Merge-sort.
1493*/
1494
1495/*
1496** Return a pointer to the next structure in the linked list.
1497*/
drhba99af52001-10-25 20:37:16 +00001498#define NEXT(A) (*(char**)(((unsigned long)A)+offset))
drh75897232000-05-29 14:26:00 +00001499
1500/*
1501** Inputs:
1502** a: A sorted, null-terminated linked list. (May be null).
1503** b: A sorted, null-terminated linked list. (May be null).
1504** cmp: A pointer to the comparison function.
1505** offset: Offset in the structure to the "next" field.
1506**
1507** Return Value:
1508** A pointer to the head of a sorted list containing the elements
1509** of both a and b.
1510**
1511** Side effects:
1512** The "next" pointers for elements in the lists a and b are
1513** changed.
1514*/
1515static char *merge(a,b,cmp,offset)
1516char *a;
1517char *b;
1518int (*cmp)();
1519int offset;
1520{
1521 char *ptr, *head;
1522
1523 if( a==0 ){
1524 head = b;
1525 }else if( b==0 ){
1526 head = a;
1527 }else{
1528 if( (*cmp)(a,b)<0 ){
1529 ptr = a;
1530 a = NEXT(a);
1531 }else{
1532 ptr = b;
1533 b = NEXT(b);
1534 }
1535 head = ptr;
1536 while( a && b ){
1537 if( (*cmp)(a,b)<0 ){
1538 NEXT(ptr) = a;
1539 ptr = a;
1540 a = NEXT(a);
1541 }else{
1542 NEXT(ptr) = b;
1543 ptr = b;
1544 b = NEXT(b);
1545 }
1546 }
1547 if( a ) NEXT(ptr) = a;
1548 else NEXT(ptr) = b;
1549 }
1550 return head;
1551}
1552
1553/*
1554** Inputs:
1555** list: Pointer to a singly-linked list of structures.
1556** next: Pointer to pointer to the second element of the list.
1557** cmp: A comparison function.
1558**
1559** Return Value:
1560** A pointer to the head of a sorted list containing the elements
1561** orginally in list.
1562**
1563** Side effects:
1564** The "next" pointers for elements in list are changed.
1565*/
1566#define LISTSIZE 30
1567char *msort(list,next,cmp)
1568char *list;
1569char **next;
1570int (*cmp)();
1571{
drhba99af52001-10-25 20:37:16 +00001572 unsigned long offset;
drh75897232000-05-29 14:26:00 +00001573 char *ep;
1574 char *set[LISTSIZE];
1575 int i;
drhba99af52001-10-25 20:37:16 +00001576 offset = (unsigned long)next - (unsigned long)list;
drh75897232000-05-29 14:26:00 +00001577 for(i=0; i<LISTSIZE; i++) set[i] = 0;
1578 while( list ){
1579 ep = list;
1580 list = NEXT(list);
1581 NEXT(ep) = 0;
1582 for(i=0; i<LISTSIZE-1 && set[i]!=0; i++){
1583 ep = merge(ep,set[i],cmp,offset);
1584 set[i] = 0;
1585 }
1586 set[i] = ep;
1587 }
1588 ep = 0;
1589 for(i=0; i<LISTSIZE; i++) if( set[i] ) ep = merge(ep,set[i],cmp,offset);
1590 return ep;
1591}
1592/************************ From the file "option.c" **************************/
1593static char **argv;
1594static struct s_options *op;
1595static FILE *errstream;
1596
1597#define ISOPT(X) ((X)[0]=='-'||(X)[0]=='+'||strchr((X),'=')!=0)
1598
1599/*
1600** Print the command line with a carrot pointing to the k-th character
1601** of the n-th field.
1602*/
1603static void errline(n,k,err)
1604int n;
1605int k;
1606FILE *err;
1607{
1608 int spcnt, i;
1609 spcnt = 0;
1610 if( argv[0] ) fprintf(err,"%s",argv[0]);
1611 spcnt = strlen(argv[0]) + 1;
1612 for(i=1; i<n && argv[i]; i++){
1613 fprintf(err," %s",argv[i]);
1614 spcnt += strlen(argv[i]+1);
1615 }
1616 spcnt += k;
1617 for(; argv[i]; i++) fprintf(err," %s",argv[i]);
1618 if( spcnt<20 ){
1619 fprintf(err,"\n%*s^-- here\n",spcnt,"");
1620 }else{
1621 fprintf(err,"\n%*shere --^\n",spcnt-7,"");
1622 }
1623}
1624
1625/*
1626** Return the index of the N-th non-switch argument. Return -1
1627** if N is out of range.
1628*/
1629static int argindex(n)
1630int n;
1631{
1632 int i;
1633 int dashdash = 0;
1634 if( argv!=0 && *argv!=0 ){
1635 for(i=1; argv[i]; i++){
1636 if( dashdash || !ISOPT(argv[i]) ){
1637 if( n==0 ) return i;
1638 n--;
1639 }
1640 if( strcmp(argv[i],"--")==0 ) dashdash = 1;
1641 }
1642 }
1643 return -1;
1644}
1645
1646static char emsg[] = "Command line syntax error: ";
1647
1648/*
1649** Process a flag command line argument.
1650*/
1651static int handleflags(i,err)
1652int i;
1653FILE *err;
1654{
1655 int v;
1656 int errcnt = 0;
1657 int j;
1658 for(j=0; op[j].label; j++){
drh6d08b4d2004-07-20 12:45:22 +00001659 if( strncmp(&argv[i][1],op[j].label,strlen(op[j].label))==0 ) break;
drh75897232000-05-29 14:26:00 +00001660 }
1661 v = argv[i][0]=='-' ? 1 : 0;
1662 if( op[j].label==0 ){
1663 if( err ){
1664 fprintf(err,"%sundefined option.\n",emsg);
1665 errline(i,1,err);
1666 }
1667 errcnt++;
1668 }else if( op[j].type==OPT_FLAG ){
1669 *((int*)op[j].arg) = v;
1670 }else if( op[j].type==OPT_FFLAG ){
1671 (*(void(*)())(op[j].arg))(v);
drh6d08b4d2004-07-20 12:45:22 +00001672 }else if( op[j].type==OPT_FSTR ){
1673 (*(void(*)())(op[j].arg))(&argv[i][2]);
drh75897232000-05-29 14:26:00 +00001674 }else{
1675 if( err ){
1676 fprintf(err,"%smissing argument on switch.\n",emsg);
1677 errline(i,1,err);
1678 }
1679 errcnt++;
1680 }
1681 return errcnt;
1682}
1683
1684/*
1685** Process a command line switch which has an argument.
1686*/
1687static int handleswitch(i,err)
1688int i;
1689FILE *err;
1690{
1691 int lv = 0;
1692 double dv = 0.0;
1693 char *sv = 0, *end;
1694 char *cp;
1695 int j;
1696 int errcnt = 0;
1697 cp = strchr(argv[i],'=');
1698 *cp = 0;
1699 for(j=0; op[j].label; j++){
1700 if( strcmp(argv[i],op[j].label)==0 ) break;
1701 }
1702 *cp = '=';
1703 if( op[j].label==0 ){
1704 if( err ){
1705 fprintf(err,"%sundefined option.\n",emsg);
1706 errline(i,0,err);
1707 }
1708 errcnt++;
1709 }else{
1710 cp++;
1711 switch( op[j].type ){
1712 case OPT_FLAG:
1713 case OPT_FFLAG:
1714 if( err ){
1715 fprintf(err,"%soption requires an argument.\n",emsg);
1716 errline(i,0,err);
1717 }
1718 errcnt++;
1719 break;
1720 case OPT_DBL:
1721 case OPT_FDBL:
1722 dv = strtod(cp,&end);
1723 if( *end ){
1724 if( err ){
1725 fprintf(err,"%sillegal character in floating-point argument.\n",emsg);
drhba99af52001-10-25 20:37:16 +00001726 errline(i,((unsigned long)end)-(unsigned long)argv[i],err);
drh75897232000-05-29 14:26:00 +00001727 }
1728 errcnt++;
1729 }
1730 break;
1731 case OPT_INT:
1732 case OPT_FINT:
1733 lv = strtol(cp,&end,0);
1734 if( *end ){
1735 if( err ){
1736 fprintf(err,"%sillegal character in integer argument.\n",emsg);
drhba99af52001-10-25 20:37:16 +00001737 errline(i,((unsigned long)end)-(unsigned long)argv[i],err);
drh75897232000-05-29 14:26:00 +00001738 }
1739 errcnt++;
1740 }
1741 break;
1742 case OPT_STR:
1743 case OPT_FSTR:
1744 sv = cp;
1745 break;
1746 }
1747 switch( op[j].type ){
1748 case OPT_FLAG:
1749 case OPT_FFLAG:
1750 break;
1751 case OPT_DBL:
1752 *(double*)(op[j].arg) = dv;
1753 break;
1754 case OPT_FDBL:
1755 (*(void(*)())(op[j].arg))(dv);
1756 break;
1757 case OPT_INT:
1758 *(int*)(op[j].arg) = lv;
1759 break;
1760 case OPT_FINT:
1761 (*(void(*)())(op[j].arg))((int)lv);
1762 break;
1763 case OPT_STR:
1764 *(char**)(op[j].arg) = sv;
1765 break;
1766 case OPT_FSTR:
1767 (*(void(*)())(op[j].arg))(sv);
1768 break;
1769 }
1770 }
1771 return errcnt;
1772}
1773
drhb0c86772000-06-02 23:21:26 +00001774int OptInit(a,o,err)
drh75897232000-05-29 14:26:00 +00001775char **a;
1776struct s_options *o;
1777FILE *err;
1778{
1779 int errcnt = 0;
1780 argv = a;
1781 op = o;
1782 errstream = err;
1783 if( argv && *argv && op ){
1784 int i;
1785 for(i=1; argv[i]; i++){
1786 if( argv[i][0]=='+' || argv[i][0]=='-' ){
1787 errcnt += handleflags(i,err);
1788 }else if( strchr(argv[i],'=') ){
1789 errcnt += handleswitch(i,err);
1790 }
1791 }
1792 }
1793 if( errcnt>0 ){
1794 fprintf(err,"Valid command line options for \"%s\" are:\n",*a);
drhb0c86772000-06-02 23:21:26 +00001795 OptPrint();
drh75897232000-05-29 14:26:00 +00001796 exit(1);
1797 }
1798 return 0;
1799}
1800
drhb0c86772000-06-02 23:21:26 +00001801int OptNArgs(){
drh75897232000-05-29 14:26:00 +00001802 int cnt = 0;
1803 int dashdash = 0;
1804 int i;
1805 if( argv!=0 && argv[0]!=0 ){
1806 for(i=1; argv[i]; i++){
1807 if( dashdash || !ISOPT(argv[i]) ) cnt++;
1808 if( strcmp(argv[i],"--")==0 ) dashdash = 1;
1809 }
1810 }
1811 return cnt;
1812}
1813
drhb0c86772000-06-02 23:21:26 +00001814char *OptArg(n)
drh75897232000-05-29 14:26:00 +00001815int n;
1816{
1817 int i;
1818 i = argindex(n);
1819 return i>=0 ? argv[i] : 0;
1820}
1821
drhb0c86772000-06-02 23:21:26 +00001822void OptErr(n)
drh75897232000-05-29 14:26:00 +00001823int n;
1824{
1825 int i;
1826 i = argindex(n);
1827 if( i>=0 ) errline(i,0,errstream);
1828}
1829
drhb0c86772000-06-02 23:21:26 +00001830void OptPrint(){
drh75897232000-05-29 14:26:00 +00001831 int i;
1832 int max, len;
1833 max = 0;
1834 for(i=0; op[i].label; i++){
1835 len = strlen(op[i].label) + 1;
1836 switch( op[i].type ){
1837 case OPT_FLAG:
1838 case OPT_FFLAG:
1839 break;
1840 case OPT_INT:
1841 case OPT_FINT:
1842 len += 9; /* length of "<integer>" */
1843 break;
1844 case OPT_DBL:
1845 case OPT_FDBL:
1846 len += 6; /* length of "<real>" */
1847 break;
1848 case OPT_STR:
1849 case OPT_FSTR:
1850 len += 8; /* length of "<string>" */
1851 break;
1852 }
1853 if( len>max ) max = len;
1854 }
1855 for(i=0; op[i].label; i++){
1856 switch( op[i].type ){
1857 case OPT_FLAG:
1858 case OPT_FFLAG:
1859 fprintf(errstream," -%-*s %s\n",max,op[i].label,op[i].message);
1860 break;
1861 case OPT_INT:
1862 case OPT_FINT:
1863 fprintf(errstream," %s=<integer>%*s %s\n",op[i].label,
drh8b582012003-10-21 13:16:03 +00001864 (int)(max-strlen(op[i].label)-9),"",op[i].message);
drh75897232000-05-29 14:26:00 +00001865 break;
1866 case OPT_DBL:
1867 case OPT_FDBL:
1868 fprintf(errstream," %s=<real>%*s %s\n",op[i].label,
drh8b582012003-10-21 13:16:03 +00001869 (int)(max-strlen(op[i].label)-6),"",op[i].message);
drh75897232000-05-29 14:26:00 +00001870 break;
1871 case OPT_STR:
1872 case OPT_FSTR:
1873 fprintf(errstream," %s=<string>%*s %s\n",op[i].label,
drh8b582012003-10-21 13:16:03 +00001874 (int)(max-strlen(op[i].label)-8),"",op[i].message);
drh75897232000-05-29 14:26:00 +00001875 break;
1876 }
1877 }
1878}
1879/*********************** From the file "parse.c" ****************************/
1880/*
1881** Input file parser for the LEMON parser generator.
1882*/
1883
1884/* The state of the parser */
1885struct pstate {
1886 char *filename; /* Name of the input file */
1887 int tokenlineno; /* Linenumber at which current token starts */
1888 int errorcnt; /* Number of errors so far */
1889 char *tokenstart; /* Text of current token */
1890 struct lemon *gp; /* Global state vector */
1891 enum e_state {
1892 INITIALIZE,
1893 WAITING_FOR_DECL_OR_RULE,
1894 WAITING_FOR_DECL_KEYWORD,
1895 WAITING_FOR_DECL_ARG,
1896 WAITING_FOR_PRECEDENCE_SYMBOL,
1897 WAITING_FOR_ARROW,
1898 IN_RHS,
1899 LHS_ALIAS_1,
1900 LHS_ALIAS_2,
1901 LHS_ALIAS_3,
1902 RHS_ALIAS_1,
1903 RHS_ALIAS_2,
1904 PRECEDENCE_MARK_1,
1905 PRECEDENCE_MARK_2,
1906 RESYNC_AFTER_RULE_ERROR,
1907 RESYNC_AFTER_DECL_ERROR,
1908 WAITING_FOR_DESTRUCTOR_SYMBOL,
drh0bd1f4e2002-06-06 18:54:39 +00001909 WAITING_FOR_DATATYPE_SYMBOL,
1910 WAITING_FOR_FALLBACK_ID
drh75897232000-05-29 14:26:00 +00001911 } state; /* The state of the parser */
drh0bd1f4e2002-06-06 18:54:39 +00001912 struct symbol *fallback; /* The fallback token */
drh75897232000-05-29 14:26:00 +00001913 struct symbol *lhs; /* Left-hand side of current rule */
1914 char *lhsalias; /* Alias for the LHS */
1915 int nrhs; /* Number of right-hand side symbols seen */
1916 struct symbol *rhs[MAXRHS]; /* RHS symbols */
1917 char *alias[MAXRHS]; /* Aliases for each RHS symbol (or NULL) */
1918 struct rule *prevrule; /* Previous rule parsed */
1919 char *declkeyword; /* Keyword of a declaration */
1920 char **declargslot; /* Where the declaration argument should be put */
1921 int *decllnslot; /* Where the declaration linenumber is put */
1922 enum e_assoc declassoc; /* Assign this association to decl arguments */
1923 int preccounter; /* Assign this precedence to decl arguments */
1924 struct rule *firstrule; /* Pointer to first rule in the grammar */
1925 struct rule *lastrule; /* Pointer to the most recently parsed rule */
1926};
1927
1928/* Parse a single token */
1929static void parseonetoken(psp)
1930struct pstate *psp;
1931{
1932 char *x;
1933 x = Strsafe(psp->tokenstart); /* Save the token permanently */
1934#if 0
1935 printf("%s:%d: Token=[%s] state=%d\n",psp->filename,psp->tokenlineno,
1936 x,psp->state);
1937#endif
1938 switch( psp->state ){
1939 case INITIALIZE:
1940 psp->prevrule = 0;
1941 psp->preccounter = 0;
1942 psp->firstrule = psp->lastrule = 0;
1943 psp->gp->nrule = 0;
1944 /* Fall thru to next case */
1945 case WAITING_FOR_DECL_OR_RULE:
1946 if( x[0]=='%' ){
1947 psp->state = WAITING_FOR_DECL_KEYWORD;
1948 }else if( islower(x[0]) ){
1949 psp->lhs = Symbol_new(x);
1950 psp->nrhs = 0;
1951 psp->lhsalias = 0;
1952 psp->state = WAITING_FOR_ARROW;
1953 }else if( x[0]=='{' ){
1954 if( psp->prevrule==0 ){
1955 ErrorMsg(psp->filename,psp->tokenlineno,
1956"There is not prior rule opon which to attach the code \
1957fragment which begins on this line.");
1958 psp->errorcnt++;
1959 }else if( psp->prevrule->code!=0 ){
1960 ErrorMsg(psp->filename,psp->tokenlineno,
1961"Code fragment beginning on this line is not the first \
1962to follow the previous rule.");
1963 psp->errorcnt++;
1964 }else{
1965 psp->prevrule->line = psp->tokenlineno;
1966 psp->prevrule->code = &x[1];
1967 }
1968 }else if( x[0]=='[' ){
1969 psp->state = PRECEDENCE_MARK_1;
1970 }else{
1971 ErrorMsg(psp->filename,psp->tokenlineno,
1972 "Token \"%s\" should be either \"%%\" or a nonterminal name.",
1973 x);
1974 psp->errorcnt++;
1975 }
1976 break;
1977 case PRECEDENCE_MARK_1:
1978 if( !isupper(x[0]) ){
1979 ErrorMsg(psp->filename,psp->tokenlineno,
1980 "The precedence symbol must be a terminal.");
1981 psp->errorcnt++;
1982 }else if( psp->prevrule==0 ){
1983 ErrorMsg(psp->filename,psp->tokenlineno,
1984 "There is no prior rule to assign precedence \"[%s]\".",x);
1985 psp->errorcnt++;
1986 }else if( psp->prevrule->precsym!=0 ){
1987 ErrorMsg(psp->filename,psp->tokenlineno,
1988"Precedence mark on this line is not the first \
1989to follow the previous rule.");
1990 psp->errorcnt++;
1991 }else{
1992 psp->prevrule->precsym = Symbol_new(x);
1993 }
1994 psp->state = PRECEDENCE_MARK_2;
1995 break;
1996 case PRECEDENCE_MARK_2:
1997 if( x[0]!=']' ){
1998 ErrorMsg(psp->filename,psp->tokenlineno,
1999 "Missing \"]\" on precedence mark.");
2000 psp->errorcnt++;
2001 }
2002 psp->state = WAITING_FOR_DECL_OR_RULE;
2003 break;
2004 case WAITING_FOR_ARROW:
2005 if( x[0]==':' && x[1]==':' && x[2]=='=' ){
2006 psp->state = IN_RHS;
2007 }else if( x[0]=='(' ){
2008 psp->state = LHS_ALIAS_1;
2009 }else{
2010 ErrorMsg(psp->filename,psp->tokenlineno,
2011 "Expected to see a \":\" following the LHS symbol \"%s\".",
2012 psp->lhs->name);
2013 psp->errorcnt++;
2014 psp->state = RESYNC_AFTER_RULE_ERROR;
2015 }
2016 break;
2017 case LHS_ALIAS_1:
2018 if( isalpha(x[0]) ){
2019 psp->lhsalias = x;
2020 psp->state = LHS_ALIAS_2;
2021 }else{
2022 ErrorMsg(psp->filename,psp->tokenlineno,
2023 "\"%s\" is not a valid alias for the LHS \"%s\"\n",
2024 x,psp->lhs->name);
2025 psp->errorcnt++;
2026 psp->state = RESYNC_AFTER_RULE_ERROR;
2027 }
2028 break;
2029 case LHS_ALIAS_2:
2030 if( x[0]==')' ){
2031 psp->state = LHS_ALIAS_3;
2032 }else{
2033 ErrorMsg(psp->filename,psp->tokenlineno,
2034 "Missing \")\" following LHS alias name \"%s\".",psp->lhsalias);
2035 psp->errorcnt++;
2036 psp->state = RESYNC_AFTER_RULE_ERROR;
2037 }
2038 break;
2039 case LHS_ALIAS_3:
2040 if( x[0]==':' && x[1]==':' && x[2]=='=' ){
2041 psp->state = IN_RHS;
2042 }else{
2043 ErrorMsg(psp->filename,psp->tokenlineno,
2044 "Missing \"->\" following: \"%s(%s)\".",
2045 psp->lhs->name,psp->lhsalias);
2046 psp->errorcnt++;
2047 psp->state = RESYNC_AFTER_RULE_ERROR;
2048 }
2049 break;
2050 case IN_RHS:
2051 if( x[0]=='.' ){
2052 struct rule *rp;
2053 rp = (struct rule *)malloc( sizeof(struct rule) +
2054 sizeof(struct symbol*)*psp->nrhs + sizeof(char*)*psp->nrhs );
2055 if( rp==0 ){
2056 ErrorMsg(psp->filename,psp->tokenlineno,
2057 "Can't allocate enough memory for this rule.");
2058 psp->errorcnt++;
2059 psp->prevrule = 0;
2060 }else{
2061 int i;
2062 rp->ruleline = psp->tokenlineno;
2063 rp->rhs = (struct symbol**)&rp[1];
2064 rp->rhsalias = (char**)&(rp->rhs[psp->nrhs]);
2065 for(i=0; i<psp->nrhs; i++){
2066 rp->rhs[i] = psp->rhs[i];
2067 rp->rhsalias[i] = psp->alias[i];
2068 }
2069 rp->lhs = psp->lhs;
2070 rp->lhsalias = psp->lhsalias;
2071 rp->nrhs = psp->nrhs;
2072 rp->code = 0;
2073 rp->precsym = 0;
2074 rp->index = psp->gp->nrule++;
2075 rp->nextlhs = rp->lhs->rule;
2076 rp->lhs->rule = rp;
2077 rp->next = 0;
2078 if( psp->firstrule==0 ){
2079 psp->firstrule = psp->lastrule = rp;
2080 }else{
2081 psp->lastrule->next = rp;
2082 psp->lastrule = rp;
2083 }
2084 psp->prevrule = rp;
2085 }
2086 psp->state = WAITING_FOR_DECL_OR_RULE;
2087 }else if( isalpha(x[0]) ){
2088 if( psp->nrhs>=MAXRHS ){
2089 ErrorMsg(psp->filename,psp->tokenlineno,
2090 "Too many symbol on RHS or rule beginning at \"%s\".",
2091 x);
2092 psp->errorcnt++;
2093 psp->state = RESYNC_AFTER_RULE_ERROR;
2094 }else{
2095 psp->rhs[psp->nrhs] = Symbol_new(x);
2096 psp->alias[psp->nrhs] = 0;
2097 psp->nrhs++;
2098 }
2099 }else if( x[0]=='(' && psp->nrhs>0 ){
2100 psp->state = RHS_ALIAS_1;
2101 }else{
2102 ErrorMsg(psp->filename,psp->tokenlineno,
2103 "Illegal character on RHS of rule: \"%s\".",x);
2104 psp->errorcnt++;
2105 psp->state = RESYNC_AFTER_RULE_ERROR;
2106 }
2107 break;
2108 case RHS_ALIAS_1:
2109 if( isalpha(x[0]) ){
2110 psp->alias[psp->nrhs-1] = x;
2111 psp->state = RHS_ALIAS_2;
2112 }else{
2113 ErrorMsg(psp->filename,psp->tokenlineno,
2114 "\"%s\" is not a valid alias for the RHS symbol \"%s\"\n",
2115 x,psp->rhs[psp->nrhs-1]->name);
2116 psp->errorcnt++;
2117 psp->state = RESYNC_AFTER_RULE_ERROR;
2118 }
2119 break;
2120 case RHS_ALIAS_2:
2121 if( x[0]==')' ){
2122 psp->state = IN_RHS;
2123 }else{
2124 ErrorMsg(psp->filename,psp->tokenlineno,
2125 "Missing \")\" following LHS alias name \"%s\".",psp->lhsalias);
2126 psp->errorcnt++;
2127 psp->state = RESYNC_AFTER_RULE_ERROR;
2128 }
2129 break;
2130 case WAITING_FOR_DECL_KEYWORD:
2131 if( isalpha(x[0]) ){
2132 psp->declkeyword = x;
2133 psp->declargslot = 0;
2134 psp->decllnslot = 0;
2135 psp->state = WAITING_FOR_DECL_ARG;
2136 if( strcmp(x,"name")==0 ){
2137 psp->declargslot = &(psp->gp->name);
2138 }else if( strcmp(x,"include")==0 ){
2139 psp->declargslot = &(psp->gp->include);
2140 psp->decllnslot = &psp->gp->includeln;
2141 }else if( strcmp(x,"code")==0 ){
2142 psp->declargslot = &(psp->gp->extracode);
2143 psp->decllnslot = &psp->gp->extracodeln;
2144 }else if( strcmp(x,"token_destructor")==0 ){
2145 psp->declargslot = &psp->gp->tokendest;
2146 psp->decllnslot = &psp->gp->tokendestln;
drh960e8c62001-04-03 16:53:21 +00002147 }else if( strcmp(x,"default_destructor")==0 ){
2148 psp->declargslot = &psp->gp->vardest;
2149 psp->decllnslot = &psp->gp->vardestln;
drh75897232000-05-29 14:26:00 +00002150 }else if( strcmp(x,"token_prefix")==0 ){
2151 psp->declargslot = &psp->gp->tokenprefix;
2152 }else if( strcmp(x,"syntax_error")==0 ){
2153 psp->declargslot = &(psp->gp->error);
2154 psp->decllnslot = &psp->gp->errorln;
2155 }else if( strcmp(x,"parse_accept")==0 ){
2156 psp->declargslot = &(psp->gp->accept);
2157 psp->decllnslot = &psp->gp->acceptln;
2158 }else if( strcmp(x,"parse_failure")==0 ){
2159 psp->declargslot = &(psp->gp->failure);
2160 psp->decllnslot = &psp->gp->failureln;
2161 }else if( strcmp(x,"stack_overflow")==0 ){
2162 psp->declargslot = &(psp->gp->overflow);
2163 psp->decllnslot = &psp->gp->overflowln;
2164 }else if( strcmp(x,"extra_argument")==0 ){
2165 psp->declargslot = &(psp->gp->arg);
2166 }else if( strcmp(x,"token_type")==0 ){
2167 psp->declargslot = &(psp->gp->tokentype);
drh960e8c62001-04-03 16:53:21 +00002168 }else if( strcmp(x,"default_type")==0 ){
2169 psp->declargslot = &(psp->gp->vartype);
drh75897232000-05-29 14:26:00 +00002170 }else if( strcmp(x,"stack_size")==0 ){
2171 psp->declargslot = &(psp->gp->stacksize);
2172 }else if( strcmp(x,"start_symbol")==0 ){
2173 psp->declargslot = &(psp->gp->start);
2174 }else if( strcmp(x,"left")==0 ){
2175 psp->preccounter++;
2176 psp->declassoc = LEFT;
2177 psp->state = WAITING_FOR_PRECEDENCE_SYMBOL;
2178 }else if( strcmp(x,"right")==0 ){
2179 psp->preccounter++;
2180 psp->declassoc = RIGHT;
2181 psp->state = WAITING_FOR_PRECEDENCE_SYMBOL;
2182 }else if( strcmp(x,"nonassoc")==0 ){
2183 psp->preccounter++;
2184 psp->declassoc = NONE;
2185 psp->state = WAITING_FOR_PRECEDENCE_SYMBOL;
2186 }else if( strcmp(x,"destructor")==0 ){
2187 psp->state = WAITING_FOR_DESTRUCTOR_SYMBOL;
2188 }else if( strcmp(x,"type")==0 ){
2189 psp->state = WAITING_FOR_DATATYPE_SYMBOL;
drh0bd1f4e2002-06-06 18:54:39 +00002190 }else if( strcmp(x,"fallback")==0 ){
2191 psp->fallback = 0;
2192 psp->state = WAITING_FOR_FALLBACK_ID;
drh75897232000-05-29 14:26:00 +00002193 }else{
2194 ErrorMsg(psp->filename,psp->tokenlineno,
2195 "Unknown declaration keyword: \"%%%s\".",x);
2196 psp->errorcnt++;
2197 psp->state = RESYNC_AFTER_DECL_ERROR;
2198 }
2199 }else{
2200 ErrorMsg(psp->filename,psp->tokenlineno,
2201 "Illegal declaration keyword: \"%s\".",x);
2202 psp->errorcnt++;
2203 psp->state = RESYNC_AFTER_DECL_ERROR;
2204 }
2205 break;
2206 case WAITING_FOR_DESTRUCTOR_SYMBOL:
2207 if( !isalpha(x[0]) ){
2208 ErrorMsg(psp->filename,psp->tokenlineno,
2209 "Symbol name missing after %destructor keyword");
2210 psp->errorcnt++;
2211 psp->state = RESYNC_AFTER_DECL_ERROR;
2212 }else{
2213 struct symbol *sp = Symbol_new(x);
2214 psp->declargslot = &sp->destructor;
2215 psp->decllnslot = &sp->destructorln;
2216 psp->state = WAITING_FOR_DECL_ARG;
2217 }
2218 break;
2219 case WAITING_FOR_DATATYPE_SYMBOL:
2220 if( !isalpha(x[0]) ){
2221 ErrorMsg(psp->filename,psp->tokenlineno,
2222 "Symbol name missing after %destructor keyword");
2223 psp->errorcnt++;
2224 psp->state = RESYNC_AFTER_DECL_ERROR;
2225 }else{
2226 struct symbol *sp = Symbol_new(x);
2227 psp->declargslot = &sp->datatype;
2228 psp->decllnslot = 0;
2229 psp->state = WAITING_FOR_DECL_ARG;
2230 }
2231 break;
2232 case WAITING_FOR_PRECEDENCE_SYMBOL:
2233 if( x[0]=='.' ){
2234 psp->state = WAITING_FOR_DECL_OR_RULE;
2235 }else if( isupper(x[0]) ){
2236 struct symbol *sp;
2237 sp = Symbol_new(x);
2238 if( sp->prec>=0 ){
2239 ErrorMsg(psp->filename,psp->tokenlineno,
2240 "Symbol \"%s\" has already be given a precedence.",x);
2241 psp->errorcnt++;
2242 }else{
2243 sp->prec = psp->preccounter;
2244 sp->assoc = psp->declassoc;
2245 }
2246 }else{
2247 ErrorMsg(psp->filename,psp->tokenlineno,
2248 "Can't assign a precedence to \"%s\".",x);
2249 psp->errorcnt++;
2250 }
2251 break;
2252 case WAITING_FOR_DECL_ARG:
2253 if( (x[0]=='{' || x[0]=='\"' || isalnum(x[0])) ){
2254 if( *(psp->declargslot)!=0 ){
2255 ErrorMsg(psp->filename,psp->tokenlineno,
2256 "The argument \"%s\" to declaration \"%%%s\" is not the first.",
2257 x[0]=='\"' ? &x[1] : x,psp->declkeyword);
2258 psp->errorcnt++;
2259 psp->state = RESYNC_AFTER_DECL_ERROR;
2260 }else{
2261 *(psp->declargslot) = (x[0]=='\"' || x[0]=='{') ? &x[1] : x;
2262 if( psp->decllnslot ) *psp->decllnslot = psp->tokenlineno;
2263 psp->state = WAITING_FOR_DECL_OR_RULE;
2264 }
2265 }else{
2266 ErrorMsg(psp->filename,psp->tokenlineno,
2267 "Illegal argument to %%%s: %s",psp->declkeyword,x);
2268 psp->errorcnt++;
2269 psp->state = RESYNC_AFTER_DECL_ERROR;
2270 }
2271 break;
drh0bd1f4e2002-06-06 18:54:39 +00002272 case WAITING_FOR_FALLBACK_ID:
2273 if( x[0]=='.' ){
2274 psp->state = WAITING_FOR_DECL_OR_RULE;
2275 }else if( !isupper(x[0]) ){
2276 ErrorMsg(psp->filename, psp->tokenlineno,
2277 "%%fallback argument \"%s\" should be a token", x);
2278 psp->errorcnt++;
2279 }else{
2280 struct symbol *sp = Symbol_new(x);
2281 if( psp->fallback==0 ){
2282 psp->fallback = sp;
2283 }else if( sp->fallback ){
2284 ErrorMsg(psp->filename, psp->tokenlineno,
2285 "More than one fallback assigned to token %s", x);
2286 psp->errorcnt++;
2287 }else{
2288 sp->fallback = psp->fallback;
2289 psp->gp->has_fallback = 1;
2290 }
2291 }
2292 break;
drh75897232000-05-29 14:26:00 +00002293 case RESYNC_AFTER_RULE_ERROR:
2294/* if( x[0]=='.' ) psp->state = WAITING_FOR_DECL_OR_RULE;
2295** break; */
2296 case RESYNC_AFTER_DECL_ERROR:
2297 if( x[0]=='.' ) psp->state = WAITING_FOR_DECL_OR_RULE;
2298 if( x[0]=='%' ) psp->state = WAITING_FOR_DECL_KEYWORD;
2299 break;
2300 }
2301}
2302
drh6d08b4d2004-07-20 12:45:22 +00002303/* Run the proprocessor over the input file text. The global variables
2304** azDefine[0] through azDefine[nDefine-1] contains the names of all defined
2305** macros. This routine looks for "%ifdef" and "%ifndef" and "%endif" and
2306** comments them out. Text in between is also commented out as appropriate.
2307*/
2308static preprocess_input(char *z){
2309 int i, j, k, n;
2310 int exclude = 0;
2311 int start;
2312 int lineno = 1;
2313 int start_lineno;
2314 for(i=0; z[i]; i++){
2315 if( z[i]=='\n' ) lineno++;
2316 if( z[i]!='%' || (i>0 && z[i-1]!='\n') ) continue;
2317 if( strncmp(&z[i],"%endif",6)==0 && isspace(z[i+6]) ){
2318 if( exclude ){
2319 exclude--;
2320 if( exclude==0 ){
2321 for(j=start; j<i; j++) if( z[j]!='\n' ) z[j] = ' ';
2322 }
2323 }
2324 for(j=i; z[j] && z[j]!='\n'; j++) z[j] = ' ';
2325 }else if( (strncmp(&z[i],"%ifdef",6)==0 && isspace(z[i+6]))
2326 || (strncmp(&z[i],"%ifndef",7)==0 && isspace(z[i+7])) ){
2327 if( exclude ){
2328 exclude++;
2329 }else{
2330 for(j=i+7; isspace(z[j]); j++){}
2331 for(n=0; z[j+n] && !isspace(z[j+n]); n++){}
2332 exclude = 1;
2333 for(k=0; k<nDefine; k++){
2334 if( strncmp(azDefine[k],&z[j],n)==0 && strlen(azDefine[k])==n ){
2335 exclude = 0;
2336 break;
2337 }
2338 }
2339 if( z[i+3]=='n' ) exclude = !exclude;
2340 if( exclude ){
2341 start = i;
2342 start_lineno = lineno;
2343 }
2344 }
2345 for(j=i; z[j] && z[j]!='\n'; j++) z[j] = ' ';
2346 }
2347 }
2348 if( exclude ){
2349 fprintf(stderr,"unterminated %%ifdef starting on line %d\n", start_lineno);
2350 exit(1);
2351 }
2352}
2353
drh75897232000-05-29 14:26:00 +00002354/* In spite of its name, this function is really a scanner. It read
2355** in the entire input file (all at once) then tokenizes it. Each
2356** token is passed to the function "parseonetoken" which builds all
2357** the appropriate data structures in the global state vector "gp".
2358*/
2359void Parse(gp)
2360struct lemon *gp;
2361{
2362 struct pstate ps;
2363 FILE *fp;
2364 char *filebuf;
2365 int filesize;
2366 int lineno;
2367 int c;
2368 char *cp, *nextcp;
2369 int startline = 0;
2370
2371 ps.gp = gp;
2372 ps.filename = gp->filename;
2373 ps.errorcnt = 0;
2374 ps.state = INITIALIZE;
2375
2376 /* Begin by reading the input file */
2377 fp = fopen(ps.filename,"rb");
2378 if( fp==0 ){
2379 ErrorMsg(ps.filename,0,"Can't open this file for reading.");
2380 gp->errorcnt++;
2381 return;
2382 }
2383 fseek(fp,0,2);
2384 filesize = ftell(fp);
2385 rewind(fp);
2386 filebuf = (char *)malloc( filesize+1 );
2387 if( filebuf==0 ){
2388 ErrorMsg(ps.filename,0,"Can't allocate %d of memory to hold this file.",
2389 filesize+1);
2390 gp->errorcnt++;
2391 return;
2392 }
2393 if( fread(filebuf,1,filesize,fp)!=filesize ){
2394 ErrorMsg(ps.filename,0,"Can't read in all %d bytes of this file.",
2395 filesize);
2396 free(filebuf);
2397 gp->errorcnt++;
2398 return;
2399 }
2400 fclose(fp);
2401 filebuf[filesize] = 0;
2402
drh6d08b4d2004-07-20 12:45:22 +00002403 /* Make an initial pass through the file to handle %ifdef and %ifndef */
2404 preprocess_input(filebuf);
2405
drh75897232000-05-29 14:26:00 +00002406 /* Now scan the text of the input file */
2407 lineno = 1;
2408 for(cp=filebuf; (c= *cp)!=0; ){
2409 if( c=='\n' ) lineno++; /* Keep track of the line number */
2410 if( isspace(c) ){ cp++; continue; } /* Skip all white space */
2411 if( c=='/' && cp[1]=='/' ){ /* Skip C++ style comments */
2412 cp+=2;
2413 while( (c= *cp)!=0 && c!='\n' ) cp++;
2414 continue;
2415 }
2416 if( c=='/' && cp[1]=='*' ){ /* Skip C style comments */
2417 cp+=2;
2418 while( (c= *cp)!=0 && (c!='/' || cp[-1]!='*') ){
2419 if( c=='\n' ) lineno++;
2420 cp++;
2421 }
2422 if( c ) cp++;
2423 continue;
2424 }
2425 ps.tokenstart = cp; /* Mark the beginning of the token */
2426 ps.tokenlineno = lineno; /* Linenumber on which token begins */
2427 if( c=='\"' ){ /* String literals */
2428 cp++;
2429 while( (c= *cp)!=0 && c!='\"' ){
2430 if( c=='\n' ) lineno++;
2431 cp++;
2432 }
2433 if( c==0 ){
2434 ErrorMsg(ps.filename,startline,
2435"String starting on this line is not terminated before the end of the file.");
2436 ps.errorcnt++;
2437 nextcp = cp;
2438 }else{
2439 nextcp = cp+1;
2440 }
2441 }else if( c=='{' ){ /* A block of C code */
2442 int level;
2443 cp++;
2444 for(level=1; (c= *cp)!=0 && (level>1 || c!='}'); cp++){
2445 if( c=='\n' ) lineno++;
2446 else if( c=='{' ) level++;
2447 else if( c=='}' ) level--;
2448 else if( c=='/' && cp[1]=='*' ){ /* Skip comments */
2449 int prevc;
2450 cp = &cp[2];
2451 prevc = 0;
2452 while( (c= *cp)!=0 && (c!='/' || prevc!='*') ){
2453 if( c=='\n' ) lineno++;
2454 prevc = c;
2455 cp++;
2456 }
2457 }else if( c=='/' && cp[1]=='/' ){ /* Skip C++ style comments too */
2458 cp = &cp[2];
2459 while( (c= *cp)!=0 && c!='\n' ) cp++;
2460 if( c ) lineno++;
2461 }else if( c=='\'' || c=='\"' ){ /* String a character literals */
2462 int startchar, prevc;
2463 startchar = c;
2464 prevc = 0;
2465 for(cp++; (c= *cp)!=0 && (c!=startchar || prevc=='\\'); cp++){
2466 if( c=='\n' ) lineno++;
2467 if( prevc=='\\' ) prevc = 0;
2468 else prevc = c;
2469 }
2470 }
2471 }
2472 if( c==0 ){
drh960e8c62001-04-03 16:53:21 +00002473 ErrorMsg(ps.filename,ps.tokenlineno,
drh75897232000-05-29 14:26:00 +00002474"C code starting on this line is not terminated before the end of the file.");
2475 ps.errorcnt++;
2476 nextcp = cp;
2477 }else{
2478 nextcp = cp+1;
2479 }
2480 }else if( isalnum(c) ){ /* Identifiers */
2481 while( (c= *cp)!=0 && (isalnum(c) || c=='_') ) cp++;
2482 nextcp = cp;
2483 }else if( c==':' && cp[1]==':' && cp[2]=='=' ){ /* The operator "::=" */
2484 cp += 3;
2485 nextcp = cp;
2486 }else{ /* All other (one character) operators */
2487 cp++;
2488 nextcp = cp;
2489 }
2490 c = *cp;
2491 *cp = 0; /* Null terminate the token */
2492 parseonetoken(&ps); /* Parse the token */
2493 *cp = c; /* Restore the buffer */
2494 cp = nextcp;
2495 }
2496 free(filebuf); /* Release the buffer after parsing */
2497 gp->rule = ps.firstrule;
2498 gp->errorcnt = ps.errorcnt;
2499}
2500/*************************** From the file "plink.c" *********************/
2501/*
2502** Routines processing configuration follow-set propagation links
2503** in the LEMON parser generator.
2504*/
2505static struct plink *plink_freelist = 0;
2506
2507/* Allocate a new plink */
2508struct plink *Plink_new(){
2509 struct plink *new;
2510
2511 if( plink_freelist==0 ){
2512 int i;
2513 int amt = 100;
2514 plink_freelist = (struct plink *)malloc( sizeof(struct plink)*amt );
2515 if( plink_freelist==0 ){
2516 fprintf(stderr,
2517 "Unable to allocate memory for a new follow-set propagation link.\n");
2518 exit(1);
2519 }
2520 for(i=0; i<amt-1; i++) plink_freelist[i].next = &plink_freelist[i+1];
2521 plink_freelist[amt-1].next = 0;
2522 }
2523 new = plink_freelist;
2524 plink_freelist = plink_freelist->next;
2525 return new;
2526}
2527
2528/* Add a plink to a plink list */
2529void Plink_add(plpp,cfp)
2530struct plink **plpp;
2531struct config *cfp;
2532{
2533 struct plink *new;
2534 new = Plink_new();
2535 new->next = *plpp;
2536 *plpp = new;
2537 new->cfp = cfp;
2538}
2539
2540/* Transfer every plink on the list "from" to the list "to" */
2541void Plink_copy(to,from)
2542struct plink **to;
2543struct plink *from;
2544{
2545 struct plink *nextpl;
2546 while( from ){
2547 nextpl = from->next;
2548 from->next = *to;
2549 *to = from;
2550 from = nextpl;
2551 }
2552}
2553
2554/* Delete every plink on the list */
2555void Plink_delete(plp)
2556struct plink *plp;
2557{
2558 struct plink *nextpl;
2559
2560 while( plp ){
2561 nextpl = plp->next;
2562 plp->next = plink_freelist;
2563 plink_freelist = plp;
2564 plp = nextpl;
2565 }
2566}
2567/*********************** From the file "report.c" **************************/
2568/*
2569** Procedures for generating reports and tables in the LEMON parser generator.
2570*/
2571
2572/* Generate a filename with the given suffix. Space to hold the
2573** name comes from malloc() and must be freed by the calling
2574** function.
2575*/
2576PRIVATE char *file_makename(lemp,suffix)
2577struct lemon *lemp;
2578char *suffix;
2579{
2580 char *name;
2581 char *cp;
2582
2583 name = malloc( strlen(lemp->filename) + strlen(suffix) + 5 );
2584 if( name==0 ){
2585 fprintf(stderr,"Can't allocate space for a filename.\n");
2586 exit(1);
2587 }
2588 strcpy(name,lemp->filename);
2589 cp = strrchr(name,'.');
2590 if( cp ) *cp = 0;
2591 strcat(name,suffix);
2592 return name;
2593}
2594
2595/* Open a file with a name based on the name of the input file,
2596** but with a different (specified) suffix, and return a pointer
2597** to the stream */
2598PRIVATE FILE *file_open(lemp,suffix,mode)
2599struct lemon *lemp;
2600char *suffix;
2601char *mode;
2602{
2603 FILE *fp;
2604
2605 if( lemp->outname ) free(lemp->outname);
2606 lemp->outname = file_makename(lemp, suffix);
2607 fp = fopen(lemp->outname,mode);
2608 if( fp==0 && *mode=='w' ){
2609 fprintf(stderr,"Can't open file \"%s\".\n",lemp->outname);
2610 lemp->errorcnt++;
2611 return 0;
2612 }
2613 return fp;
2614}
2615
2616/* Duplicate the input file without comments and without actions
2617** on rules */
2618void Reprint(lemp)
2619struct lemon *lemp;
2620{
2621 struct rule *rp;
2622 struct symbol *sp;
2623 int i, j, maxlen, len, ncolumns, skip;
2624 printf("// Reprint of input file \"%s\".\n// Symbols:\n",lemp->filename);
2625 maxlen = 10;
2626 for(i=0; i<lemp->nsymbol; i++){
2627 sp = lemp->symbols[i];
2628 len = strlen(sp->name);
2629 if( len>maxlen ) maxlen = len;
2630 }
2631 ncolumns = 76/(maxlen+5);
2632 if( ncolumns<1 ) ncolumns = 1;
2633 skip = (lemp->nsymbol + ncolumns - 1)/ncolumns;
2634 for(i=0; i<skip; i++){
2635 printf("//");
2636 for(j=i; j<lemp->nsymbol; j+=skip){
2637 sp = lemp->symbols[j];
2638 assert( sp->index==j );
2639 printf(" %3d %-*.*s",j,maxlen,maxlen,sp->name);
2640 }
2641 printf("\n");
2642 }
2643 for(rp=lemp->rule; rp; rp=rp->next){
2644 printf("%s",rp->lhs->name);
2645/* if( rp->lhsalias ) printf("(%s)",rp->lhsalias); */
2646 printf(" ::=");
2647 for(i=0; i<rp->nrhs; i++){
2648 printf(" %s",rp->rhs[i]->name);
2649/* if( rp->rhsalias[i] ) printf("(%s)",rp->rhsalias[i]); */
2650 }
2651 printf(".");
2652 if( rp->precsym ) printf(" [%s]",rp->precsym->name);
2653/* if( rp->code ) printf("\n %s",rp->code); */
2654 printf("\n");
2655 }
2656}
2657
2658void ConfigPrint(fp,cfp)
2659FILE *fp;
2660struct config *cfp;
2661{
2662 struct rule *rp;
2663 int i;
2664 rp = cfp->rp;
2665 fprintf(fp,"%s ::=",rp->lhs->name);
2666 for(i=0; i<=rp->nrhs; i++){
2667 if( i==cfp->dot ) fprintf(fp," *");
2668 if( i==rp->nrhs ) break;
2669 fprintf(fp," %s",rp->rhs[i]->name);
2670 }
2671}
2672
2673/* #define TEST */
2674#ifdef TEST
2675/* Print a set */
2676PRIVATE void SetPrint(out,set,lemp)
2677FILE *out;
2678char *set;
2679struct lemon *lemp;
2680{
2681 int i;
2682 char *spacer;
2683 spacer = "";
2684 fprintf(out,"%12s[","");
2685 for(i=0; i<lemp->nterminal; i++){
2686 if( SetFind(set,i) ){
2687 fprintf(out,"%s%s",spacer,lemp->symbols[i]->name);
2688 spacer = " ";
2689 }
2690 }
2691 fprintf(out,"]\n");
2692}
2693
2694/* Print a plink chain */
2695PRIVATE void PlinkPrint(out,plp,tag)
2696FILE *out;
2697struct plink *plp;
2698char *tag;
2699{
2700 while( plp ){
2701 fprintf(out,"%12s%s (state %2d) ","",tag,plp->cfp->stp->index);
2702 ConfigPrint(out,plp->cfp);
2703 fprintf(out,"\n");
2704 plp = plp->next;
2705 }
2706}
2707#endif
2708
2709/* Print an action to the given file descriptor. Return FALSE if
2710** nothing was actually printed.
2711*/
2712int PrintAction(struct action *ap, FILE *fp, int indent){
2713 int result = 1;
2714 switch( ap->type ){
2715 case SHIFT:
2716 fprintf(fp,"%*s shift %d",indent,ap->sp->name,ap->x.stp->index);
2717 break;
2718 case REDUCE:
2719 fprintf(fp,"%*s reduce %d",indent,ap->sp->name,ap->x.rp->index);
2720 break;
2721 case ACCEPT:
2722 fprintf(fp,"%*s accept",indent,ap->sp->name);
2723 break;
2724 case ERROR:
2725 fprintf(fp,"%*s error",indent,ap->sp->name);
2726 break;
2727 case CONFLICT:
2728 fprintf(fp,"%*s reduce %-3d ** Parsing conflict **",
2729 indent,ap->sp->name,ap->x.rp->index);
2730 break;
2731 case SH_RESOLVED:
2732 case RD_RESOLVED:
2733 case NOT_USED:
2734 result = 0;
2735 break;
2736 }
2737 return result;
2738}
2739
2740/* Generate the "y.output" log file */
2741void ReportOutput(lemp)
2742struct lemon *lemp;
2743{
2744 int i;
2745 struct state *stp;
2746 struct config *cfp;
2747 struct action *ap;
2748 FILE *fp;
2749
2750 fp = file_open(lemp,".out","w");
2751 if( fp==0 ) return;
2752 fprintf(fp," \b");
2753 for(i=0; i<lemp->nstate; i++){
2754 stp = lemp->sorted[i];
2755 fprintf(fp,"State %d:\n",stp->index);
2756 if( lemp->basisflag ) cfp=stp->bp;
2757 else cfp=stp->cfp;
2758 while( cfp ){
2759 char buf[20];
2760 if( cfp->dot==cfp->rp->nrhs ){
2761 sprintf(buf,"(%d)",cfp->rp->index);
2762 fprintf(fp," %5s ",buf);
2763 }else{
2764 fprintf(fp," ");
2765 }
2766 ConfigPrint(fp,cfp);
2767 fprintf(fp,"\n");
2768#ifdef TEST
2769 SetPrint(fp,cfp->fws,lemp);
2770 PlinkPrint(fp,cfp->fplp,"To ");
2771 PlinkPrint(fp,cfp->bplp,"From");
2772#endif
2773 if( lemp->basisflag ) cfp=cfp->bp;
2774 else cfp=cfp->next;
2775 }
2776 fprintf(fp,"\n");
2777 for(ap=stp->ap; ap; ap=ap->next){
2778 if( PrintAction(ap,fp,30) ) fprintf(fp,"\n");
2779 }
2780 fprintf(fp,"\n");
2781 }
2782 fclose(fp);
2783 return;
2784}
2785
2786/* Search for the file "name" which is in the same directory as
2787** the exacutable */
2788PRIVATE char *pathsearch(argv0,name,modemask)
2789char *argv0;
2790char *name;
2791int modemask;
2792{
2793 char *pathlist;
2794 char *path,*cp;
2795 char c;
2796 extern int access();
2797
2798#ifdef __WIN32__
2799 cp = strrchr(argv0,'\\');
2800#else
2801 cp = strrchr(argv0,'/');
2802#endif
2803 if( cp ){
2804 c = *cp;
2805 *cp = 0;
2806 path = (char *)malloc( strlen(argv0) + strlen(name) + 2 );
2807 if( path ) sprintf(path,"%s/%s",argv0,name);
2808 *cp = c;
2809 }else{
2810 extern char *getenv();
2811 pathlist = getenv("PATH");
2812 if( pathlist==0 ) pathlist = ".:/bin:/usr/bin";
2813 path = (char *)malloc( strlen(pathlist)+strlen(name)+2 );
2814 if( path!=0 ){
2815 while( *pathlist ){
2816 cp = strchr(pathlist,':');
2817 if( cp==0 ) cp = &pathlist[strlen(pathlist)];
2818 c = *cp;
2819 *cp = 0;
2820 sprintf(path,"%s/%s",pathlist,name);
2821 *cp = c;
2822 if( c==0 ) pathlist = "";
2823 else pathlist = &cp[1];
2824 if( access(path,modemask)==0 ) break;
2825 }
2826 }
2827 }
2828 return path;
2829}
2830
2831/* Given an action, compute the integer value for that action
2832** which is to be put in the action table of the generated machine.
2833** Return negative if no action should be generated.
2834*/
2835PRIVATE int compute_action(lemp,ap)
2836struct lemon *lemp;
2837struct action *ap;
2838{
2839 int act;
2840 switch( ap->type ){
2841 case SHIFT: act = ap->x.stp->index; break;
2842 case REDUCE: act = ap->x.rp->index + lemp->nstate; break;
2843 case ERROR: act = lemp->nstate + lemp->nrule; break;
2844 case ACCEPT: act = lemp->nstate + lemp->nrule + 1; break;
2845 default: act = -1; break;
2846 }
2847 return act;
2848}
2849
2850#define LINESIZE 1000
2851/* The next cluster of routines are for reading the template file
2852** and writing the results to the generated parser */
2853/* The first function transfers data from "in" to "out" until
2854** a line is seen which begins with "%%". The line number is
2855** tracked.
2856**
2857** if name!=0, then any word that begin with "Parse" is changed to
2858** begin with *name instead.
2859*/
2860PRIVATE void tplt_xfer(name,in,out,lineno)
2861char *name;
2862FILE *in;
2863FILE *out;
2864int *lineno;
2865{
2866 int i, iStart;
2867 char line[LINESIZE];
2868 while( fgets(line,LINESIZE,in) && (line[0]!='%' || line[1]!='%') ){
2869 (*lineno)++;
2870 iStart = 0;
2871 if( name ){
2872 for(i=0; line[i]; i++){
2873 if( line[i]=='P' && strncmp(&line[i],"Parse",5)==0
2874 && (i==0 || !isalpha(line[i-1]))
2875 ){
2876 if( i>iStart ) fprintf(out,"%.*s",i-iStart,&line[iStart]);
2877 fprintf(out,"%s",name);
2878 i += 4;
2879 iStart = i+1;
2880 }
2881 }
2882 }
2883 fprintf(out,"%s",&line[iStart]);
2884 }
2885}
2886
2887/* The next function finds the template file and opens it, returning
2888** a pointer to the opened file. */
2889PRIVATE FILE *tplt_open(lemp)
2890struct lemon *lemp;
2891{
2892 static char templatename[] = "lempar.c";
2893 char buf[1000];
2894 FILE *in;
2895 char *tpltname;
2896 char *cp;
2897
2898 cp = strrchr(lemp->filename,'.');
2899 if( cp ){
drh8b582012003-10-21 13:16:03 +00002900 sprintf(buf,"%.*s.lt",(int)(cp-lemp->filename),lemp->filename);
drh75897232000-05-29 14:26:00 +00002901 }else{
2902 sprintf(buf,"%s.lt",lemp->filename);
2903 }
2904 if( access(buf,004)==0 ){
2905 tpltname = buf;
drh960e8c62001-04-03 16:53:21 +00002906 }else if( access(templatename,004)==0 ){
2907 tpltname = templatename;
drh75897232000-05-29 14:26:00 +00002908 }else{
2909 tpltname = pathsearch(lemp->argv0,templatename,0);
2910 }
2911 if( tpltname==0 ){
2912 fprintf(stderr,"Can't find the parser driver template file \"%s\".\n",
2913 templatename);
2914 lemp->errorcnt++;
2915 return 0;
2916 }
2917 in = fopen(tpltname,"r");
2918 if( in==0 ){
2919 fprintf(stderr,"Can't open the template file \"%s\".\n",templatename);
2920 lemp->errorcnt++;
2921 return 0;
2922 }
2923 return in;
2924}
2925
2926/* Print a string to the file and keep the linenumber up to date */
2927PRIVATE void tplt_print(out,lemp,str,strln,lineno)
2928FILE *out;
2929struct lemon *lemp;
2930char *str;
2931int strln;
2932int *lineno;
2933{
2934 if( str==0 ) return;
2935 fprintf(out,"#line %d \"%s\"\n",strln,lemp->filename); (*lineno)++;
2936 while( *str ){
2937 if( *str=='\n' ) (*lineno)++;
2938 putc(*str,out);
2939 str++;
2940 }
2941 fprintf(out,"\n#line %d \"%s\"\n",*lineno+2,lemp->outname); (*lineno)+=2;
2942 return;
2943}
2944
2945/*
2946** The following routine emits code for the destructor for the
2947** symbol sp
2948*/
2949void emit_destructor_code(out,sp,lemp,lineno)
2950FILE *out;
2951struct symbol *sp;
2952struct lemon *lemp;
2953int *lineno;
2954{
drhcc83b6e2004-04-23 23:38:42 +00002955 char *cp = 0;
drh75897232000-05-29 14:26:00 +00002956
2957 int linecnt = 0;
2958 if( sp->type==TERMINAL ){
2959 cp = lemp->tokendest;
2960 if( cp==0 ) return;
2961 fprintf(out,"#line %d \"%s\"\n{",lemp->tokendestln,lemp->filename);
drh960e8c62001-04-03 16:53:21 +00002962 }else if( sp->destructor ){
drh75897232000-05-29 14:26:00 +00002963 cp = sp->destructor;
drh75897232000-05-29 14:26:00 +00002964 fprintf(out,"#line %d \"%s\"\n{",sp->destructorln,lemp->filename);
drh960e8c62001-04-03 16:53:21 +00002965 }else if( lemp->vardest ){
2966 cp = lemp->vardest;
2967 if( cp==0 ) return;
2968 fprintf(out,"#line %d \"%s\"\n{",lemp->vardestln,lemp->filename);
drhcc83b6e2004-04-23 23:38:42 +00002969 }else{
2970 assert( 0 ); /* Cannot happen */
drh75897232000-05-29 14:26:00 +00002971 }
2972 for(; *cp; cp++){
2973 if( *cp=='$' && cp[1]=='$' ){
2974 fprintf(out,"(yypminor->yy%d)",sp->dtnum);
2975 cp++;
2976 continue;
2977 }
2978 if( *cp=='\n' ) linecnt++;
2979 fputc(*cp,out);
2980 }
2981 (*lineno) += 3 + linecnt;
2982 fprintf(out,"}\n#line %d \"%s\"\n",*lineno,lemp->outname);
2983 return;
2984}
2985
2986/*
drh960e8c62001-04-03 16:53:21 +00002987** Return TRUE (non-zero) if the given symbol has a destructor.
drh75897232000-05-29 14:26:00 +00002988*/
2989int has_destructor(sp, lemp)
2990struct symbol *sp;
2991struct lemon *lemp;
2992{
2993 int ret;
2994 if( sp->type==TERMINAL ){
2995 ret = lemp->tokendest!=0;
2996 }else{
drh960e8c62001-04-03 16:53:21 +00002997 ret = lemp->vardest!=0 || sp->destructor!=0;
drh75897232000-05-29 14:26:00 +00002998 }
2999 return ret;
3000}
3001
drh0bb132b2004-07-20 14:06:51 +00003002/*
3003** Append text to a dynamically allocated string. If zText is 0 then
3004** reset the string to be empty again. Always return the complete text
3005** of the string (which is overwritten with each call).
3006*/
3007PRIVATE char *append_str(char *zText, int n, int p1, int p2){
3008 static char *z = 0;
3009 static int alloced = 0;
3010 static int used = 0;
3011 int i, c;
3012 char zInt[40];
3013
3014 if( zText==0 ){
3015 used = 0;
3016 return z;
3017 }
3018 if( n<=0 ) n = strlen(zText);
3019 if( n+sizeof(zInt)*2+used >= alloced ){
3020 alloced = n + sizeof(zInt)*2 + used + 200;
3021 z = realloc(z, alloced);
3022 }
3023 if( z==0 ) return "";
3024 while( n-- > 0 ){
3025 c = *(zText++);
3026 if( c=='%' && zText[0]=='d' ){
3027 sprintf(zInt, "%d", p1);
3028 p1 = p2;
3029 strcpy(&z[used], zInt);
3030 used += strlen(&z[used]);
3031 zText++;
3032 n--;
3033 }else{
3034 z[used++] = c;
3035 }
3036 }
3037 z[used] = 0;
3038 return z;
3039}
3040
3041/*
3042** zCode is a string that is the action associated with a rule. Expand
3043** the symbols in this string so that the refer to elements of the parser
3044** stack. Return a new string stored in space obtained from malloc.
3045*/
3046PRIVATE char *translate_code(struct lemon *lemp, struct rule *rp){
3047 char *cp, *xp;
3048 int i;
3049 char lhsused = 0; /* True if the LHS element has been used */
3050 char used[MAXRHS]; /* True for each RHS element which is used */
3051
3052 for(i=0; i<rp->nrhs; i++) used[i] = 0;
3053 lhsused = 0;
3054
3055 append_str(0,0,0,0);
3056 for(cp=rp->code; *cp; cp++){
3057 if( isalpha(*cp) && (cp==rp->code || (!isalnum(cp[-1]) && cp[-1]!='_')) ){
3058 char saved;
3059 for(xp= &cp[1]; isalnum(*xp) || *xp=='_'; xp++);
3060 saved = *xp;
3061 *xp = 0;
3062 if( rp->lhsalias && strcmp(cp,rp->lhsalias)==0 ){
3063 append_str("yygotominor.yy%d",-1,rp->lhs->dtnum,0);
3064 cp = xp;
3065 lhsused = 1;
3066 }else{
3067 for(i=0; i<rp->nrhs; i++){
3068 if( rp->rhsalias[i] && strcmp(cp,rp->rhsalias[i])==0 ){
3069 append_str("yymsp[%d].minor.yy%d",-1,
3070 i-rp->nrhs+1,rp->rhs[i]->dtnum);
3071 cp = xp;
3072 used[i] = 1;
3073 break;
3074 }
3075 }
3076 }
3077 *xp = saved;
3078 }
3079 append_str(cp, 1, 0, 0);
3080 } /* End loop */
3081
3082 /* Check to make sure the LHS has been used */
3083 if( rp->lhsalias && !lhsused ){
3084 ErrorMsg(lemp->filename,rp->ruleline,
3085 "Label \"%s\" for \"%s(%s)\" is never used.",
3086 rp->lhsalias,rp->lhs->name,rp->lhsalias);
3087 lemp->errorcnt++;
3088 }
3089
3090 /* Generate destructor code for RHS symbols which are not used in the
3091 ** reduce code */
3092 for(i=0; i<rp->nrhs; i++){
3093 if( rp->rhsalias[i] && !used[i] ){
3094 ErrorMsg(lemp->filename,rp->ruleline,
3095 "Label %s for \"%s(%s)\" is never used.",
3096 rp->rhsalias[i],rp->rhs[i]->name,rp->rhsalias[i]);
3097 lemp->errorcnt++;
3098 }else if( rp->rhsalias[i]==0 ){
3099 if( has_destructor(rp->rhs[i],lemp) ){
3100 append_str(" yy_destructor(%d,&yymsp[%d].minor);\n", -1,
3101 rp->rhs[i]->index,i-rp->nrhs+1);
3102 }else{
3103 /* No destructor defined for this term */
3104 }
3105 }
3106 }
3107 cp = append_str(0,0,0,0);
3108 rp->code = Strsafe(cp);
3109}
3110
drh75897232000-05-29 14:26:00 +00003111/*
3112** Generate code which executes when the rule "rp" is reduced. Write
3113** the code to "out". Make sure lineno stays up-to-date.
3114*/
3115PRIVATE void emit_code(out,rp,lemp,lineno)
3116FILE *out;
3117struct rule *rp;
3118struct lemon *lemp;
3119int *lineno;
3120{
drh0bb132b2004-07-20 14:06:51 +00003121 char *cp;
drh75897232000-05-29 14:26:00 +00003122 int linecnt = 0;
drh75897232000-05-29 14:26:00 +00003123
3124 /* Generate code to do the reduce action */
3125 if( rp->code ){
3126 fprintf(out,"#line %d \"%s\"\n{",rp->line,lemp->filename);
drh0bb132b2004-07-20 14:06:51 +00003127 fprintf(out,"%s",rp->code);
drh75897232000-05-29 14:26:00 +00003128 for(cp=rp->code; *cp; cp++){
drh75897232000-05-29 14:26:00 +00003129 if( *cp=='\n' ) linecnt++;
drh75897232000-05-29 14:26:00 +00003130 } /* End loop */
3131 (*lineno) += 3 + linecnt;
3132 fprintf(out,"}\n#line %d \"%s\"\n",*lineno,lemp->outname);
3133 } /* End if( rp->code ) */
3134
drh75897232000-05-29 14:26:00 +00003135 return;
3136}
3137
3138/*
3139** Print the definition of the union used for the parser's data stack.
3140** This union contains fields for every possible data type for tokens
3141** and nonterminals. In the process of computing and printing this
3142** union, also set the ".dtnum" field of every terminal and nonterminal
3143** symbol.
3144*/
3145void print_stack_union(out,lemp,plineno,mhflag)
3146FILE *out; /* The output stream */
3147struct lemon *lemp; /* The main info structure for this parser */
3148int *plineno; /* Pointer to the line number */
3149int mhflag; /* True if generating makeheaders output */
3150{
3151 int lineno = *plineno; /* The line number of the output */
3152 char **types; /* A hash table of datatypes */
3153 int arraysize; /* Size of the "types" array */
3154 int maxdtlength; /* Maximum length of any ".datatype" field. */
3155 char *stddt; /* Standardized name for a datatype */
3156 int i,j; /* Loop counters */
3157 int hash; /* For hashing the name of a type */
3158 char *name; /* Name of the parser */
3159
3160 /* Allocate and initialize types[] and allocate stddt[] */
3161 arraysize = lemp->nsymbol * 2;
3162 types = (char**)malloc( arraysize * sizeof(char*) );
3163 for(i=0; i<arraysize; i++) types[i] = 0;
3164 maxdtlength = 0;
drh960e8c62001-04-03 16:53:21 +00003165 if( lemp->vartype ){
3166 maxdtlength = strlen(lemp->vartype);
3167 }
drh75897232000-05-29 14:26:00 +00003168 for(i=0; i<lemp->nsymbol; i++){
3169 int len;
3170 struct symbol *sp = lemp->symbols[i];
3171 if( sp->datatype==0 ) continue;
3172 len = strlen(sp->datatype);
3173 if( len>maxdtlength ) maxdtlength = len;
3174 }
3175 stddt = (char*)malloc( maxdtlength*2 + 1 );
3176 if( types==0 || stddt==0 ){
3177 fprintf(stderr,"Out of memory.\n");
3178 exit(1);
3179 }
3180
3181 /* Build a hash table of datatypes. The ".dtnum" field of each symbol
3182 ** is filled in with the hash index plus 1. A ".dtnum" value of 0 is
drh960e8c62001-04-03 16:53:21 +00003183 ** used for terminal symbols. If there is no %default_type defined then
3184 ** 0 is also used as the .dtnum value for nonterminals which do not specify
3185 ** a datatype using the %type directive.
3186 */
drh75897232000-05-29 14:26:00 +00003187 for(i=0; i<lemp->nsymbol; i++){
3188 struct symbol *sp = lemp->symbols[i];
3189 char *cp;
3190 if( sp==lemp->errsym ){
3191 sp->dtnum = arraysize+1;
3192 continue;
3193 }
drh960e8c62001-04-03 16:53:21 +00003194 if( sp->type!=NONTERMINAL || (sp->datatype==0 && lemp->vartype==0) ){
drh75897232000-05-29 14:26:00 +00003195 sp->dtnum = 0;
3196 continue;
3197 }
3198 cp = sp->datatype;
drh960e8c62001-04-03 16:53:21 +00003199 if( cp==0 ) cp = lemp->vartype;
drh75897232000-05-29 14:26:00 +00003200 j = 0;
3201 while( isspace(*cp) ) cp++;
3202 while( *cp ) stddt[j++] = *cp++;
3203 while( j>0 && isspace(stddt[j-1]) ) j--;
3204 stddt[j] = 0;
3205 hash = 0;
3206 for(j=0; stddt[j]; j++){
3207 hash = hash*53 + stddt[j];
3208 }
drh3b2129c2003-05-13 00:34:21 +00003209 hash = (hash & 0x7fffffff)%arraysize;
drh75897232000-05-29 14:26:00 +00003210 while( types[hash] ){
3211 if( strcmp(types[hash],stddt)==0 ){
3212 sp->dtnum = hash + 1;
3213 break;
3214 }
3215 hash++;
3216 if( hash>=arraysize ) hash = 0;
3217 }
3218 if( types[hash]==0 ){
3219 sp->dtnum = hash + 1;
3220 types[hash] = (char*)malloc( strlen(stddt)+1 );
3221 if( types[hash]==0 ){
3222 fprintf(stderr,"Out of memory.\n");
3223 exit(1);
3224 }
3225 strcpy(types[hash],stddt);
3226 }
3227 }
3228
3229 /* Print out the definition of YYTOKENTYPE and YYMINORTYPE */
3230 name = lemp->name ? lemp->name : "Parse";
3231 lineno = *plineno;
3232 if( mhflag ){ fprintf(out,"#if INTERFACE\n"); lineno++; }
3233 fprintf(out,"#define %sTOKENTYPE %s\n",name,
3234 lemp->tokentype?lemp->tokentype:"void*"); lineno++;
3235 if( mhflag ){ fprintf(out,"#endif\n"); lineno++; }
3236 fprintf(out,"typedef union {\n"); lineno++;
3237 fprintf(out," %sTOKENTYPE yy0;\n",name); lineno++;
3238 for(i=0; i<arraysize; i++){
3239 if( types[i]==0 ) continue;
3240 fprintf(out," %s yy%d;\n",types[i],i+1); lineno++;
3241 free(types[i]);
3242 }
3243 fprintf(out," int yy%d;\n",lemp->errsym->dtnum); lineno++;
3244 free(stddt);
3245 free(types);
3246 fprintf(out,"} YYMINORTYPE;\n"); lineno++;
3247 *plineno = lineno;
3248}
3249
drhb29b0a52002-02-23 19:39:46 +00003250/*
3251** Return the name of a C datatype able to represent values between
drh8b582012003-10-21 13:16:03 +00003252** lwr and upr, inclusive.
drhb29b0a52002-02-23 19:39:46 +00003253*/
drh8b582012003-10-21 13:16:03 +00003254static const char *minimum_size_type(int lwr, int upr){
3255 if( lwr>=0 ){
3256 if( upr<=255 ){
3257 return "unsigned char";
3258 }else if( upr<65535 ){
3259 return "unsigned short int";
3260 }else{
3261 return "unsigned int";
3262 }
3263 }else if( lwr>=-127 && upr<=127 ){
3264 return "signed char";
3265 }else if( lwr>=-32767 && upr<32767 ){
3266 return "short";
drhb29b0a52002-02-23 19:39:46 +00003267 }else{
drh8b582012003-10-21 13:16:03 +00003268 return "int";
drhb29b0a52002-02-23 19:39:46 +00003269 }
3270}
3271
drhfdbf9282003-10-21 16:34:41 +00003272/*
3273** Each state contains a set of token transaction and a set of
3274** nonterminal transactions. Each of these sets makes an instance
3275** of the following structure. An array of these structures is used
3276** to order the creation of entries in the yy_action[] table.
3277*/
3278struct axset {
3279 struct state *stp; /* A pointer to a state */
3280 int isTkn; /* True to use tokens. False for non-terminals */
3281 int nAction; /* Number of actions */
3282};
3283
3284/*
3285** Compare to axset structures for sorting purposes
3286*/
3287static int axset_compare(const void *a, const void *b){
3288 struct axset *p1 = (struct axset*)a;
3289 struct axset *p2 = (struct axset*)b;
3290 return p2->nAction - p1->nAction;
3291}
3292
drh75897232000-05-29 14:26:00 +00003293/* Generate C source code for the parser */
3294void ReportTable(lemp, mhflag)
3295struct lemon *lemp;
3296int mhflag; /* Output in makeheaders format if true */
3297{
3298 FILE *out, *in;
3299 char line[LINESIZE];
3300 int lineno;
3301 struct state *stp;
3302 struct action *ap;
3303 struct rule *rp;
drh8b582012003-10-21 13:16:03 +00003304 struct acttab *pActtab;
3305 int i, j, n;
drh75897232000-05-29 14:26:00 +00003306 char *name;
drh8b582012003-10-21 13:16:03 +00003307 int mnTknOfst, mxTknOfst;
3308 int mnNtOfst, mxNtOfst;
drhfdbf9282003-10-21 16:34:41 +00003309 struct axset *ax;
drh75897232000-05-29 14:26:00 +00003310
3311 in = tplt_open(lemp);
3312 if( in==0 ) return;
3313 out = file_open(lemp,".c","w");
3314 if( out==0 ){
3315 fclose(in);
3316 return;
3317 }
3318 lineno = 1;
3319 tplt_xfer(lemp->name,in,out,&lineno);
3320
3321 /* Generate the include code, if any */
3322 tplt_print(out,lemp,lemp->include,lemp->includeln,&lineno);
3323 if( mhflag ){
3324 char *name = file_makename(lemp, ".h");
3325 fprintf(out,"#include \"%s\"\n", name); lineno++;
3326 free(name);
3327 }
3328 tplt_xfer(lemp->name,in,out,&lineno);
3329
3330 /* Generate #defines for all tokens */
3331 if( mhflag ){
3332 char *prefix;
3333 fprintf(out,"#if INTERFACE\n"); lineno++;
3334 if( lemp->tokenprefix ) prefix = lemp->tokenprefix;
3335 else prefix = "";
3336 for(i=1; i<lemp->nterminal; i++){
3337 fprintf(out,"#define %s%-30s %2d\n",prefix,lemp->symbols[i]->name,i);
3338 lineno++;
3339 }
3340 fprintf(out,"#endif\n"); lineno++;
3341 }
3342 tplt_xfer(lemp->name,in,out,&lineno);
3343
3344 /* Generate the defines */
3345 fprintf(out,"/* \001 */\n");
3346 fprintf(out,"#define YYCODETYPE %s\n",
drh8b582012003-10-21 13:16:03 +00003347 minimum_size_type(0, lemp->nsymbol+5)); lineno++;
drh75897232000-05-29 14:26:00 +00003348 fprintf(out,"#define YYNOCODE %d\n",lemp->nsymbol+1); lineno++;
3349 fprintf(out,"#define YYACTIONTYPE %s\n",
drh8b582012003-10-21 13:16:03 +00003350 minimum_size_type(0, lemp->nstate+lemp->nrule+5)); lineno++;
drh75897232000-05-29 14:26:00 +00003351 print_stack_union(out,lemp,&lineno,mhflag);
3352 if( lemp->stacksize ){
3353 if( atoi(lemp->stacksize)<=0 ){
3354 ErrorMsg(lemp->filename,0,
3355"Illegal stack size: [%s]. The stack size should be an integer constant.",
3356 lemp->stacksize);
3357 lemp->errorcnt++;
3358 lemp->stacksize = "100";
3359 }
3360 fprintf(out,"#define YYSTACKDEPTH %s\n",lemp->stacksize); lineno++;
3361 }else{
3362 fprintf(out,"#define YYSTACKDEPTH 100\n"); lineno++;
3363 }
3364 if( mhflag ){
3365 fprintf(out,"#if INTERFACE\n"); lineno++;
3366 }
3367 name = lemp->name ? lemp->name : "Parse";
3368 if( lemp->arg && lemp->arg[0] ){
3369 int i;
3370 i = strlen(lemp->arg);
drhb1edd012000-06-02 18:52:12 +00003371 while( i>=1 && isspace(lemp->arg[i-1]) ) i--;
3372 while( i>=1 && (isalnum(lemp->arg[i-1]) || lemp->arg[i-1]=='_') ) i--;
drh1f245e42002-03-11 13:55:50 +00003373 fprintf(out,"#define %sARG_SDECL %s;\n",name,lemp->arg); lineno++;
3374 fprintf(out,"#define %sARG_PDECL ,%s\n",name,lemp->arg); lineno++;
3375 fprintf(out,"#define %sARG_FETCH %s = yypParser->%s\n",
3376 name,lemp->arg,&lemp->arg[i]); lineno++;
3377 fprintf(out,"#define %sARG_STORE yypParser->%s = %s\n",
3378 name,&lemp->arg[i],&lemp->arg[i]); lineno++;
drh75897232000-05-29 14:26:00 +00003379 }else{
drh1f245e42002-03-11 13:55:50 +00003380 fprintf(out,"#define %sARG_SDECL\n",name); lineno++;
3381 fprintf(out,"#define %sARG_PDECL\n",name); lineno++;
3382 fprintf(out,"#define %sARG_FETCH\n",name); lineno++;
3383 fprintf(out,"#define %sARG_STORE\n",name); lineno++;
drh75897232000-05-29 14:26:00 +00003384 }
3385 if( mhflag ){
3386 fprintf(out,"#endif\n"); lineno++;
3387 }
3388 fprintf(out,"#define YYNSTATE %d\n",lemp->nstate); lineno++;
3389 fprintf(out,"#define YYNRULE %d\n",lemp->nrule); lineno++;
3390 fprintf(out,"#define YYERRORSYMBOL %d\n",lemp->errsym->index); lineno++;
3391 fprintf(out,"#define YYERRSYMDT yy%d\n",lemp->errsym->dtnum); lineno++;
drh0bd1f4e2002-06-06 18:54:39 +00003392 if( lemp->has_fallback ){
3393 fprintf(out,"#define YYFALLBACK 1\n"); lineno++;
3394 }
drh75897232000-05-29 14:26:00 +00003395 tplt_xfer(lemp->name,in,out,&lineno);
3396
drh8b582012003-10-21 13:16:03 +00003397 /* Generate the action table and its associates:
drh75897232000-05-29 14:26:00 +00003398 **
drh8b582012003-10-21 13:16:03 +00003399 ** yy_action[] A single table containing all actions.
3400 ** yy_lookahead[] A table containing the lookahead for each entry in
3401 ** yy_action. Used to detect hash collisions.
3402 ** yy_shift_ofst[] For each state, the offset into yy_action for
3403 ** shifting terminals.
3404 ** yy_reduce_ofst[] For each state, the offset into yy_action for
3405 ** shifting non-terminals after a reduce.
3406 ** yy_default[] Default action for each state.
drh75897232000-05-29 14:26:00 +00003407 */
drh75897232000-05-29 14:26:00 +00003408
drh8b582012003-10-21 13:16:03 +00003409 /* Compute the actions on all states and count them up */
drhfdbf9282003-10-21 16:34:41 +00003410 ax = malloc( sizeof(ax[0])*lemp->nstate*2 );
3411 if( ax==0 ){
3412 fprintf(stderr,"malloc failed\n");
3413 exit(1);
3414 }
drh75897232000-05-29 14:26:00 +00003415 for(i=0; i<lemp->nstate; i++){
drh75897232000-05-29 14:26:00 +00003416 stp = lemp->sorted[i];
drh8b582012003-10-21 13:16:03 +00003417 stp->nTknAct = stp->nNtAct = 0;
3418 stp->iDflt = lemp->nstate + lemp->nrule;
3419 stp->iTknOfst = NO_OFFSET;
3420 stp->iNtOfst = NO_OFFSET;
3421 for(ap=stp->ap; ap; ap=ap->next){
3422 if( compute_action(lemp,ap)>=0 ){
3423 if( ap->sp->index<lemp->nterminal ){
3424 stp->nTknAct++;
3425 }else if( ap->sp->index<lemp->nsymbol ){
3426 stp->nNtAct++;
3427 }else{
3428 stp->iDflt = compute_action(lemp, ap);
3429 }
3430 }
3431 }
drhfdbf9282003-10-21 16:34:41 +00003432 ax[i*2].stp = stp;
3433 ax[i*2].isTkn = 1;
3434 ax[i*2].nAction = stp->nTknAct;
3435 ax[i*2+1].stp = stp;
3436 ax[i*2+1].isTkn = 0;
3437 ax[i*2+1].nAction = stp->nNtAct;
drh75897232000-05-29 14:26:00 +00003438 }
drh8b582012003-10-21 13:16:03 +00003439 mxTknOfst = mnTknOfst = 0;
3440 mxNtOfst = mnNtOfst = 0;
3441
drhfdbf9282003-10-21 16:34:41 +00003442 /* Compute the action table. In order to try to keep the size of the
3443 ** action table to a minimum, the heuristic of placing the largest action
3444 ** sets first is used.
drh8b582012003-10-21 13:16:03 +00003445 */
drhfdbf9282003-10-21 16:34:41 +00003446 qsort(ax, lemp->nstate*2, sizeof(ax[0]), axset_compare);
drh8b582012003-10-21 13:16:03 +00003447 pActtab = acttab_alloc();
drhfdbf9282003-10-21 16:34:41 +00003448 for(i=0; i<lemp->nstate*2 && ax[i].nAction>0; i++){
3449 stp = ax[i].stp;
3450 if( ax[i].isTkn ){
3451 for(ap=stp->ap; ap; ap=ap->next){
3452 int action;
3453 if( ap->sp->index>=lemp->nterminal ) continue;
3454 action = compute_action(lemp, ap);
3455 if( action<0 ) continue;
3456 acttab_action(pActtab, ap->sp->index, action);
drh8b582012003-10-21 13:16:03 +00003457 }
drhfdbf9282003-10-21 16:34:41 +00003458 stp->iTknOfst = acttab_insert(pActtab);
3459 if( stp->iTknOfst<mnTknOfst ) mnTknOfst = stp->iTknOfst;
3460 if( stp->iTknOfst>mxTknOfst ) mxTknOfst = stp->iTknOfst;
3461 }else{
3462 for(ap=stp->ap; ap; ap=ap->next){
3463 int action;
3464 if( ap->sp->index<lemp->nterminal ) continue;
3465 if( ap->sp->index==lemp->nsymbol ) continue;
3466 action = compute_action(lemp, ap);
3467 if( action<0 ) continue;
3468 acttab_action(pActtab, ap->sp->index, action);
drh8b582012003-10-21 13:16:03 +00003469 }
drhfdbf9282003-10-21 16:34:41 +00003470 stp->iNtOfst = acttab_insert(pActtab);
3471 if( stp->iNtOfst<mnNtOfst ) mnNtOfst = stp->iNtOfst;
3472 if( stp->iNtOfst>mxNtOfst ) mxNtOfst = stp->iNtOfst;
drh8b582012003-10-21 13:16:03 +00003473 }
3474 }
drhfdbf9282003-10-21 16:34:41 +00003475 free(ax);
drh8b582012003-10-21 13:16:03 +00003476
3477 /* Output the yy_action table */
3478 fprintf(out,"static YYACTIONTYPE yy_action[] = {\n"); lineno++;
3479 n = acttab_size(pActtab);
3480 for(i=j=0; i<n; i++){
3481 int action = acttab_yyaction(pActtab, i);
3482 if( action<0 ) action = lemp->nsymbol + lemp->nrule + 2;
drhfdbf9282003-10-21 16:34:41 +00003483 if( j==0 ) fprintf(out," /* %5d */ ", i);
drh8b582012003-10-21 13:16:03 +00003484 fprintf(out, " %4d,", action);
3485 if( j==9 || i==n-1 ){
3486 fprintf(out, "\n"); lineno++;
3487 j = 0;
3488 }else{
3489 j++;
3490 }
3491 }
3492 fprintf(out, "};\n"); lineno++;
3493
3494 /* Output the yy_lookahead table */
3495 fprintf(out,"static YYCODETYPE yy_lookahead[] = {\n"); lineno++;
3496 for(i=j=0; i<n; i++){
3497 int la = acttab_yylookahead(pActtab, i);
3498 if( la<0 ) la = lemp->nsymbol;
drhfdbf9282003-10-21 16:34:41 +00003499 if( j==0 ) fprintf(out," /* %5d */ ", i);
drh8b582012003-10-21 13:16:03 +00003500 fprintf(out, " %4d,", la);
3501 if( j==9 || i==n-1 ){
3502 fprintf(out, "\n"); lineno++;
3503 j = 0;
3504 }else{
3505 j++;
3506 }
3507 }
3508 fprintf(out, "};\n"); lineno++;
3509
3510 /* Output the yy_shift_ofst[] table */
3511 fprintf(out, "#define YY_SHIFT_USE_DFLT (%d)\n", mnTknOfst-1); lineno++;
3512 fprintf(out, "static %s yy_shift_ofst[] = {\n",
3513 minimum_size_type(mnTknOfst-1, mxTknOfst)); lineno++;
3514 n = lemp->nstate;
3515 for(i=j=0; i<n; i++){
3516 int ofst;
3517 stp = lemp->sorted[i];
3518 ofst = stp->iTknOfst;
3519 if( ofst==NO_OFFSET ) ofst = mnTknOfst - 1;
drhfdbf9282003-10-21 16:34:41 +00003520 if( j==0 ) fprintf(out," /* %5d */ ", i);
drh8b582012003-10-21 13:16:03 +00003521 fprintf(out, " %4d,", ofst);
3522 if( j==9 || i==n-1 ){
3523 fprintf(out, "\n"); lineno++;
3524 j = 0;
3525 }else{
3526 j++;
3527 }
3528 }
3529 fprintf(out, "};\n"); lineno++;
3530
3531 /* Output the yy_reduce_ofst[] table */
3532 fprintf(out, "#define YY_REDUCE_USE_DFLT (%d)\n", mnNtOfst-1); lineno++;
3533 fprintf(out, "static %s yy_reduce_ofst[] = {\n",
3534 minimum_size_type(mnNtOfst-1, mxNtOfst)); lineno++;
3535 n = lemp->nstate;
3536 for(i=j=0; i<n; i++){
3537 int ofst;
3538 stp = lemp->sorted[i];
3539 ofst = stp->iNtOfst;
3540 if( ofst==NO_OFFSET ) ofst = mnNtOfst - 1;
drhfdbf9282003-10-21 16:34:41 +00003541 if( j==0 ) fprintf(out," /* %5d */ ", i);
drh8b582012003-10-21 13:16:03 +00003542 fprintf(out, " %4d,", ofst);
3543 if( j==9 || i==n-1 ){
3544 fprintf(out, "\n"); lineno++;
3545 j = 0;
3546 }else{
3547 j++;
3548 }
3549 }
3550 fprintf(out, "};\n"); lineno++;
3551
3552 /* Output the default action table */
3553 fprintf(out, "static YYACTIONTYPE yy_default[] = {\n"); lineno++;
3554 n = lemp->nstate;
3555 for(i=j=0; i<n; i++){
3556 stp = lemp->sorted[i];
drhfdbf9282003-10-21 16:34:41 +00003557 if( j==0 ) fprintf(out," /* %5d */ ", i);
drh8b582012003-10-21 13:16:03 +00003558 fprintf(out, " %4d,", stp->iDflt);
3559 if( j==9 || i==n-1 ){
3560 fprintf(out, "\n"); lineno++;
3561 j = 0;
3562 }else{
3563 j++;
3564 }
3565 }
3566 fprintf(out, "};\n"); lineno++;
drh75897232000-05-29 14:26:00 +00003567 tplt_xfer(lemp->name,in,out,&lineno);
3568
drh0bd1f4e2002-06-06 18:54:39 +00003569 /* Generate the table of fallback tokens.
3570 */
3571 if( lemp->has_fallback ){
3572 for(i=0; i<lemp->nterminal; i++){
3573 struct symbol *p = lemp->symbols[i];
3574 if( p->fallback==0 ){
3575 fprintf(out, " 0, /* %10s => nothing */\n", p->name);
3576 }else{
3577 fprintf(out, " %3d, /* %10s => %s */\n", p->fallback->index,
3578 p->name, p->fallback->name);
3579 }
3580 lineno++;
3581 }
3582 }
3583 tplt_xfer(lemp->name, in, out, &lineno);
3584
3585 /* Generate a table containing the symbolic name of every symbol
3586 */
drh75897232000-05-29 14:26:00 +00003587 for(i=0; i<lemp->nsymbol; i++){
3588 sprintf(line,"\"%s\",",lemp->symbols[i]->name);
3589 fprintf(out," %-15s",line);
3590 if( (i&3)==3 ){ fprintf(out,"\n"); lineno++; }
3591 }
3592 if( (i&3)!=0 ){ fprintf(out,"\n"); lineno++; }
3593 tplt_xfer(lemp->name,in,out,&lineno);
3594
drh0bd1f4e2002-06-06 18:54:39 +00003595 /* Generate a table containing a text string that describes every
3596 ** rule in the rule set of the grammer. This information is used
3597 ** when tracing REDUCE actions.
3598 */
3599 for(i=0, rp=lemp->rule; rp; rp=rp->next, i++){
3600 assert( rp->index==i );
3601 fprintf(out," /* %3d */ \"%s ::=", i, rp->lhs->name);
3602 for(j=0; j<rp->nrhs; j++) fprintf(out," %s",rp->rhs[j]->name);
3603 fprintf(out,"\",\n"); lineno++;
3604 }
3605 tplt_xfer(lemp->name,in,out,&lineno);
3606
drh75897232000-05-29 14:26:00 +00003607 /* Generate code which executes every time a symbol is popped from
3608 ** the stack while processing errors or while destroying the parser.
drh0bd1f4e2002-06-06 18:54:39 +00003609 ** (In other words, generate the %destructor actions)
3610 */
drh75897232000-05-29 14:26:00 +00003611 if( lemp->tokendest ){
3612 for(i=0; i<lemp->nsymbol; i++){
3613 struct symbol *sp = lemp->symbols[i];
3614 if( sp==0 || sp->type!=TERMINAL ) continue;
3615 fprintf(out," case %d:\n",sp->index); lineno++;
3616 }
3617 for(i=0; i<lemp->nsymbol && lemp->symbols[i]->type!=TERMINAL; i++);
3618 if( i<lemp->nsymbol ){
3619 emit_destructor_code(out,lemp->symbols[i],lemp,&lineno);
3620 fprintf(out," break;\n"); lineno++;
3621 }
3622 }
3623 for(i=0; i<lemp->nsymbol; i++){
3624 struct symbol *sp = lemp->symbols[i];
3625 if( sp==0 || sp->type==TERMINAL || sp->destructor==0 ) continue;
3626 fprintf(out," case %d:\n",sp->index); lineno++;
drh0bb132b2004-07-20 14:06:51 +00003627
3628 /* Combine duplicate destructors into a single case */
3629 for(j=i+1; j<lemp->nsymbol; j++){
3630 struct symbol *sp2 = lemp->symbols[j];
3631 if( sp2 && sp2->type!=TERMINAL && sp2->destructor
3632 && sp2->dtnum==sp->dtnum
3633 && strcmp(sp->destructor,sp2->destructor)==0 ){
3634 fprintf(out," case %d:\n",sp2->index); lineno++;
3635 sp2->destructor = 0;
3636 }
3637 }
3638
drh75897232000-05-29 14:26:00 +00003639 emit_destructor_code(out,lemp->symbols[i],lemp,&lineno);
3640 fprintf(out," break;\n"); lineno++;
3641 }
drh960e8c62001-04-03 16:53:21 +00003642 if( lemp->vardest ){
3643 struct symbol *dflt_sp = 0;
3644 for(i=0; i<lemp->nsymbol; i++){
3645 struct symbol *sp = lemp->symbols[i];
3646 if( sp==0 || sp->type==TERMINAL ||
3647 sp->index<=0 || sp->destructor!=0 ) continue;
3648 fprintf(out," case %d:\n",sp->index); lineno++;
3649 dflt_sp = sp;
3650 }
3651 if( dflt_sp!=0 ){
3652 emit_destructor_code(out,dflt_sp,lemp,&lineno);
3653 fprintf(out," break;\n"); lineno++;
3654 }
3655 }
drh75897232000-05-29 14:26:00 +00003656 tplt_xfer(lemp->name,in,out,&lineno);
3657
3658 /* Generate code which executes whenever the parser stack overflows */
3659 tplt_print(out,lemp,lemp->overflow,lemp->overflowln,&lineno);
3660 tplt_xfer(lemp->name,in,out,&lineno);
3661
3662 /* Generate the table of rule information
3663 **
3664 ** Note: This code depends on the fact that rules are number
3665 ** sequentually beginning with 0.
3666 */
3667 for(rp=lemp->rule; rp; rp=rp->next){
3668 fprintf(out," { %d, %d },\n",rp->lhs->index,rp->nrhs); lineno++;
3669 }
3670 tplt_xfer(lemp->name,in,out,&lineno);
3671
3672 /* Generate code which execution during each REDUCE action */
3673 for(rp=lemp->rule; rp; rp=rp->next){
drh0bb132b2004-07-20 14:06:51 +00003674 if( rp->code ) translate_code(lemp, rp);
3675 }
3676 for(rp=lemp->rule; rp; rp=rp->next){
3677 struct rule *rp2;
3678 if( rp->code==0 ) continue;
drh75897232000-05-29 14:26:00 +00003679 fprintf(out," case %d:\n",rp->index); lineno++;
drh0bb132b2004-07-20 14:06:51 +00003680 for(rp2=rp->next; rp2; rp2=rp2->next){
3681 if( rp2->code==rp->code ){
3682 fprintf(out," case %d:\n",rp2->index); lineno++;
3683 rp2->code = 0;
3684 }
3685 }
drh75897232000-05-29 14:26:00 +00003686 emit_code(out,rp,lemp,&lineno);
3687 fprintf(out," break;\n"); lineno++;
3688 }
3689 tplt_xfer(lemp->name,in,out,&lineno);
3690
3691 /* Generate code which executes if a parse fails */
3692 tplt_print(out,lemp,lemp->failure,lemp->failureln,&lineno);
3693 tplt_xfer(lemp->name,in,out,&lineno);
3694
3695 /* Generate code which executes when a syntax error occurs */
3696 tplt_print(out,lemp,lemp->error,lemp->errorln,&lineno);
3697 tplt_xfer(lemp->name,in,out,&lineno);
3698
3699 /* Generate code which executes when the parser accepts its input */
3700 tplt_print(out,lemp,lemp->accept,lemp->acceptln,&lineno);
3701 tplt_xfer(lemp->name,in,out,&lineno);
3702
3703 /* Append any addition code the user desires */
3704 tplt_print(out,lemp,lemp->extracode,lemp->extracodeln,&lineno);
3705
3706 fclose(in);
3707 fclose(out);
3708 return;
3709}
3710
3711/* Generate a header file for the parser */
3712void ReportHeader(lemp)
3713struct lemon *lemp;
3714{
3715 FILE *out, *in;
3716 char *prefix;
3717 char line[LINESIZE];
3718 char pattern[LINESIZE];
3719 int i;
3720
3721 if( lemp->tokenprefix ) prefix = lemp->tokenprefix;
3722 else prefix = "";
3723 in = file_open(lemp,".h","r");
3724 if( in ){
3725 for(i=1; i<lemp->nterminal && fgets(line,LINESIZE,in); i++){
3726 sprintf(pattern,"#define %s%-30s %2d\n",prefix,lemp->symbols[i]->name,i);
3727 if( strcmp(line,pattern) ) break;
3728 }
3729 fclose(in);
3730 if( i==lemp->nterminal ){
3731 /* No change in the file. Don't rewrite it. */
3732 return;
3733 }
3734 }
3735 out = file_open(lemp,".h","w");
3736 if( out ){
3737 for(i=1; i<lemp->nterminal; i++){
3738 fprintf(out,"#define %s%-30s %2d\n",prefix,lemp->symbols[i]->name,i);
3739 }
3740 fclose(out);
3741 }
3742 return;
3743}
3744
3745/* Reduce the size of the action tables, if possible, by making use
3746** of defaults.
3747**
drhb59499c2002-02-23 18:45:13 +00003748** In this version, we take the most frequent REDUCE action and make
3749** it the default. Only default a reduce if there are more than one.
drh75897232000-05-29 14:26:00 +00003750*/
3751void CompressTables(lemp)
3752struct lemon *lemp;
3753{
3754 struct state *stp;
drhb59499c2002-02-23 18:45:13 +00003755 struct action *ap, *ap2;
3756 struct rule *rp, *rp2, *rbest;
3757 int nbest, n;
drh75897232000-05-29 14:26:00 +00003758 int i;
drh75897232000-05-29 14:26:00 +00003759
3760 for(i=0; i<lemp->nstate; i++){
3761 stp = lemp->sorted[i];
drhb59499c2002-02-23 18:45:13 +00003762 nbest = 0;
3763 rbest = 0;
drh75897232000-05-29 14:26:00 +00003764
drhb59499c2002-02-23 18:45:13 +00003765 for(ap=stp->ap; ap; ap=ap->next){
3766 if( ap->type!=REDUCE ) continue;
3767 rp = ap->x.rp;
3768 if( rp==rbest ) continue;
3769 n = 1;
3770 for(ap2=ap->next; ap2; ap2=ap2->next){
3771 if( ap2->type!=REDUCE ) continue;
3772 rp2 = ap2->x.rp;
3773 if( rp2==rbest ) continue;
3774 if( rp2==rp ) n++;
3775 }
3776 if( n>nbest ){
3777 nbest = n;
3778 rbest = rp;
drh75897232000-05-29 14:26:00 +00003779 }
3780 }
drhb59499c2002-02-23 18:45:13 +00003781
3782 /* Do not make a default if the number of rules to default
3783 ** is not at least 2 */
3784 if( nbest<2 ) continue;
drh75897232000-05-29 14:26:00 +00003785
drhb59499c2002-02-23 18:45:13 +00003786
3787 /* Combine matching REDUCE actions into a single default */
3788 for(ap=stp->ap; ap; ap=ap->next){
3789 if( ap->type==REDUCE && ap->x.rp==rbest ) break;
3790 }
drh75897232000-05-29 14:26:00 +00003791 assert( ap );
3792 ap->sp = Symbol_new("{default}");
3793 for(ap=ap->next; ap; ap=ap->next){
drhb59499c2002-02-23 18:45:13 +00003794 if( ap->type==REDUCE && ap->x.rp==rbest ) ap->type = NOT_USED;
drh75897232000-05-29 14:26:00 +00003795 }
3796 stp->ap = Action_sort(stp->ap);
3797 }
3798}
drhb59499c2002-02-23 18:45:13 +00003799
drh75897232000-05-29 14:26:00 +00003800/***************** From the file "set.c" ************************************/
3801/*
3802** Set manipulation routines for the LEMON parser generator.
3803*/
3804
3805static int size = 0;
3806
3807/* Set the set size */
3808void SetSize(n)
3809int n;
3810{
3811 size = n+1;
3812}
3813
3814/* Allocate a new set */
3815char *SetNew(){
3816 char *s;
3817 int i;
3818 s = (char*)malloc( size );
3819 if( s==0 ){
3820 extern void memory_error();
3821 memory_error();
3822 }
3823 for(i=0; i<size; i++) s[i] = 0;
3824 return s;
3825}
3826
3827/* Deallocate a set */
3828void SetFree(s)
3829char *s;
3830{
3831 free(s);
3832}
3833
3834/* Add a new element to the set. Return TRUE if the element was added
3835** and FALSE if it was already there. */
3836int SetAdd(s,e)
3837char *s;
3838int e;
3839{
3840 int rv;
3841 rv = s[e];
3842 s[e] = 1;
3843 return !rv;
3844}
3845
3846/* Add every element of s2 to s1. Return TRUE if s1 changes. */
3847int SetUnion(s1,s2)
3848char *s1;
3849char *s2;
3850{
3851 int i, progress;
3852 progress = 0;
3853 for(i=0; i<size; i++){
3854 if( s2[i]==0 ) continue;
3855 if( s1[i]==0 ){
3856 progress = 1;
3857 s1[i] = 1;
3858 }
3859 }
3860 return progress;
3861}
3862/********************** From the file "table.c" ****************************/
3863/*
3864** All code in this file has been automatically generated
3865** from a specification in the file
3866** "table.q"
3867** by the associative array code building program "aagen".
3868** Do not edit this file! Instead, edit the specification
3869** file, then rerun aagen.
3870*/
3871/*
3872** Code for processing tables in the LEMON parser generator.
3873*/
3874
3875PRIVATE int strhash(x)
3876char *x;
3877{
3878 int h = 0;
3879 while( *x) h = h*13 + *(x++);
3880 return h;
3881}
3882
3883/* Works like strdup, sort of. Save a string in malloced memory, but
3884** keep strings in a table so that the same string is not in more
3885** than one place.
3886*/
3887char *Strsafe(y)
3888char *y;
3889{
3890 char *z;
3891
3892 z = Strsafe_find(y);
3893 if( z==0 && (z=malloc( strlen(y)+1 ))!=0 ){
3894 strcpy(z,y);
3895 Strsafe_insert(z);
3896 }
3897 MemoryCheck(z);
3898 return z;
3899}
3900
3901/* There is one instance of the following structure for each
3902** associative array of type "x1".
3903*/
3904struct s_x1 {
3905 int size; /* The number of available slots. */
3906 /* Must be a power of 2 greater than or */
3907 /* equal to 1 */
3908 int count; /* Number of currently slots filled */
3909 struct s_x1node *tbl; /* The data stored here */
3910 struct s_x1node **ht; /* Hash table for lookups */
3911};
3912
3913/* There is one instance of this structure for every data element
3914** in an associative array of type "x1".
3915*/
3916typedef struct s_x1node {
3917 char *data; /* The data */
3918 struct s_x1node *next; /* Next entry with the same hash */
3919 struct s_x1node **from; /* Previous link */
3920} x1node;
3921
3922/* There is only one instance of the array, which is the following */
3923static struct s_x1 *x1a;
3924
3925/* Allocate a new associative array */
3926void Strsafe_init(){
3927 if( x1a ) return;
3928 x1a = (struct s_x1*)malloc( sizeof(struct s_x1) );
3929 if( x1a ){
3930 x1a->size = 1024;
3931 x1a->count = 0;
3932 x1a->tbl = (x1node*)malloc(
3933 (sizeof(x1node) + sizeof(x1node*))*1024 );
3934 if( x1a->tbl==0 ){
3935 free(x1a);
3936 x1a = 0;
3937 }else{
3938 int i;
3939 x1a->ht = (x1node**)&(x1a->tbl[1024]);
3940 for(i=0; i<1024; i++) x1a->ht[i] = 0;
3941 }
3942 }
3943}
3944/* Insert a new record into the array. Return TRUE if successful.
3945** Prior data with the same key is NOT overwritten */
3946int Strsafe_insert(data)
3947char *data;
3948{
3949 x1node *np;
3950 int h;
3951 int ph;
3952
3953 if( x1a==0 ) return 0;
3954 ph = strhash(data);
3955 h = ph & (x1a->size-1);
3956 np = x1a->ht[h];
3957 while( np ){
3958 if( strcmp(np->data,data)==0 ){
3959 /* An existing entry with the same key is found. */
3960 /* Fail because overwrite is not allows. */
3961 return 0;
3962 }
3963 np = np->next;
3964 }
3965 if( x1a->count>=x1a->size ){
3966 /* Need to make the hash table bigger */
3967 int i,size;
3968 struct s_x1 array;
3969 array.size = size = x1a->size*2;
3970 array.count = x1a->count;
3971 array.tbl = (x1node*)malloc(
3972 (sizeof(x1node) + sizeof(x1node*))*size );
3973 if( array.tbl==0 ) return 0; /* Fail due to malloc failure */
3974 array.ht = (x1node**)&(array.tbl[size]);
3975 for(i=0; i<size; i++) array.ht[i] = 0;
3976 for(i=0; i<x1a->count; i++){
3977 x1node *oldnp, *newnp;
3978 oldnp = &(x1a->tbl[i]);
3979 h = strhash(oldnp->data) & (size-1);
3980 newnp = &(array.tbl[i]);
3981 if( array.ht[h] ) array.ht[h]->from = &(newnp->next);
3982 newnp->next = array.ht[h];
3983 newnp->data = oldnp->data;
3984 newnp->from = &(array.ht[h]);
3985 array.ht[h] = newnp;
3986 }
3987 free(x1a->tbl);
3988 *x1a = array;
3989 }
3990 /* Insert the new data */
3991 h = ph & (x1a->size-1);
3992 np = &(x1a->tbl[x1a->count++]);
3993 np->data = data;
3994 if( x1a->ht[h] ) x1a->ht[h]->from = &(np->next);
3995 np->next = x1a->ht[h];
3996 x1a->ht[h] = np;
3997 np->from = &(x1a->ht[h]);
3998 return 1;
3999}
4000
4001/* Return a pointer to data assigned to the given key. Return NULL
4002** if no such key. */
4003char *Strsafe_find(key)
4004char *key;
4005{
4006 int h;
4007 x1node *np;
4008
4009 if( x1a==0 ) return 0;
4010 h = strhash(key) & (x1a->size-1);
4011 np = x1a->ht[h];
4012 while( np ){
4013 if( strcmp(np->data,key)==0 ) break;
4014 np = np->next;
4015 }
4016 return np ? np->data : 0;
4017}
4018
4019/* Return a pointer to the (terminal or nonterminal) symbol "x".
4020** Create a new symbol if this is the first time "x" has been seen.
4021*/
4022struct symbol *Symbol_new(x)
4023char *x;
4024{
4025 struct symbol *sp;
4026
4027 sp = Symbol_find(x);
4028 if( sp==0 ){
4029 sp = (struct symbol *)malloc( sizeof(struct symbol) );
4030 MemoryCheck(sp);
4031 sp->name = Strsafe(x);
4032 sp->type = isupper(*x) ? TERMINAL : NONTERMINAL;
4033 sp->rule = 0;
drh0bd1f4e2002-06-06 18:54:39 +00004034 sp->fallback = 0;
drh75897232000-05-29 14:26:00 +00004035 sp->prec = -1;
4036 sp->assoc = UNK;
4037 sp->firstset = 0;
drhb27b83a2002-08-14 23:18:57 +00004038 sp->lambda = B_FALSE;
drh75897232000-05-29 14:26:00 +00004039 sp->destructor = 0;
4040 sp->datatype = 0;
4041 Symbol_insert(sp,sp->name);
4042 }
4043 return sp;
4044}
4045
drh60d31652004-02-22 00:08:04 +00004046/* Compare two symbols for working purposes
4047**
4048** Symbols that begin with upper case letters (terminals or tokens)
4049** must sort before symbols that begin with lower case letters
4050** (non-terminals). Other than that, the order does not matter.
4051**
4052** We find experimentally that leaving the symbols in their original
4053** order (the order they appeared in the grammar file) gives the
4054** smallest parser tables in SQLite.
4055*/
4056int Symbolcmpp(struct symbol **a, struct symbol **b){
4057 int i1 = (**a).index + 10000000*((**a).name[0]>'Z');
4058 int i2 = (**b).index + 10000000*((**b).name[0]>'Z');
4059 return i1-i2;
drh75897232000-05-29 14:26:00 +00004060}
4061
4062/* There is one instance of the following structure for each
4063** associative array of type "x2".
4064*/
4065struct s_x2 {
4066 int size; /* The number of available slots. */
4067 /* Must be a power of 2 greater than or */
4068 /* equal to 1 */
4069 int count; /* Number of currently slots filled */
4070 struct s_x2node *tbl; /* The data stored here */
4071 struct s_x2node **ht; /* Hash table for lookups */
4072};
4073
4074/* There is one instance of this structure for every data element
4075** in an associative array of type "x2".
4076*/
4077typedef struct s_x2node {
4078 struct symbol *data; /* The data */
4079 char *key; /* The key */
4080 struct s_x2node *next; /* Next entry with the same hash */
4081 struct s_x2node **from; /* Previous link */
4082} x2node;
4083
4084/* There is only one instance of the array, which is the following */
4085static struct s_x2 *x2a;
4086
4087/* Allocate a new associative array */
4088void Symbol_init(){
4089 if( x2a ) return;
4090 x2a = (struct s_x2*)malloc( sizeof(struct s_x2) );
4091 if( x2a ){
4092 x2a->size = 128;
4093 x2a->count = 0;
4094 x2a->tbl = (x2node*)malloc(
4095 (sizeof(x2node) + sizeof(x2node*))*128 );
4096 if( x2a->tbl==0 ){
4097 free(x2a);
4098 x2a = 0;
4099 }else{
4100 int i;
4101 x2a->ht = (x2node**)&(x2a->tbl[128]);
4102 for(i=0; i<128; i++) x2a->ht[i] = 0;
4103 }
4104 }
4105}
4106/* Insert a new record into the array. Return TRUE if successful.
4107** Prior data with the same key is NOT overwritten */
4108int Symbol_insert(data,key)
4109struct symbol *data;
4110char *key;
4111{
4112 x2node *np;
4113 int h;
4114 int ph;
4115
4116 if( x2a==0 ) return 0;
4117 ph = strhash(key);
4118 h = ph & (x2a->size-1);
4119 np = x2a->ht[h];
4120 while( np ){
4121 if( strcmp(np->key,key)==0 ){
4122 /* An existing entry with the same key is found. */
4123 /* Fail because overwrite is not allows. */
4124 return 0;
4125 }
4126 np = np->next;
4127 }
4128 if( x2a->count>=x2a->size ){
4129 /* Need to make the hash table bigger */
4130 int i,size;
4131 struct s_x2 array;
4132 array.size = size = x2a->size*2;
4133 array.count = x2a->count;
4134 array.tbl = (x2node*)malloc(
4135 (sizeof(x2node) + sizeof(x2node*))*size );
4136 if( array.tbl==0 ) return 0; /* Fail due to malloc failure */
4137 array.ht = (x2node**)&(array.tbl[size]);
4138 for(i=0; i<size; i++) array.ht[i] = 0;
4139 for(i=0; i<x2a->count; i++){
4140 x2node *oldnp, *newnp;
4141 oldnp = &(x2a->tbl[i]);
4142 h = strhash(oldnp->key) & (size-1);
4143 newnp = &(array.tbl[i]);
4144 if( array.ht[h] ) array.ht[h]->from = &(newnp->next);
4145 newnp->next = array.ht[h];
4146 newnp->key = oldnp->key;
4147 newnp->data = oldnp->data;
4148 newnp->from = &(array.ht[h]);
4149 array.ht[h] = newnp;
4150 }
4151 free(x2a->tbl);
4152 *x2a = array;
4153 }
4154 /* Insert the new data */
4155 h = ph & (x2a->size-1);
4156 np = &(x2a->tbl[x2a->count++]);
4157 np->key = key;
4158 np->data = data;
4159 if( x2a->ht[h] ) x2a->ht[h]->from = &(np->next);
4160 np->next = x2a->ht[h];
4161 x2a->ht[h] = np;
4162 np->from = &(x2a->ht[h]);
4163 return 1;
4164}
4165
4166/* Return a pointer to data assigned to the given key. Return NULL
4167** if no such key. */
4168struct symbol *Symbol_find(key)
4169char *key;
4170{
4171 int h;
4172 x2node *np;
4173
4174 if( x2a==0 ) return 0;
4175 h = strhash(key) & (x2a->size-1);
4176 np = x2a->ht[h];
4177 while( np ){
4178 if( strcmp(np->key,key)==0 ) break;
4179 np = np->next;
4180 }
4181 return np ? np->data : 0;
4182}
4183
4184/* Return the n-th data. Return NULL if n is out of range. */
4185struct symbol *Symbol_Nth(n)
4186int n;
4187{
4188 struct symbol *data;
4189 if( x2a && n>0 && n<=x2a->count ){
4190 data = x2a->tbl[n-1].data;
4191 }else{
4192 data = 0;
4193 }
4194 return data;
4195}
4196
4197/* Return the size of the array */
4198int Symbol_count()
4199{
4200 return x2a ? x2a->count : 0;
4201}
4202
4203/* Return an array of pointers to all data in the table.
4204** The array is obtained from malloc. Return NULL if memory allocation
4205** problems, or if the array is empty. */
4206struct symbol **Symbol_arrayof()
4207{
4208 struct symbol **array;
4209 int i,size;
4210 if( x2a==0 ) return 0;
4211 size = x2a->count;
4212 array = (struct symbol **)malloc( sizeof(struct symbol *)*size );
4213 if( array ){
4214 for(i=0; i<size; i++) array[i] = x2a->tbl[i].data;
4215 }
4216 return array;
4217}
4218
4219/* Compare two configurations */
4220int Configcmp(a,b)
4221struct config *a;
4222struct config *b;
4223{
4224 int x;
4225 x = a->rp->index - b->rp->index;
4226 if( x==0 ) x = a->dot - b->dot;
4227 return x;
4228}
4229
4230/* Compare two states */
4231PRIVATE int statecmp(a,b)
4232struct config *a;
4233struct config *b;
4234{
4235 int rc;
4236 for(rc=0; rc==0 && a && b; a=a->bp, b=b->bp){
4237 rc = a->rp->index - b->rp->index;
4238 if( rc==0 ) rc = a->dot - b->dot;
4239 }
4240 if( rc==0 ){
4241 if( a ) rc = 1;
4242 if( b ) rc = -1;
4243 }
4244 return rc;
4245}
4246
4247/* Hash a state */
4248PRIVATE int statehash(a)
4249struct config *a;
4250{
4251 int h=0;
4252 while( a ){
4253 h = h*571 + a->rp->index*37 + a->dot;
4254 a = a->bp;
4255 }
4256 return h;
4257}
4258
4259/* Allocate a new state structure */
4260struct state *State_new()
4261{
4262 struct state *new;
4263 new = (struct state *)malloc( sizeof(struct state) );
4264 MemoryCheck(new);
4265 return new;
4266}
4267
4268/* There is one instance of the following structure for each
4269** associative array of type "x3".
4270*/
4271struct s_x3 {
4272 int size; /* The number of available slots. */
4273 /* Must be a power of 2 greater than or */
4274 /* equal to 1 */
4275 int count; /* Number of currently slots filled */
4276 struct s_x3node *tbl; /* The data stored here */
4277 struct s_x3node **ht; /* Hash table for lookups */
4278};
4279
4280/* There is one instance of this structure for every data element
4281** in an associative array of type "x3".
4282*/
4283typedef struct s_x3node {
4284 struct state *data; /* The data */
4285 struct config *key; /* The key */
4286 struct s_x3node *next; /* Next entry with the same hash */
4287 struct s_x3node **from; /* Previous link */
4288} x3node;
4289
4290/* There is only one instance of the array, which is the following */
4291static struct s_x3 *x3a;
4292
4293/* Allocate a new associative array */
4294void State_init(){
4295 if( x3a ) return;
4296 x3a = (struct s_x3*)malloc( sizeof(struct s_x3) );
4297 if( x3a ){
4298 x3a->size = 128;
4299 x3a->count = 0;
4300 x3a->tbl = (x3node*)malloc(
4301 (sizeof(x3node) + sizeof(x3node*))*128 );
4302 if( x3a->tbl==0 ){
4303 free(x3a);
4304 x3a = 0;
4305 }else{
4306 int i;
4307 x3a->ht = (x3node**)&(x3a->tbl[128]);
4308 for(i=0; i<128; i++) x3a->ht[i] = 0;
4309 }
4310 }
4311}
4312/* Insert a new record into the array. Return TRUE if successful.
4313** Prior data with the same key is NOT overwritten */
4314int State_insert(data,key)
4315struct state *data;
4316struct config *key;
4317{
4318 x3node *np;
4319 int h;
4320 int ph;
4321
4322 if( x3a==0 ) return 0;
4323 ph = statehash(key);
4324 h = ph & (x3a->size-1);
4325 np = x3a->ht[h];
4326 while( np ){
4327 if( statecmp(np->key,key)==0 ){
4328 /* An existing entry with the same key is found. */
4329 /* Fail because overwrite is not allows. */
4330 return 0;
4331 }
4332 np = np->next;
4333 }
4334 if( x3a->count>=x3a->size ){
4335 /* Need to make the hash table bigger */
4336 int i,size;
4337 struct s_x3 array;
4338 array.size = size = x3a->size*2;
4339 array.count = x3a->count;
4340 array.tbl = (x3node*)malloc(
4341 (sizeof(x3node) + sizeof(x3node*))*size );
4342 if( array.tbl==0 ) return 0; /* Fail due to malloc failure */
4343 array.ht = (x3node**)&(array.tbl[size]);
4344 for(i=0; i<size; i++) array.ht[i] = 0;
4345 for(i=0; i<x3a->count; i++){
4346 x3node *oldnp, *newnp;
4347 oldnp = &(x3a->tbl[i]);
4348 h = statehash(oldnp->key) & (size-1);
4349 newnp = &(array.tbl[i]);
4350 if( array.ht[h] ) array.ht[h]->from = &(newnp->next);
4351 newnp->next = array.ht[h];
4352 newnp->key = oldnp->key;
4353 newnp->data = oldnp->data;
4354 newnp->from = &(array.ht[h]);
4355 array.ht[h] = newnp;
4356 }
4357 free(x3a->tbl);
4358 *x3a = array;
4359 }
4360 /* Insert the new data */
4361 h = ph & (x3a->size-1);
4362 np = &(x3a->tbl[x3a->count++]);
4363 np->key = key;
4364 np->data = data;
4365 if( x3a->ht[h] ) x3a->ht[h]->from = &(np->next);
4366 np->next = x3a->ht[h];
4367 x3a->ht[h] = np;
4368 np->from = &(x3a->ht[h]);
4369 return 1;
4370}
4371
4372/* Return a pointer to data assigned to the given key. Return NULL
4373** if no such key. */
4374struct state *State_find(key)
4375struct config *key;
4376{
4377 int h;
4378 x3node *np;
4379
4380 if( x3a==0 ) return 0;
4381 h = statehash(key) & (x3a->size-1);
4382 np = x3a->ht[h];
4383 while( np ){
4384 if( statecmp(np->key,key)==0 ) break;
4385 np = np->next;
4386 }
4387 return np ? np->data : 0;
4388}
4389
4390/* Return an array of pointers to all data in the table.
4391** The array is obtained from malloc. Return NULL if memory allocation
4392** problems, or if the array is empty. */
4393struct state **State_arrayof()
4394{
4395 struct state **array;
4396 int i,size;
4397 if( x3a==0 ) return 0;
4398 size = x3a->count;
4399 array = (struct state **)malloc( sizeof(struct state *)*size );
4400 if( array ){
4401 for(i=0; i<size; i++) array[i] = x3a->tbl[i].data;
4402 }
4403 return array;
4404}
4405
4406/* Hash a configuration */
4407PRIVATE int confighash(a)
4408struct config *a;
4409{
4410 int h=0;
4411 h = h*571 + a->rp->index*37 + a->dot;
4412 return h;
4413}
4414
4415/* There is one instance of the following structure for each
4416** associative array of type "x4".
4417*/
4418struct s_x4 {
4419 int size; /* The number of available slots. */
4420 /* Must be a power of 2 greater than or */
4421 /* equal to 1 */
4422 int count; /* Number of currently slots filled */
4423 struct s_x4node *tbl; /* The data stored here */
4424 struct s_x4node **ht; /* Hash table for lookups */
4425};
4426
4427/* There is one instance of this structure for every data element
4428** in an associative array of type "x4".
4429*/
4430typedef struct s_x4node {
4431 struct config *data; /* The data */
4432 struct s_x4node *next; /* Next entry with the same hash */
4433 struct s_x4node **from; /* Previous link */
4434} x4node;
4435
4436/* There is only one instance of the array, which is the following */
4437static struct s_x4 *x4a;
4438
4439/* Allocate a new associative array */
4440void Configtable_init(){
4441 if( x4a ) return;
4442 x4a = (struct s_x4*)malloc( sizeof(struct s_x4) );
4443 if( x4a ){
4444 x4a->size = 64;
4445 x4a->count = 0;
4446 x4a->tbl = (x4node*)malloc(
4447 (sizeof(x4node) + sizeof(x4node*))*64 );
4448 if( x4a->tbl==0 ){
4449 free(x4a);
4450 x4a = 0;
4451 }else{
4452 int i;
4453 x4a->ht = (x4node**)&(x4a->tbl[64]);
4454 for(i=0; i<64; i++) x4a->ht[i] = 0;
4455 }
4456 }
4457}
4458/* Insert a new record into the array. Return TRUE if successful.
4459** Prior data with the same key is NOT overwritten */
4460int Configtable_insert(data)
4461struct config *data;
4462{
4463 x4node *np;
4464 int h;
4465 int ph;
4466
4467 if( x4a==0 ) return 0;
4468 ph = confighash(data);
4469 h = ph & (x4a->size-1);
4470 np = x4a->ht[h];
4471 while( np ){
4472 if( Configcmp(np->data,data)==0 ){
4473 /* An existing entry with the same key is found. */
4474 /* Fail because overwrite is not allows. */
4475 return 0;
4476 }
4477 np = np->next;
4478 }
4479 if( x4a->count>=x4a->size ){
4480 /* Need to make the hash table bigger */
4481 int i,size;
4482 struct s_x4 array;
4483 array.size = size = x4a->size*2;
4484 array.count = x4a->count;
4485 array.tbl = (x4node*)malloc(
4486 (sizeof(x4node) + sizeof(x4node*))*size );
4487 if( array.tbl==0 ) return 0; /* Fail due to malloc failure */
4488 array.ht = (x4node**)&(array.tbl[size]);
4489 for(i=0; i<size; i++) array.ht[i] = 0;
4490 for(i=0; i<x4a->count; i++){
4491 x4node *oldnp, *newnp;
4492 oldnp = &(x4a->tbl[i]);
4493 h = confighash(oldnp->data) & (size-1);
4494 newnp = &(array.tbl[i]);
4495 if( array.ht[h] ) array.ht[h]->from = &(newnp->next);
4496 newnp->next = array.ht[h];
4497 newnp->data = oldnp->data;
4498 newnp->from = &(array.ht[h]);
4499 array.ht[h] = newnp;
4500 }
4501 free(x4a->tbl);
4502 *x4a = array;
4503 }
4504 /* Insert the new data */
4505 h = ph & (x4a->size-1);
4506 np = &(x4a->tbl[x4a->count++]);
4507 np->data = data;
4508 if( x4a->ht[h] ) x4a->ht[h]->from = &(np->next);
4509 np->next = x4a->ht[h];
4510 x4a->ht[h] = np;
4511 np->from = &(x4a->ht[h]);
4512 return 1;
4513}
4514
4515/* Return a pointer to data assigned to the given key. Return NULL
4516** if no such key. */
4517struct config *Configtable_find(key)
4518struct config *key;
4519{
4520 int h;
4521 x4node *np;
4522
4523 if( x4a==0 ) return 0;
4524 h = confighash(key) & (x4a->size-1);
4525 np = x4a->ht[h];
4526 while( np ){
4527 if( Configcmp(np->data,key)==0 ) break;
4528 np = np->next;
4529 }
4530 return np ? np->data : 0;
4531}
4532
4533/* Remove all data from the table. Pass each data to the function "f"
4534** as it is removed. ("f" may be null to avoid this step.) */
4535void Configtable_clear(f)
4536int(*f)(/* struct config * */);
4537{
4538 int i;
4539 if( x4a==0 || x4a->count==0 ) return;
4540 if( f ) for(i=0; i<x4a->count; i++) (*f)(x4a->tbl[i].data);
4541 for(i=0; i<x4a->size; i++) x4a->ht[i] = 0;
4542 x4a->count = 0;
4543 return;
4544}