blob: f816de2309a7ac5b20a5889ceae3ceb73ca78050 [file] [log] [blame]
drh75897232000-05-29 14:26:00 +00001/*
drh75897232000-05-29 14:26:00 +00002** This file contains all sources (including headers) to the LEMON
3** LALR(1) parser generator. The sources have been combined into a
drh960e8c62001-04-03 16:53:21 +00004** single file to make it easy to include LEMON in the source tree
5** and Makefile of another program.
drh75897232000-05-29 14:26:00 +00006**
drhb19a2bc2001-09-16 00:13:26 +00007** The author of this program disclaims copyright.
drh75897232000-05-29 14:26:00 +00008*/
9#include <stdio.h>
drhf9a2e7b2003-04-15 01:49:48 +000010#include <stdarg.h>
drh75897232000-05-29 14:26:00 +000011#include <string.h>
12#include <ctype.h>
drh8b582012003-10-21 13:16:03 +000013#include <stdlib.h>
drh75897232000-05-29 14:26:00 +000014
drh75897232000-05-29 14:26:00 +000015#ifndef __WIN32__
16# if defined(_WIN32) || defined(WIN32)
17# define __WIN32__
18# endif
19#endif
20
21/* #define PRIVATE static */
22#define PRIVATE
23
24#ifdef TEST
25#define MAXRHS 5 /* Set low to exercise exception code */
26#else
27#define MAXRHS 1000
28#endif
29
30char *msort();
31extern void *malloc();
32
33/******** From the file "action.h" *************************************/
34struct action *Action_new();
35struct action *Action_sort();
drh75897232000-05-29 14:26:00 +000036
37/********* From the file "assert.h" ************************************/
38void myassert();
39#ifndef NDEBUG
40# define assert(X) if(!(X))myassert(__FILE__,__LINE__)
41#else
42# define assert(X)
43#endif
44
45/********** From the file "build.h" ************************************/
46void FindRulePrecedences();
47void FindFirstSets();
48void FindStates();
49void FindLinks();
50void FindFollowSets();
51void FindActions();
52
53/********* From the file "configlist.h" *********************************/
54void Configlist_init(/* void */);
55struct config *Configlist_add(/* struct rule *, int */);
56struct config *Configlist_addbasis(/* struct rule *, int */);
57void Configlist_closure(/* void */);
58void Configlist_sort(/* void */);
59void Configlist_sortbasis(/* void */);
60struct config *Configlist_return(/* void */);
61struct config *Configlist_basis(/* void */);
62void Configlist_eat(/* struct config * */);
63void Configlist_reset(/* void */);
64
65/********* From the file "error.h" ***************************************/
drhf9a2e7b2003-04-15 01:49:48 +000066void ErrorMsg(const char *, int,const char *, ...);
drh75897232000-05-29 14:26:00 +000067
68/****** From the file "option.h" ******************************************/
69struct s_options {
70 enum { OPT_FLAG=1, OPT_INT, OPT_DBL, OPT_STR,
71 OPT_FFLAG, OPT_FINT, OPT_FDBL, OPT_FSTR} type;
72 char *label;
73 char *arg;
74 char *message;
75};
drhb0c86772000-06-02 23:21:26 +000076int OptInit(/* char**,struct s_options*,FILE* */);
77int OptNArgs(/* void */);
78char *OptArg(/* int */);
79void OptErr(/* int */);
80void OptPrint(/* void */);
drh75897232000-05-29 14:26:00 +000081
82/******** From the file "parse.h" *****************************************/
83void Parse(/* struct lemon *lemp */);
84
85/********* From the file "plink.h" ***************************************/
86struct plink *Plink_new(/* void */);
87void Plink_add(/* struct plink **, struct config * */);
88void Plink_copy(/* struct plink **, struct plink * */);
89void Plink_delete(/* struct plink * */);
90
91/********** From the file "report.h" *************************************/
92void Reprint(/* struct lemon * */);
93void ReportOutput(/* struct lemon * */);
94void ReportTable(/* struct lemon * */);
95void ReportHeader(/* struct lemon * */);
96void CompressTables(/* struct lemon * */);
97
98/********** From the file "set.h" ****************************************/
99void SetSize(/* int N */); /* All sets will be of size N */
100char *SetNew(/* void */); /* A new set for element 0..N */
101void SetFree(/* char* */); /* Deallocate a set */
102
103int SetAdd(/* char*,int */); /* Add element to a set */
104int SetUnion(/* char *A,char *B */); /* A <- A U B, thru element N */
105
106#define SetFind(X,Y) (X[Y]) /* True if Y is in set X */
107
108/********** From the file "struct.h" *************************************/
109/*
110** Principal data structures for the LEMON parser generator.
111*/
112
drhb27b83a2002-08-14 23:18:57 +0000113typedef enum {B_FALSE=0, B_TRUE} Boolean;
drh75897232000-05-29 14:26:00 +0000114
115/* Symbols (terminals and nonterminals) of the grammar are stored
116** in the following: */
117struct symbol {
118 char *name; /* Name of the symbol */
119 int index; /* Index number for this symbol */
120 enum {
121 TERMINAL,
122 NONTERMINAL
123 } type; /* Symbols are all either TERMINALS or NTs */
124 struct rule *rule; /* Linked list of rules of this (if an NT) */
drh0bd1f4e2002-06-06 18:54:39 +0000125 struct symbol *fallback; /* fallback token in case this token doesn't parse */
drh75897232000-05-29 14:26:00 +0000126 int prec; /* Precedence if defined (-1 otherwise) */
127 enum e_assoc {
128 LEFT,
129 RIGHT,
130 NONE,
131 UNK
132 } assoc; /* Associativity if predecence is defined */
133 char *firstset; /* First-set for all rules of this symbol */
134 Boolean lambda; /* True if NT and can generate an empty string */
135 char *destructor; /* Code which executes whenever this symbol is
136 ** popped from the stack during error processing */
137 int destructorln; /* Line number of destructor code */
138 char *datatype; /* The data type of information held by this
139 ** object. Only used if type==NONTERMINAL */
140 int dtnum; /* The data type number. In the parser, the value
141 ** stack is a union. The .yy%d element of this
142 ** union is the correct data type for this object */
143};
144
145/* Each production rule in the grammar is stored in the following
146** structure. */
147struct rule {
148 struct symbol *lhs; /* Left-hand side of the rule */
149 char *lhsalias; /* Alias for the LHS (NULL if none) */
150 int ruleline; /* Line number for the rule */
151 int nrhs; /* Number of RHS symbols */
152 struct symbol **rhs; /* The RHS symbols */
153 char **rhsalias; /* An alias for each RHS symbol (NULL if none) */
154 int line; /* Line number at which code begins */
155 char *code; /* The code executed when this rule is reduced */
156 struct symbol *precsym; /* Precedence symbol for this rule */
157 int index; /* An index number for this rule */
158 Boolean canReduce; /* True if this rule is ever reduced */
159 struct rule *nextlhs; /* Next rule with the same LHS */
160 struct rule *next; /* Next rule in the global list */
161};
162
163/* A configuration is a production rule of the grammar together with
164** a mark (dot) showing how much of that rule has been processed so far.
165** Configurations also contain a follow-set which is a list of terminal
166** symbols which are allowed to immediately follow the end of the rule.
167** Every configuration is recorded as an instance of the following: */
168struct config {
169 struct rule *rp; /* The rule upon which the configuration is based */
170 int dot; /* The parse point */
171 char *fws; /* Follow-set for this configuration only */
172 struct plink *fplp; /* Follow-set forward propagation links */
173 struct plink *bplp; /* Follow-set backwards propagation links */
174 struct state *stp; /* Pointer to state which contains this */
175 enum {
176 COMPLETE, /* The status is used during followset and */
177 INCOMPLETE /* shift computations */
178 } status;
179 struct config *next; /* Next configuration in the state */
180 struct config *bp; /* The next basis configuration */
181};
182
183/* Every shift or reduce operation is stored as one of the following */
184struct action {
185 struct symbol *sp; /* The look-ahead symbol */
186 enum e_action {
187 SHIFT,
188 ACCEPT,
189 REDUCE,
190 ERROR,
191 CONFLICT, /* Was a reduce, but part of a conflict */
192 SH_RESOLVED, /* Was a shift. Precedence resolved conflict */
193 RD_RESOLVED, /* Was reduce. Precedence resolved conflict */
194 NOT_USED /* Deleted by compression */
195 } type;
196 union {
197 struct state *stp; /* The new state, if a shift */
198 struct rule *rp; /* The rule, if a reduce */
199 } x;
200 struct action *next; /* Next action for this state */
201 struct action *collide; /* Next action with the same hash */
202};
203
204/* Each state of the generated parser's finite state machine
205** is encoded as an instance of the following structure. */
206struct state {
207 struct config *bp; /* The basis configurations for this state */
208 struct config *cfp; /* All configurations in this set */
209 int index; /* Sequencial number for this state */
210 struct action *ap; /* Array of actions for this state */
drh8b582012003-10-21 13:16:03 +0000211 int nTknAct, nNtAct; /* Number of actions on terminals and nonterminals */
212 int iTknOfst, iNtOfst; /* yy_action[] offset for terminals and nonterms */
213 int iDflt; /* Default action */
drh75897232000-05-29 14:26:00 +0000214};
drh8b582012003-10-21 13:16:03 +0000215#define NO_OFFSET (-2147483647)
drh75897232000-05-29 14:26:00 +0000216
217/* A followset propagation link indicates that the contents of one
218** configuration followset should be propagated to another whenever
219** the first changes. */
220struct plink {
221 struct config *cfp; /* The configuration to which linked */
222 struct plink *next; /* The next propagate link */
223};
224
225/* The state vector for the entire parser generator is recorded as
226** follows. (LEMON uses no global variables and makes little use of
227** static variables. Fields in the following structure can be thought
228** of as begin global variables in the program.) */
229struct lemon {
230 struct state **sorted; /* Table of states sorted by state number */
231 struct rule *rule; /* List of all rules */
232 int nstate; /* Number of states */
233 int nrule; /* Number of rules */
234 int nsymbol; /* Number of terminal and nonterminal symbols */
235 int nterminal; /* Number of terminal symbols */
236 struct symbol **symbols; /* Sorted array of pointers to symbols */
237 int errorcnt; /* Number of errors */
238 struct symbol *errsym; /* The error symbol */
239 char *name; /* Name of the generated parser */
240 char *arg; /* Declaration of the 3th argument to parser */
241 char *tokentype; /* Type of terminal symbols in the parser stack */
drh960e8c62001-04-03 16:53:21 +0000242 char *vartype; /* The default type of non-terminal symbols */
drh75897232000-05-29 14:26:00 +0000243 char *start; /* Name of the start symbol for the grammar */
244 char *stacksize; /* Size of the parser stack */
245 char *include; /* Code to put at the start of the C file */
246 int includeln; /* Line number for start of include code */
247 char *error; /* Code to execute when an error is seen */
248 int errorln; /* Line number for start of error code */
249 char *overflow; /* Code to execute on a stack overflow */
250 int overflowln; /* Line number for start of overflow code */
251 char *failure; /* Code to execute on parser failure */
252 int failureln; /* Line number for start of failure code */
253 char *accept; /* Code to execute when the parser excepts */
254 int acceptln; /* Line number for the start of accept code */
255 char *extracode; /* Code appended to the generated file */
256 int extracodeln; /* Line number for the start of the extra code */
257 char *tokendest; /* Code to execute to destroy token data */
258 int tokendestln; /* Line number for token destroyer code */
drh960e8c62001-04-03 16:53:21 +0000259 char *vardest; /* Code for the default non-terminal destructor */
260 int vardestln; /* Line number for default non-term destructor code*/
drh75897232000-05-29 14:26:00 +0000261 char *filename; /* Name of the input file */
262 char *outname; /* Name of the current output file */
263 char *tokenprefix; /* A prefix added to token names in the .h file */
264 int nconflict; /* Number of parsing conflicts */
265 int tablesize; /* Size of the parse tables */
266 int basisflag; /* Print only basis configurations */
drh0bd1f4e2002-06-06 18:54:39 +0000267 int has_fallback; /* True if any %fallback is seen in the grammer */
drh75897232000-05-29 14:26:00 +0000268 char *argv0; /* Name of the program */
269};
270
271#define MemoryCheck(X) if((X)==0){ \
272 extern void memory_error(); \
273 memory_error(); \
274}
275
276/**************** From the file "table.h" *********************************/
277/*
278** All code in this file has been automatically generated
279** from a specification in the file
280** "table.q"
281** by the associative array code building program "aagen".
282** Do not edit this file! Instead, edit the specification
283** file, then rerun aagen.
284*/
285/*
286** Code for processing tables in the LEMON parser generator.
287*/
288
289/* Routines for handling a strings */
290
291char *Strsafe();
292
293void Strsafe_init(/* void */);
294int Strsafe_insert(/* char * */);
295char *Strsafe_find(/* char * */);
296
297/* Routines for handling symbols of the grammar */
298
299struct symbol *Symbol_new();
300int Symbolcmpp(/* struct symbol **, struct symbol ** */);
301void Symbol_init(/* void */);
302int Symbol_insert(/* struct symbol *, char * */);
303struct symbol *Symbol_find(/* char * */);
304struct symbol *Symbol_Nth(/* int */);
305int Symbol_count(/* */);
306struct symbol **Symbol_arrayof(/* */);
307
308/* Routines to manage the state table */
309
310int Configcmp(/* struct config *, struct config * */);
311struct state *State_new();
312void State_init(/* void */);
313int State_insert(/* struct state *, struct config * */);
314struct state *State_find(/* struct config * */);
315struct state **State_arrayof(/* */);
316
317/* Routines used for efficiency in Configlist_add */
318
319void Configtable_init(/* void */);
320int Configtable_insert(/* struct config * */);
321struct config *Configtable_find(/* struct config * */);
322void Configtable_clear(/* int(*)(struct config *) */);
323/****************** From the file "action.c" *******************************/
324/*
325** Routines processing parser actions in the LEMON parser generator.
326*/
327
328/* Allocate a new parser action */
329struct action *Action_new(){
330 static struct action *freelist = 0;
331 struct action *new;
332
333 if( freelist==0 ){
334 int i;
335 int amt = 100;
336 freelist = (struct action *)malloc( sizeof(struct action)*amt );
337 if( freelist==0 ){
338 fprintf(stderr,"Unable to allocate memory for a new parser action.");
339 exit(1);
340 }
341 for(i=0; i<amt-1; i++) freelist[i].next = &freelist[i+1];
342 freelist[amt-1].next = 0;
343 }
344 new = freelist;
345 freelist = freelist->next;
346 return new;
347}
348
349/* Compare two actions */
350static int actioncmp(ap1,ap2)
351struct action *ap1;
352struct action *ap2;
353{
354 int rc;
355 rc = ap1->sp->index - ap2->sp->index;
356 if( rc==0 ) rc = (int)ap1->type - (int)ap2->type;
357 if( rc==0 ){
drh61bc2722000-08-20 11:42:46 +0000358 assert( ap1->type==REDUCE || ap1->type==RD_RESOLVED || ap1->type==CONFLICT);
359 assert( ap2->type==REDUCE || ap2->type==RD_RESOLVED || ap2->type==CONFLICT);
drh75897232000-05-29 14:26:00 +0000360 rc = ap1->x.rp->index - ap2->x.rp->index;
361 }
362 return rc;
363}
364
365/* Sort parser actions */
366struct action *Action_sort(ap)
367struct action *ap;
368{
drh218dc692004-05-31 23:13:45 +0000369 ap = (struct action *)msort((char *)ap,(char **)&ap->next,actioncmp);
drh75897232000-05-29 14:26:00 +0000370 return ap;
371}
372
373void Action_add(app,type,sp,arg)
374struct action **app;
375enum e_action type;
376struct symbol *sp;
377char *arg;
378{
379 struct action *new;
380 new = Action_new();
381 new->next = *app;
382 *app = new;
383 new->type = type;
384 new->sp = sp;
385 if( type==SHIFT ){
386 new->x.stp = (struct state *)arg;
387 }else{
388 new->x.rp = (struct rule *)arg;
389 }
390}
drh8b582012003-10-21 13:16:03 +0000391/********************** New code to implement the "acttab" module ***********/
392/*
393** This module implements routines use to construct the yy_action[] table.
394*/
395
396/*
397** The state of the yy_action table under construction is an instance of
398** the following structure
399*/
400typedef struct acttab acttab;
401struct acttab {
402 int nAction; /* Number of used slots in aAction[] */
403 int nActionAlloc; /* Slots allocated for aAction[] */
404 struct {
405 int lookahead; /* Value of the lookahead token */
406 int action; /* Action to take on the given lookahead */
407 } *aAction, /* The yy_action[] table under construction */
408 *aLookahead; /* A single new transaction set */
409 int mnLookahead; /* Minimum aLookahead[].lookahead */
410 int mnAction; /* Action associated with mnLookahead */
411 int mxLookahead; /* Maximum aLookahead[].lookahead */
412 int nLookahead; /* Used slots in aLookahead[] */
413 int nLookaheadAlloc; /* Slots allocated in aLookahead[] */
414};
415
416/* Return the number of entries in the yy_action table */
417#define acttab_size(X) ((X)->nAction)
418
419/* The value for the N-th entry in yy_action */
420#define acttab_yyaction(X,N) ((X)->aAction[N].action)
421
422/* The value for the N-th entry in yy_lookahead */
423#define acttab_yylookahead(X,N) ((X)->aAction[N].lookahead)
424
425/* Free all memory associated with the given acttab */
426void acttab_free(acttab *p){
427 free( p->aAction );
428 free( p->aLookahead );
429 free( p );
430}
431
432/* Allocate a new acttab structure */
433acttab *acttab_alloc(void){
434 acttab *p = malloc( sizeof(*p) );
435 if( p==0 ){
436 fprintf(stderr,"Unable to allocate memory for a new acttab.");
437 exit(1);
438 }
439 memset(p, 0, sizeof(*p));
440 return p;
441}
442
443/* Add a new action to the current transaction set
444*/
445void acttab_action(acttab *p, int lookahead, int action){
446 if( p->nLookahead>=p->nLookaheadAlloc ){
447 p->nLookaheadAlloc += 25;
448 p->aLookahead = realloc( p->aLookahead,
449 sizeof(p->aLookahead[0])*p->nLookaheadAlloc );
450 if( p->aLookahead==0 ){
451 fprintf(stderr,"malloc failed\n");
452 exit(1);
453 }
454 }
455 if( p->nLookahead==0 ){
456 p->mxLookahead = lookahead;
457 p->mnLookahead = lookahead;
458 p->mnAction = action;
459 }else{
460 if( p->mxLookahead<lookahead ) p->mxLookahead = lookahead;
461 if( p->mnLookahead>lookahead ){
462 p->mnLookahead = lookahead;
463 p->mnAction = action;
464 }
465 }
466 p->aLookahead[p->nLookahead].lookahead = lookahead;
467 p->aLookahead[p->nLookahead].action = action;
468 p->nLookahead++;
469}
470
471/*
472** Add the transaction set built up with prior calls to acttab_action()
473** into the current action table. Then reset the transaction set back
474** to an empty set in preparation for a new round of acttab_action() calls.
475**
476** Return the offset into the action table of the new transaction.
477*/
478int acttab_insert(acttab *p){
479 int i, j, k, n;
480 assert( p->nLookahead>0 );
481
482 /* Make sure we have enough space to hold the expanded action table
483 ** in the worst case. The worst case occurs if the transaction set
484 ** must be appended to the current action table
485 */
drh784d86f2004-02-19 18:41:53 +0000486 n = p->mxLookahead + 1;
drh8b582012003-10-21 13:16:03 +0000487 if( p->nAction + n >= p->nActionAlloc ){
drhfdbf9282003-10-21 16:34:41 +0000488 int oldAlloc = p->nActionAlloc;
drh8b582012003-10-21 13:16:03 +0000489 p->nActionAlloc = p->nAction + n + p->nActionAlloc + 20;
490 p->aAction = realloc( p->aAction,
491 sizeof(p->aAction[0])*p->nActionAlloc);
492 if( p->aAction==0 ){
493 fprintf(stderr,"malloc failed\n");
494 exit(1);
495 }
drhfdbf9282003-10-21 16:34:41 +0000496 for(i=oldAlloc; i<p->nActionAlloc; i++){
drh8b582012003-10-21 13:16:03 +0000497 p->aAction[i].lookahead = -1;
498 p->aAction[i].action = -1;
499 }
500 }
501
502 /* Scan the existing action table looking for an offset where we can
503 ** insert the current transaction set. Fall out of the loop when that
504 ** offset is found. In the worst case, we fall out of the loop when
505 ** i reaches p->nAction, which means we append the new transaction set.
506 **
507 ** i is the index in p->aAction[] where p->mnLookahead is inserted.
508 */
drh784d86f2004-02-19 18:41:53 +0000509 for(i=0; i<p->nAction+p->mnLookahead; i++){
drh8b582012003-10-21 13:16:03 +0000510 if( p->aAction[i].lookahead<0 ){
511 for(j=0; j<p->nLookahead; j++){
512 k = p->aLookahead[j].lookahead - p->mnLookahead + i;
513 if( k<0 ) break;
514 if( p->aAction[k].lookahead>=0 ) break;
515 }
drhfdbf9282003-10-21 16:34:41 +0000516 if( j<p->nLookahead ) continue;
517 for(j=0; j<p->nAction; j++){
518 if( p->aAction[j].lookahead==j+p->mnLookahead-i ) break;
519 }
520 if( j==p->nAction ){
521 break; /* Fits in empty slots */
522 }
drh8b582012003-10-21 13:16:03 +0000523 }else if( p->aAction[i].lookahead==p->mnLookahead ){
524 if( p->aAction[i].action!=p->mnAction ) continue;
525 for(j=0; j<p->nLookahead; j++){
526 k = p->aLookahead[j].lookahead - p->mnLookahead + i;
527 if( k<0 || k>=p->nAction ) break;
528 if( p->aLookahead[j].lookahead!=p->aAction[k].lookahead ) break;
529 if( p->aLookahead[j].action!=p->aAction[k].action ) break;
530 }
531 if( j<p->nLookahead ) continue;
532 n = 0;
533 for(j=0; j<p->nAction; j++){
drhfdbf9282003-10-21 16:34:41 +0000534 if( p->aAction[j].lookahead<0 ) continue;
535 if( p->aAction[j].lookahead==j+p->mnLookahead-i ) n++;
drh8b582012003-10-21 13:16:03 +0000536 }
drhfdbf9282003-10-21 16:34:41 +0000537 if( n==p->nLookahead ){
538 break; /* Same as a prior transaction set */
539 }
drh8b582012003-10-21 13:16:03 +0000540 }
541 }
542 /* Insert transaction set at index i. */
543 for(j=0; j<p->nLookahead; j++){
544 k = p->aLookahead[j].lookahead - p->mnLookahead + i;
545 p->aAction[k] = p->aLookahead[j];
546 if( k>=p->nAction ) p->nAction = k+1;
547 }
548 p->nLookahead = 0;
549
550 /* Return the offset that is added to the lookahead in order to get the
551 ** index into yy_action of the action */
552 return i - p->mnLookahead;
553}
554
drh75897232000-05-29 14:26:00 +0000555/********************** From the file "assert.c" ****************************/
556/*
557** A more efficient way of handling assertions.
558*/
559void myassert(file,line)
560char *file;
561int line;
562{
563 fprintf(stderr,"Assertion failed on line %d of file \"%s\"\n",line,file);
564 exit(1);
565}
566/********************** From the file "build.c" *****************************/
567/*
568** Routines to construction the finite state machine for the LEMON
569** parser generator.
570*/
571
572/* Find a precedence symbol of every rule in the grammar.
573**
574** Those rules which have a precedence symbol coded in the input
575** grammar using the "[symbol]" construct will already have the
576** rp->precsym field filled. Other rules take as their precedence
577** symbol the first RHS symbol with a defined precedence. If there
578** are not RHS symbols with a defined precedence, the precedence
579** symbol field is left blank.
580*/
581void FindRulePrecedences(xp)
582struct lemon *xp;
583{
584 struct rule *rp;
585 for(rp=xp->rule; rp; rp=rp->next){
586 if( rp->precsym==0 ){
587 int i;
588 for(i=0; i<rp->nrhs; i++){
589 if( rp->rhs[i]->prec>=0 ){
590 rp->precsym = rp->rhs[i];
591 break;
592 }
593 }
594 }
595 }
596 return;
597}
598
599/* Find all nonterminals which will generate the empty string.
600** Then go back and compute the first sets of every nonterminal.
601** The first set is the set of all terminal symbols which can begin
602** a string generated by that nonterminal.
603*/
604void FindFirstSets(lemp)
605struct lemon *lemp;
606{
607 int i;
608 struct rule *rp;
609 int progress;
610
611 for(i=0; i<lemp->nsymbol; i++){
drhb27b83a2002-08-14 23:18:57 +0000612 lemp->symbols[i]->lambda = B_FALSE;
drh75897232000-05-29 14:26:00 +0000613 }
614 for(i=lemp->nterminal; i<lemp->nsymbol; i++){
615 lemp->symbols[i]->firstset = SetNew();
616 }
617
618 /* First compute all lambdas */
619 do{
620 progress = 0;
621 for(rp=lemp->rule; rp; rp=rp->next){
622 if( rp->lhs->lambda ) continue;
623 for(i=0; i<rp->nrhs; i++){
drhb27b83a2002-08-14 23:18:57 +0000624 if( rp->rhs[i]->lambda==B_FALSE ) break;
drh75897232000-05-29 14:26:00 +0000625 }
626 if( i==rp->nrhs ){
drhb27b83a2002-08-14 23:18:57 +0000627 rp->lhs->lambda = B_TRUE;
drh75897232000-05-29 14:26:00 +0000628 progress = 1;
629 }
630 }
631 }while( progress );
632
633 /* Now compute all first sets */
634 do{
635 struct symbol *s1, *s2;
636 progress = 0;
637 for(rp=lemp->rule; rp; rp=rp->next){
638 s1 = rp->lhs;
639 for(i=0; i<rp->nrhs; i++){
640 s2 = rp->rhs[i];
641 if( s2->type==TERMINAL ){
642 progress += SetAdd(s1->firstset,s2->index);
643 break;
644 }else if( s1==s2 ){
drhb27b83a2002-08-14 23:18:57 +0000645 if( s1->lambda==B_FALSE ) break;
drh75897232000-05-29 14:26:00 +0000646 }else{
647 progress += SetUnion(s1->firstset,s2->firstset);
drhb27b83a2002-08-14 23:18:57 +0000648 if( s2->lambda==B_FALSE ) break;
drh75897232000-05-29 14:26:00 +0000649 }
650 }
651 }
652 }while( progress );
653 return;
654}
655
656/* Compute all LR(0) states for the grammar. Links
657** are added to between some states so that the LR(1) follow sets
658** can be computed later.
659*/
660PRIVATE struct state *getstate(/* struct lemon * */); /* forward reference */
661void FindStates(lemp)
662struct lemon *lemp;
663{
664 struct symbol *sp;
665 struct rule *rp;
666
667 Configlist_init();
668
669 /* Find the start symbol */
670 if( lemp->start ){
671 sp = Symbol_find(lemp->start);
672 if( sp==0 ){
673 ErrorMsg(lemp->filename,0,
674"The specified start symbol \"%s\" is not \
675in a nonterminal of the grammar. \"%s\" will be used as the start \
676symbol instead.",lemp->start,lemp->rule->lhs->name);
677 lemp->errorcnt++;
678 sp = lemp->rule->lhs;
679 }
680 }else{
681 sp = lemp->rule->lhs;
682 }
683
684 /* Make sure the start symbol doesn't occur on the right-hand side of
685 ** any rule. Report an error if it does. (YACC would generate a new
686 ** start symbol in this case.) */
687 for(rp=lemp->rule; rp; rp=rp->next){
688 int i;
689 for(i=0; i<rp->nrhs; i++){
690 if( rp->rhs[i]==sp ){
691 ErrorMsg(lemp->filename,0,
692"The start symbol \"%s\" occurs on the \
693right-hand side of a rule. This will result in a parser which \
694does not work properly.",sp->name);
695 lemp->errorcnt++;
696 }
697 }
698 }
699
700 /* The basis configuration set for the first state
701 ** is all rules which have the start symbol as their
702 ** left-hand side */
703 for(rp=sp->rule; rp; rp=rp->nextlhs){
704 struct config *newcfp;
705 newcfp = Configlist_addbasis(rp,0);
706 SetAdd(newcfp->fws,0);
707 }
708
709 /* Compute the first state. All other states will be
710 ** computed automatically during the computation of the first one.
711 ** The returned pointer to the first state is not used. */
712 (void)getstate(lemp);
713 return;
714}
715
716/* Return a pointer to a state which is described by the configuration
717** list which has been built from calls to Configlist_add.
718*/
719PRIVATE void buildshifts(/* struct lemon *, struct state * */); /* Forwd ref */
720PRIVATE struct state *getstate(lemp)
721struct lemon *lemp;
722{
723 struct config *cfp, *bp;
724 struct state *stp;
725
726 /* Extract the sorted basis of the new state. The basis was constructed
727 ** by prior calls to "Configlist_addbasis()". */
728 Configlist_sortbasis();
729 bp = Configlist_basis();
730
731 /* Get a state with the same basis */
732 stp = State_find(bp);
733 if( stp ){
734 /* A state with the same basis already exists! Copy all the follow-set
735 ** propagation links from the state under construction into the
736 ** preexisting state, then return a pointer to the preexisting state */
737 struct config *x, *y;
738 for(x=bp, y=stp->bp; x && y; x=x->bp, y=y->bp){
739 Plink_copy(&y->bplp,x->bplp);
740 Plink_delete(x->fplp);
741 x->fplp = x->bplp = 0;
742 }
743 cfp = Configlist_return();
744 Configlist_eat(cfp);
745 }else{
746 /* This really is a new state. Construct all the details */
747 Configlist_closure(lemp); /* Compute the configuration closure */
748 Configlist_sort(); /* Sort the configuration closure */
749 cfp = Configlist_return(); /* Get a pointer to the config list */
750 stp = State_new(); /* A new state structure */
751 MemoryCheck(stp);
752 stp->bp = bp; /* Remember the configuration basis */
753 stp->cfp = cfp; /* Remember the configuration closure */
754 stp->index = lemp->nstate++; /* Every state gets a sequence number */
755 stp->ap = 0; /* No actions, yet. */
756 State_insert(stp,stp->bp); /* Add to the state table */
757 buildshifts(lemp,stp); /* Recursively compute successor states */
758 }
759 return stp;
760}
761
762/* Construct all successor states to the given state. A "successor"
763** state is any state which can be reached by a shift action.
764*/
765PRIVATE void buildshifts(lemp,stp)
766struct lemon *lemp;
767struct state *stp; /* The state from which successors are computed */
768{
769 struct config *cfp; /* For looping thru the config closure of "stp" */
770 struct config *bcfp; /* For the inner loop on config closure of "stp" */
771 struct config *new; /* */
772 struct symbol *sp; /* Symbol following the dot in configuration "cfp" */
773 struct symbol *bsp; /* Symbol following the dot in configuration "bcfp" */
774 struct state *newstp; /* A pointer to a successor state */
775
776 /* Each configuration becomes complete after it contibutes to a successor
777 ** state. Initially, all configurations are incomplete */
778 for(cfp=stp->cfp; cfp; cfp=cfp->next) cfp->status = INCOMPLETE;
779
780 /* Loop through all configurations of the state "stp" */
781 for(cfp=stp->cfp; cfp; cfp=cfp->next){
782 if( cfp->status==COMPLETE ) continue; /* Already used by inner loop */
783 if( cfp->dot>=cfp->rp->nrhs ) continue; /* Can't shift this config */
784 Configlist_reset(); /* Reset the new config set */
785 sp = cfp->rp->rhs[cfp->dot]; /* Symbol after the dot */
786
787 /* For every configuration in the state "stp" which has the symbol "sp"
788 ** following its dot, add the same configuration to the basis set under
789 ** construction but with the dot shifted one symbol to the right. */
790 for(bcfp=cfp; bcfp; bcfp=bcfp->next){
791 if( bcfp->status==COMPLETE ) continue; /* Already used */
792 if( bcfp->dot>=bcfp->rp->nrhs ) continue; /* Can't shift this one */
793 bsp = bcfp->rp->rhs[bcfp->dot]; /* Get symbol after dot */
794 if( bsp!=sp ) continue; /* Must be same as for "cfp" */
795 bcfp->status = COMPLETE; /* Mark this config as used */
796 new = Configlist_addbasis(bcfp->rp,bcfp->dot+1);
797 Plink_add(&new->bplp,bcfp);
798 }
799
800 /* Get a pointer to the state described by the basis configuration set
801 ** constructed in the preceding loop */
802 newstp = getstate(lemp);
803
804 /* The state "newstp" is reached from the state "stp" by a shift action
805 ** on the symbol "sp" */
drh218dc692004-05-31 23:13:45 +0000806 Action_add(&stp->ap,SHIFT,sp,(char *)newstp);
drh75897232000-05-29 14:26:00 +0000807 }
808}
809
810/*
811** Construct the propagation links
812*/
813void FindLinks(lemp)
814struct lemon *lemp;
815{
816 int i;
817 struct config *cfp, *other;
818 struct state *stp;
819 struct plink *plp;
820
821 /* Housekeeping detail:
822 ** Add to every propagate link a pointer back to the state to
823 ** which the link is attached. */
824 for(i=0; i<lemp->nstate; i++){
825 stp = lemp->sorted[i];
826 for(cfp=stp->cfp; cfp; cfp=cfp->next){
827 cfp->stp = stp;
828 }
829 }
830
831 /* Convert all backlinks into forward links. Only the forward
832 ** links are used in the follow-set computation. */
833 for(i=0; i<lemp->nstate; i++){
834 stp = lemp->sorted[i];
835 for(cfp=stp->cfp; cfp; cfp=cfp->next){
836 for(plp=cfp->bplp; plp; plp=plp->next){
837 other = plp->cfp;
838 Plink_add(&other->fplp,cfp);
839 }
840 }
841 }
842}
843
844/* Compute all followsets.
845**
846** A followset is the set of all symbols which can come immediately
847** after a configuration.
848*/
849void FindFollowSets(lemp)
850struct lemon *lemp;
851{
852 int i;
853 struct config *cfp;
854 struct plink *plp;
855 int progress;
856 int change;
857
858 for(i=0; i<lemp->nstate; i++){
859 for(cfp=lemp->sorted[i]->cfp; cfp; cfp=cfp->next){
860 cfp->status = INCOMPLETE;
861 }
862 }
863
864 do{
865 progress = 0;
866 for(i=0; i<lemp->nstate; i++){
867 for(cfp=lemp->sorted[i]->cfp; cfp; cfp=cfp->next){
868 if( cfp->status==COMPLETE ) continue;
869 for(plp=cfp->fplp; plp; plp=plp->next){
870 change = SetUnion(plp->cfp->fws,cfp->fws);
871 if( change ){
872 plp->cfp->status = INCOMPLETE;
873 progress = 1;
874 }
875 }
876 cfp->status = COMPLETE;
877 }
878 }
879 }while( progress );
880}
881
882static int resolve_conflict();
883
884/* Compute the reduce actions, and resolve conflicts.
885*/
886void FindActions(lemp)
887struct lemon *lemp;
888{
889 int i,j;
890 struct config *cfp;
891 struct state *stp;
892 struct symbol *sp;
893 struct rule *rp;
894
895 /* Add all of the reduce actions
896 ** A reduce action is added for each element of the followset of
897 ** a configuration which has its dot at the extreme right.
898 */
899 for(i=0; i<lemp->nstate; i++){ /* Loop over all states */
900 stp = lemp->sorted[i];
901 for(cfp=stp->cfp; cfp; cfp=cfp->next){ /* Loop over all configurations */
902 if( cfp->rp->nrhs==cfp->dot ){ /* Is dot at extreme right? */
903 for(j=0; j<lemp->nterminal; j++){
904 if( SetFind(cfp->fws,j) ){
905 /* Add a reduce action to the state "stp" which will reduce by the
906 ** rule "cfp->rp" if the lookahead symbol is "lemp->symbols[j]" */
drh218dc692004-05-31 23:13:45 +0000907 Action_add(&stp->ap,REDUCE,lemp->symbols[j],(char *)cfp->rp);
drh75897232000-05-29 14:26:00 +0000908 }
909 }
910 }
911 }
912 }
913
914 /* Add the accepting token */
915 if( lemp->start ){
916 sp = Symbol_find(lemp->start);
917 if( sp==0 ) sp = lemp->rule->lhs;
918 }else{
919 sp = lemp->rule->lhs;
920 }
921 /* Add to the first state (which is always the starting state of the
922 ** finite state machine) an action to ACCEPT if the lookahead is the
923 ** start nonterminal. */
924 Action_add(&lemp->sorted[0]->ap,ACCEPT,sp,0);
925
926 /* Resolve conflicts */
927 for(i=0; i<lemp->nstate; i++){
928 struct action *ap, *nap;
929 struct state *stp;
930 stp = lemp->sorted[i];
931 assert( stp->ap );
932 stp->ap = Action_sort(stp->ap);
drhb59499c2002-02-23 18:45:13 +0000933 for(ap=stp->ap; ap && ap->next; ap=ap->next){
drh75897232000-05-29 14:26:00 +0000934 for(nap=ap->next; nap && nap->sp==ap->sp; nap=nap->next){
935 /* The two actions "ap" and "nap" have the same lookahead.
936 ** Figure out which one should be used */
937 lemp->nconflict += resolve_conflict(ap,nap,lemp->errsym);
938 }
939 }
940 }
941
942 /* Report an error for each rule that can never be reduced. */
drhb27b83a2002-08-14 23:18:57 +0000943 for(rp=lemp->rule; rp; rp=rp->next) rp->canReduce = B_FALSE;
drh75897232000-05-29 14:26:00 +0000944 for(i=0; i<lemp->nstate; i++){
945 struct action *ap;
946 for(ap=lemp->sorted[i]->ap; ap; ap=ap->next){
drhb27b83a2002-08-14 23:18:57 +0000947 if( ap->type==REDUCE ) ap->x.rp->canReduce = B_TRUE;
drh75897232000-05-29 14:26:00 +0000948 }
949 }
950 for(rp=lemp->rule; rp; rp=rp->next){
951 if( rp->canReduce ) continue;
952 ErrorMsg(lemp->filename,rp->ruleline,"This rule can not be reduced.\n");
953 lemp->errorcnt++;
954 }
955}
956
957/* Resolve a conflict between the two given actions. If the
958** conflict can't be resolve, return non-zero.
959**
960** NO LONGER TRUE:
961** To resolve a conflict, first look to see if either action
962** is on an error rule. In that case, take the action which
963** is not associated with the error rule. If neither or both
964** actions are associated with an error rule, then try to
965** use precedence to resolve the conflict.
966**
967** If either action is a SHIFT, then it must be apx. This
968** function won't work if apx->type==REDUCE and apy->type==SHIFT.
969*/
970static int resolve_conflict(apx,apy,errsym)
971struct action *apx;
972struct action *apy;
973struct symbol *errsym; /* The error symbol (if defined. NULL otherwise) */
974{
975 struct symbol *spx, *spy;
976 int errcnt = 0;
977 assert( apx->sp==apy->sp ); /* Otherwise there would be no conflict */
978 if( apx->type==SHIFT && apy->type==REDUCE ){
979 spx = apx->sp;
980 spy = apy->x.rp->precsym;
981 if( spy==0 || spx->prec<0 || spy->prec<0 ){
982 /* Not enough precedence information. */
983 apy->type = CONFLICT;
984 errcnt++;
985 }else if( spx->prec>spy->prec ){ /* Lower precedence wins */
986 apy->type = RD_RESOLVED;
987 }else if( spx->prec<spy->prec ){
988 apx->type = SH_RESOLVED;
989 }else if( spx->prec==spy->prec && spx->assoc==RIGHT ){ /* Use operator */
990 apy->type = RD_RESOLVED; /* associativity */
991 }else if( spx->prec==spy->prec && spx->assoc==LEFT ){ /* to break tie */
992 apx->type = SH_RESOLVED;
993 }else{
994 assert( spx->prec==spy->prec && spx->assoc==NONE );
995 apy->type = CONFLICT;
996 errcnt++;
997 }
998 }else if( apx->type==REDUCE && apy->type==REDUCE ){
999 spx = apx->x.rp->precsym;
1000 spy = apy->x.rp->precsym;
1001 if( spx==0 || spy==0 || spx->prec<0 ||
1002 spy->prec<0 || spx->prec==spy->prec ){
1003 apy->type = CONFLICT;
1004 errcnt++;
1005 }else if( spx->prec>spy->prec ){
1006 apy->type = RD_RESOLVED;
1007 }else if( spx->prec<spy->prec ){
1008 apx->type = RD_RESOLVED;
1009 }
1010 }else{
drhb59499c2002-02-23 18:45:13 +00001011 assert(
1012 apx->type==SH_RESOLVED ||
1013 apx->type==RD_RESOLVED ||
1014 apx->type==CONFLICT ||
1015 apy->type==SH_RESOLVED ||
1016 apy->type==RD_RESOLVED ||
1017 apy->type==CONFLICT
1018 );
1019 /* The REDUCE/SHIFT case cannot happen because SHIFTs come before
1020 ** REDUCEs on the list. If we reach this point it must be because
1021 ** the parser conflict had already been resolved. */
drh75897232000-05-29 14:26:00 +00001022 }
1023 return errcnt;
1024}
1025/********************* From the file "configlist.c" *************************/
1026/*
1027** Routines to processing a configuration list and building a state
1028** in the LEMON parser generator.
1029*/
1030
1031static struct config *freelist = 0; /* List of free configurations */
1032static struct config *current = 0; /* Top of list of configurations */
1033static struct config **currentend = 0; /* Last on list of configs */
1034static struct config *basis = 0; /* Top of list of basis configs */
1035static struct config **basisend = 0; /* End of list of basis configs */
1036
1037/* Return a pointer to a new configuration */
1038PRIVATE struct config *newconfig(){
1039 struct config *new;
1040 if( freelist==0 ){
1041 int i;
1042 int amt = 3;
1043 freelist = (struct config *)malloc( sizeof(struct config)*amt );
1044 if( freelist==0 ){
1045 fprintf(stderr,"Unable to allocate memory for a new configuration.");
1046 exit(1);
1047 }
1048 for(i=0; i<amt-1; i++) freelist[i].next = &freelist[i+1];
1049 freelist[amt-1].next = 0;
1050 }
1051 new = freelist;
1052 freelist = freelist->next;
1053 return new;
1054}
1055
1056/* The configuration "old" is no longer used */
1057PRIVATE void deleteconfig(old)
1058struct config *old;
1059{
1060 old->next = freelist;
1061 freelist = old;
1062}
1063
1064/* Initialized the configuration list builder */
1065void Configlist_init(){
1066 current = 0;
1067 currentend = &current;
1068 basis = 0;
1069 basisend = &basis;
1070 Configtable_init();
1071 return;
1072}
1073
1074/* Initialized the configuration list builder */
1075void Configlist_reset(){
1076 current = 0;
1077 currentend = &current;
1078 basis = 0;
1079 basisend = &basis;
1080 Configtable_clear(0);
1081 return;
1082}
1083
1084/* Add another configuration to the configuration list */
1085struct config *Configlist_add(rp,dot)
1086struct rule *rp; /* The rule */
1087int dot; /* Index into the RHS of the rule where the dot goes */
1088{
1089 struct config *cfp, model;
1090
1091 assert( currentend!=0 );
1092 model.rp = rp;
1093 model.dot = dot;
1094 cfp = Configtable_find(&model);
1095 if( cfp==0 ){
1096 cfp = newconfig();
1097 cfp->rp = rp;
1098 cfp->dot = dot;
1099 cfp->fws = SetNew();
1100 cfp->stp = 0;
1101 cfp->fplp = cfp->bplp = 0;
1102 cfp->next = 0;
1103 cfp->bp = 0;
1104 *currentend = cfp;
1105 currentend = &cfp->next;
1106 Configtable_insert(cfp);
1107 }
1108 return cfp;
1109}
1110
1111/* Add a basis configuration to the configuration list */
1112struct config *Configlist_addbasis(rp,dot)
1113struct rule *rp;
1114int dot;
1115{
1116 struct config *cfp, model;
1117
1118 assert( basisend!=0 );
1119 assert( currentend!=0 );
1120 model.rp = rp;
1121 model.dot = dot;
1122 cfp = Configtable_find(&model);
1123 if( cfp==0 ){
1124 cfp = newconfig();
1125 cfp->rp = rp;
1126 cfp->dot = dot;
1127 cfp->fws = SetNew();
1128 cfp->stp = 0;
1129 cfp->fplp = cfp->bplp = 0;
1130 cfp->next = 0;
1131 cfp->bp = 0;
1132 *currentend = cfp;
1133 currentend = &cfp->next;
1134 *basisend = cfp;
1135 basisend = &cfp->bp;
1136 Configtable_insert(cfp);
1137 }
1138 return cfp;
1139}
1140
1141/* Compute the closure of the configuration list */
1142void Configlist_closure(lemp)
1143struct lemon *lemp;
1144{
1145 struct config *cfp, *newcfp;
1146 struct rule *rp, *newrp;
1147 struct symbol *sp, *xsp;
1148 int i, dot;
1149
1150 assert( currentend!=0 );
1151 for(cfp=current; cfp; cfp=cfp->next){
1152 rp = cfp->rp;
1153 dot = cfp->dot;
1154 if( dot>=rp->nrhs ) continue;
1155 sp = rp->rhs[dot];
1156 if( sp->type==NONTERMINAL ){
1157 if( sp->rule==0 && sp!=lemp->errsym ){
1158 ErrorMsg(lemp->filename,rp->line,"Nonterminal \"%s\" has no rules.",
1159 sp->name);
1160 lemp->errorcnt++;
1161 }
1162 for(newrp=sp->rule; newrp; newrp=newrp->nextlhs){
1163 newcfp = Configlist_add(newrp,0);
1164 for(i=dot+1; i<rp->nrhs; i++){
1165 xsp = rp->rhs[i];
1166 if( xsp->type==TERMINAL ){
1167 SetAdd(newcfp->fws,xsp->index);
1168 break;
1169 }else{
1170 SetUnion(newcfp->fws,xsp->firstset);
drhb27b83a2002-08-14 23:18:57 +00001171 if( xsp->lambda==B_FALSE ) break;
drh75897232000-05-29 14:26:00 +00001172 }
1173 }
1174 if( i==rp->nrhs ) Plink_add(&cfp->fplp,newcfp);
1175 }
1176 }
1177 }
1178 return;
1179}
1180
1181/* Sort the configuration list */
1182void Configlist_sort(){
drh218dc692004-05-31 23:13:45 +00001183 current = (struct config *)msort((char *)current,(char **)&(current->next),Configcmp);
drh75897232000-05-29 14:26:00 +00001184 currentend = 0;
1185 return;
1186}
1187
1188/* Sort the basis configuration list */
1189void Configlist_sortbasis(){
drh218dc692004-05-31 23:13:45 +00001190 basis = (struct config *)msort((char *)current,(char **)&(current->bp),Configcmp);
drh75897232000-05-29 14:26:00 +00001191 basisend = 0;
1192 return;
1193}
1194
1195/* Return a pointer to the head of the configuration list and
1196** reset the list */
1197struct config *Configlist_return(){
1198 struct config *old;
1199 old = current;
1200 current = 0;
1201 currentend = 0;
1202 return old;
1203}
1204
1205/* Return a pointer to the head of the configuration list and
1206** reset the list */
1207struct config *Configlist_basis(){
1208 struct config *old;
1209 old = basis;
1210 basis = 0;
1211 basisend = 0;
1212 return old;
1213}
1214
1215/* Free all elements of the given configuration list */
1216void Configlist_eat(cfp)
1217struct config *cfp;
1218{
1219 struct config *nextcfp;
1220 for(; cfp; cfp=nextcfp){
1221 nextcfp = cfp->next;
1222 assert( cfp->fplp==0 );
1223 assert( cfp->bplp==0 );
1224 if( cfp->fws ) SetFree(cfp->fws);
1225 deleteconfig(cfp);
1226 }
1227 return;
1228}
1229/***************** From the file "error.c" *********************************/
1230/*
1231** Code for printing error message.
1232*/
1233
1234/* Find a good place to break "msg" so that its length is at least "min"
1235** but no more than "max". Make the point as close to max as possible.
1236*/
1237static int findbreak(msg,min,max)
1238char *msg;
1239int min;
1240int max;
1241{
1242 int i,spot;
1243 char c;
1244 for(i=spot=min; i<=max; i++){
1245 c = msg[i];
1246 if( c=='\t' ) msg[i] = ' ';
1247 if( c=='\n' ){ msg[i] = ' '; spot = i; break; }
1248 if( c==0 ){ spot = i; break; }
1249 if( c=='-' && i<max-1 ) spot = i+1;
1250 if( c==' ' ) spot = i;
1251 }
1252 return spot;
1253}
1254
1255/*
1256** The error message is split across multiple lines if necessary. The
1257** splits occur at a space, if there is a space available near the end
1258** of the line.
1259*/
1260#define ERRMSGSIZE 10000 /* Hope this is big enough. No way to error check */
1261#define LINEWIDTH 79 /* Max width of any output line */
1262#define PREFIXLIMIT 30 /* Max width of the prefix on each line */
drhf9a2e7b2003-04-15 01:49:48 +00001263void ErrorMsg(const char *filename, int lineno, const char *format, ...){
drh75897232000-05-29 14:26:00 +00001264 char errmsg[ERRMSGSIZE];
1265 char prefix[PREFIXLIMIT+10];
1266 int errmsgsize;
1267 int prefixsize;
1268 int availablewidth;
1269 va_list ap;
1270 int end, restart, base;
1271
drhf9a2e7b2003-04-15 01:49:48 +00001272 va_start(ap, format);
drh75897232000-05-29 14:26:00 +00001273 /* Prepare a prefix to be prepended to every output line */
1274 if( lineno>0 ){
1275 sprintf(prefix,"%.*s:%d: ",PREFIXLIMIT-10,filename,lineno);
1276 }else{
1277 sprintf(prefix,"%.*s: ",PREFIXLIMIT-10,filename);
1278 }
1279 prefixsize = strlen(prefix);
1280 availablewidth = LINEWIDTH - prefixsize;
1281
1282 /* Generate the error message */
1283 vsprintf(errmsg,format,ap);
1284 va_end(ap);
1285 errmsgsize = strlen(errmsg);
1286 /* Remove trailing '\n's from the error message. */
1287 while( errmsgsize>0 && errmsg[errmsgsize-1]=='\n' ){
1288 errmsg[--errmsgsize] = 0;
1289 }
1290
1291 /* Print the error message */
1292 base = 0;
1293 while( errmsg[base]!=0 ){
1294 end = restart = findbreak(&errmsg[base],0,availablewidth);
1295 restart += base;
1296 while( errmsg[restart]==' ' ) restart++;
1297 fprintf(stdout,"%s%.*s\n",prefix,end,&errmsg[base]);
1298 base = restart;
1299 }
1300}
1301/**************** From the file "main.c" ************************************/
1302/*
1303** Main program file for the LEMON parser generator.
1304*/
1305
1306/* Report an out-of-memory condition and abort. This function
1307** is used mostly by the "MemoryCheck" macro in struct.h
1308*/
1309void memory_error(){
1310 fprintf(stderr,"Out of memory. Aborting...\n");
1311 exit(1);
1312}
1313
drh6d08b4d2004-07-20 12:45:22 +00001314static int nDefine = 0; /* Number of -D options on the command line */
1315static char **azDefine = 0; /* Name of the -D macros */
1316
1317/* This routine is called with the argument to each -D command-line option.
1318** Add the macro defined to the azDefine array.
1319*/
1320static void handle_D_option(char *z){
1321 char **paz;
1322 nDefine++;
1323 azDefine = realloc(azDefine, sizeof(azDefine[0])*nDefine);
1324 if( azDefine==0 ){
1325 fprintf(stderr,"out of memory\n");
1326 exit(1);
1327 }
1328 paz = &azDefine[nDefine-1];
1329 *paz = malloc( strlen(z)+1 );
1330 if( *paz==0 ){
1331 fprintf(stderr,"out of memory\n");
1332 exit(1);
1333 }
1334 strcpy(*paz, z);
1335 for(z=*paz; *z && *z!='='; z++){}
1336 *z = 0;
1337}
1338
drh75897232000-05-29 14:26:00 +00001339
1340/* The main program. Parse the command line and do it... */
1341int main(argc,argv)
1342int argc;
1343char **argv;
1344{
1345 static int version = 0;
1346 static int rpflag = 0;
1347 static int basisflag = 0;
1348 static int compress = 0;
1349 static int quiet = 0;
1350 static int statistics = 0;
1351 static int mhflag = 0;
1352 static struct s_options options[] = {
1353 {OPT_FLAG, "b", (char*)&basisflag, "Print only the basis in report."},
1354 {OPT_FLAG, "c", (char*)&compress, "Don't compress the action table."},
drh6d08b4d2004-07-20 12:45:22 +00001355 {OPT_FSTR, "D", (char*)handle_D_option, "Define an %ifdef macro."},
drh75897232000-05-29 14:26:00 +00001356 {OPT_FLAG, "g", (char*)&rpflag, "Print grammar without actions."},
1357 {OPT_FLAG, "m", (char*)&mhflag, "Output a makeheaders compatible file"},
1358 {OPT_FLAG, "q", (char*)&quiet, "(Quiet) Don't print the report file."},
drh6d08b4d2004-07-20 12:45:22 +00001359 {OPT_FLAG, "s", (char*)&statistics,
1360 "Print parser stats to standard output."},
drh75897232000-05-29 14:26:00 +00001361 {OPT_FLAG, "x", (char*)&version, "Print the version number."},
1362 {OPT_FLAG,0,0,0}
1363 };
1364 int i;
1365 struct lemon lem;
1366
drhb0c86772000-06-02 23:21:26 +00001367 OptInit(argv,options,stderr);
drh75897232000-05-29 14:26:00 +00001368 if( version ){
drhb19a2bc2001-09-16 00:13:26 +00001369 printf("Lemon version 1.0\n");
drh75897232000-05-29 14:26:00 +00001370 exit(0);
1371 }
drhb0c86772000-06-02 23:21:26 +00001372 if( OptNArgs()!=1 ){
drh75897232000-05-29 14:26:00 +00001373 fprintf(stderr,"Exactly one filename argument is required.\n");
1374 exit(1);
1375 }
1376 lem.errorcnt = 0;
1377
1378 /* Initialize the machine */
1379 Strsafe_init();
1380 Symbol_init();
1381 State_init();
1382 lem.argv0 = argv[0];
drhb0c86772000-06-02 23:21:26 +00001383 lem.filename = OptArg(0);
drh75897232000-05-29 14:26:00 +00001384 lem.basisflag = basisflag;
drh0bd1f4e2002-06-06 18:54:39 +00001385 lem.has_fallback = 0;
drh75897232000-05-29 14:26:00 +00001386 lem.nconflict = 0;
1387 lem.name = lem.include = lem.arg = lem.tokentype = lem.start = 0;
drh960e8c62001-04-03 16:53:21 +00001388 lem.vartype = 0;
drh75897232000-05-29 14:26:00 +00001389 lem.stacksize = 0;
1390 lem.error = lem.overflow = lem.failure = lem.accept = lem.tokendest =
1391 lem.tokenprefix = lem.outname = lem.extracode = 0;
drh960e8c62001-04-03 16:53:21 +00001392 lem.vardest = 0;
drh75897232000-05-29 14:26:00 +00001393 lem.tablesize = 0;
1394 Symbol_new("$");
1395 lem.errsym = Symbol_new("error");
1396
1397 /* Parse the input file */
1398 Parse(&lem);
1399 if( lem.errorcnt ) exit(lem.errorcnt);
1400 if( lem.rule==0 ){
1401 fprintf(stderr,"Empty grammar.\n");
1402 exit(1);
1403 }
1404
1405 /* Count and index the symbols of the grammar */
1406 lem.nsymbol = Symbol_count();
1407 Symbol_new("{default}");
1408 lem.symbols = Symbol_arrayof();
drh60d31652004-02-22 00:08:04 +00001409 for(i=0; i<=lem.nsymbol; i++) lem.symbols[i]->index = i;
drh75897232000-05-29 14:26:00 +00001410 qsort(lem.symbols,lem.nsymbol+1,sizeof(struct symbol*),
1411 (int(*)())Symbolcmpp);
1412 for(i=0; i<=lem.nsymbol; i++) lem.symbols[i]->index = i;
1413 for(i=1; isupper(lem.symbols[i]->name[0]); i++);
1414 lem.nterminal = i;
1415
1416 /* Generate a reprint of the grammar, if requested on the command line */
1417 if( rpflag ){
1418 Reprint(&lem);
1419 }else{
1420 /* Initialize the size for all follow and first sets */
1421 SetSize(lem.nterminal);
1422
1423 /* Find the precedence for every production rule (that has one) */
1424 FindRulePrecedences(&lem);
1425
1426 /* Compute the lambda-nonterminals and the first-sets for every
1427 ** nonterminal */
1428 FindFirstSets(&lem);
1429
1430 /* Compute all LR(0) states. Also record follow-set propagation
1431 ** links so that the follow-set can be computed later */
1432 lem.nstate = 0;
1433 FindStates(&lem);
1434 lem.sorted = State_arrayof();
1435
1436 /* Tie up loose ends on the propagation links */
1437 FindLinks(&lem);
1438
1439 /* Compute the follow set of every reducible configuration */
1440 FindFollowSets(&lem);
1441
1442 /* Compute the action tables */
1443 FindActions(&lem);
1444
1445 /* Compress the action tables */
1446 if( compress==0 ) CompressTables(&lem);
1447
1448 /* Generate a report of the parser generated. (the "y.output" file) */
1449 if( !quiet ) ReportOutput(&lem);
1450
1451 /* Generate the source code for the parser */
1452 ReportTable(&lem, mhflag);
1453
1454 /* Produce a header file for use by the scanner. (This step is
1455 ** omitted if the "-m" option is used because makeheaders will
1456 ** generate the file for us.) */
1457 if( !mhflag ) ReportHeader(&lem);
1458 }
1459 if( statistics ){
1460 printf("Parser statistics: %d terminals, %d nonterminals, %d rules\n",
1461 lem.nterminal, lem.nsymbol - lem.nterminal, lem.nrule);
1462 printf(" %d states, %d parser table entries, %d conflicts\n",
1463 lem.nstate, lem.tablesize, lem.nconflict);
1464 }
1465 if( lem.nconflict ){
1466 fprintf(stderr,"%d parsing conflicts.\n",lem.nconflict);
1467 }
1468 exit(lem.errorcnt + lem.nconflict);
drh218dc692004-05-31 23:13:45 +00001469 return (lem.errorcnt + lem.nconflict);
drh75897232000-05-29 14:26:00 +00001470}
1471/******************** From the file "msort.c" *******************************/
1472/*
1473** A generic merge-sort program.
1474**
1475** USAGE:
1476** Let "ptr" be a pointer to some structure which is at the head of
1477** a null-terminated list. Then to sort the list call:
1478**
1479** ptr = msort(ptr,&(ptr->next),cmpfnc);
1480**
1481** In the above, "cmpfnc" is a pointer to a function which compares
1482** two instances of the structure and returns an integer, as in
1483** strcmp. The second argument is a pointer to the pointer to the
1484** second element of the linked list. This address is used to compute
1485** the offset to the "next" field within the structure. The offset to
1486** the "next" field must be constant for all structures in the list.
1487**
1488** The function returns a new pointer which is the head of the list
1489** after sorting.
1490**
1491** ALGORITHM:
1492** Merge-sort.
1493*/
1494
1495/*
1496** Return a pointer to the next structure in the linked list.
1497*/
drhba99af52001-10-25 20:37:16 +00001498#define NEXT(A) (*(char**)(((unsigned long)A)+offset))
drh75897232000-05-29 14:26:00 +00001499
1500/*
1501** Inputs:
1502** a: A sorted, null-terminated linked list. (May be null).
1503** b: A sorted, null-terminated linked list. (May be null).
1504** cmp: A pointer to the comparison function.
1505** offset: Offset in the structure to the "next" field.
1506**
1507** Return Value:
1508** A pointer to the head of a sorted list containing the elements
1509** of both a and b.
1510**
1511** Side effects:
1512** The "next" pointers for elements in the lists a and b are
1513** changed.
1514*/
1515static char *merge(a,b,cmp,offset)
1516char *a;
1517char *b;
1518int (*cmp)();
1519int offset;
1520{
1521 char *ptr, *head;
1522
1523 if( a==0 ){
1524 head = b;
1525 }else if( b==0 ){
1526 head = a;
1527 }else{
1528 if( (*cmp)(a,b)<0 ){
1529 ptr = a;
1530 a = NEXT(a);
1531 }else{
1532 ptr = b;
1533 b = NEXT(b);
1534 }
1535 head = ptr;
1536 while( a && b ){
1537 if( (*cmp)(a,b)<0 ){
1538 NEXT(ptr) = a;
1539 ptr = a;
1540 a = NEXT(a);
1541 }else{
1542 NEXT(ptr) = b;
1543 ptr = b;
1544 b = NEXT(b);
1545 }
1546 }
1547 if( a ) NEXT(ptr) = a;
1548 else NEXT(ptr) = b;
1549 }
1550 return head;
1551}
1552
1553/*
1554** Inputs:
1555** list: Pointer to a singly-linked list of structures.
1556** next: Pointer to pointer to the second element of the list.
1557** cmp: A comparison function.
1558**
1559** Return Value:
1560** A pointer to the head of a sorted list containing the elements
1561** orginally in list.
1562**
1563** Side effects:
1564** The "next" pointers for elements in list are changed.
1565*/
1566#define LISTSIZE 30
1567char *msort(list,next,cmp)
1568char *list;
1569char **next;
1570int (*cmp)();
1571{
drhba99af52001-10-25 20:37:16 +00001572 unsigned long offset;
drh75897232000-05-29 14:26:00 +00001573 char *ep;
1574 char *set[LISTSIZE];
1575 int i;
drhba99af52001-10-25 20:37:16 +00001576 offset = (unsigned long)next - (unsigned long)list;
drh75897232000-05-29 14:26:00 +00001577 for(i=0; i<LISTSIZE; i++) set[i] = 0;
1578 while( list ){
1579 ep = list;
1580 list = NEXT(list);
1581 NEXT(ep) = 0;
1582 for(i=0; i<LISTSIZE-1 && set[i]!=0; i++){
1583 ep = merge(ep,set[i],cmp,offset);
1584 set[i] = 0;
1585 }
1586 set[i] = ep;
1587 }
1588 ep = 0;
1589 for(i=0; i<LISTSIZE; i++) if( set[i] ) ep = merge(ep,set[i],cmp,offset);
1590 return ep;
1591}
1592/************************ From the file "option.c" **************************/
1593static char **argv;
1594static struct s_options *op;
1595static FILE *errstream;
1596
1597#define ISOPT(X) ((X)[0]=='-'||(X)[0]=='+'||strchr((X),'=')!=0)
1598
1599/*
1600** Print the command line with a carrot pointing to the k-th character
1601** of the n-th field.
1602*/
1603static void errline(n,k,err)
1604int n;
1605int k;
1606FILE *err;
1607{
1608 int spcnt, i;
1609 spcnt = 0;
1610 if( argv[0] ) fprintf(err,"%s",argv[0]);
1611 spcnt = strlen(argv[0]) + 1;
1612 for(i=1; i<n && argv[i]; i++){
1613 fprintf(err," %s",argv[i]);
1614 spcnt += strlen(argv[i]+1);
1615 }
1616 spcnt += k;
1617 for(; argv[i]; i++) fprintf(err," %s",argv[i]);
1618 if( spcnt<20 ){
1619 fprintf(err,"\n%*s^-- here\n",spcnt,"");
1620 }else{
1621 fprintf(err,"\n%*shere --^\n",spcnt-7,"");
1622 }
1623}
1624
1625/*
1626** Return the index of the N-th non-switch argument. Return -1
1627** if N is out of range.
1628*/
1629static int argindex(n)
1630int n;
1631{
1632 int i;
1633 int dashdash = 0;
1634 if( argv!=0 && *argv!=0 ){
1635 for(i=1; argv[i]; i++){
1636 if( dashdash || !ISOPT(argv[i]) ){
1637 if( n==0 ) return i;
1638 n--;
1639 }
1640 if( strcmp(argv[i],"--")==0 ) dashdash = 1;
1641 }
1642 }
1643 return -1;
1644}
1645
1646static char emsg[] = "Command line syntax error: ";
1647
1648/*
1649** Process a flag command line argument.
1650*/
1651static int handleflags(i,err)
1652int i;
1653FILE *err;
1654{
1655 int v;
1656 int errcnt = 0;
1657 int j;
1658 for(j=0; op[j].label; j++){
drh6d08b4d2004-07-20 12:45:22 +00001659 if( strncmp(&argv[i][1],op[j].label,strlen(op[j].label))==0 ) break;
drh75897232000-05-29 14:26:00 +00001660 }
1661 v = argv[i][0]=='-' ? 1 : 0;
1662 if( op[j].label==0 ){
1663 if( err ){
1664 fprintf(err,"%sundefined option.\n",emsg);
1665 errline(i,1,err);
1666 }
1667 errcnt++;
1668 }else if( op[j].type==OPT_FLAG ){
1669 *((int*)op[j].arg) = v;
1670 }else if( op[j].type==OPT_FFLAG ){
1671 (*(void(*)())(op[j].arg))(v);
drh6d08b4d2004-07-20 12:45:22 +00001672 }else if( op[j].type==OPT_FSTR ){
1673 (*(void(*)())(op[j].arg))(&argv[i][2]);
drh75897232000-05-29 14:26:00 +00001674 }else{
1675 if( err ){
1676 fprintf(err,"%smissing argument on switch.\n",emsg);
1677 errline(i,1,err);
1678 }
1679 errcnt++;
1680 }
1681 return errcnt;
1682}
1683
1684/*
1685** Process a command line switch which has an argument.
1686*/
1687static int handleswitch(i,err)
1688int i;
1689FILE *err;
1690{
1691 int lv = 0;
1692 double dv = 0.0;
1693 char *sv = 0, *end;
1694 char *cp;
1695 int j;
1696 int errcnt = 0;
1697 cp = strchr(argv[i],'=');
1698 *cp = 0;
1699 for(j=0; op[j].label; j++){
1700 if( strcmp(argv[i],op[j].label)==0 ) break;
1701 }
1702 *cp = '=';
1703 if( op[j].label==0 ){
1704 if( err ){
1705 fprintf(err,"%sundefined option.\n",emsg);
1706 errline(i,0,err);
1707 }
1708 errcnt++;
1709 }else{
1710 cp++;
1711 switch( op[j].type ){
1712 case OPT_FLAG:
1713 case OPT_FFLAG:
1714 if( err ){
1715 fprintf(err,"%soption requires an argument.\n",emsg);
1716 errline(i,0,err);
1717 }
1718 errcnt++;
1719 break;
1720 case OPT_DBL:
1721 case OPT_FDBL:
1722 dv = strtod(cp,&end);
1723 if( *end ){
1724 if( err ){
1725 fprintf(err,"%sillegal character in floating-point argument.\n",emsg);
drhba99af52001-10-25 20:37:16 +00001726 errline(i,((unsigned long)end)-(unsigned long)argv[i],err);
drh75897232000-05-29 14:26:00 +00001727 }
1728 errcnt++;
1729 }
1730 break;
1731 case OPT_INT:
1732 case OPT_FINT:
1733 lv = strtol(cp,&end,0);
1734 if( *end ){
1735 if( err ){
1736 fprintf(err,"%sillegal character in integer argument.\n",emsg);
drhba99af52001-10-25 20:37:16 +00001737 errline(i,((unsigned long)end)-(unsigned long)argv[i],err);
drh75897232000-05-29 14:26:00 +00001738 }
1739 errcnt++;
1740 }
1741 break;
1742 case OPT_STR:
1743 case OPT_FSTR:
1744 sv = cp;
1745 break;
1746 }
1747 switch( op[j].type ){
1748 case OPT_FLAG:
1749 case OPT_FFLAG:
1750 break;
1751 case OPT_DBL:
1752 *(double*)(op[j].arg) = dv;
1753 break;
1754 case OPT_FDBL:
1755 (*(void(*)())(op[j].arg))(dv);
1756 break;
1757 case OPT_INT:
1758 *(int*)(op[j].arg) = lv;
1759 break;
1760 case OPT_FINT:
1761 (*(void(*)())(op[j].arg))((int)lv);
1762 break;
1763 case OPT_STR:
1764 *(char**)(op[j].arg) = sv;
1765 break;
1766 case OPT_FSTR:
1767 (*(void(*)())(op[j].arg))(sv);
1768 break;
1769 }
1770 }
1771 return errcnt;
1772}
1773
drhb0c86772000-06-02 23:21:26 +00001774int OptInit(a,o,err)
drh75897232000-05-29 14:26:00 +00001775char **a;
1776struct s_options *o;
1777FILE *err;
1778{
1779 int errcnt = 0;
1780 argv = a;
1781 op = o;
1782 errstream = err;
1783 if( argv && *argv && op ){
1784 int i;
1785 for(i=1; argv[i]; i++){
1786 if( argv[i][0]=='+' || argv[i][0]=='-' ){
1787 errcnt += handleflags(i,err);
1788 }else if( strchr(argv[i],'=') ){
1789 errcnt += handleswitch(i,err);
1790 }
1791 }
1792 }
1793 if( errcnt>0 ){
1794 fprintf(err,"Valid command line options for \"%s\" are:\n",*a);
drhb0c86772000-06-02 23:21:26 +00001795 OptPrint();
drh75897232000-05-29 14:26:00 +00001796 exit(1);
1797 }
1798 return 0;
1799}
1800
drhb0c86772000-06-02 23:21:26 +00001801int OptNArgs(){
drh75897232000-05-29 14:26:00 +00001802 int cnt = 0;
1803 int dashdash = 0;
1804 int i;
1805 if( argv!=0 && argv[0]!=0 ){
1806 for(i=1; argv[i]; i++){
1807 if( dashdash || !ISOPT(argv[i]) ) cnt++;
1808 if( strcmp(argv[i],"--")==0 ) dashdash = 1;
1809 }
1810 }
1811 return cnt;
1812}
1813
drhb0c86772000-06-02 23:21:26 +00001814char *OptArg(n)
drh75897232000-05-29 14:26:00 +00001815int n;
1816{
1817 int i;
1818 i = argindex(n);
1819 return i>=0 ? argv[i] : 0;
1820}
1821
drhb0c86772000-06-02 23:21:26 +00001822void OptErr(n)
drh75897232000-05-29 14:26:00 +00001823int n;
1824{
1825 int i;
1826 i = argindex(n);
1827 if( i>=0 ) errline(i,0,errstream);
1828}
1829
drhb0c86772000-06-02 23:21:26 +00001830void OptPrint(){
drh75897232000-05-29 14:26:00 +00001831 int i;
1832 int max, len;
1833 max = 0;
1834 for(i=0; op[i].label; i++){
1835 len = strlen(op[i].label) + 1;
1836 switch( op[i].type ){
1837 case OPT_FLAG:
1838 case OPT_FFLAG:
1839 break;
1840 case OPT_INT:
1841 case OPT_FINT:
1842 len += 9; /* length of "<integer>" */
1843 break;
1844 case OPT_DBL:
1845 case OPT_FDBL:
1846 len += 6; /* length of "<real>" */
1847 break;
1848 case OPT_STR:
1849 case OPT_FSTR:
1850 len += 8; /* length of "<string>" */
1851 break;
1852 }
1853 if( len>max ) max = len;
1854 }
1855 for(i=0; op[i].label; i++){
1856 switch( op[i].type ){
1857 case OPT_FLAG:
1858 case OPT_FFLAG:
1859 fprintf(errstream," -%-*s %s\n",max,op[i].label,op[i].message);
1860 break;
1861 case OPT_INT:
1862 case OPT_FINT:
1863 fprintf(errstream," %s=<integer>%*s %s\n",op[i].label,
drh8b582012003-10-21 13:16:03 +00001864 (int)(max-strlen(op[i].label)-9),"",op[i].message);
drh75897232000-05-29 14:26:00 +00001865 break;
1866 case OPT_DBL:
1867 case OPT_FDBL:
1868 fprintf(errstream," %s=<real>%*s %s\n",op[i].label,
drh8b582012003-10-21 13:16:03 +00001869 (int)(max-strlen(op[i].label)-6),"",op[i].message);
drh75897232000-05-29 14:26:00 +00001870 break;
1871 case OPT_STR:
1872 case OPT_FSTR:
1873 fprintf(errstream," %s=<string>%*s %s\n",op[i].label,
drh8b582012003-10-21 13:16:03 +00001874 (int)(max-strlen(op[i].label)-8),"",op[i].message);
drh75897232000-05-29 14:26:00 +00001875 break;
1876 }
1877 }
1878}
1879/*********************** From the file "parse.c" ****************************/
1880/*
1881** Input file parser for the LEMON parser generator.
1882*/
1883
1884/* The state of the parser */
1885struct pstate {
1886 char *filename; /* Name of the input file */
1887 int tokenlineno; /* Linenumber at which current token starts */
1888 int errorcnt; /* Number of errors so far */
1889 char *tokenstart; /* Text of current token */
1890 struct lemon *gp; /* Global state vector */
1891 enum e_state {
1892 INITIALIZE,
1893 WAITING_FOR_DECL_OR_RULE,
1894 WAITING_FOR_DECL_KEYWORD,
1895 WAITING_FOR_DECL_ARG,
1896 WAITING_FOR_PRECEDENCE_SYMBOL,
1897 WAITING_FOR_ARROW,
1898 IN_RHS,
1899 LHS_ALIAS_1,
1900 LHS_ALIAS_2,
1901 LHS_ALIAS_3,
1902 RHS_ALIAS_1,
1903 RHS_ALIAS_2,
1904 PRECEDENCE_MARK_1,
1905 PRECEDENCE_MARK_2,
1906 RESYNC_AFTER_RULE_ERROR,
1907 RESYNC_AFTER_DECL_ERROR,
1908 WAITING_FOR_DESTRUCTOR_SYMBOL,
drh0bd1f4e2002-06-06 18:54:39 +00001909 WAITING_FOR_DATATYPE_SYMBOL,
1910 WAITING_FOR_FALLBACK_ID
drh75897232000-05-29 14:26:00 +00001911 } state; /* The state of the parser */
drh0bd1f4e2002-06-06 18:54:39 +00001912 struct symbol *fallback; /* The fallback token */
drh75897232000-05-29 14:26:00 +00001913 struct symbol *lhs; /* Left-hand side of current rule */
1914 char *lhsalias; /* Alias for the LHS */
1915 int nrhs; /* Number of right-hand side symbols seen */
1916 struct symbol *rhs[MAXRHS]; /* RHS symbols */
1917 char *alias[MAXRHS]; /* Aliases for each RHS symbol (or NULL) */
1918 struct rule *prevrule; /* Previous rule parsed */
1919 char *declkeyword; /* Keyword of a declaration */
1920 char **declargslot; /* Where the declaration argument should be put */
1921 int *decllnslot; /* Where the declaration linenumber is put */
1922 enum e_assoc declassoc; /* Assign this association to decl arguments */
1923 int preccounter; /* Assign this precedence to decl arguments */
1924 struct rule *firstrule; /* Pointer to first rule in the grammar */
1925 struct rule *lastrule; /* Pointer to the most recently parsed rule */
1926};
1927
1928/* Parse a single token */
1929static void parseonetoken(psp)
1930struct pstate *psp;
1931{
1932 char *x;
1933 x = Strsafe(psp->tokenstart); /* Save the token permanently */
1934#if 0
1935 printf("%s:%d: Token=[%s] state=%d\n",psp->filename,psp->tokenlineno,
1936 x,psp->state);
1937#endif
1938 switch( psp->state ){
1939 case INITIALIZE:
1940 psp->prevrule = 0;
1941 psp->preccounter = 0;
1942 psp->firstrule = psp->lastrule = 0;
1943 psp->gp->nrule = 0;
1944 /* Fall thru to next case */
1945 case WAITING_FOR_DECL_OR_RULE:
1946 if( x[0]=='%' ){
1947 psp->state = WAITING_FOR_DECL_KEYWORD;
1948 }else if( islower(x[0]) ){
1949 psp->lhs = Symbol_new(x);
1950 psp->nrhs = 0;
1951 psp->lhsalias = 0;
1952 psp->state = WAITING_FOR_ARROW;
1953 }else if( x[0]=='{' ){
1954 if( psp->prevrule==0 ){
1955 ErrorMsg(psp->filename,psp->tokenlineno,
1956"There is not prior rule opon which to attach the code \
1957fragment which begins on this line.");
1958 psp->errorcnt++;
1959 }else if( psp->prevrule->code!=0 ){
1960 ErrorMsg(psp->filename,psp->tokenlineno,
1961"Code fragment beginning on this line is not the first \
1962to follow the previous rule.");
1963 psp->errorcnt++;
1964 }else{
1965 psp->prevrule->line = psp->tokenlineno;
1966 psp->prevrule->code = &x[1];
1967 }
1968 }else if( x[0]=='[' ){
1969 psp->state = PRECEDENCE_MARK_1;
1970 }else{
1971 ErrorMsg(psp->filename,psp->tokenlineno,
1972 "Token \"%s\" should be either \"%%\" or a nonterminal name.",
1973 x);
1974 psp->errorcnt++;
1975 }
1976 break;
1977 case PRECEDENCE_MARK_1:
1978 if( !isupper(x[0]) ){
1979 ErrorMsg(psp->filename,psp->tokenlineno,
1980 "The precedence symbol must be a terminal.");
1981 psp->errorcnt++;
1982 }else if( psp->prevrule==0 ){
1983 ErrorMsg(psp->filename,psp->tokenlineno,
1984 "There is no prior rule to assign precedence \"[%s]\".",x);
1985 psp->errorcnt++;
1986 }else if( psp->prevrule->precsym!=0 ){
1987 ErrorMsg(psp->filename,psp->tokenlineno,
1988"Precedence mark on this line is not the first \
1989to follow the previous rule.");
1990 psp->errorcnt++;
1991 }else{
1992 psp->prevrule->precsym = Symbol_new(x);
1993 }
1994 psp->state = PRECEDENCE_MARK_2;
1995 break;
1996 case PRECEDENCE_MARK_2:
1997 if( x[0]!=']' ){
1998 ErrorMsg(psp->filename,psp->tokenlineno,
1999 "Missing \"]\" on precedence mark.");
2000 psp->errorcnt++;
2001 }
2002 psp->state = WAITING_FOR_DECL_OR_RULE;
2003 break;
2004 case WAITING_FOR_ARROW:
2005 if( x[0]==':' && x[1]==':' && x[2]=='=' ){
2006 psp->state = IN_RHS;
2007 }else if( x[0]=='(' ){
2008 psp->state = LHS_ALIAS_1;
2009 }else{
2010 ErrorMsg(psp->filename,psp->tokenlineno,
2011 "Expected to see a \":\" following the LHS symbol \"%s\".",
2012 psp->lhs->name);
2013 psp->errorcnt++;
2014 psp->state = RESYNC_AFTER_RULE_ERROR;
2015 }
2016 break;
2017 case LHS_ALIAS_1:
2018 if( isalpha(x[0]) ){
2019 psp->lhsalias = x;
2020 psp->state = LHS_ALIAS_2;
2021 }else{
2022 ErrorMsg(psp->filename,psp->tokenlineno,
2023 "\"%s\" is not a valid alias for the LHS \"%s\"\n",
2024 x,psp->lhs->name);
2025 psp->errorcnt++;
2026 psp->state = RESYNC_AFTER_RULE_ERROR;
2027 }
2028 break;
2029 case LHS_ALIAS_2:
2030 if( x[0]==')' ){
2031 psp->state = LHS_ALIAS_3;
2032 }else{
2033 ErrorMsg(psp->filename,psp->tokenlineno,
2034 "Missing \")\" following LHS alias name \"%s\".",psp->lhsalias);
2035 psp->errorcnt++;
2036 psp->state = RESYNC_AFTER_RULE_ERROR;
2037 }
2038 break;
2039 case LHS_ALIAS_3:
2040 if( x[0]==':' && x[1]==':' && x[2]=='=' ){
2041 psp->state = IN_RHS;
2042 }else{
2043 ErrorMsg(psp->filename,psp->tokenlineno,
2044 "Missing \"->\" following: \"%s(%s)\".",
2045 psp->lhs->name,psp->lhsalias);
2046 psp->errorcnt++;
2047 psp->state = RESYNC_AFTER_RULE_ERROR;
2048 }
2049 break;
2050 case IN_RHS:
2051 if( x[0]=='.' ){
2052 struct rule *rp;
2053 rp = (struct rule *)malloc( sizeof(struct rule) +
2054 sizeof(struct symbol*)*psp->nrhs + sizeof(char*)*psp->nrhs );
2055 if( rp==0 ){
2056 ErrorMsg(psp->filename,psp->tokenlineno,
2057 "Can't allocate enough memory for this rule.");
2058 psp->errorcnt++;
2059 psp->prevrule = 0;
2060 }else{
2061 int i;
2062 rp->ruleline = psp->tokenlineno;
2063 rp->rhs = (struct symbol**)&rp[1];
2064 rp->rhsalias = (char**)&(rp->rhs[psp->nrhs]);
2065 for(i=0; i<psp->nrhs; i++){
2066 rp->rhs[i] = psp->rhs[i];
2067 rp->rhsalias[i] = psp->alias[i];
2068 }
2069 rp->lhs = psp->lhs;
2070 rp->lhsalias = psp->lhsalias;
2071 rp->nrhs = psp->nrhs;
2072 rp->code = 0;
2073 rp->precsym = 0;
2074 rp->index = psp->gp->nrule++;
2075 rp->nextlhs = rp->lhs->rule;
2076 rp->lhs->rule = rp;
2077 rp->next = 0;
2078 if( psp->firstrule==0 ){
2079 psp->firstrule = psp->lastrule = rp;
2080 }else{
2081 psp->lastrule->next = rp;
2082 psp->lastrule = rp;
2083 }
2084 psp->prevrule = rp;
2085 }
2086 psp->state = WAITING_FOR_DECL_OR_RULE;
2087 }else if( isalpha(x[0]) ){
2088 if( psp->nrhs>=MAXRHS ){
2089 ErrorMsg(psp->filename,psp->tokenlineno,
2090 "Too many symbol on RHS or rule beginning at \"%s\".",
2091 x);
2092 psp->errorcnt++;
2093 psp->state = RESYNC_AFTER_RULE_ERROR;
2094 }else{
2095 psp->rhs[psp->nrhs] = Symbol_new(x);
2096 psp->alias[psp->nrhs] = 0;
2097 psp->nrhs++;
2098 }
2099 }else if( x[0]=='(' && psp->nrhs>0 ){
2100 psp->state = RHS_ALIAS_1;
2101 }else{
2102 ErrorMsg(psp->filename,psp->tokenlineno,
2103 "Illegal character on RHS of rule: \"%s\".",x);
2104 psp->errorcnt++;
2105 psp->state = RESYNC_AFTER_RULE_ERROR;
2106 }
2107 break;
2108 case RHS_ALIAS_1:
2109 if( isalpha(x[0]) ){
2110 psp->alias[psp->nrhs-1] = x;
2111 psp->state = RHS_ALIAS_2;
2112 }else{
2113 ErrorMsg(psp->filename,psp->tokenlineno,
2114 "\"%s\" is not a valid alias for the RHS symbol \"%s\"\n",
2115 x,psp->rhs[psp->nrhs-1]->name);
2116 psp->errorcnt++;
2117 psp->state = RESYNC_AFTER_RULE_ERROR;
2118 }
2119 break;
2120 case RHS_ALIAS_2:
2121 if( x[0]==')' ){
2122 psp->state = IN_RHS;
2123 }else{
2124 ErrorMsg(psp->filename,psp->tokenlineno,
2125 "Missing \")\" following LHS alias name \"%s\".",psp->lhsalias);
2126 psp->errorcnt++;
2127 psp->state = RESYNC_AFTER_RULE_ERROR;
2128 }
2129 break;
2130 case WAITING_FOR_DECL_KEYWORD:
2131 if( isalpha(x[0]) ){
2132 psp->declkeyword = x;
2133 psp->declargslot = 0;
2134 psp->decllnslot = 0;
2135 psp->state = WAITING_FOR_DECL_ARG;
2136 if( strcmp(x,"name")==0 ){
2137 psp->declargslot = &(psp->gp->name);
2138 }else if( strcmp(x,"include")==0 ){
2139 psp->declargslot = &(psp->gp->include);
2140 psp->decllnslot = &psp->gp->includeln;
2141 }else if( strcmp(x,"code")==0 ){
2142 psp->declargslot = &(psp->gp->extracode);
2143 psp->decllnslot = &psp->gp->extracodeln;
2144 }else if( strcmp(x,"token_destructor")==0 ){
2145 psp->declargslot = &psp->gp->tokendest;
2146 psp->decllnslot = &psp->gp->tokendestln;
drh960e8c62001-04-03 16:53:21 +00002147 }else if( strcmp(x,"default_destructor")==0 ){
2148 psp->declargslot = &psp->gp->vardest;
2149 psp->decllnslot = &psp->gp->vardestln;
drh75897232000-05-29 14:26:00 +00002150 }else if( strcmp(x,"token_prefix")==0 ){
2151 psp->declargslot = &psp->gp->tokenprefix;
2152 }else if( strcmp(x,"syntax_error")==0 ){
2153 psp->declargslot = &(psp->gp->error);
2154 psp->decllnslot = &psp->gp->errorln;
2155 }else if( strcmp(x,"parse_accept")==0 ){
2156 psp->declargslot = &(psp->gp->accept);
2157 psp->decllnslot = &psp->gp->acceptln;
2158 }else if( strcmp(x,"parse_failure")==0 ){
2159 psp->declargslot = &(psp->gp->failure);
2160 psp->decllnslot = &psp->gp->failureln;
2161 }else if( strcmp(x,"stack_overflow")==0 ){
2162 psp->declargslot = &(psp->gp->overflow);
2163 psp->decllnslot = &psp->gp->overflowln;
2164 }else if( strcmp(x,"extra_argument")==0 ){
2165 psp->declargslot = &(psp->gp->arg);
2166 }else if( strcmp(x,"token_type")==0 ){
2167 psp->declargslot = &(psp->gp->tokentype);
drh960e8c62001-04-03 16:53:21 +00002168 }else if( strcmp(x,"default_type")==0 ){
2169 psp->declargslot = &(psp->gp->vartype);
drh75897232000-05-29 14:26:00 +00002170 }else if( strcmp(x,"stack_size")==0 ){
2171 psp->declargslot = &(psp->gp->stacksize);
2172 }else if( strcmp(x,"start_symbol")==0 ){
2173 psp->declargslot = &(psp->gp->start);
2174 }else if( strcmp(x,"left")==0 ){
2175 psp->preccounter++;
2176 psp->declassoc = LEFT;
2177 psp->state = WAITING_FOR_PRECEDENCE_SYMBOL;
2178 }else if( strcmp(x,"right")==0 ){
2179 psp->preccounter++;
2180 psp->declassoc = RIGHT;
2181 psp->state = WAITING_FOR_PRECEDENCE_SYMBOL;
2182 }else if( strcmp(x,"nonassoc")==0 ){
2183 psp->preccounter++;
2184 psp->declassoc = NONE;
2185 psp->state = WAITING_FOR_PRECEDENCE_SYMBOL;
2186 }else if( strcmp(x,"destructor")==0 ){
2187 psp->state = WAITING_FOR_DESTRUCTOR_SYMBOL;
2188 }else if( strcmp(x,"type")==0 ){
2189 psp->state = WAITING_FOR_DATATYPE_SYMBOL;
drh0bd1f4e2002-06-06 18:54:39 +00002190 }else if( strcmp(x,"fallback")==0 ){
2191 psp->fallback = 0;
2192 psp->state = WAITING_FOR_FALLBACK_ID;
drh75897232000-05-29 14:26:00 +00002193 }else{
2194 ErrorMsg(psp->filename,psp->tokenlineno,
2195 "Unknown declaration keyword: \"%%%s\".",x);
2196 psp->errorcnt++;
2197 psp->state = RESYNC_AFTER_DECL_ERROR;
2198 }
2199 }else{
2200 ErrorMsg(psp->filename,psp->tokenlineno,
2201 "Illegal declaration keyword: \"%s\".",x);
2202 psp->errorcnt++;
2203 psp->state = RESYNC_AFTER_DECL_ERROR;
2204 }
2205 break;
2206 case WAITING_FOR_DESTRUCTOR_SYMBOL:
2207 if( !isalpha(x[0]) ){
2208 ErrorMsg(psp->filename,psp->tokenlineno,
2209 "Symbol name missing after %destructor keyword");
2210 psp->errorcnt++;
2211 psp->state = RESYNC_AFTER_DECL_ERROR;
2212 }else{
2213 struct symbol *sp = Symbol_new(x);
2214 psp->declargslot = &sp->destructor;
2215 psp->decllnslot = &sp->destructorln;
2216 psp->state = WAITING_FOR_DECL_ARG;
2217 }
2218 break;
2219 case WAITING_FOR_DATATYPE_SYMBOL:
2220 if( !isalpha(x[0]) ){
2221 ErrorMsg(psp->filename,psp->tokenlineno,
2222 "Symbol name missing after %destructor keyword");
2223 psp->errorcnt++;
2224 psp->state = RESYNC_AFTER_DECL_ERROR;
2225 }else{
2226 struct symbol *sp = Symbol_new(x);
2227 psp->declargslot = &sp->datatype;
2228 psp->decllnslot = 0;
2229 psp->state = WAITING_FOR_DECL_ARG;
2230 }
2231 break;
2232 case WAITING_FOR_PRECEDENCE_SYMBOL:
2233 if( x[0]=='.' ){
2234 psp->state = WAITING_FOR_DECL_OR_RULE;
2235 }else if( isupper(x[0]) ){
2236 struct symbol *sp;
2237 sp = Symbol_new(x);
2238 if( sp->prec>=0 ){
2239 ErrorMsg(psp->filename,psp->tokenlineno,
2240 "Symbol \"%s\" has already be given a precedence.",x);
2241 psp->errorcnt++;
2242 }else{
2243 sp->prec = psp->preccounter;
2244 sp->assoc = psp->declassoc;
2245 }
2246 }else{
2247 ErrorMsg(psp->filename,psp->tokenlineno,
2248 "Can't assign a precedence to \"%s\".",x);
2249 psp->errorcnt++;
2250 }
2251 break;
2252 case WAITING_FOR_DECL_ARG:
2253 if( (x[0]=='{' || x[0]=='\"' || isalnum(x[0])) ){
2254 if( *(psp->declargslot)!=0 ){
2255 ErrorMsg(psp->filename,psp->tokenlineno,
2256 "The argument \"%s\" to declaration \"%%%s\" is not the first.",
2257 x[0]=='\"' ? &x[1] : x,psp->declkeyword);
2258 psp->errorcnt++;
2259 psp->state = RESYNC_AFTER_DECL_ERROR;
2260 }else{
2261 *(psp->declargslot) = (x[0]=='\"' || x[0]=='{') ? &x[1] : x;
2262 if( psp->decllnslot ) *psp->decllnslot = psp->tokenlineno;
2263 psp->state = WAITING_FOR_DECL_OR_RULE;
2264 }
2265 }else{
2266 ErrorMsg(psp->filename,psp->tokenlineno,
2267 "Illegal argument to %%%s: %s",psp->declkeyword,x);
2268 psp->errorcnt++;
2269 psp->state = RESYNC_AFTER_DECL_ERROR;
2270 }
2271 break;
drh0bd1f4e2002-06-06 18:54:39 +00002272 case WAITING_FOR_FALLBACK_ID:
2273 if( x[0]=='.' ){
2274 psp->state = WAITING_FOR_DECL_OR_RULE;
2275 }else if( !isupper(x[0]) ){
2276 ErrorMsg(psp->filename, psp->tokenlineno,
2277 "%%fallback argument \"%s\" should be a token", x);
2278 psp->errorcnt++;
2279 }else{
2280 struct symbol *sp = Symbol_new(x);
2281 if( psp->fallback==0 ){
2282 psp->fallback = sp;
2283 }else if( sp->fallback ){
2284 ErrorMsg(psp->filename, psp->tokenlineno,
2285 "More than one fallback assigned to token %s", x);
2286 psp->errorcnt++;
2287 }else{
2288 sp->fallback = psp->fallback;
2289 psp->gp->has_fallback = 1;
2290 }
2291 }
2292 break;
drh75897232000-05-29 14:26:00 +00002293 case RESYNC_AFTER_RULE_ERROR:
2294/* if( x[0]=='.' ) psp->state = WAITING_FOR_DECL_OR_RULE;
2295** break; */
2296 case RESYNC_AFTER_DECL_ERROR:
2297 if( x[0]=='.' ) psp->state = WAITING_FOR_DECL_OR_RULE;
2298 if( x[0]=='%' ) psp->state = WAITING_FOR_DECL_KEYWORD;
2299 break;
2300 }
2301}
2302
drh6d08b4d2004-07-20 12:45:22 +00002303/* Run the proprocessor over the input file text. The global variables
2304** azDefine[0] through azDefine[nDefine-1] contains the names of all defined
2305** macros. This routine looks for "%ifdef" and "%ifndef" and "%endif" and
2306** comments them out. Text in between is also commented out as appropriate.
2307*/
2308static preprocess_input(char *z){
2309 int i, j, k, n;
2310 int exclude = 0;
2311 int start;
2312 int lineno = 1;
2313 int start_lineno;
2314 for(i=0; z[i]; i++){
2315 if( z[i]=='\n' ) lineno++;
2316 if( z[i]!='%' || (i>0 && z[i-1]!='\n') ) continue;
2317 if( strncmp(&z[i],"%endif",6)==0 && isspace(z[i+6]) ){
2318 if( exclude ){
2319 exclude--;
2320 if( exclude==0 ){
2321 for(j=start; j<i; j++) if( z[j]!='\n' ) z[j] = ' ';
2322 }
2323 }
2324 for(j=i; z[j] && z[j]!='\n'; j++) z[j] = ' ';
2325 }else if( (strncmp(&z[i],"%ifdef",6)==0 && isspace(z[i+6]))
2326 || (strncmp(&z[i],"%ifndef",7)==0 && isspace(z[i+7])) ){
2327 if( exclude ){
2328 exclude++;
2329 }else{
2330 for(j=i+7; isspace(z[j]); j++){}
2331 for(n=0; z[j+n] && !isspace(z[j+n]); n++){}
2332 exclude = 1;
2333 for(k=0; k<nDefine; k++){
2334 if( strncmp(azDefine[k],&z[j],n)==0 && strlen(azDefine[k])==n ){
2335 exclude = 0;
2336 break;
2337 }
2338 }
2339 if( z[i+3]=='n' ) exclude = !exclude;
2340 if( exclude ){
2341 start = i;
2342 start_lineno = lineno;
2343 }
2344 }
2345 for(j=i; z[j] && z[j]!='\n'; j++) z[j] = ' ';
2346 }
2347 }
2348 if( exclude ){
2349 fprintf(stderr,"unterminated %%ifdef starting on line %d\n", start_lineno);
2350 exit(1);
2351 }
2352}
2353
drh75897232000-05-29 14:26:00 +00002354/* In spite of its name, this function is really a scanner. It read
2355** in the entire input file (all at once) then tokenizes it. Each
2356** token is passed to the function "parseonetoken" which builds all
2357** the appropriate data structures in the global state vector "gp".
2358*/
2359void Parse(gp)
2360struct lemon *gp;
2361{
2362 struct pstate ps;
2363 FILE *fp;
2364 char *filebuf;
2365 int filesize;
2366 int lineno;
2367 int c;
2368 char *cp, *nextcp;
2369 int startline = 0;
2370
2371 ps.gp = gp;
2372 ps.filename = gp->filename;
2373 ps.errorcnt = 0;
2374 ps.state = INITIALIZE;
2375
2376 /* Begin by reading the input file */
2377 fp = fopen(ps.filename,"rb");
2378 if( fp==0 ){
2379 ErrorMsg(ps.filename,0,"Can't open this file for reading.");
2380 gp->errorcnt++;
2381 return;
2382 }
2383 fseek(fp,0,2);
2384 filesize = ftell(fp);
2385 rewind(fp);
2386 filebuf = (char *)malloc( filesize+1 );
2387 if( filebuf==0 ){
2388 ErrorMsg(ps.filename,0,"Can't allocate %d of memory to hold this file.",
2389 filesize+1);
2390 gp->errorcnt++;
2391 return;
2392 }
2393 if( fread(filebuf,1,filesize,fp)!=filesize ){
2394 ErrorMsg(ps.filename,0,"Can't read in all %d bytes of this file.",
2395 filesize);
2396 free(filebuf);
2397 gp->errorcnt++;
2398 return;
2399 }
2400 fclose(fp);
2401 filebuf[filesize] = 0;
2402
drh6d08b4d2004-07-20 12:45:22 +00002403 /* Make an initial pass through the file to handle %ifdef and %ifndef */
2404 preprocess_input(filebuf);
2405
drh75897232000-05-29 14:26:00 +00002406 /* Now scan the text of the input file */
2407 lineno = 1;
2408 for(cp=filebuf; (c= *cp)!=0; ){
2409 if( c=='\n' ) lineno++; /* Keep track of the line number */
2410 if( isspace(c) ){ cp++; continue; } /* Skip all white space */
2411 if( c=='/' && cp[1]=='/' ){ /* Skip C++ style comments */
2412 cp+=2;
2413 while( (c= *cp)!=0 && c!='\n' ) cp++;
2414 continue;
2415 }
2416 if( c=='/' && cp[1]=='*' ){ /* Skip C style comments */
2417 cp+=2;
2418 while( (c= *cp)!=0 && (c!='/' || cp[-1]!='*') ){
2419 if( c=='\n' ) lineno++;
2420 cp++;
2421 }
2422 if( c ) cp++;
2423 continue;
2424 }
2425 ps.tokenstart = cp; /* Mark the beginning of the token */
2426 ps.tokenlineno = lineno; /* Linenumber on which token begins */
2427 if( c=='\"' ){ /* String literals */
2428 cp++;
2429 while( (c= *cp)!=0 && c!='\"' ){
2430 if( c=='\n' ) lineno++;
2431 cp++;
2432 }
2433 if( c==0 ){
2434 ErrorMsg(ps.filename,startline,
2435"String starting on this line is not terminated before the end of the file.");
2436 ps.errorcnt++;
2437 nextcp = cp;
2438 }else{
2439 nextcp = cp+1;
2440 }
2441 }else if( c=='{' ){ /* A block of C code */
2442 int level;
2443 cp++;
2444 for(level=1; (c= *cp)!=0 && (level>1 || c!='}'); cp++){
2445 if( c=='\n' ) lineno++;
2446 else if( c=='{' ) level++;
2447 else if( c=='}' ) level--;
2448 else if( c=='/' && cp[1]=='*' ){ /* Skip comments */
2449 int prevc;
2450 cp = &cp[2];
2451 prevc = 0;
2452 while( (c= *cp)!=0 && (c!='/' || prevc!='*') ){
2453 if( c=='\n' ) lineno++;
2454 prevc = c;
2455 cp++;
2456 }
2457 }else if( c=='/' && cp[1]=='/' ){ /* Skip C++ style comments too */
2458 cp = &cp[2];
2459 while( (c= *cp)!=0 && c!='\n' ) cp++;
2460 if( c ) lineno++;
2461 }else if( c=='\'' || c=='\"' ){ /* String a character literals */
2462 int startchar, prevc;
2463 startchar = c;
2464 prevc = 0;
2465 for(cp++; (c= *cp)!=0 && (c!=startchar || prevc=='\\'); cp++){
2466 if( c=='\n' ) lineno++;
2467 if( prevc=='\\' ) prevc = 0;
2468 else prevc = c;
2469 }
2470 }
2471 }
2472 if( c==0 ){
drh960e8c62001-04-03 16:53:21 +00002473 ErrorMsg(ps.filename,ps.tokenlineno,
drh75897232000-05-29 14:26:00 +00002474"C code starting on this line is not terminated before the end of the file.");
2475 ps.errorcnt++;
2476 nextcp = cp;
2477 }else{
2478 nextcp = cp+1;
2479 }
2480 }else if( isalnum(c) ){ /* Identifiers */
2481 while( (c= *cp)!=0 && (isalnum(c) || c=='_') ) cp++;
2482 nextcp = cp;
2483 }else if( c==':' && cp[1]==':' && cp[2]=='=' ){ /* The operator "::=" */
2484 cp += 3;
2485 nextcp = cp;
2486 }else{ /* All other (one character) operators */
2487 cp++;
2488 nextcp = cp;
2489 }
2490 c = *cp;
2491 *cp = 0; /* Null terminate the token */
2492 parseonetoken(&ps); /* Parse the token */
2493 *cp = c; /* Restore the buffer */
2494 cp = nextcp;
2495 }
2496 free(filebuf); /* Release the buffer after parsing */
2497 gp->rule = ps.firstrule;
2498 gp->errorcnt = ps.errorcnt;
2499}
2500/*************************** From the file "plink.c" *********************/
2501/*
2502** Routines processing configuration follow-set propagation links
2503** in the LEMON parser generator.
2504*/
2505static struct plink *plink_freelist = 0;
2506
2507/* Allocate a new plink */
2508struct plink *Plink_new(){
2509 struct plink *new;
2510
2511 if( plink_freelist==0 ){
2512 int i;
2513 int amt = 100;
2514 plink_freelist = (struct plink *)malloc( sizeof(struct plink)*amt );
2515 if( plink_freelist==0 ){
2516 fprintf(stderr,
2517 "Unable to allocate memory for a new follow-set propagation link.\n");
2518 exit(1);
2519 }
2520 for(i=0; i<amt-1; i++) plink_freelist[i].next = &plink_freelist[i+1];
2521 plink_freelist[amt-1].next = 0;
2522 }
2523 new = plink_freelist;
2524 plink_freelist = plink_freelist->next;
2525 return new;
2526}
2527
2528/* Add a plink to a plink list */
2529void Plink_add(plpp,cfp)
2530struct plink **plpp;
2531struct config *cfp;
2532{
2533 struct plink *new;
2534 new = Plink_new();
2535 new->next = *plpp;
2536 *plpp = new;
2537 new->cfp = cfp;
2538}
2539
2540/* Transfer every plink on the list "from" to the list "to" */
2541void Plink_copy(to,from)
2542struct plink **to;
2543struct plink *from;
2544{
2545 struct plink *nextpl;
2546 while( from ){
2547 nextpl = from->next;
2548 from->next = *to;
2549 *to = from;
2550 from = nextpl;
2551 }
2552}
2553
2554/* Delete every plink on the list */
2555void Plink_delete(plp)
2556struct plink *plp;
2557{
2558 struct plink *nextpl;
2559
2560 while( plp ){
2561 nextpl = plp->next;
2562 plp->next = plink_freelist;
2563 plink_freelist = plp;
2564 plp = nextpl;
2565 }
2566}
2567/*********************** From the file "report.c" **************************/
2568/*
2569** Procedures for generating reports and tables in the LEMON parser generator.
2570*/
2571
2572/* Generate a filename with the given suffix. Space to hold the
2573** name comes from malloc() and must be freed by the calling
2574** function.
2575*/
2576PRIVATE char *file_makename(lemp,suffix)
2577struct lemon *lemp;
2578char *suffix;
2579{
2580 char *name;
2581 char *cp;
2582
2583 name = malloc( strlen(lemp->filename) + strlen(suffix) + 5 );
2584 if( name==0 ){
2585 fprintf(stderr,"Can't allocate space for a filename.\n");
2586 exit(1);
2587 }
2588 strcpy(name,lemp->filename);
2589 cp = strrchr(name,'.');
2590 if( cp ) *cp = 0;
2591 strcat(name,suffix);
2592 return name;
2593}
2594
2595/* Open a file with a name based on the name of the input file,
2596** but with a different (specified) suffix, and return a pointer
2597** to the stream */
2598PRIVATE FILE *file_open(lemp,suffix,mode)
2599struct lemon *lemp;
2600char *suffix;
2601char *mode;
2602{
2603 FILE *fp;
2604
2605 if( lemp->outname ) free(lemp->outname);
2606 lemp->outname = file_makename(lemp, suffix);
2607 fp = fopen(lemp->outname,mode);
2608 if( fp==0 && *mode=='w' ){
2609 fprintf(stderr,"Can't open file \"%s\".\n",lemp->outname);
2610 lemp->errorcnt++;
2611 return 0;
2612 }
2613 return fp;
2614}
2615
2616/* Duplicate the input file without comments and without actions
2617** on rules */
2618void Reprint(lemp)
2619struct lemon *lemp;
2620{
2621 struct rule *rp;
2622 struct symbol *sp;
2623 int i, j, maxlen, len, ncolumns, skip;
2624 printf("// Reprint of input file \"%s\".\n// Symbols:\n",lemp->filename);
2625 maxlen = 10;
2626 for(i=0; i<lemp->nsymbol; i++){
2627 sp = lemp->symbols[i];
2628 len = strlen(sp->name);
2629 if( len>maxlen ) maxlen = len;
2630 }
2631 ncolumns = 76/(maxlen+5);
2632 if( ncolumns<1 ) ncolumns = 1;
2633 skip = (lemp->nsymbol + ncolumns - 1)/ncolumns;
2634 for(i=0; i<skip; i++){
2635 printf("//");
2636 for(j=i; j<lemp->nsymbol; j+=skip){
2637 sp = lemp->symbols[j];
2638 assert( sp->index==j );
2639 printf(" %3d %-*.*s",j,maxlen,maxlen,sp->name);
2640 }
2641 printf("\n");
2642 }
2643 for(rp=lemp->rule; rp; rp=rp->next){
2644 printf("%s",rp->lhs->name);
2645/* if( rp->lhsalias ) printf("(%s)",rp->lhsalias); */
2646 printf(" ::=");
2647 for(i=0; i<rp->nrhs; i++){
2648 printf(" %s",rp->rhs[i]->name);
2649/* if( rp->rhsalias[i] ) printf("(%s)",rp->rhsalias[i]); */
2650 }
2651 printf(".");
2652 if( rp->precsym ) printf(" [%s]",rp->precsym->name);
2653/* if( rp->code ) printf("\n %s",rp->code); */
2654 printf("\n");
2655 }
2656}
2657
2658void ConfigPrint(fp,cfp)
2659FILE *fp;
2660struct config *cfp;
2661{
2662 struct rule *rp;
2663 int i;
2664 rp = cfp->rp;
2665 fprintf(fp,"%s ::=",rp->lhs->name);
2666 for(i=0; i<=rp->nrhs; i++){
2667 if( i==cfp->dot ) fprintf(fp," *");
2668 if( i==rp->nrhs ) break;
2669 fprintf(fp," %s",rp->rhs[i]->name);
2670 }
2671}
2672
2673/* #define TEST */
2674#ifdef TEST
2675/* Print a set */
2676PRIVATE void SetPrint(out,set,lemp)
2677FILE *out;
2678char *set;
2679struct lemon *lemp;
2680{
2681 int i;
2682 char *spacer;
2683 spacer = "";
2684 fprintf(out,"%12s[","");
2685 for(i=0; i<lemp->nterminal; i++){
2686 if( SetFind(set,i) ){
2687 fprintf(out,"%s%s",spacer,lemp->symbols[i]->name);
2688 spacer = " ";
2689 }
2690 }
2691 fprintf(out,"]\n");
2692}
2693
2694/* Print a plink chain */
2695PRIVATE void PlinkPrint(out,plp,tag)
2696FILE *out;
2697struct plink *plp;
2698char *tag;
2699{
2700 while( plp ){
2701 fprintf(out,"%12s%s (state %2d) ","",tag,plp->cfp->stp->index);
2702 ConfigPrint(out,plp->cfp);
2703 fprintf(out,"\n");
2704 plp = plp->next;
2705 }
2706}
2707#endif
2708
2709/* Print an action to the given file descriptor. Return FALSE if
2710** nothing was actually printed.
2711*/
2712int PrintAction(struct action *ap, FILE *fp, int indent){
2713 int result = 1;
2714 switch( ap->type ){
2715 case SHIFT:
2716 fprintf(fp,"%*s shift %d",indent,ap->sp->name,ap->x.stp->index);
2717 break;
2718 case REDUCE:
2719 fprintf(fp,"%*s reduce %d",indent,ap->sp->name,ap->x.rp->index);
2720 break;
2721 case ACCEPT:
2722 fprintf(fp,"%*s accept",indent,ap->sp->name);
2723 break;
2724 case ERROR:
2725 fprintf(fp,"%*s error",indent,ap->sp->name);
2726 break;
2727 case CONFLICT:
2728 fprintf(fp,"%*s reduce %-3d ** Parsing conflict **",
2729 indent,ap->sp->name,ap->x.rp->index);
2730 break;
2731 case SH_RESOLVED:
2732 case RD_RESOLVED:
2733 case NOT_USED:
2734 result = 0;
2735 break;
2736 }
2737 return result;
2738}
2739
2740/* Generate the "y.output" log file */
2741void ReportOutput(lemp)
2742struct lemon *lemp;
2743{
2744 int i;
2745 struct state *stp;
2746 struct config *cfp;
2747 struct action *ap;
2748 FILE *fp;
2749
2750 fp = file_open(lemp,".out","w");
2751 if( fp==0 ) return;
2752 fprintf(fp," \b");
2753 for(i=0; i<lemp->nstate; i++){
2754 stp = lemp->sorted[i];
2755 fprintf(fp,"State %d:\n",stp->index);
2756 if( lemp->basisflag ) cfp=stp->bp;
2757 else cfp=stp->cfp;
2758 while( cfp ){
2759 char buf[20];
2760 if( cfp->dot==cfp->rp->nrhs ){
2761 sprintf(buf,"(%d)",cfp->rp->index);
2762 fprintf(fp," %5s ",buf);
2763 }else{
2764 fprintf(fp," ");
2765 }
2766 ConfigPrint(fp,cfp);
2767 fprintf(fp,"\n");
2768#ifdef TEST
2769 SetPrint(fp,cfp->fws,lemp);
2770 PlinkPrint(fp,cfp->fplp,"To ");
2771 PlinkPrint(fp,cfp->bplp,"From");
2772#endif
2773 if( lemp->basisflag ) cfp=cfp->bp;
2774 else cfp=cfp->next;
2775 }
2776 fprintf(fp,"\n");
2777 for(ap=stp->ap; ap; ap=ap->next){
2778 if( PrintAction(ap,fp,30) ) fprintf(fp,"\n");
2779 }
2780 fprintf(fp,"\n");
2781 }
2782 fclose(fp);
2783 return;
2784}
2785
2786/* Search for the file "name" which is in the same directory as
2787** the exacutable */
2788PRIVATE char *pathsearch(argv0,name,modemask)
2789char *argv0;
2790char *name;
2791int modemask;
2792{
2793 char *pathlist;
2794 char *path,*cp;
2795 char c;
2796 extern int access();
2797
2798#ifdef __WIN32__
2799 cp = strrchr(argv0,'\\');
2800#else
2801 cp = strrchr(argv0,'/');
2802#endif
2803 if( cp ){
2804 c = *cp;
2805 *cp = 0;
2806 path = (char *)malloc( strlen(argv0) + strlen(name) + 2 );
2807 if( path ) sprintf(path,"%s/%s",argv0,name);
2808 *cp = c;
2809 }else{
2810 extern char *getenv();
2811 pathlist = getenv("PATH");
2812 if( pathlist==0 ) pathlist = ".:/bin:/usr/bin";
2813 path = (char *)malloc( strlen(pathlist)+strlen(name)+2 );
2814 if( path!=0 ){
2815 while( *pathlist ){
2816 cp = strchr(pathlist,':');
2817 if( cp==0 ) cp = &pathlist[strlen(pathlist)];
2818 c = *cp;
2819 *cp = 0;
2820 sprintf(path,"%s/%s",pathlist,name);
2821 *cp = c;
2822 if( c==0 ) pathlist = "";
2823 else pathlist = &cp[1];
2824 if( access(path,modemask)==0 ) break;
2825 }
2826 }
2827 }
2828 return path;
2829}
2830
2831/* Given an action, compute the integer value for that action
2832** which is to be put in the action table of the generated machine.
2833** Return negative if no action should be generated.
2834*/
2835PRIVATE int compute_action(lemp,ap)
2836struct lemon *lemp;
2837struct action *ap;
2838{
2839 int act;
2840 switch( ap->type ){
2841 case SHIFT: act = ap->x.stp->index; break;
2842 case REDUCE: act = ap->x.rp->index + lemp->nstate; break;
2843 case ERROR: act = lemp->nstate + lemp->nrule; break;
2844 case ACCEPT: act = lemp->nstate + lemp->nrule + 1; break;
2845 default: act = -1; break;
2846 }
2847 return act;
2848}
2849
2850#define LINESIZE 1000
2851/* The next cluster of routines are for reading the template file
2852** and writing the results to the generated parser */
2853/* The first function transfers data from "in" to "out" until
2854** a line is seen which begins with "%%". The line number is
2855** tracked.
2856**
2857** if name!=0, then any word that begin with "Parse" is changed to
2858** begin with *name instead.
2859*/
2860PRIVATE void tplt_xfer(name,in,out,lineno)
2861char *name;
2862FILE *in;
2863FILE *out;
2864int *lineno;
2865{
2866 int i, iStart;
2867 char line[LINESIZE];
2868 while( fgets(line,LINESIZE,in) && (line[0]!='%' || line[1]!='%') ){
2869 (*lineno)++;
2870 iStart = 0;
2871 if( name ){
2872 for(i=0; line[i]; i++){
2873 if( line[i]=='P' && strncmp(&line[i],"Parse",5)==0
2874 && (i==0 || !isalpha(line[i-1]))
2875 ){
2876 if( i>iStart ) fprintf(out,"%.*s",i-iStart,&line[iStart]);
2877 fprintf(out,"%s",name);
2878 i += 4;
2879 iStart = i+1;
2880 }
2881 }
2882 }
2883 fprintf(out,"%s",&line[iStart]);
2884 }
2885}
2886
2887/* The next function finds the template file and opens it, returning
2888** a pointer to the opened file. */
2889PRIVATE FILE *tplt_open(lemp)
2890struct lemon *lemp;
2891{
2892 static char templatename[] = "lempar.c";
2893 char buf[1000];
2894 FILE *in;
2895 char *tpltname;
2896 char *cp;
2897
2898 cp = strrchr(lemp->filename,'.');
2899 if( cp ){
drh8b582012003-10-21 13:16:03 +00002900 sprintf(buf,"%.*s.lt",(int)(cp-lemp->filename),lemp->filename);
drh75897232000-05-29 14:26:00 +00002901 }else{
2902 sprintf(buf,"%s.lt",lemp->filename);
2903 }
2904 if( access(buf,004)==0 ){
2905 tpltname = buf;
drh960e8c62001-04-03 16:53:21 +00002906 }else if( access(templatename,004)==0 ){
2907 tpltname = templatename;
drh75897232000-05-29 14:26:00 +00002908 }else{
2909 tpltname = pathsearch(lemp->argv0,templatename,0);
2910 }
2911 if( tpltname==0 ){
2912 fprintf(stderr,"Can't find the parser driver template file \"%s\".\n",
2913 templatename);
2914 lemp->errorcnt++;
2915 return 0;
2916 }
2917 in = fopen(tpltname,"r");
2918 if( in==0 ){
2919 fprintf(stderr,"Can't open the template file \"%s\".\n",templatename);
2920 lemp->errorcnt++;
2921 return 0;
2922 }
2923 return in;
2924}
2925
2926/* Print a string to the file and keep the linenumber up to date */
2927PRIVATE void tplt_print(out,lemp,str,strln,lineno)
2928FILE *out;
2929struct lemon *lemp;
2930char *str;
2931int strln;
2932int *lineno;
2933{
2934 if( str==0 ) return;
2935 fprintf(out,"#line %d \"%s\"\n",strln,lemp->filename); (*lineno)++;
2936 while( *str ){
2937 if( *str=='\n' ) (*lineno)++;
2938 putc(*str,out);
2939 str++;
2940 }
2941 fprintf(out,"\n#line %d \"%s\"\n",*lineno+2,lemp->outname); (*lineno)+=2;
2942 return;
2943}
2944
2945/*
2946** The following routine emits code for the destructor for the
2947** symbol sp
2948*/
2949void emit_destructor_code(out,sp,lemp,lineno)
2950FILE *out;
2951struct symbol *sp;
2952struct lemon *lemp;
2953int *lineno;
2954{
drhcc83b6e2004-04-23 23:38:42 +00002955 char *cp = 0;
drh75897232000-05-29 14:26:00 +00002956
2957 int linecnt = 0;
2958 if( sp->type==TERMINAL ){
2959 cp = lemp->tokendest;
2960 if( cp==0 ) return;
2961 fprintf(out,"#line %d \"%s\"\n{",lemp->tokendestln,lemp->filename);
drh960e8c62001-04-03 16:53:21 +00002962 }else if( sp->destructor ){
drh75897232000-05-29 14:26:00 +00002963 cp = sp->destructor;
drh75897232000-05-29 14:26:00 +00002964 fprintf(out,"#line %d \"%s\"\n{",sp->destructorln,lemp->filename);
drh960e8c62001-04-03 16:53:21 +00002965 }else if( lemp->vardest ){
2966 cp = lemp->vardest;
2967 if( cp==0 ) return;
2968 fprintf(out,"#line %d \"%s\"\n{",lemp->vardestln,lemp->filename);
drhcc83b6e2004-04-23 23:38:42 +00002969 }else{
2970 assert( 0 ); /* Cannot happen */
drh75897232000-05-29 14:26:00 +00002971 }
2972 for(; *cp; cp++){
2973 if( *cp=='$' && cp[1]=='$' ){
2974 fprintf(out,"(yypminor->yy%d)",sp->dtnum);
2975 cp++;
2976 continue;
2977 }
2978 if( *cp=='\n' ) linecnt++;
2979 fputc(*cp,out);
2980 }
2981 (*lineno) += 3 + linecnt;
2982 fprintf(out,"}\n#line %d \"%s\"\n",*lineno,lemp->outname);
2983 return;
2984}
2985
2986/*
drh960e8c62001-04-03 16:53:21 +00002987** Return TRUE (non-zero) if the given symbol has a destructor.
drh75897232000-05-29 14:26:00 +00002988*/
2989int has_destructor(sp, lemp)
2990struct symbol *sp;
2991struct lemon *lemp;
2992{
2993 int ret;
2994 if( sp->type==TERMINAL ){
2995 ret = lemp->tokendest!=0;
2996 }else{
drh960e8c62001-04-03 16:53:21 +00002997 ret = lemp->vardest!=0 || sp->destructor!=0;
drh75897232000-05-29 14:26:00 +00002998 }
2999 return ret;
3000}
3001
3002/*
3003** Generate code which executes when the rule "rp" is reduced. Write
3004** the code to "out". Make sure lineno stays up-to-date.
3005*/
3006PRIVATE void emit_code(out,rp,lemp,lineno)
3007FILE *out;
3008struct rule *rp;
3009struct lemon *lemp;
3010int *lineno;
3011{
3012 char *cp, *xp;
3013 int linecnt = 0;
3014 int i;
3015 char lhsused = 0; /* True if the LHS element has been used */
3016 char used[MAXRHS]; /* True for each RHS element which is used */
3017
3018 for(i=0; i<rp->nrhs; i++) used[i] = 0;
3019 lhsused = 0;
3020
3021 /* Generate code to do the reduce action */
3022 if( rp->code ){
3023 fprintf(out,"#line %d \"%s\"\n{",rp->line,lemp->filename);
3024 for(cp=rp->code; *cp; cp++){
drh7218ac72002-03-10 21:21:00 +00003025 if( isalpha(*cp) && (cp==rp->code || (!isalnum(cp[-1]) && cp[-1]!='_')) ){
drh75897232000-05-29 14:26:00 +00003026 char saved;
drh7218ac72002-03-10 21:21:00 +00003027 for(xp= &cp[1]; isalnum(*xp) || *xp=='_'; xp++);
drh75897232000-05-29 14:26:00 +00003028 saved = *xp;
3029 *xp = 0;
3030 if( rp->lhsalias && strcmp(cp,rp->lhsalias)==0 ){
3031 fprintf(out,"yygotominor.yy%d",rp->lhs->dtnum);
3032 cp = xp;
3033 lhsused = 1;
3034 }else{
3035 for(i=0; i<rp->nrhs; i++){
3036 if( rp->rhsalias[i] && strcmp(cp,rp->rhsalias[i])==0 ){
3037 fprintf(out,"yymsp[%d].minor.yy%d",i-rp->nrhs+1,rp->rhs[i]->dtnum);
3038 cp = xp;
3039 used[i] = 1;
3040 break;
3041 }
3042 }
3043 }
3044 *xp = saved;
3045 }
3046 if( *cp=='\n' ) linecnt++;
3047 fputc(*cp,out);
3048 } /* End loop */
3049 (*lineno) += 3 + linecnt;
3050 fprintf(out,"}\n#line %d \"%s\"\n",*lineno,lemp->outname);
3051 } /* End if( rp->code ) */
3052
3053 /* Check to make sure the LHS has been used */
3054 if( rp->lhsalias && !lhsused ){
3055 ErrorMsg(lemp->filename,rp->ruleline,
3056 "Label \"%s\" for \"%s(%s)\" is never used.",
3057 rp->lhsalias,rp->lhs->name,rp->lhsalias);
3058 lemp->errorcnt++;
3059 }
3060
3061 /* Generate destructor code for RHS symbols which are not used in the
3062 ** reduce code */
3063 for(i=0; i<rp->nrhs; i++){
3064 if( rp->rhsalias[i] && !used[i] ){
3065 ErrorMsg(lemp->filename,rp->ruleline,
drh960e8c62001-04-03 16:53:21 +00003066 "Label %s for \"%s(%s)\" is never used.",
drh75897232000-05-29 14:26:00 +00003067 rp->rhsalias[i],rp->rhs[i]->name,rp->rhsalias[i]);
3068 lemp->errorcnt++;
3069 }else if( rp->rhsalias[i]==0 ){
3070 if( has_destructor(rp->rhs[i],lemp) ){
3071 fprintf(out," yy_destructor(%d,&yymsp[%d].minor);\n",
3072 rp->rhs[i]->index,i-rp->nrhs+1); (*lineno)++;
3073 }else{
3074 fprintf(out," /* No destructor defined for %s */\n",
3075 rp->rhs[i]->name);
3076 (*lineno)++;
3077 }
3078 }
3079 }
3080 return;
3081}
3082
3083/*
3084** Print the definition of the union used for the parser's data stack.
3085** This union contains fields for every possible data type for tokens
3086** and nonterminals. In the process of computing and printing this
3087** union, also set the ".dtnum" field of every terminal and nonterminal
3088** symbol.
3089*/
3090void print_stack_union(out,lemp,plineno,mhflag)
3091FILE *out; /* The output stream */
3092struct lemon *lemp; /* The main info structure for this parser */
3093int *plineno; /* Pointer to the line number */
3094int mhflag; /* True if generating makeheaders output */
3095{
3096 int lineno = *plineno; /* The line number of the output */
3097 char **types; /* A hash table of datatypes */
3098 int arraysize; /* Size of the "types" array */
3099 int maxdtlength; /* Maximum length of any ".datatype" field. */
3100 char *stddt; /* Standardized name for a datatype */
3101 int i,j; /* Loop counters */
3102 int hash; /* For hashing the name of a type */
3103 char *name; /* Name of the parser */
3104
3105 /* Allocate and initialize types[] and allocate stddt[] */
3106 arraysize = lemp->nsymbol * 2;
3107 types = (char**)malloc( arraysize * sizeof(char*) );
3108 for(i=0; i<arraysize; i++) types[i] = 0;
3109 maxdtlength = 0;
drh960e8c62001-04-03 16:53:21 +00003110 if( lemp->vartype ){
3111 maxdtlength = strlen(lemp->vartype);
3112 }
drh75897232000-05-29 14:26:00 +00003113 for(i=0; i<lemp->nsymbol; i++){
3114 int len;
3115 struct symbol *sp = lemp->symbols[i];
3116 if( sp->datatype==0 ) continue;
3117 len = strlen(sp->datatype);
3118 if( len>maxdtlength ) maxdtlength = len;
3119 }
3120 stddt = (char*)malloc( maxdtlength*2 + 1 );
3121 if( types==0 || stddt==0 ){
3122 fprintf(stderr,"Out of memory.\n");
3123 exit(1);
3124 }
3125
3126 /* Build a hash table of datatypes. The ".dtnum" field of each symbol
3127 ** is filled in with the hash index plus 1. A ".dtnum" value of 0 is
drh960e8c62001-04-03 16:53:21 +00003128 ** used for terminal symbols. If there is no %default_type defined then
3129 ** 0 is also used as the .dtnum value for nonterminals which do not specify
3130 ** a datatype using the %type directive.
3131 */
drh75897232000-05-29 14:26:00 +00003132 for(i=0; i<lemp->nsymbol; i++){
3133 struct symbol *sp = lemp->symbols[i];
3134 char *cp;
3135 if( sp==lemp->errsym ){
3136 sp->dtnum = arraysize+1;
3137 continue;
3138 }
drh960e8c62001-04-03 16:53:21 +00003139 if( sp->type!=NONTERMINAL || (sp->datatype==0 && lemp->vartype==0) ){
drh75897232000-05-29 14:26:00 +00003140 sp->dtnum = 0;
3141 continue;
3142 }
3143 cp = sp->datatype;
drh960e8c62001-04-03 16:53:21 +00003144 if( cp==0 ) cp = lemp->vartype;
drh75897232000-05-29 14:26:00 +00003145 j = 0;
3146 while( isspace(*cp) ) cp++;
3147 while( *cp ) stddt[j++] = *cp++;
3148 while( j>0 && isspace(stddt[j-1]) ) j--;
3149 stddt[j] = 0;
3150 hash = 0;
3151 for(j=0; stddt[j]; j++){
3152 hash = hash*53 + stddt[j];
3153 }
drh3b2129c2003-05-13 00:34:21 +00003154 hash = (hash & 0x7fffffff)%arraysize;
drh75897232000-05-29 14:26:00 +00003155 while( types[hash] ){
3156 if( strcmp(types[hash],stddt)==0 ){
3157 sp->dtnum = hash + 1;
3158 break;
3159 }
3160 hash++;
3161 if( hash>=arraysize ) hash = 0;
3162 }
3163 if( types[hash]==0 ){
3164 sp->dtnum = hash + 1;
3165 types[hash] = (char*)malloc( strlen(stddt)+1 );
3166 if( types[hash]==0 ){
3167 fprintf(stderr,"Out of memory.\n");
3168 exit(1);
3169 }
3170 strcpy(types[hash],stddt);
3171 }
3172 }
3173
3174 /* Print out the definition of YYTOKENTYPE and YYMINORTYPE */
3175 name = lemp->name ? lemp->name : "Parse";
3176 lineno = *plineno;
3177 if( mhflag ){ fprintf(out,"#if INTERFACE\n"); lineno++; }
3178 fprintf(out,"#define %sTOKENTYPE %s\n",name,
3179 lemp->tokentype?lemp->tokentype:"void*"); lineno++;
3180 if( mhflag ){ fprintf(out,"#endif\n"); lineno++; }
3181 fprintf(out,"typedef union {\n"); lineno++;
3182 fprintf(out," %sTOKENTYPE yy0;\n",name); lineno++;
3183 for(i=0; i<arraysize; i++){
3184 if( types[i]==0 ) continue;
3185 fprintf(out," %s yy%d;\n",types[i],i+1); lineno++;
3186 free(types[i]);
3187 }
3188 fprintf(out," int yy%d;\n",lemp->errsym->dtnum); lineno++;
3189 free(stddt);
3190 free(types);
3191 fprintf(out,"} YYMINORTYPE;\n"); lineno++;
3192 *plineno = lineno;
3193}
3194
drhb29b0a52002-02-23 19:39:46 +00003195/*
3196** Return the name of a C datatype able to represent values between
drh8b582012003-10-21 13:16:03 +00003197** lwr and upr, inclusive.
drhb29b0a52002-02-23 19:39:46 +00003198*/
drh8b582012003-10-21 13:16:03 +00003199static const char *minimum_size_type(int lwr, int upr){
3200 if( lwr>=0 ){
3201 if( upr<=255 ){
3202 return "unsigned char";
3203 }else if( upr<65535 ){
3204 return "unsigned short int";
3205 }else{
3206 return "unsigned int";
3207 }
3208 }else if( lwr>=-127 && upr<=127 ){
3209 return "signed char";
3210 }else if( lwr>=-32767 && upr<32767 ){
3211 return "short";
drhb29b0a52002-02-23 19:39:46 +00003212 }else{
drh8b582012003-10-21 13:16:03 +00003213 return "int";
drhb29b0a52002-02-23 19:39:46 +00003214 }
3215}
3216
drhfdbf9282003-10-21 16:34:41 +00003217/*
3218** Each state contains a set of token transaction and a set of
3219** nonterminal transactions. Each of these sets makes an instance
3220** of the following structure. An array of these structures is used
3221** to order the creation of entries in the yy_action[] table.
3222*/
3223struct axset {
3224 struct state *stp; /* A pointer to a state */
3225 int isTkn; /* True to use tokens. False for non-terminals */
3226 int nAction; /* Number of actions */
3227};
3228
3229/*
3230** Compare to axset structures for sorting purposes
3231*/
3232static int axset_compare(const void *a, const void *b){
3233 struct axset *p1 = (struct axset*)a;
3234 struct axset *p2 = (struct axset*)b;
3235 return p2->nAction - p1->nAction;
3236}
3237
drh75897232000-05-29 14:26:00 +00003238/* Generate C source code for the parser */
3239void ReportTable(lemp, mhflag)
3240struct lemon *lemp;
3241int mhflag; /* Output in makeheaders format if true */
3242{
3243 FILE *out, *in;
3244 char line[LINESIZE];
3245 int lineno;
3246 struct state *stp;
3247 struct action *ap;
3248 struct rule *rp;
drh8b582012003-10-21 13:16:03 +00003249 struct acttab *pActtab;
3250 int i, j, n;
drh75897232000-05-29 14:26:00 +00003251 char *name;
drh8b582012003-10-21 13:16:03 +00003252 int mnTknOfst, mxTknOfst;
3253 int mnNtOfst, mxNtOfst;
drhfdbf9282003-10-21 16:34:41 +00003254 struct axset *ax;
drh75897232000-05-29 14:26:00 +00003255
3256 in = tplt_open(lemp);
3257 if( in==0 ) return;
3258 out = file_open(lemp,".c","w");
3259 if( out==0 ){
3260 fclose(in);
3261 return;
3262 }
3263 lineno = 1;
3264 tplt_xfer(lemp->name,in,out,&lineno);
3265
3266 /* Generate the include code, if any */
3267 tplt_print(out,lemp,lemp->include,lemp->includeln,&lineno);
3268 if( mhflag ){
3269 char *name = file_makename(lemp, ".h");
3270 fprintf(out,"#include \"%s\"\n", name); lineno++;
3271 free(name);
3272 }
3273 tplt_xfer(lemp->name,in,out,&lineno);
3274
3275 /* Generate #defines for all tokens */
3276 if( mhflag ){
3277 char *prefix;
3278 fprintf(out,"#if INTERFACE\n"); lineno++;
3279 if( lemp->tokenprefix ) prefix = lemp->tokenprefix;
3280 else prefix = "";
3281 for(i=1; i<lemp->nterminal; i++){
3282 fprintf(out,"#define %s%-30s %2d\n",prefix,lemp->symbols[i]->name,i);
3283 lineno++;
3284 }
3285 fprintf(out,"#endif\n"); lineno++;
3286 }
3287 tplt_xfer(lemp->name,in,out,&lineno);
3288
3289 /* Generate the defines */
3290 fprintf(out,"/* \001 */\n");
3291 fprintf(out,"#define YYCODETYPE %s\n",
drh8b582012003-10-21 13:16:03 +00003292 minimum_size_type(0, lemp->nsymbol+5)); lineno++;
drh75897232000-05-29 14:26:00 +00003293 fprintf(out,"#define YYNOCODE %d\n",lemp->nsymbol+1); lineno++;
3294 fprintf(out,"#define YYACTIONTYPE %s\n",
drh8b582012003-10-21 13:16:03 +00003295 minimum_size_type(0, lemp->nstate+lemp->nrule+5)); lineno++;
drh75897232000-05-29 14:26:00 +00003296 print_stack_union(out,lemp,&lineno,mhflag);
3297 if( lemp->stacksize ){
3298 if( atoi(lemp->stacksize)<=0 ){
3299 ErrorMsg(lemp->filename,0,
3300"Illegal stack size: [%s]. The stack size should be an integer constant.",
3301 lemp->stacksize);
3302 lemp->errorcnt++;
3303 lemp->stacksize = "100";
3304 }
3305 fprintf(out,"#define YYSTACKDEPTH %s\n",lemp->stacksize); lineno++;
3306 }else{
3307 fprintf(out,"#define YYSTACKDEPTH 100\n"); lineno++;
3308 }
3309 if( mhflag ){
3310 fprintf(out,"#if INTERFACE\n"); lineno++;
3311 }
3312 name = lemp->name ? lemp->name : "Parse";
3313 if( lemp->arg && lemp->arg[0] ){
3314 int i;
3315 i = strlen(lemp->arg);
drhb1edd012000-06-02 18:52:12 +00003316 while( i>=1 && isspace(lemp->arg[i-1]) ) i--;
3317 while( i>=1 && (isalnum(lemp->arg[i-1]) || lemp->arg[i-1]=='_') ) i--;
drh1f245e42002-03-11 13:55:50 +00003318 fprintf(out,"#define %sARG_SDECL %s;\n",name,lemp->arg); lineno++;
3319 fprintf(out,"#define %sARG_PDECL ,%s\n",name,lemp->arg); lineno++;
3320 fprintf(out,"#define %sARG_FETCH %s = yypParser->%s\n",
3321 name,lemp->arg,&lemp->arg[i]); lineno++;
3322 fprintf(out,"#define %sARG_STORE yypParser->%s = %s\n",
3323 name,&lemp->arg[i],&lemp->arg[i]); lineno++;
drh75897232000-05-29 14:26:00 +00003324 }else{
drh1f245e42002-03-11 13:55:50 +00003325 fprintf(out,"#define %sARG_SDECL\n",name); lineno++;
3326 fprintf(out,"#define %sARG_PDECL\n",name); lineno++;
3327 fprintf(out,"#define %sARG_FETCH\n",name); lineno++;
3328 fprintf(out,"#define %sARG_STORE\n",name); lineno++;
drh75897232000-05-29 14:26:00 +00003329 }
3330 if( mhflag ){
3331 fprintf(out,"#endif\n"); lineno++;
3332 }
3333 fprintf(out,"#define YYNSTATE %d\n",lemp->nstate); lineno++;
3334 fprintf(out,"#define YYNRULE %d\n",lemp->nrule); lineno++;
3335 fprintf(out,"#define YYERRORSYMBOL %d\n",lemp->errsym->index); lineno++;
3336 fprintf(out,"#define YYERRSYMDT yy%d\n",lemp->errsym->dtnum); lineno++;
drh0bd1f4e2002-06-06 18:54:39 +00003337 if( lemp->has_fallback ){
3338 fprintf(out,"#define YYFALLBACK 1\n"); lineno++;
3339 }
drh75897232000-05-29 14:26:00 +00003340 tplt_xfer(lemp->name,in,out,&lineno);
3341
drh8b582012003-10-21 13:16:03 +00003342 /* Generate the action table and its associates:
drh75897232000-05-29 14:26:00 +00003343 **
drh8b582012003-10-21 13:16:03 +00003344 ** yy_action[] A single table containing all actions.
3345 ** yy_lookahead[] A table containing the lookahead for each entry in
3346 ** yy_action. Used to detect hash collisions.
3347 ** yy_shift_ofst[] For each state, the offset into yy_action for
3348 ** shifting terminals.
3349 ** yy_reduce_ofst[] For each state, the offset into yy_action for
3350 ** shifting non-terminals after a reduce.
3351 ** yy_default[] Default action for each state.
drh75897232000-05-29 14:26:00 +00003352 */
drh75897232000-05-29 14:26:00 +00003353
drh8b582012003-10-21 13:16:03 +00003354 /* Compute the actions on all states and count them up */
drhfdbf9282003-10-21 16:34:41 +00003355 ax = malloc( sizeof(ax[0])*lemp->nstate*2 );
3356 if( ax==0 ){
3357 fprintf(stderr,"malloc failed\n");
3358 exit(1);
3359 }
drh75897232000-05-29 14:26:00 +00003360 for(i=0; i<lemp->nstate; i++){
drh75897232000-05-29 14:26:00 +00003361 stp = lemp->sorted[i];
drh8b582012003-10-21 13:16:03 +00003362 stp->nTknAct = stp->nNtAct = 0;
3363 stp->iDflt = lemp->nstate + lemp->nrule;
3364 stp->iTknOfst = NO_OFFSET;
3365 stp->iNtOfst = NO_OFFSET;
3366 for(ap=stp->ap; ap; ap=ap->next){
3367 if( compute_action(lemp,ap)>=0 ){
3368 if( ap->sp->index<lemp->nterminal ){
3369 stp->nTknAct++;
3370 }else if( ap->sp->index<lemp->nsymbol ){
3371 stp->nNtAct++;
3372 }else{
3373 stp->iDflt = compute_action(lemp, ap);
3374 }
3375 }
3376 }
drhfdbf9282003-10-21 16:34:41 +00003377 ax[i*2].stp = stp;
3378 ax[i*2].isTkn = 1;
3379 ax[i*2].nAction = stp->nTknAct;
3380 ax[i*2+1].stp = stp;
3381 ax[i*2+1].isTkn = 0;
3382 ax[i*2+1].nAction = stp->nNtAct;
drh75897232000-05-29 14:26:00 +00003383 }
drh8b582012003-10-21 13:16:03 +00003384 mxTknOfst = mnTknOfst = 0;
3385 mxNtOfst = mnNtOfst = 0;
3386
drhfdbf9282003-10-21 16:34:41 +00003387 /* Compute the action table. In order to try to keep the size of the
3388 ** action table to a minimum, the heuristic of placing the largest action
3389 ** sets first is used.
drh8b582012003-10-21 13:16:03 +00003390 */
drhfdbf9282003-10-21 16:34:41 +00003391 qsort(ax, lemp->nstate*2, sizeof(ax[0]), axset_compare);
drh8b582012003-10-21 13:16:03 +00003392 pActtab = acttab_alloc();
drhfdbf9282003-10-21 16:34:41 +00003393 for(i=0; i<lemp->nstate*2 && ax[i].nAction>0; i++){
3394 stp = ax[i].stp;
3395 if( ax[i].isTkn ){
3396 for(ap=stp->ap; ap; ap=ap->next){
3397 int action;
3398 if( ap->sp->index>=lemp->nterminal ) continue;
3399 action = compute_action(lemp, ap);
3400 if( action<0 ) continue;
3401 acttab_action(pActtab, ap->sp->index, action);
drh8b582012003-10-21 13:16:03 +00003402 }
drhfdbf9282003-10-21 16:34:41 +00003403 stp->iTknOfst = acttab_insert(pActtab);
3404 if( stp->iTknOfst<mnTknOfst ) mnTknOfst = stp->iTknOfst;
3405 if( stp->iTknOfst>mxTknOfst ) mxTknOfst = stp->iTknOfst;
3406 }else{
3407 for(ap=stp->ap; ap; ap=ap->next){
3408 int action;
3409 if( ap->sp->index<lemp->nterminal ) continue;
3410 if( ap->sp->index==lemp->nsymbol ) continue;
3411 action = compute_action(lemp, ap);
3412 if( action<0 ) continue;
3413 acttab_action(pActtab, ap->sp->index, action);
drh8b582012003-10-21 13:16:03 +00003414 }
drhfdbf9282003-10-21 16:34:41 +00003415 stp->iNtOfst = acttab_insert(pActtab);
3416 if( stp->iNtOfst<mnNtOfst ) mnNtOfst = stp->iNtOfst;
3417 if( stp->iNtOfst>mxNtOfst ) mxNtOfst = stp->iNtOfst;
drh8b582012003-10-21 13:16:03 +00003418 }
3419 }
drhfdbf9282003-10-21 16:34:41 +00003420 free(ax);
drh8b582012003-10-21 13:16:03 +00003421
3422 /* Output the yy_action table */
3423 fprintf(out,"static YYACTIONTYPE yy_action[] = {\n"); lineno++;
3424 n = acttab_size(pActtab);
3425 for(i=j=0; i<n; i++){
3426 int action = acttab_yyaction(pActtab, i);
3427 if( action<0 ) action = lemp->nsymbol + lemp->nrule + 2;
drhfdbf9282003-10-21 16:34:41 +00003428 if( j==0 ) fprintf(out," /* %5d */ ", i);
drh8b582012003-10-21 13:16:03 +00003429 fprintf(out, " %4d,", action);
3430 if( j==9 || i==n-1 ){
3431 fprintf(out, "\n"); lineno++;
3432 j = 0;
3433 }else{
3434 j++;
3435 }
3436 }
3437 fprintf(out, "};\n"); lineno++;
3438
3439 /* Output the yy_lookahead table */
3440 fprintf(out,"static YYCODETYPE yy_lookahead[] = {\n"); lineno++;
3441 for(i=j=0; i<n; i++){
3442 int la = acttab_yylookahead(pActtab, i);
3443 if( la<0 ) la = lemp->nsymbol;
drhfdbf9282003-10-21 16:34:41 +00003444 if( j==0 ) fprintf(out," /* %5d */ ", i);
drh8b582012003-10-21 13:16:03 +00003445 fprintf(out, " %4d,", la);
3446 if( j==9 || i==n-1 ){
3447 fprintf(out, "\n"); lineno++;
3448 j = 0;
3449 }else{
3450 j++;
3451 }
3452 }
3453 fprintf(out, "};\n"); lineno++;
3454
3455 /* Output the yy_shift_ofst[] table */
3456 fprintf(out, "#define YY_SHIFT_USE_DFLT (%d)\n", mnTknOfst-1); lineno++;
3457 fprintf(out, "static %s yy_shift_ofst[] = {\n",
3458 minimum_size_type(mnTknOfst-1, mxTknOfst)); lineno++;
3459 n = lemp->nstate;
3460 for(i=j=0; i<n; i++){
3461 int ofst;
3462 stp = lemp->sorted[i];
3463 ofst = stp->iTknOfst;
3464 if( ofst==NO_OFFSET ) ofst = mnTknOfst - 1;
drhfdbf9282003-10-21 16:34:41 +00003465 if( j==0 ) fprintf(out," /* %5d */ ", i);
drh8b582012003-10-21 13:16:03 +00003466 fprintf(out, " %4d,", ofst);
3467 if( j==9 || i==n-1 ){
3468 fprintf(out, "\n"); lineno++;
3469 j = 0;
3470 }else{
3471 j++;
3472 }
3473 }
3474 fprintf(out, "};\n"); lineno++;
3475
3476 /* Output the yy_reduce_ofst[] table */
3477 fprintf(out, "#define YY_REDUCE_USE_DFLT (%d)\n", mnNtOfst-1); lineno++;
3478 fprintf(out, "static %s yy_reduce_ofst[] = {\n",
3479 minimum_size_type(mnNtOfst-1, mxNtOfst)); lineno++;
3480 n = lemp->nstate;
3481 for(i=j=0; i<n; i++){
3482 int ofst;
3483 stp = lemp->sorted[i];
3484 ofst = stp->iNtOfst;
3485 if( ofst==NO_OFFSET ) ofst = mnNtOfst - 1;
drhfdbf9282003-10-21 16:34:41 +00003486 if( j==0 ) fprintf(out," /* %5d */ ", i);
drh8b582012003-10-21 13:16:03 +00003487 fprintf(out, " %4d,", ofst);
3488 if( j==9 || i==n-1 ){
3489 fprintf(out, "\n"); lineno++;
3490 j = 0;
3491 }else{
3492 j++;
3493 }
3494 }
3495 fprintf(out, "};\n"); lineno++;
3496
3497 /* Output the default action table */
3498 fprintf(out, "static YYACTIONTYPE yy_default[] = {\n"); lineno++;
3499 n = lemp->nstate;
3500 for(i=j=0; i<n; i++){
3501 stp = lemp->sorted[i];
drhfdbf9282003-10-21 16:34:41 +00003502 if( j==0 ) fprintf(out," /* %5d */ ", i);
drh8b582012003-10-21 13:16:03 +00003503 fprintf(out, " %4d,", stp->iDflt);
3504 if( j==9 || i==n-1 ){
3505 fprintf(out, "\n"); lineno++;
3506 j = 0;
3507 }else{
3508 j++;
3509 }
3510 }
3511 fprintf(out, "};\n"); lineno++;
drh75897232000-05-29 14:26:00 +00003512 tplt_xfer(lemp->name,in,out,&lineno);
3513
drh0bd1f4e2002-06-06 18:54:39 +00003514 /* Generate the table of fallback tokens.
3515 */
3516 if( lemp->has_fallback ){
3517 for(i=0; i<lemp->nterminal; i++){
3518 struct symbol *p = lemp->symbols[i];
3519 if( p->fallback==0 ){
3520 fprintf(out, " 0, /* %10s => nothing */\n", p->name);
3521 }else{
3522 fprintf(out, " %3d, /* %10s => %s */\n", p->fallback->index,
3523 p->name, p->fallback->name);
3524 }
3525 lineno++;
3526 }
3527 }
3528 tplt_xfer(lemp->name, in, out, &lineno);
3529
3530 /* Generate a table containing the symbolic name of every symbol
3531 */
drh75897232000-05-29 14:26:00 +00003532 for(i=0; i<lemp->nsymbol; i++){
3533 sprintf(line,"\"%s\",",lemp->symbols[i]->name);
3534 fprintf(out," %-15s",line);
3535 if( (i&3)==3 ){ fprintf(out,"\n"); lineno++; }
3536 }
3537 if( (i&3)!=0 ){ fprintf(out,"\n"); lineno++; }
3538 tplt_xfer(lemp->name,in,out,&lineno);
3539
drh0bd1f4e2002-06-06 18:54:39 +00003540 /* Generate a table containing a text string that describes every
3541 ** rule in the rule set of the grammer. This information is used
3542 ** when tracing REDUCE actions.
3543 */
3544 for(i=0, rp=lemp->rule; rp; rp=rp->next, i++){
3545 assert( rp->index==i );
3546 fprintf(out," /* %3d */ \"%s ::=", i, rp->lhs->name);
3547 for(j=0; j<rp->nrhs; j++) fprintf(out," %s",rp->rhs[j]->name);
3548 fprintf(out,"\",\n"); lineno++;
3549 }
3550 tplt_xfer(lemp->name,in,out,&lineno);
3551
drh75897232000-05-29 14:26:00 +00003552 /* Generate code which executes every time a symbol is popped from
3553 ** the stack while processing errors or while destroying the parser.
drh0bd1f4e2002-06-06 18:54:39 +00003554 ** (In other words, generate the %destructor actions)
3555 */
drh75897232000-05-29 14:26:00 +00003556 if( lemp->tokendest ){
3557 for(i=0; i<lemp->nsymbol; i++){
3558 struct symbol *sp = lemp->symbols[i];
3559 if( sp==0 || sp->type!=TERMINAL ) continue;
3560 fprintf(out," case %d:\n",sp->index); lineno++;
3561 }
3562 for(i=0; i<lemp->nsymbol && lemp->symbols[i]->type!=TERMINAL; i++);
3563 if( i<lemp->nsymbol ){
3564 emit_destructor_code(out,lemp->symbols[i],lemp,&lineno);
3565 fprintf(out," break;\n"); lineno++;
3566 }
3567 }
3568 for(i=0; i<lemp->nsymbol; i++){
3569 struct symbol *sp = lemp->symbols[i];
3570 if( sp==0 || sp->type==TERMINAL || sp->destructor==0 ) continue;
3571 fprintf(out," case %d:\n",sp->index); lineno++;
3572 emit_destructor_code(out,lemp->symbols[i],lemp,&lineno);
3573 fprintf(out," break;\n"); lineno++;
3574 }
drh960e8c62001-04-03 16:53:21 +00003575 if( lemp->vardest ){
3576 struct symbol *dflt_sp = 0;
3577 for(i=0; i<lemp->nsymbol; i++){
3578 struct symbol *sp = lemp->symbols[i];
3579 if( sp==0 || sp->type==TERMINAL ||
3580 sp->index<=0 || sp->destructor!=0 ) continue;
3581 fprintf(out," case %d:\n",sp->index); lineno++;
3582 dflt_sp = sp;
3583 }
3584 if( dflt_sp!=0 ){
3585 emit_destructor_code(out,dflt_sp,lemp,&lineno);
3586 fprintf(out," break;\n"); lineno++;
3587 }
3588 }
drh75897232000-05-29 14:26:00 +00003589 tplt_xfer(lemp->name,in,out,&lineno);
3590
3591 /* Generate code which executes whenever the parser stack overflows */
3592 tplt_print(out,lemp,lemp->overflow,lemp->overflowln,&lineno);
3593 tplt_xfer(lemp->name,in,out,&lineno);
3594
3595 /* Generate the table of rule information
3596 **
3597 ** Note: This code depends on the fact that rules are number
3598 ** sequentually beginning with 0.
3599 */
3600 for(rp=lemp->rule; rp; rp=rp->next){
3601 fprintf(out," { %d, %d },\n",rp->lhs->index,rp->nrhs); lineno++;
3602 }
3603 tplt_xfer(lemp->name,in,out,&lineno);
3604
3605 /* Generate code which execution during each REDUCE action */
3606 for(rp=lemp->rule; rp; rp=rp->next){
3607 fprintf(out," case %d:\n",rp->index); lineno++;
drh75897232000-05-29 14:26:00 +00003608 emit_code(out,rp,lemp,&lineno);
3609 fprintf(out," break;\n"); lineno++;
3610 }
3611 tplt_xfer(lemp->name,in,out,&lineno);
3612
3613 /* Generate code which executes if a parse fails */
3614 tplt_print(out,lemp,lemp->failure,lemp->failureln,&lineno);
3615 tplt_xfer(lemp->name,in,out,&lineno);
3616
3617 /* Generate code which executes when a syntax error occurs */
3618 tplt_print(out,lemp,lemp->error,lemp->errorln,&lineno);
3619 tplt_xfer(lemp->name,in,out,&lineno);
3620
3621 /* Generate code which executes when the parser accepts its input */
3622 tplt_print(out,lemp,lemp->accept,lemp->acceptln,&lineno);
3623 tplt_xfer(lemp->name,in,out,&lineno);
3624
3625 /* Append any addition code the user desires */
3626 tplt_print(out,lemp,lemp->extracode,lemp->extracodeln,&lineno);
3627
3628 fclose(in);
3629 fclose(out);
3630 return;
3631}
3632
3633/* Generate a header file for the parser */
3634void ReportHeader(lemp)
3635struct lemon *lemp;
3636{
3637 FILE *out, *in;
3638 char *prefix;
3639 char line[LINESIZE];
3640 char pattern[LINESIZE];
3641 int i;
3642
3643 if( lemp->tokenprefix ) prefix = lemp->tokenprefix;
3644 else prefix = "";
3645 in = file_open(lemp,".h","r");
3646 if( in ){
3647 for(i=1; i<lemp->nterminal && fgets(line,LINESIZE,in); i++){
3648 sprintf(pattern,"#define %s%-30s %2d\n",prefix,lemp->symbols[i]->name,i);
3649 if( strcmp(line,pattern) ) break;
3650 }
3651 fclose(in);
3652 if( i==lemp->nterminal ){
3653 /* No change in the file. Don't rewrite it. */
3654 return;
3655 }
3656 }
3657 out = file_open(lemp,".h","w");
3658 if( out ){
3659 for(i=1; i<lemp->nterminal; i++){
3660 fprintf(out,"#define %s%-30s %2d\n",prefix,lemp->symbols[i]->name,i);
3661 }
3662 fclose(out);
3663 }
3664 return;
3665}
3666
3667/* Reduce the size of the action tables, if possible, by making use
3668** of defaults.
3669**
drhb59499c2002-02-23 18:45:13 +00003670** In this version, we take the most frequent REDUCE action and make
3671** it the default. Only default a reduce if there are more than one.
drh75897232000-05-29 14:26:00 +00003672*/
3673void CompressTables(lemp)
3674struct lemon *lemp;
3675{
3676 struct state *stp;
drhb59499c2002-02-23 18:45:13 +00003677 struct action *ap, *ap2;
3678 struct rule *rp, *rp2, *rbest;
3679 int nbest, n;
drh75897232000-05-29 14:26:00 +00003680 int i;
drh75897232000-05-29 14:26:00 +00003681
3682 for(i=0; i<lemp->nstate; i++){
3683 stp = lemp->sorted[i];
drhb59499c2002-02-23 18:45:13 +00003684 nbest = 0;
3685 rbest = 0;
drh75897232000-05-29 14:26:00 +00003686
drhb59499c2002-02-23 18:45:13 +00003687 for(ap=stp->ap; ap; ap=ap->next){
3688 if( ap->type!=REDUCE ) continue;
3689 rp = ap->x.rp;
3690 if( rp==rbest ) continue;
3691 n = 1;
3692 for(ap2=ap->next; ap2; ap2=ap2->next){
3693 if( ap2->type!=REDUCE ) continue;
3694 rp2 = ap2->x.rp;
3695 if( rp2==rbest ) continue;
3696 if( rp2==rp ) n++;
3697 }
3698 if( n>nbest ){
3699 nbest = n;
3700 rbest = rp;
drh75897232000-05-29 14:26:00 +00003701 }
3702 }
drhb59499c2002-02-23 18:45:13 +00003703
3704 /* Do not make a default if the number of rules to default
3705 ** is not at least 2 */
3706 if( nbest<2 ) continue;
drh75897232000-05-29 14:26:00 +00003707
drhb59499c2002-02-23 18:45:13 +00003708
3709 /* Combine matching REDUCE actions into a single default */
3710 for(ap=stp->ap; ap; ap=ap->next){
3711 if( ap->type==REDUCE && ap->x.rp==rbest ) break;
3712 }
drh75897232000-05-29 14:26:00 +00003713 assert( ap );
3714 ap->sp = Symbol_new("{default}");
3715 for(ap=ap->next; ap; ap=ap->next){
drhb59499c2002-02-23 18:45:13 +00003716 if( ap->type==REDUCE && ap->x.rp==rbest ) ap->type = NOT_USED;
drh75897232000-05-29 14:26:00 +00003717 }
3718 stp->ap = Action_sort(stp->ap);
3719 }
3720}
drhb59499c2002-02-23 18:45:13 +00003721
drh75897232000-05-29 14:26:00 +00003722/***************** From the file "set.c" ************************************/
3723/*
3724** Set manipulation routines for the LEMON parser generator.
3725*/
3726
3727static int size = 0;
3728
3729/* Set the set size */
3730void SetSize(n)
3731int n;
3732{
3733 size = n+1;
3734}
3735
3736/* Allocate a new set */
3737char *SetNew(){
3738 char *s;
3739 int i;
3740 s = (char*)malloc( size );
3741 if( s==0 ){
3742 extern void memory_error();
3743 memory_error();
3744 }
3745 for(i=0; i<size; i++) s[i] = 0;
3746 return s;
3747}
3748
3749/* Deallocate a set */
3750void SetFree(s)
3751char *s;
3752{
3753 free(s);
3754}
3755
3756/* Add a new element to the set. Return TRUE if the element was added
3757** and FALSE if it was already there. */
3758int SetAdd(s,e)
3759char *s;
3760int e;
3761{
3762 int rv;
3763 rv = s[e];
3764 s[e] = 1;
3765 return !rv;
3766}
3767
3768/* Add every element of s2 to s1. Return TRUE if s1 changes. */
3769int SetUnion(s1,s2)
3770char *s1;
3771char *s2;
3772{
3773 int i, progress;
3774 progress = 0;
3775 for(i=0; i<size; i++){
3776 if( s2[i]==0 ) continue;
3777 if( s1[i]==0 ){
3778 progress = 1;
3779 s1[i] = 1;
3780 }
3781 }
3782 return progress;
3783}
3784/********************** From the file "table.c" ****************************/
3785/*
3786** All code in this file has been automatically generated
3787** from a specification in the file
3788** "table.q"
3789** by the associative array code building program "aagen".
3790** Do not edit this file! Instead, edit the specification
3791** file, then rerun aagen.
3792*/
3793/*
3794** Code for processing tables in the LEMON parser generator.
3795*/
3796
3797PRIVATE int strhash(x)
3798char *x;
3799{
3800 int h = 0;
3801 while( *x) h = h*13 + *(x++);
3802 return h;
3803}
3804
3805/* Works like strdup, sort of. Save a string in malloced memory, but
3806** keep strings in a table so that the same string is not in more
3807** than one place.
3808*/
3809char *Strsafe(y)
3810char *y;
3811{
3812 char *z;
3813
3814 z = Strsafe_find(y);
3815 if( z==0 && (z=malloc( strlen(y)+1 ))!=0 ){
3816 strcpy(z,y);
3817 Strsafe_insert(z);
3818 }
3819 MemoryCheck(z);
3820 return z;
3821}
3822
3823/* There is one instance of the following structure for each
3824** associative array of type "x1".
3825*/
3826struct s_x1 {
3827 int size; /* The number of available slots. */
3828 /* Must be a power of 2 greater than or */
3829 /* equal to 1 */
3830 int count; /* Number of currently slots filled */
3831 struct s_x1node *tbl; /* The data stored here */
3832 struct s_x1node **ht; /* Hash table for lookups */
3833};
3834
3835/* There is one instance of this structure for every data element
3836** in an associative array of type "x1".
3837*/
3838typedef struct s_x1node {
3839 char *data; /* The data */
3840 struct s_x1node *next; /* Next entry with the same hash */
3841 struct s_x1node **from; /* Previous link */
3842} x1node;
3843
3844/* There is only one instance of the array, which is the following */
3845static struct s_x1 *x1a;
3846
3847/* Allocate a new associative array */
3848void Strsafe_init(){
3849 if( x1a ) return;
3850 x1a = (struct s_x1*)malloc( sizeof(struct s_x1) );
3851 if( x1a ){
3852 x1a->size = 1024;
3853 x1a->count = 0;
3854 x1a->tbl = (x1node*)malloc(
3855 (sizeof(x1node) + sizeof(x1node*))*1024 );
3856 if( x1a->tbl==0 ){
3857 free(x1a);
3858 x1a = 0;
3859 }else{
3860 int i;
3861 x1a->ht = (x1node**)&(x1a->tbl[1024]);
3862 for(i=0; i<1024; i++) x1a->ht[i] = 0;
3863 }
3864 }
3865}
3866/* Insert a new record into the array. Return TRUE if successful.
3867** Prior data with the same key is NOT overwritten */
3868int Strsafe_insert(data)
3869char *data;
3870{
3871 x1node *np;
3872 int h;
3873 int ph;
3874
3875 if( x1a==0 ) return 0;
3876 ph = strhash(data);
3877 h = ph & (x1a->size-1);
3878 np = x1a->ht[h];
3879 while( np ){
3880 if( strcmp(np->data,data)==0 ){
3881 /* An existing entry with the same key is found. */
3882 /* Fail because overwrite is not allows. */
3883 return 0;
3884 }
3885 np = np->next;
3886 }
3887 if( x1a->count>=x1a->size ){
3888 /* Need to make the hash table bigger */
3889 int i,size;
3890 struct s_x1 array;
3891 array.size = size = x1a->size*2;
3892 array.count = x1a->count;
3893 array.tbl = (x1node*)malloc(
3894 (sizeof(x1node) + sizeof(x1node*))*size );
3895 if( array.tbl==0 ) return 0; /* Fail due to malloc failure */
3896 array.ht = (x1node**)&(array.tbl[size]);
3897 for(i=0; i<size; i++) array.ht[i] = 0;
3898 for(i=0; i<x1a->count; i++){
3899 x1node *oldnp, *newnp;
3900 oldnp = &(x1a->tbl[i]);
3901 h = strhash(oldnp->data) & (size-1);
3902 newnp = &(array.tbl[i]);
3903 if( array.ht[h] ) array.ht[h]->from = &(newnp->next);
3904 newnp->next = array.ht[h];
3905 newnp->data = oldnp->data;
3906 newnp->from = &(array.ht[h]);
3907 array.ht[h] = newnp;
3908 }
3909 free(x1a->tbl);
3910 *x1a = array;
3911 }
3912 /* Insert the new data */
3913 h = ph & (x1a->size-1);
3914 np = &(x1a->tbl[x1a->count++]);
3915 np->data = data;
3916 if( x1a->ht[h] ) x1a->ht[h]->from = &(np->next);
3917 np->next = x1a->ht[h];
3918 x1a->ht[h] = np;
3919 np->from = &(x1a->ht[h]);
3920 return 1;
3921}
3922
3923/* Return a pointer to data assigned to the given key. Return NULL
3924** if no such key. */
3925char *Strsafe_find(key)
3926char *key;
3927{
3928 int h;
3929 x1node *np;
3930
3931 if( x1a==0 ) return 0;
3932 h = strhash(key) & (x1a->size-1);
3933 np = x1a->ht[h];
3934 while( np ){
3935 if( strcmp(np->data,key)==0 ) break;
3936 np = np->next;
3937 }
3938 return np ? np->data : 0;
3939}
3940
3941/* Return a pointer to the (terminal or nonterminal) symbol "x".
3942** Create a new symbol if this is the first time "x" has been seen.
3943*/
3944struct symbol *Symbol_new(x)
3945char *x;
3946{
3947 struct symbol *sp;
3948
3949 sp = Symbol_find(x);
3950 if( sp==0 ){
3951 sp = (struct symbol *)malloc( sizeof(struct symbol) );
3952 MemoryCheck(sp);
3953 sp->name = Strsafe(x);
3954 sp->type = isupper(*x) ? TERMINAL : NONTERMINAL;
3955 sp->rule = 0;
drh0bd1f4e2002-06-06 18:54:39 +00003956 sp->fallback = 0;
drh75897232000-05-29 14:26:00 +00003957 sp->prec = -1;
3958 sp->assoc = UNK;
3959 sp->firstset = 0;
drhb27b83a2002-08-14 23:18:57 +00003960 sp->lambda = B_FALSE;
drh75897232000-05-29 14:26:00 +00003961 sp->destructor = 0;
3962 sp->datatype = 0;
3963 Symbol_insert(sp,sp->name);
3964 }
3965 return sp;
3966}
3967
drh60d31652004-02-22 00:08:04 +00003968/* Compare two symbols for working purposes
3969**
3970** Symbols that begin with upper case letters (terminals or tokens)
3971** must sort before symbols that begin with lower case letters
3972** (non-terminals). Other than that, the order does not matter.
3973**
3974** We find experimentally that leaving the symbols in their original
3975** order (the order they appeared in the grammar file) gives the
3976** smallest parser tables in SQLite.
3977*/
3978int Symbolcmpp(struct symbol **a, struct symbol **b){
3979 int i1 = (**a).index + 10000000*((**a).name[0]>'Z');
3980 int i2 = (**b).index + 10000000*((**b).name[0]>'Z');
3981 return i1-i2;
drh75897232000-05-29 14:26:00 +00003982}
3983
3984/* There is one instance of the following structure for each
3985** associative array of type "x2".
3986*/
3987struct s_x2 {
3988 int size; /* The number of available slots. */
3989 /* Must be a power of 2 greater than or */
3990 /* equal to 1 */
3991 int count; /* Number of currently slots filled */
3992 struct s_x2node *tbl; /* The data stored here */
3993 struct s_x2node **ht; /* Hash table for lookups */
3994};
3995
3996/* There is one instance of this structure for every data element
3997** in an associative array of type "x2".
3998*/
3999typedef struct s_x2node {
4000 struct symbol *data; /* The data */
4001 char *key; /* The key */
4002 struct s_x2node *next; /* Next entry with the same hash */
4003 struct s_x2node **from; /* Previous link */
4004} x2node;
4005
4006/* There is only one instance of the array, which is the following */
4007static struct s_x2 *x2a;
4008
4009/* Allocate a new associative array */
4010void Symbol_init(){
4011 if( x2a ) return;
4012 x2a = (struct s_x2*)malloc( sizeof(struct s_x2) );
4013 if( x2a ){
4014 x2a->size = 128;
4015 x2a->count = 0;
4016 x2a->tbl = (x2node*)malloc(
4017 (sizeof(x2node) + sizeof(x2node*))*128 );
4018 if( x2a->tbl==0 ){
4019 free(x2a);
4020 x2a = 0;
4021 }else{
4022 int i;
4023 x2a->ht = (x2node**)&(x2a->tbl[128]);
4024 for(i=0; i<128; i++) x2a->ht[i] = 0;
4025 }
4026 }
4027}
4028/* Insert a new record into the array. Return TRUE if successful.
4029** Prior data with the same key is NOT overwritten */
4030int Symbol_insert(data,key)
4031struct symbol *data;
4032char *key;
4033{
4034 x2node *np;
4035 int h;
4036 int ph;
4037
4038 if( x2a==0 ) return 0;
4039 ph = strhash(key);
4040 h = ph & (x2a->size-1);
4041 np = x2a->ht[h];
4042 while( np ){
4043 if( strcmp(np->key,key)==0 ){
4044 /* An existing entry with the same key is found. */
4045 /* Fail because overwrite is not allows. */
4046 return 0;
4047 }
4048 np = np->next;
4049 }
4050 if( x2a->count>=x2a->size ){
4051 /* Need to make the hash table bigger */
4052 int i,size;
4053 struct s_x2 array;
4054 array.size = size = x2a->size*2;
4055 array.count = x2a->count;
4056 array.tbl = (x2node*)malloc(
4057 (sizeof(x2node) + sizeof(x2node*))*size );
4058 if( array.tbl==0 ) return 0; /* Fail due to malloc failure */
4059 array.ht = (x2node**)&(array.tbl[size]);
4060 for(i=0; i<size; i++) array.ht[i] = 0;
4061 for(i=0; i<x2a->count; i++){
4062 x2node *oldnp, *newnp;
4063 oldnp = &(x2a->tbl[i]);
4064 h = strhash(oldnp->key) & (size-1);
4065 newnp = &(array.tbl[i]);
4066 if( array.ht[h] ) array.ht[h]->from = &(newnp->next);
4067 newnp->next = array.ht[h];
4068 newnp->key = oldnp->key;
4069 newnp->data = oldnp->data;
4070 newnp->from = &(array.ht[h]);
4071 array.ht[h] = newnp;
4072 }
4073 free(x2a->tbl);
4074 *x2a = array;
4075 }
4076 /* Insert the new data */
4077 h = ph & (x2a->size-1);
4078 np = &(x2a->tbl[x2a->count++]);
4079 np->key = key;
4080 np->data = data;
4081 if( x2a->ht[h] ) x2a->ht[h]->from = &(np->next);
4082 np->next = x2a->ht[h];
4083 x2a->ht[h] = np;
4084 np->from = &(x2a->ht[h]);
4085 return 1;
4086}
4087
4088/* Return a pointer to data assigned to the given key. Return NULL
4089** if no such key. */
4090struct symbol *Symbol_find(key)
4091char *key;
4092{
4093 int h;
4094 x2node *np;
4095
4096 if( x2a==0 ) return 0;
4097 h = strhash(key) & (x2a->size-1);
4098 np = x2a->ht[h];
4099 while( np ){
4100 if( strcmp(np->key,key)==0 ) break;
4101 np = np->next;
4102 }
4103 return np ? np->data : 0;
4104}
4105
4106/* Return the n-th data. Return NULL if n is out of range. */
4107struct symbol *Symbol_Nth(n)
4108int n;
4109{
4110 struct symbol *data;
4111 if( x2a && n>0 && n<=x2a->count ){
4112 data = x2a->tbl[n-1].data;
4113 }else{
4114 data = 0;
4115 }
4116 return data;
4117}
4118
4119/* Return the size of the array */
4120int Symbol_count()
4121{
4122 return x2a ? x2a->count : 0;
4123}
4124
4125/* Return an array of pointers to all data in the table.
4126** The array is obtained from malloc. Return NULL if memory allocation
4127** problems, or if the array is empty. */
4128struct symbol **Symbol_arrayof()
4129{
4130 struct symbol **array;
4131 int i,size;
4132 if( x2a==0 ) return 0;
4133 size = x2a->count;
4134 array = (struct symbol **)malloc( sizeof(struct symbol *)*size );
4135 if( array ){
4136 for(i=0; i<size; i++) array[i] = x2a->tbl[i].data;
4137 }
4138 return array;
4139}
4140
4141/* Compare two configurations */
4142int Configcmp(a,b)
4143struct config *a;
4144struct config *b;
4145{
4146 int x;
4147 x = a->rp->index - b->rp->index;
4148 if( x==0 ) x = a->dot - b->dot;
4149 return x;
4150}
4151
4152/* Compare two states */
4153PRIVATE int statecmp(a,b)
4154struct config *a;
4155struct config *b;
4156{
4157 int rc;
4158 for(rc=0; rc==0 && a && b; a=a->bp, b=b->bp){
4159 rc = a->rp->index - b->rp->index;
4160 if( rc==0 ) rc = a->dot - b->dot;
4161 }
4162 if( rc==0 ){
4163 if( a ) rc = 1;
4164 if( b ) rc = -1;
4165 }
4166 return rc;
4167}
4168
4169/* Hash a state */
4170PRIVATE int statehash(a)
4171struct config *a;
4172{
4173 int h=0;
4174 while( a ){
4175 h = h*571 + a->rp->index*37 + a->dot;
4176 a = a->bp;
4177 }
4178 return h;
4179}
4180
4181/* Allocate a new state structure */
4182struct state *State_new()
4183{
4184 struct state *new;
4185 new = (struct state *)malloc( sizeof(struct state) );
4186 MemoryCheck(new);
4187 return new;
4188}
4189
4190/* There is one instance of the following structure for each
4191** associative array of type "x3".
4192*/
4193struct s_x3 {
4194 int size; /* The number of available slots. */
4195 /* Must be a power of 2 greater than or */
4196 /* equal to 1 */
4197 int count; /* Number of currently slots filled */
4198 struct s_x3node *tbl; /* The data stored here */
4199 struct s_x3node **ht; /* Hash table for lookups */
4200};
4201
4202/* There is one instance of this structure for every data element
4203** in an associative array of type "x3".
4204*/
4205typedef struct s_x3node {
4206 struct state *data; /* The data */
4207 struct config *key; /* The key */
4208 struct s_x3node *next; /* Next entry with the same hash */
4209 struct s_x3node **from; /* Previous link */
4210} x3node;
4211
4212/* There is only one instance of the array, which is the following */
4213static struct s_x3 *x3a;
4214
4215/* Allocate a new associative array */
4216void State_init(){
4217 if( x3a ) return;
4218 x3a = (struct s_x3*)malloc( sizeof(struct s_x3) );
4219 if( x3a ){
4220 x3a->size = 128;
4221 x3a->count = 0;
4222 x3a->tbl = (x3node*)malloc(
4223 (sizeof(x3node) + sizeof(x3node*))*128 );
4224 if( x3a->tbl==0 ){
4225 free(x3a);
4226 x3a = 0;
4227 }else{
4228 int i;
4229 x3a->ht = (x3node**)&(x3a->tbl[128]);
4230 for(i=0; i<128; i++) x3a->ht[i] = 0;
4231 }
4232 }
4233}
4234/* Insert a new record into the array. Return TRUE if successful.
4235** Prior data with the same key is NOT overwritten */
4236int State_insert(data,key)
4237struct state *data;
4238struct config *key;
4239{
4240 x3node *np;
4241 int h;
4242 int ph;
4243
4244 if( x3a==0 ) return 0;
4245 ph = statehash(key);
4246 h = ph & (x3a->size-1);
4247 np = x3a->ht[h];
4248 while( np ){
4249 if( statecmp(np->key,key)==0 ){
4250 /* An existing entry with the same key is found. */
4251 /* Fail because overwrite is not allows. */
4252 return 0;
4253 }
4254 np = np->next;
4255 }
4256 if( x3a->count>=x3a->size ){
4257 /* Need to make the hash table bigger */
4258 int i,size;
4259 struct s_x3 array;
4260 array.size = size = x3a->size*2;
4261 array.count = x3a->count;
4262 array.tbl = (x3node*)malloc(
4263 (sizeof(x3node) + sizeof(x3node*))*size );
4264 if( array.tbl==0 ) return 0; /* Fail due to malloc failure */
4265 array.ht = (x3node**)&(array.tbl[size]);
4266 for(i=0; i<size; i++) array.ht[i] = 0;
4267 for(i=0; i<x3a->count; i++){
4268 x3node *oldnp, *newnp;
4269 oldnp = &(x3a->tbl[i]);
4270 h = statehash(oldnp->key) & (size-1);
4271 newnp = &(array.tbl[i]);
4272 if( array.ht[h] ) array.ht[h]->from = &(newnp->next);
4273 newnp->next = array.ht[h];
4274 newnp->key = oldnp->key;
4275 newnp->data = oldnp->data;
4276 newnp->from = &(array.ht[h]);
4277 array.ht[h] = newnp;
4278 }
4279 free(x3a->tbl);
4280 *x3a = array;
4281 }
4282 /* Insert the new data */
4283 h = ph & (x3a->size-1);
4284 np = &(x3a->tbl[x3a->count++]);
4285 np->key = key;
4286 np->data = data;
4287 if( x3a->ht[h] ) x3a->ht[h]->from = &(np->next);
4288 np->next = x3a->ht[h];
4289 x3a->ht[h] = np;
4290 np->from = &(x3a->ht[h]);
4291 return 1;
4292}
4293
4294/* Return a pointer to data assigned to the given key. Return NULL
4295** if no such key. */
4296struct state *State_find(key)
4297struct config *key;
4298{
4299 int h;
4300 x3node *np;
4301
4302 if( x3a==0 ) return 0;
4303 h = statehash(key) & (x3a->size-1);
4304 np = x3a->ht[h];
4305 while( np ){
4306 if( statecmp(np->key,key)==0 ) break;
4307 np = np->next;
4308 }
4309 return np ? np->data : 0;
4310}
4311
4312/* Return an array of pointers to all data in the table.
4313** The array is obtained from malloc. Return NULL if memory allocation
4314** problems, or if the array is empty. */
4315struct state **State_arrayof()
4316{
4317 struct state **array;
4318 int i,size;
4319 if( x3a==0 ) return 0;
4320 size = x3a->count;
4321 array = (struct state **)malloc( sizeof(struct state *)*size );
4322 if( array ){
4323 for(i=0; i<size; i++) array[i] = x3a->tbl[i].data;
4324 }
4325 return array;
4326}
4327
4328/* Hash a configuration */
4329PRIVATE int confighash(a)
4330struct config *a;
4331{
4332 int h=0;
4333 h = h*571 + a->rp->index*37 + a->dot;
4334 return h;
4335}
4336
4337/* There is one instance of the following structure for each
4338** associative array of type "x4".
4339*/
4340struct s_x4 {
4341 int size; /* The number of available slots. */
4342 /* Must be a power of 2 greater than or */
4343 /* equal to 1 */
4344 int count; /* Number of currently slots filled */
4345 struct s_x4node *tbl; /* The data stored here */
4346 struct s_x4node **ht; /* Hash table for lookups */
4347};
4348
4349/* There is one instance of this structure for every data element
4350** in an associative array of type "x4".
4351*/
4352typedef struct s_x4node {
4353 struct config *data; /* The data */
4354 struct s_x4node *next; /* Next entry with the same hash */
4355 struct s_x4node **from; /* Previous link */
4356} x4node;
4357
4358/* There is only one instance of the array, which is the following */
4359static struct s_x4 *x4a;
4360
4361/* Allocate a new associative array */
4362void Configtable_init(){
4363 if( x4a ) return;
4364 x4a = (struct s_x4*)malloc( sizeof(struct s_x4) );
4365 if( x4a ){
4366 x4a->size = 64;
4367 x4a->count = 0;
4368 x4a->tbl = (x4node*)malloc(
4369 (sizeof(x4node) + sizeof(x4node*))*64 );
4370 if( x4a->tbl==0 ){
4371 free(x4a);
4372 x4a = 0;
4373 }else{
4374 int i;
4375 x4a->ht = (x4node**)&(x4a->tbl[64]);
4376 for(i=0; i<64; i++) x4a->ht[i] = 0;
4377 }
4378 }
4379}
4380/* Insert a new record into the array. Return TRUE if successful.
4381** Prior data with the same key is NOT overwritten */
4382int Configtable_insert(data)
4383struct config *data;
4384{
4385 x4node *np;
4386 int h;
4387 int ph;
4388
4389 if( x4a==0 ) return 0;
4390 ph = confighash(data);
4391 h = ph & (x4a->size-1);
4392 np = x4a->ht[h];
4393 while( np ){
4394 if( Configcmp(np->data,data)==0 ){
4395 /* An existing entry with the same key is found. */
4396 /* Fail because overwrite is not allows. */
4397 return 0;
4398 }
4399 np = np->next;
4400 }
4401 if( x4a->count>=x4a->size ){
4402 /* Need to make the hash table bigger */
4403 int i,size;
4404 struct s_x4 array;
4405 array.size = size = x4a->size*2;
4406 array.count = x4a->count;
4407 array.tbl = (x4node*)malloc(
4408 (sizeof(x4node) + sizeof(x4node*))*size );
4409 if( array.tbl==0 ) return 0; /* Fail due to malloc failure */
4410 array.ht = (x4node**)&(array.tbl[size]);
4411 for(i=0; i<size; i++) array.ht[i] = 0;
4412 for(i=0; i<x4a->count; i++){
4413 x4node *oldnp, *newnp;
4414 oldnp = &(x4a->tbl[i]);
4415 h = confighash(oldnp->data) & (size-1);
4416 newnp = &(array.tbl[i]);
4417 if( array.ht[h] ) array.ht[h]->from = &(newnp->next);
4418 newnp->next = array.ht[h];
4419 newnp->data = oldnp->data;
4420 newnp->from = &(array.ht[h]);
4421 array.ht[h] = newnp;
4422 }
4423 free(x4a->tbl);
4424 *x4a = array;
4425 }
4426 /* Insert the new data */
4427 h = ph & (x4a->size-1);
4428 np = &(x4a->tbl[x4a->count++]);
4429 np->data = data;
4430 if( x4a->ht[h] ) x4a->ht[h]->from = &(np->next);
4431 np->next = x4a->ht[h];
4432 x4a->ht[h] = np;
4433 np->from = &(x4a->ht[h]);
4434 return 1;
4435}
4436
4437/* Return a pointer to data assigned to the given key. Return NULL
4438** if no such key. */
4439struct config *Configtable_find(key)
4440struct config *key;
4441{
4442 int h;
4443 x4node *np;
4444
4445 if( x4a==0 ) return 0;
4446 h = confighash(key) & (x4a->size-1);
4447 np = x4a->ht[h];
4448 while( np ){
4449 if( Configcmp(np->data,key)==0 ) break;
4450 np = np->next;
4451 }
4452 return np ? np->data : 0;
4453}
4454
4455/* Remove all data from the table. Pass each data to the function "f"
4456** as it is removed. ("f" may be null to avoid this step.) */
4457void Configtable_clear(f)
4458int(*f)(/* struct config * */);
4459{
4460 int i;
4461 if( x4a==0 || x4a->count==0 ) return;
4462 if( f ) for(i=0; i<x4a->count; i++) (*f)(x4a->tbl[i].data);
4463 for(i=0; i<x4a->size; i++) x4a->ht[i] = 0;
4464 x4a->count = 0;
4465 return;
4466}