blob: 626cd2e2f8c531aefecc7a982992ab577b608156 [file] [log] [blame]
drh75897232000-05-29 14:26:00 +00001/*
drh75897232000-05-29 14:26:00 +00002** This file contains all sources (including headers) to the LEMON
3** LALR(1) parser generator. The sources have been combined into a
drh960e8c62001-04-03 16:53:21 +00004** single file to make it easy to include LEMON in the source tree
5** and Makefile of another program.
drh75897232000-05-29 14:26:00 +00006**
drhb19a2bc2001-09-16 00:13:26 +00007** The author of this program disclaims copyright.
drh75897232000-05-29 14:26:00 +00008*/
9#include <stdio.h>
drhf9a2e7b2003-04-15 01:49:48 +000010#include <stdarg.h>
drh75897232000-05-29 14:26:00 +000011#include <string.h>
12#include <ctype.h>
drh8b582012003-10-21 13:16:03 +000013#include <stdlib.h>
drhe9278182007-07-18 18:16:29 +000014#include <assert.h>
drh75897232000-05-29 14:26:00 +000015
drh75897232000-05-29 14:26:00 +000016#ifndef __WIN32__
17# if defined(_WIN32) || defined(WIN32)
18# define __WIN32__
19# endif
20#endif
21
rse8f304482007-07-30 18:31:53 +000022#ifdef __WIN32__
23extern int access();
24#else
25#include <unistd.h>
26#endif
27
drh75897232000-05-29 14:26:00 +000028/* #define PRIVATE static */
29#define PRIVATE
30
31#ifdef TEST
32#define MAXRHS 5 /* Set low to exercise exception code */
33#else
34#define MAXRHS 1000
35#endif
36
drhe9278182007-07-18 18:16:29 +000037static char *msort(char*,char**,int(*)(const char*,const char*));
drh75897232000-05-29 14:26:00 +000038
drh87cf1372008-08-13 20:09:06 +000039/*
40** Compilers are getting increasingly pedantic about type conversions
41** as C evolves ever closer to Ada.... To work around the latest problems
42** we have to define the following variant of strlen().
43*/
44#define lemonStrlen(X) ((int)strlen(X))
45
drhe9278182007-07-18 18:16:29 +000046static struct action *Action_new(void);
47static struct action *Action_sort(struct action *);
drh75897232000-05-29 14:26:00 +000048
49/********** From the file "build.h" ************************************/
50void FindRulePrecedences();
51void FindFirstSets();
52void FindStates();
53void FindLinks();
54void FindFollowSets();
55void FindActions();
56
57/********* From the file "configlist.h" *********************************/
58void Configlist_init(/* void */);
59struct config *Configlist_add(/* struct rule *, int */);
60struct config *Configlist_addbasis(/* struct rule *, int */);
61void Configlist_closure(/* void */);
62void Configlist_sort(/* void */);
63void Configlist_sortbasis(/* void */);
64struct config *Configlist_return(/* void */);
65struct config *Configlist_basis(/* void */);
66void Configlist_eat(/* struct config * */);
67void Configlist_reset(/* void */);
68
69/********* From the file "error.h" ***************************************/
drhf9a2e7b2003-04-15 01:49:48 +000070void ErrorMsg(const char *, int,const char *, ...);
drh75897232000-05-29 14:26:00 +000071
72/****** From the file "option.h" ******************************************/
73struct s_options {
74 enum { OPT_FLAG=1, OPT_INT, OPT_DBL, OPT_STR,
75 OPT_FFLAG, OPT_FINT, OPT_FDBL, OPT_FSTR} type;
76 char *label;
77 char *arg;
78 char *message;
79};
drhb0c86772000-06-02 23:21:26 +000080int OptInit(/* char**,struct s_options*,FILE* */);
81int OptNArgs(/* void */);
82char *OptArg(/* int */);
83void OptErr(/* int */);
84void OptPrint(/* void */);
drh75897232000-05-29 14:26:00 +000085
86/******** From the file "parse.h" *****************************************/
87void Parse(/* struct lemon *lemp */);
88
89/********* From the file "plink.h" ***************************************/
90struct plink *Plink_new(/* void */);
91void Plink_add(/* struct plink **, struct config * */);
92void Plink_copy(/* struct plink **, struct plink * */);
93void Plink_delete(/* struct plink * */);
94
95/********** From the file "report.h" *************************************/
96void Reprint(/* struct lemon * */);
97void ReportOutput(/* struct lemon * */);
98void ReportTable(/* struct lemon * */);
99void ReportHeader(/* struct lemon * */);
100void CompressTables(/* struct lemon * */);
drhada354d2005-11-05 15:03:59 +0000101void ResortStates(/* struct lemon * */);
drh75897232000-05-29 14:26:00 +0000102
103/********** From the file "set.h" ****************************************/
104void SetSize(/* int N */); /* All sets will be of size N */
105char *SetNew(/* void */); /* A new set for element 0..N */
106void SetFree(/* char* */); /* Deallocate a set */
107
108int SetAdd(/* char*,int */); /* Add element to a set */
109int SetUnion(/* char *A,char *B */); /* A <- A U B, thru element N */
110
111#define SetFind(X,Y) (X[Y]) /* True if Y is in set X */
112
113/********** From the file "struct.h" *************************************/
114/*
115** Principal data structures for the LEMON parser generator.
116*/
117
drhaa9f1122007-08-23 02:50:56 +0000118typedef enum {LEMON_FALSE=0, LEMON_TRUE} Boolean;
drh75897232000-05-29 14:26:00 +0000119
120/* Symbols (terminals and nonterminals) of the grammar are stored
121** in the following: */
122struct symbol {
123 char *name; /* Name of the symbol */
124 int index; /* Index number for this symbol */
125 enum {
126 TERMINAL,
drhfd405312005-11-06 04:06:59 +0000127 NONTERMINAL,
128 MULTITERMINAL
drh75897232000-05-29 14:26:00 +0000129 } type; /* Symbols are all either TERMINALS or NTs */
130 struct rule *rule; /* Linked list of rules of this (if an NT) */
drh0bd1f4e2002-06-06 18:54:39 +0000131 struct symbol *fallback; /* fallback token in case this token doesn't parse */
drh75897232000-05-29 14:26:00 +0000132 int prec; /* Precedence if defined (-1 otherwise) */
133 enum e_assoc {
134 LEFT,
135 RIGHT,
136 NONE,
137 UNK
drh34ff57b2008-07-14 12:27:51 +0000138 } assoc; /* Associativity if precedence is defined */
drh75897232000-05-29 14:26:00 +0000139 char *firstset; /* First-set for all rules of this symbol */
140 Boolean lambda; /* True if NT and can generate an empty string */
drhc4dd3fd2008-01-22 01:48:05 +0000141 int useCnt; /* Number of times used */
drh75897232000-05-29 14:26:00 +0000142 char *destructor; /* Code which executes whenever this symbol is
143 ** popped from the stack during error processing */
drh4dc8ef52008-07-01 17:13:57 +0000144 int destLineno; /* Line number for start of destructor */
drh75897232000-05-29 14:26:00 +0000145 char *datatype; /* The data type of information held by this
146 ** object. Only used if type==NONTERMINAL */
147 int dtnum; /* The data type number. In the parser, the value
148 ** stack is a union. The .yy%d element of this
149 ** union is the correct data type for this object */
drhfd405312005-11-06 04:06:59 +0000150 /* The following fields are used by MULTITERMINALs only */
151 int nsubsym; /* Number of constituent symbols in the MULTI */
152 struct symbol **subsym; /* Array of constituent symbols */
drh75897232000-05-29 14:26:00 +0000153};
154
155/* Each production rule in the grammar is stored in the following
156** structure. */
157struct rule {
158 struct symbol *lhs; /* Left-hand side of the rule */
159 char *lhsalias; /* Alias for the LHS (NULL if none) */
drhb4960992007-10-05 16:16:36 +0000160 int lhsStart; /* True if left-hand side is the start symbol */
drh75897232000-05-29 14:26:00 +0000161 int ruleline; /* Line number for the rule */
162 int nrhs; /* Number of RHS symbols */
163 struct symbol **rhs; /* The RHS symbols */
164 char **rhsalias; /* An alias for each RHS symbol (NULL if none) */
165 int line; /* Line number at which code begins */
166 char *code; /* The code executed when this rule is reduced */
167 struct symbol *precsym; /* Precedence symbol for this rule */
168 int index; /* An index number for this rule */
169 Boolean canReduce; /* True if this rule is ever reduced */
170 struct rule *nextlhs; /* Next rule with the same LHS */
171 struct rule *next; /* Next rule in the global list */
172};
173
174/* A configuration is a production rule of the grammar together with
175** a mark (dot) showing how much of that rule has been processed so far.
176** Configurations also contain a follow-set which is a list of terminal
177** symbols which are allowed to immediately follow the end of the rule.
178** Every configuration is recorded as an instance of the following: */
179struct config {
180 struct rule *rp; /* The rule upon which the configuration is based */
181 int dot; /* The parse point */
182 char *fws; /* Follow-set for this configuration only */
183 struct plink *fplp; /* Follow-set forward propagation links */
184 struct plink *bplp; /* Follow-set backwards propagation links */
185 struct state *stp; /* Pointer to state which contains this */
186 enum {
187 COMPLETE, /* The status is used during followset and */
188 INCOMPLETE /* shift computations */
189 } status;
190 struct config *next; /* Next configuration in the state */
191 struct config *bp; /* The next basis configuration */
192};
193
194/* Every shift or reduce operation is stored as one of the following */
195struct action {
196 struct symbol *sp; /* The look-ahead symbol */
197 enum e_action {
198 SHIFT,
199 ACCEPT,
200 REDUCE,
201 ERROR,
drh9892c5d2007-12-21 00:02:11 +0000202 SSCONFLICT, /* A shift/shift conflict */
203 SRCONFLICT, /* Was a reduce, but part of a conflict */
204 RRCONFLICT, /* Was a reduce, but part of a conflict */
drh75897232000-05-29 14:26:00 +0000205 SH_RESOLVED, /* Was a shift. Precedence resolved conflict */
206 RD_RESOLVED, /* Was reduce. Precedence resolved conflict */
207 NOT_USED /* Deleted by compression */
208 } type;
209 union {
210 struct state *stp; /* The new state, if a shift */
211 struct rule *rp; /* The rule, if a reduce */
212 } x;
213 struct action *next; /* Next action for this state */
214 struct action *collide; /* Next action with the same hash */
215};
216
217/* Each state of the generated parser's finite state machine
218** is encoded as an instance of the following structure. */
219struct state {
220 struct config *bp; /* The basis configurations for this state */
221 struct config *cfp; /* All configurations in this set */
drh34ff57b2008-07-14 12:27:51 +0000222 int statenum; /* Sequential number for this state */
drh75897232000-05-29 14:26:00 +0000223 struct action *ap; /* Array of actions for this state */
drh8b582012003-10-21 13:16:03 +0000224 int nTknAct, nNtAct; /* Number of actions on terminals and nonterminals */
225 int iTknOfst, iNtOfst; /* yy_action[] offset for terminals and nonterms */
226 int iDflt; /* Default action */
drh75897232000-05-29 14:26:00 +0000227};
drh8b582012003-10-21 13:16:03 +0000228#define NO_OFFSET (-2147483647)
drh75897232000-05-29 14:26:00 +0000229
230/* A followset propagation link indicates that the contents of one
231** configuration followset should be propagated to another whenever
232** the first changes. */
233struct plink {
234 struct config *cfp; /* The configuration to which linked */
235 struct plink *next; /* The next propagate link */
236};
237
238/* The state vector for the entire parser generator is recorded as
239** follows. (LEMON uses no global variables and makes little use of
240** static variables. Fields in the following structure can be thought
241** of as begin global variables in the program.) */
242struct lemon {
243 struct state **sorted; /* Table of states sorted by state number */
244 struct rule *rule; /* List of all rules */
245 int nstate; /* Number of states */
246 int nrule; /* Number of rules */
247 int nsymbol; /* Number of terminal and nonterminal symbols */
248 int nterminal; /* Number of terminal symbols */
249 struct symbol **symbols; /* Sorted array of pointers to symbols */
250 int errorcnt; /* Number of errors */
251 struct symbol *errsym; /* The error symbol */
drhe09daa92006-06-10 13:29:31 +0000252 struct symbol *wildcard; /* Token that matches anything */
drh75897232000-05-29 14:26:00 +0000253 char *name; /* Name of the generated parser */
254 char *arg; /* Declaration of the 3th argument to parser */
255 char *tokentype; /* Type of terminal symbols in the parser stack */
drh960e8c62001-04-03 16:53:21 +0000256 char *vartype; /* The default type of non-terminal symbols */
drh75897232000-05-29 14:26:00 +0000257 char *start; /* Name of the start symbol for the grammar */
258 char *stacksize; /* Size of the parser stack */
259 char *include; /* Code to put at the start of the C file */
drh75897232000-05-29 14:26:00 +0000260 char *error; /* Code to execute when an error is seen */
drh75897232000-05-29 14:26:00 +0000261 char *overflow; /* Code to execute on a stack overflow */
drh75897232000-05-29 14:26:00 +0000262 char *failure; /* Code to execute on parser failure */
drh75897232000-05-29 14:26:00 +0000263 char *accept; /* Code to execute when the parser excepts */
drh75897232000-05-29 14:26:00 +0000264 char *extracode; /* Code appended to the generated file */
drh75897232000-05-29 14:26:00 +0000265 char *tokendest; /* Code to execute to destroy token data */
drh960e8c62001-04-03 16:53:21 +0000266 char *vardest; /* Code for the default non-terminal destructor */
drh75897232000-05-29 14:26:00 +0000267 char *filename; /* Name of the input file */
268 char *outname; /* Name of the current output file */
269 char *tokenprefix; /* A prefix added to token names in the .h file */
270 int nconflict; /* Number of parsing conflicts */
271 int tablesize; /* Size of the parse tables */
272 int basisflag; /* Print only basis configurations */
drh34ff57b2008-07-14 12:27:51 +0000273 int has_fallback; /* True if any %fallback is seen in the grammar */
shane58543932008-12-10 20:10:04 +0000274 int nolinenosflag; /* True if #line statements should not be printed */
drh75897232000-05-29 14:26:00 +0000275 char *argv0; /* Name of the program */
276};
277
278#define MemoryCheck(X) if((X)==0){ \
279 extern void memory_error(); \
280 memory_error(); \
281}
282
283/**************** From the file "table.h" *********************************/
284/*
285** All code in this file has been automatically generated
286** from a specification in the file
287** "table.q"
288** by the associative array code building program "aagen".
289** Do not edit this file! Instead, edit the specification
290** file, then rerun aagen.
291*/
292/*
293** Code for processing tables in the LEMON parser generator.
294*/
295
296/* Routines for handling a strings */
297
298char *Strsafe();
299
300void Strsafe_init(/* void */);
301int Strsafe_insert(/* char * */);
302char *Strsafe_find(/* char * */);
303
304/* Routines for handling symbols of the grammar */
305
306struct symbol *Symbol_new();
307int Symbolcmpp(/* struct symbol **, struct symbol ** */);
308void Symbol_init(/* void */);
309int Symbol_insert(/* struct symbol *, char * */);
310struct symbol *Symbol_find(/* char * */);
311struct symbol *Symbol_Nth(/* int */);
312int Symbol_count(/* */);
313struct symbol **Symbol_arrayof(/* */);
314
315/* Routines to manage the state table */
316
317int Configcmp(/* struct config *, struct config * */);
318struct state *State_new();
319void State_init(/* void */);
320int State_insert(/* struct state *, struct config * */);
321struct state *State_find(/* struct config * */);
322struct state **State_arrayof(/* */);
323
324/* Routines used for efficiency in Configlist_add */
325
326void Configtable_init(/* void */);
327int Configtable_insert(/* struct config * */);
328struct config *Configtable_find(/* struct config * */);
329void Configtable_clear(/* int(*)(struct config *) */);
330/****************** From the file "action.c" *******************************/
331/*
332** Routines processing parser actions in the LEMON parser generator.
333*/
334
335/* Allocate a new parser action */
drhe9278182007-07-18 18:16:29 +0000336static struct action *Action_new(void){
drh75897232000-05-29 14:26:00 +0000337 static struct action *freelist = 0;
338 struct action *new;
339
340 if( freelist==0 ){
341 int i;
342 int amt = 100;
drh9892c5d2007-12-21 00:02:11 +0000343 freelist = (struct action *)calloc(amt, sizeof(struct action));
drh75897232000-05-29 14:26:00 +0000344 if( freelist==0 ){
345 fprintf(stderr,"Unable to allocate memory for a new parser action.");
346 exit(1);
347 }
348 for(i=0; i<amt-1; i++) freelist[i].next = &freelist[i+1];
349 freelist[amt-1].next = 0;
350 }
351 new = freelist;
352 freelist = freelist->next;
353 return new;
354}
355
drhe9278182007-07-18 18:16:29 +0000356/* Compare two actions for sorting purposes. Return negative, zero, or
357** positive if the first action is less than, equal to, or greater than
358** the first
359*/
360static int actioncmp(
361 struct action *ap1,
362 struct action *ap2
363){
drh75897232000-05-29 14:26:00 +0000364 int rc;
365 rc = ap1->sp->index - ap2->sp->index;
drh75897232000-05-29 14:26:00 +0000366 if( rc==0 ){
drh9892c5d2007-12-21 00:02:11 +0000367 rc = (int)ap1->type - (int)ap2->type;
368 }
369 if( rc==0 && ap1->type==REDUCE ){
drh75897232000-05-29 14:26:00 +0000370 rc = ap1->x.rp->index - ap2->x.rp->index;
371 }
372 return rc;
373}
374
375/* Sort parser actions */
drhe9278182007-07-18 18:16:29 +0000376static struct action *Action_sort(
377 struct action *ap
378){
379 ap = (struct action *)msort((char *)ap,(char **)&ap->next,
380 (int(*)(const char*,const char*))actioncmp);
drh75897232000-05-29 14:26:00 +0000381 return ap;
382}
383
384void Action_add(app,type,sp,arg)
385struct action **app;
386enum e_action type;
387struct symbol *sp;
388char *arg;
389{
390 struct action *new;
391 new = Action_new();
392 new->next = *app;
393 *app = new;
394 new->type = type;
395 new->sp = sp;
396 if( type==SHIFT ){
397 new->x.stp = (struct state *)arg;
398 }else{
399 new->x.rp = (struct rule *)arg;
400 }
401}
drh8b582012003-10-21 13:16:03 +0000402/********************** New code to implement the "acttab" module ***********/
403/*
404** This module implements routines use to construct the yy_action[] table.
405*/
406
407/*
408** The state of the yy_action table under construction is an instance of
409** the following structure
410*/
411typedef struct acttab acttab;
412struct acttab {
413 int nAction; /* Number of used slots in aAction[] */
414 int nActionAlloc; /* Slots allocated for aAction[] */
415 struct {
416 int lookahead; /* Value of the lookahead token */
417 int action; /* Action to take on the given lookahead */
418 } *aAction, /* The yy_action[] table under construction */
419 *aLookahead; /* A single new transaction set */
420 int mnLookahead; /* Minimum aLookahead[].lookahead */
421 int mnAction; /* Action associated with mnLookahead */
422 int mxLookahead; /* Maximum aLookahead[].lookahead */
423 int nLookahead; /* Used slots in aLookahead[] */
424 int nLookaheadAlloc; /* Slots allocated in aLookahead[] */
425};
426
427/* Return the number of entries in the yy_action table */
428#define acttab_size(X) ((X)->nAction)
429
430/* The value for the N-th entry in yy_action */
431#define acttab_yyaction(X,N) ((X)->aAction[N].action)
432
433/* The value for the N-th entry in yy_lookahead */
434#define acttab_yylookahead(X,N) ((X)->aAction[N].lookahead)
435
436/* Free all memory associated with the given acttab */
437void acttab_free(acttab *p){
438 free( p->aAction );
439 free( p->aLookahead );
440 free( p );
441}
442
443/* Allocate a new acttab structure */
444acttab *acttab_alloc(void){
drh9892c5d2007-12-21 00:02:11 +0000445 acttab *p = calloc( 1, sizeof(*p) );
drh8b582012003-10-21 13:16:03 +0000446 if( p==0 ){
447 fprintf(stderr,"Unable to allocate memory for a new acttab.");
448 exit(1);
449 }
450 memset(p, 0, sizeof(*p));
451 return p;
452}
453
454/* Add a new action to the current transaction set
455*/
456void acttab_action(acttab *p, int lookahead, int action){
457 if( p->nLookahead>=p->nLookaheadAlloc ){
458 p->nLookaheadAlloc += 25;
459 p->aLookahead = realloc( p->aLookahead,
460 sizeof(p->aLookahead[0])*p->nLookaheadAlloc );
461 if( p->aLookahead==0 ){
462 fprintf(stderr,"malloc failed\n");
463 exit(1);
464 }
465 }
466 if( p->nLookahead==0 ){
467 p->mxLookahead = lookahead;
468 p->mnLookahead = lookahead;
469 p->mnAction = action;
470 }else{
471 if( p->mxLookahead<lookahead ) p->mxLookahead = lookahead;
472 if( p->mnLookahead>lookahead ){
473 p->mnLookahead = lookahead;
474 p->mnAction = action;
475 }
476 }
477 p->aLookahead[p->nLookahead].lookahead = lookahead;
478 p->aLookahead[p->nLookahead].action = action;
479 p->nLookahead++;
480}
481
482/*
483** Add the transaction set built up with prior calls to acttab_action()
484** into the current action table. Then reset the transaction set back
485** to an empty set in preparation for a new round of acttab_action() calls.
486**
487** Return the offset into the action table of the new transaction.
488*/
489int acttab_insert(acttab *p){
490 int i, j, k, n;
491 assert( p->nLookahead>0 );
492
493 /* Make sure we have enough space to hold the expanded action table
494 ** in the worst case. The worst case occurs if the transaction set
495 ** must be appended to the current action table
496 */
drh784d86f2004-02-19 18:41:53 +0000497 n = p->mxLookahead + 1;
drh8b582012003-10-21 13:16:03 +0000498 if( p->nAction + n >= p->nActionAlloc ){
drhfdbf9282003-10-21 16:34:41 +0000499 int oldAlloc = p->nActionAlloc;
drh8b582012003-10-21 13:16:03 +0000500 p->nActionAlloc = p->nAction + n + p->nActionAlloc + 20;
501 p->aAction = realloc( p->aAction,
502 sizeof(p->aAction[0])*p->nActionAlloc);
503 if( p->aAction==0 ){
504 fprintf(stderr,"malloc failed\n");
505 exit(1);
506 }
drhfdbf9282003-10-21 16:34:41 +0000507 for(i=oldAlloc; i<p->nActionAlloc; i++){
drh8b582012003-10-21 13:16:03 +0000508 p->aAction[i].lookahead = -1;
509 p->aAction[i].action = -1;
510 }
511 }
512
513 /* Scan the existing action table looking for an offset where we can
514 ** insert the current transaction set. Fall out of the loop when that
515 ** offset is found. In the worst case, we fall out of the loop when
516 ** i reaches p->nAction, which means we append the new transaction set.
517 **
518 ** i is the index in p->aAction[] where p->mnLookahead is inserted.
519 */
drh784d86f2004-02-19 18:41:53 +0000520 for(i=0; i<p->nAction+p->mnLookahead; i++){
drh8b582012003-10-21 13:16:03 +0000521 if( p->aAction[i].lookahead<0 ){
522 for(j=0; j<p->nLookahead; j++){
523 k = p->aLookahead[j].lookahead - p->mnLookahead + i;
524 if( k<0 ) break;
525 if( p->aAction[k].lookahead>=0 ) break;
526 }
drhfdbf9282003-10-21 16:34:41 +0000527 if( j<p->nLookahead ) continue;
528 for(j=0; j<p->nAction; j++){
529 if( p->aAction[j].lookahead==j+p->mnLookahead-i ) break;
530 }
531 if( j==p->nAction ){
532 break; /* Fits in empty slots */
533 }
drh8b582012003-10-21 13:16:03 +0000534 }else if( p->aAction[i].lookahead==p->mnLookahead ){
535 if( p->aAction[i].action!=p->mnAction ) continue;
536 for(j=0; j<p->nLookahead; j++){
537 k = p->aLookahead[j].lookahead - p->mnLookahead + i;
538 if( k<0 || k>=p->nAction ) break;
539 if( p->aLookahead[j].lookahead!=p->aAction[k].lookahead ) break;
540 if( p->aLookahead[j].action!=p->aAction[k].action ) break;
541 }
542 if( j<p->nLookahead ) continue;
543 n = 0;
544 for(j=0; j<p->nAction; j++){
drhfdbf9282003-10-21 16:34:41 +0000545 if( p->aAction[j].lookahead<0 ) continue;
546 if( p->aAction[j].lookahead==j+p->mnLookahead-i ) n++;
drh8b582012003-10-21 13:16:03 +0000547 }
drhfdbf9282003-10-21 16:34:41 +0000548 if( n==p->nLookahead ){
549 break; /* Same as a prior transaction set */
550 }
drh8b582012003-10-21 13:16:03 +0000551 }
552 }
553 /* Insert transaction set at index i. */
554 for(j=0; j<p->nLookahead; j++){
555 k = p->aLookahead[j].lookahead - p->mnLookahead + i;
556 p->aAction[k] = p->aLookahead[j];
557 if( k>=p->nAction ) p->nAction = k+1;
558 }
559 p->nLookahead = 0;
560
561 /* Return the offset that is added to the lookahead in order to get the
562 ** index into yy_action of the action */
563 return i - p->mnLookahead;
564}
565
drh75897232000-05-29 14:26:00 +0000566/********************** From the file "build.c" *****************************/
567/*
568** Routines to construction the finite state machine for the LEMON
569** parser generator.
570*/
571
572/* Find a precedence symbol of every rule in the grammar.
573**
574** Those rules which have a precedence symbol coded in the input
575** grammar using the "[symbol]" construct will already have the
576** rp->precsym field filled. Other rules take as their precedence
577** symbol the first RHS symbol with a defined precedence. If there
578** are not RHS symbols with a defined precedence, the precedence
579** symbol field is left blank.
580*/
581void FindRulePrecedences(xp)
582struct lemon *xp;
583{
584 struct rule *rp;
585 for(rp=xp->rule; rp; rp=rp->next){
586 if( rp->precsym==0 ){
drhfd405312005-11-06 04:06:59 +0000587 int i, j;
588 for(i=0; i<rp->nrhs && rp->precsym==0; i++){
589 struct symbol *sp = rp->rhs[i];
590 if( sp->type==MULTITERMINAL ){
591 for(j=0; j<sp->nsubsym; j++){
592 if( sp->subsym[j]->prec>=0 ){
593 rp->precsym = sp->subsym[j];
594 break;
595 }
596 }
597 }else if( sp->prec>=0 ){
drh75897232000-05-29 14:26:00 +0000598 rp->precsym = rp->rhs[i];
drh75897232000-05-29 14:26:00 +0000599 }
600 }
601 }
602 }
603 return;
604}
605
606/* Find all nonterminals which will generate the empty string.
607** Then go back and compute the first sets of every nonterminal.
608** The first set is the set of all terminal symbols which can begin
609** a string generated by that nonterminal.
610*/
611void FindFirstSets(lemp)
612struct lemon *lemp;
613{
drhfd405312005-11-06 04:06:59 +0000614 int i, j;
drh75897232000-05-29 14:26:00 +0000615 struct rule *rp;
616 int progress;
617
618 for(i=0; i<lemp->nsymbol; i++){
drhaa9f1122007-08-23 02:50:56 +0000619 lemp->symbols[i]->lambda = LEMON_FALSE;
drh75897232000-05-29 14:26:00 +0000620 }
621 for(i=lemp->nterminal; i<lemp->nsymbol; i++){
622 lemp->symbols[i]->firstset = SetNew();
623 }
624
625 /* First compute all lambdas */
626 do{
627 progress = 0;
628 for(rp=lemp->rule; rp; rp=rp->next){
629 if( rp->lhs->lambda ) continue;
630 for(i=0; i<rp->nrhs; i++){
drhfd405312005-11-06 04:06:59 +0000631 struct symbol *sp = rp->rhs[i];
drhaa9f1122007-08-23 02:50:56 +0000632 if( sp->type!=TERMINAL || sp->lambda==LEMON_FALSE ) break;
drh75897232000-05-29 14:26:00 +0000633 }
634 if( i==rp->nrhs ){
drhaa9f1122007-08-23 02:50:56 +0000635 rp->lhs->lambda = LEMON_TRUE;
drh75897232000-05-29 14:26:00 +0000636 progress = 1;
637 }
638 }
639 }while( progress );
640
641 /* Now compute all first sets */
642 do{
643 struct symbol *s1, *s2;
644 progress = 0;
645 for(rp=lemp->rule; rp; rp=rp->next){
646 s1 = rp->lhs;
647 for(i=0; i<rp->nrhs; i++){
648 s2 = rp->rhs[i];
649 if( s2->type==TERMINAL ){
650 progress += SetAdd(s1->firstset,s2->index);
651 break;
drhfd405312005-11-06 04:06:59 +0000652 }else if( s2->type==MULTITERMINAL ){
653 for(j=0; j<s2->nsubsym; j++){
654 progress += SetAdd(s1->firstset,s2->subsym[j]->index);
655 }
656 break;
drh75897232000-05-29 14:26:00 +0000657 }else if( s1==s2 ){
drhaa9f1122007-08-23 02:50:56 +0000658 if( s1->lambda==LEMON_FALSE ) break;
drh75897232000-05-29 14:26:00 +0000659 }else{
660 progress += SetUnion(s1->firstset,s2->firstset);
drhaa9f1122007-08-23 02:50:56 +0000661 if( s2->lambda==LEMON_FALSE ) break;
drh75897232000-05-29 14:26:00 +0000662 }
663 }
664 }
665 }while( progress );
666 return;
667}
668
669/* Compute all LR(0) states for the grammar. Links
670** are added to between some states so that the LR(1) follow sets
671** can be computed later.
672*/
673PRIVATE struct state *getstate(/* struct lemon * */); /* forward reference */
674void FindStates(lemp)
675struct lemon *lemp;
676{
677 struct symbol *sp;
678 struct rule *rp;
679
680 Configlist_init();
681
682 /* Find the start symbol */
683 if( lemp->start ){
684 sp = Symbol_find(lemp->start);
685 if( sp==0 ){
686 ErrorMsg(lemp->filename,0,
687"The specified start symbol \"%s\" is not \
688in a nonterminal of the grammar. \"%s\" will be used as the start \
689symbol instead.",lemp->start,lemp->rule->lhs->name);
690 lemp->errorcnt++;
691 sp = lemp->rule->lhs;
692 }
693 }else{
694 sp = lemp->rule->lhs;
695 }
696
697 /* Make sure the start symbol doesn't occur on the right-hand side of
698 ** any rule. Report an error if it does. (YACC would generate a new
699 ** start symbol in this case.) */
700 for(rp=lemp->rule; rp; rp=rp->next){
701 int i;
702 for(i=0; i<rp->nrhs; i++){
drhfd405312005-11-06 04:06:59 +0000703 if( rp->rhs[i]==sp ){ /* FIX ME: Deal with multiterminals */
drh75897232000-05-29 14:26:00 +0000704 ErrorMsg(lemp->filename,0,
705"The start symbol \"%s\" occurs on the \
706right-hand side of a rule. This will result in a parser which \
707does not work properly.",sp->name);
708 lemp->errorcnt++;
709 }
710 }
711 }
712
713 /* The basis configuration set for the first state
714 ** is all rules which have the start symbol as their
715 ** left-hand side */
716 for(rp=sp->rule; rp; rp=rp->nextlhs){
717 struct config *newcfp;
drhb4960992007-10-05 16:16:36 +0000718 rp->lhsStart = 1;
drh75897232000-05-29 14:26:00 +0000719 newcfp = Configlist_addbasis(rp,0);
720 SetAdd(newcfp->fws,0);
721 }
722
723 /* Compute the first state. All other states will be
724 ** computed automatically during the computation of the first one.
725 ** The returned pointer to the first state is not used. */
726 (void)getstate(lemp);
727 return;
728}
729
730/* Return a pointer to a state which is described by the configuration
731** list which has been built from calls to Configlist_add.
732*/
733PRIVATE void buildshifts(/* struct lemon *, struct state * */); /* Forwd ref */
734PRIVATE struct state *getstate(lemp)
735struct lemon *lemp;
736{
737 struct config *cfp, *bp;
738 struct state *stp;
739
740 /* Extract the sorted basis of the new state. The basis was constructed
741 ** by prior calls to "Configlist_addbasis()". */
742 Configlist_sortbasis();
743 bp = Configlist_basis();
744
745 /* Get a state with the same basis */
746 stp = State_find(bp);
747 if( stp ){
748 /* A state with the same basis already exists! Copy all the follow-set
749 ** propagation links from the state under construction into the
750 ** preexisting state, then return a pointer to the preexisting state */
751 struct config *x, *y;
752 for(x=bp, y=stp->bp; x && y; x=x->bp, y=y->bp){
753 Plink_copy(&y->bplp,x->bplp);
754 Plink_delete(x->fplp);
755 x->fplp = x->bplp = 0;
756 }
757 cfp = Configlist_return();
758 Configlist_eat(cfp);
759 }else{
760 /* This really is a new state. Construct all the details */
761 Configlist_closure(lemp); /* Compute the configuration closure */
762 Configlist_sort(); /* Sort the configuration closure */
763 cfp = Configlist_return(); /* Get a pointer to the config list */
764 stp = State_new(); /* A new state structure */
765 MemoryCheck(stp);
766 stp->bp = bp; /* Remember the configuration basis */
767 stp->cfp = cfp; /* Remember the configuration closure */
drhada354d2005-11-05 15:03:59 +0000768 stp->statenum = lemp->nstate++; /* Every state gets a sequence number */
drh75897232000-05-29 14:26:00 +0000769 stp->ap = 0; /* No actions, yet. */
770 State_insert(stp,stp->bp); /* Add to the state table */
771 buildshifts(lemp,stp); /* Recursively compute successor states */
772 }
773 return stp;
774}
775
drhfd405312005-11-06 04:06:59 +0000776/*
777** Return true if two symbols are the same.
778*/
779int same_symbol(a,b)
780struct symbol *a;
781struct symbol *b;
782{
783 int i;
784 if( a==b ) return 1;
785 if( a->type!=MULTITERMINAL ) return 0;
786 if( b->type!=MULTITERMINAL ) return 0;
787 if( a->nsubsym!=b->nsubsym ) return 0;
788 for(i=0; i<a->nsubsym; i++){
789 if( a->subsym[i]!=b->subsym[i] ) return 0;
790 }
791 return 1;
792}
793
drh75897232000-05-29 14:26:00 +0000794/* Construct all successor states to the given state. A "successor"
795** state is any state which can be reached by a shift action.
796*/
797PRIVATE void buildshifts(lemp,stp)
798struct lemon *lemp;
799struct state *stp; /* The state from which successors are computed */
800{
801 struct config *cfp; /* For looping thru the config closure of "stp" */
802 struct config *bcfp; /* For the inner loop on config closure of "stp" */
803 struct config *new; /* */
804 struct symbol *sp; /* Symbol following the dot in configuration "cfp" */
805 struct symbol *bsp; /* Symbol following the dot in configuration "bcfp" */
806 struct state *newstp; /* A pointer to a successor state */
807
808 /* Each configuration becomes complete after it contibutes to a successor
809 ** state. Initially, all configurations are incomplete */
810 for(cfp=stp->cfp; cfp; cfp=cfp->next) cfp->status = INCOMPLETE;
811
812 /* Loop through all configurations of the state "stp" */
813 for(cfp=stp->cfp; cfp; cfp=cfp->next){
814 if( cfp->status==COMPLETE ) continue; /* Already used by inner loop */
815 if( cfp->dot>=cfp->rp->nrhs ) continue; /* Can't shift this config */
816 Configlist_reset(); /* Reset the new config set */
817 sp = cfp->rp->rhs[cfp->dot]; /* Symbol after the dot */
818
819 /* For every configuration in the state "stp" which has the symbol "sp"
820 ** following its dot, add the same configuration to the basis set under
821 ** construction but with the dot shifted one symbol to the right. */
822 for(bcfp=cfp; bcfp; bcfp=bcfp->next){
823 if( bcfp->status==COMPLETE ) continue; /* Already used */
824 if( bcfp->dot>=bcfp->rp->nrhs ) continue; /* Can't shift this one */
825 bsp = bcfp->rp->rhs[bcfp->dot]; /* Get symbol after dot */
drhfd405312005-11-06 04:06:59 +0000826 if( !same_symbol(bsp,sp) ) continue; /* Must be same as for "cfp" */
drh75897232000-05-29 14:26:00 +0000827 bcfp->status = COMPLETE; /* Mark this config as used */
828 new = Configlist_addbasis(bcfp->rp,bcfp->dot+1);
829 Plink_add(&new->bplp,bcfp);
830 }
831
832 /* Get a pointer to the state described by the basis configuration set
833 ** constructed in the preceding loop */
834 newstp = getstate(lemp);
835
836 /* The state "newstp" is reached from the state "stp" by a shift action
837 ** on the symbol "sp" */
drhfd405312005-11-06 04:06:59 +0000838 if( sp->type==MULTITERMINAL ){
839 int i;
840 for(i=0; i<sp->nsubsym; i++){
841 Action_add(&stp->ap,SHIFT,sp->subsym[i],(char*)newstp);
842 }
843 }else{
844 Action_add(&stp->ap,SHIFT,sp,(char *)newstp);
845 }
drh75897232000-05-29 14:26:00 +0000846 }
847}
848
849/*
850** Construct the propagation links
851*/
852void FindLinks(lemp)
853struct lemon *lemp;
854{
855 int i;
856 struct config *cfp, *other;
857 struct state *stp;
858 struct plink *plp;
859
860 /* Housekeeping detail:
861 ** Add to every propagate link a pointer back to the state to
862 ** which the link is attached. */
863 for(i=0; i<lemp->nstate; i++){
864 stp = lemp->sorted[i];
865 for(cfp=stp->cfp; cfp; cfp=cfp->next){
866 cfp->stp = stp;
867 }
868 }
869
870 /* Convert all backlinks into forward links. Only the forward
871 ** links are used in the follow-set computation. */
872 for(i=0; i<lemp->nstate; i++){
873 stp = lemp->sorted[i];
874 for(cfp=stp->cfp; cfp; cfp=cfp->next){
875 for(plp=cfp->bplp; plp; plp=plp->next){
876 other = plp->cfp;
877 Plink_add(&other->fplp,cfp);
878 }
879 }
880 }
881}
882
883/* Compute all followsets.
884**
885** A followset is the set of all symbols which can come immediately
886** after a configuration.
887*/
888void FindFollowSets(lemp)
889struct lemon *lemp;
890{
891 int i;
892 struct config *cfp;
893 struct plink *plp;
894 int progress;
895 int change;
896
897 for(i=0; i<lemp->nstate; i++){
898 for(cfp=lemp->sorted[i]->cfp; cfp; cfp=cfp->next){
899 cfp->status = INCOMPLETE;
900 }
901 }
902
903 do{
904 progress = 0;
905 for(i=0; i<lemp->nstate; i++){
906 for(cfp=lemp->sorted[i]->cfp; cfp; cfp=cfp->next){
907 if( cfp->status==COMPLETE ) continue;
908 for(plp=cfp->fplp; plp; plp=plp->next){
909 change = SetUnion(plp->cfp->fws,cfp->fws);
910 if( change ){
911 plp->cfp->status = INCOMPLETE;
912 progress = 1;
913 }
914 }
915 cfp->status = COMPLETE;
916 }
917 }
918 }while( progress );
919}
920
921static int resolve_conflict();
922
923/* Compute the reduce actions, and resolve conflicts.
924*/
925void FindActions(lemp)
926struct lemon *lemp;
927{
928 int i,j;
929 struct config *cfp;
930 struct state *stp;
931 struct symbol *sp;
932 struct rule *rp;
933
934 /* Add all of the reduce actions
935 ** A reduce action is added for each element of the followset of
936 ** a configuration which has its dot at the extreme right.
937 */
938 for(i=0; i<lemp->nstate; i++){ /* Loop over all states */
939 stp = lemp->sorted[i];
940 for(cfp=stp->cfp; cfp; cfp=cfp->next){ /* Loop over all configurations */
941 if( cfp->rp->nrhs==cfp->dot ){ /* Is dot at extreme right? */
942 for(j=0; j<lemp->nterminal; j++){
943 if( SetFind(cfp->fws,j) ){
944 /* Add a reduce action to the state "stp" which will reduce by the
945 ** rule "cfp->rp" if the lookahead symbol is "lemp->symbols[j]" */
drh218dc692004-05-31 23:13:45 +0000946 Action_add(&stp->ap,REDUCE,lemp->symbols[j],(char *)cfp->rp);
drh75897232000-05-29 14:26:00 +0000947 }
948 }
949 }
950 }
951 }
952
953 /* Add the accepting token */
954 if( lemp->start ){
955 sp = Symbol_find(lemp->start);
956 if( sp==0 ) sp = lemp->rule->lhs;
957 }else{
958 sp = lemp->rule->lhs;
959 }
960 /* Add to the first state (which is always the starting state of the
961 ** finite state machine) an action to ACCEPT if the lookahead is the
962 ** start nonterminal. */
963 Action_add(&lemp->sorted[0]->ap,ACCEPT,sp,0);
964
965 /* Resolve conflicts */
966 for(i=0; i<lemp->nstate; i++){
967 struct action *ap, *nap;
968 struct state *stp;
969 stp = lemp->sorted[i];
drhe9278182007-07-18 18:16:29 +0000970 /* assert( stp->ap ); */
drh75897232000-05-29 14:26:00 +0000971 stp->ap = Action_sort(stp->ap);
drhb59499c2002-02-23 18:45:13 +0000972 for(ap=stp->ap; ap && ap->next; ap=ap->next){
drh75897232000-05-29 14:26:00 +0000973 for(nap=ap->next; nap && nap->sp==ap->sp; nap=nap->next){
974 /* The two actions "ap" and "nap" have the same lookahead.
975 ** Figure out which one should be used */
976 lemp->nconflict += resolve_conflict(ap,nap,lemp->errsym);
977 }
978 }
979 }
980
981 /* Report an error for each rule that can never be reduced. */
drhaa9f1122007-08-23 02:50:56 +0000982 for(rp=lemp->rule; rp; rp=rp->next) rp->canReduce = LEMON_FALSE;
drh75897232000-05-29 14:26:00 +0000983 for(i=0; i<lemp->nstate; i++){
984 struct action *ap;
985 for(ap=lemp->sorted[i]->ap; ap; ap=ap->next){
drhaa9f1122007-08-23 02:50:56 +0000986 if( ap->type==REDUCE ) ap->x.rp->canReduce = LEMON_TRUE;
drh75897232000-05-29 14:26:00 +0000987 }
988 }
989 for(rp=lemp->rule; rp; rp=rp->next){
990 if( rp->canReduce ) continue;
991 ErrorMsg(lemp->filename,rp->ruleline,"This rule can not be reduced.\n");
992 lemp->errorcnt++;
993 }
994}
995
996/* Resolve a conflict between the two given actions. If the
drh34ff57b2008-07-14 12:27:51 +0000997** conflict can't be resolved, return non-zero.
drh75897232000-05-29 14:26:00 +0000998**
999** NO LONGER TRUE:
1000** To resolve a conflict, first look to see if either action
1001** is on an error rule. In that case, take the action which
1002** is not associated with the error rule. If neither or both
1003** actions are associated with an error rule, then try to
1004** use precedence to resolve the conflict.
1005**
1006** If either action is a SHIFT, then it must be apx. This
1007** function won't work if apx->type==REDUCE and apy->type==SHIFT.
1008*/
1009static int resolve_conflict(apx,apy,errsym)
1010struct action *apx;
1011struct action *apy;
1012struct symbol *errsym; /* The error symbol (if defined. NULL otherwise) */
1013{
1014 struct symbol *spx, *spy;
1015 int errcnt = 0;
1016 assert( apx->sp==apy->sp ); /* Otherwise there would be no conflict */
drhf0fa1c12006-12-14 01:06:22 +00001017 if( apx->type==SHIFT && apy->type==SHIFT ){
drh9892c5d2007-12-21 00:02:11 +00001018 apy->type = SSCONFLICT;
drhf0fa1c12006-12-14 01:06:22 +00001019 errcnt++;
1020 }
drh75897232000-05-29 14:26:00 +00001021 if( apx->type==SHIFT && apy->type==REDUCE ){
1022 spx = apx->sp;
1023 spy = apy->x.rp->precsym;
1024 if( spy==0 || spx->prec<0 || spy->prec<0 ){
1025 /* Not enough precedence information. */
drh9892c5d2007-12-21 00:02:11 +00001026 apy->type = SRCONFLICT;
drh75897232000-05-29 14:26:00 +00001027 errcnt++;
1028 }else if( spx->prec>spy->prec ){ /* Lower precedence wins */
1029 apy->type = RD_RESOLVED;
1030 }else if( spx->prec<spy->prec ){
1031 apx->type = SH_RESOLVED;
1032 }else if( spx->prec==spy->prec && spx->assoc==RIGHT ){ /* Use operator */
1033 apy->type = RD_RESOLVED; /* associativity */
1034 }else if( spx->prec==spy->prec && spx->assoc==LEFT ){ /* to break tie */
1035 apx->type = SH_RESOLVED;
1036 }else{
1037 assert( spx->prec==spy->prec && spx->assoc==NONE );
drh9892c5d2007-12-21 00:02:11 +00001038 apy->type = SRCONFLICT;
drh75897232000-05-29 14:26:00 +00001039 errcnt++;
1040 }
1041 }else if( apx->type==REDUCE && apy->type==REDUCE ){
1042 spx = apx->x.rp->precsym;
1043 spy = apy->x.rp->precsym;
1044 if( spx==0 || spy==0 || spx->prec<0 ||
1045 spy->prec<0 || spx->prec==spy->prec ){
drh9892c5d2007-12-21 00:02:11 +00001046 apy->type = RRCONFLICT;
drh75897232000-05-29 14:26:00 +00001047 errcnt++;
1048 }else if( spx->prec>spy->prec ){
1049 apy->type = RD_RESOLVED;
1050 }else if( spx->prec<spy->prec ){
1051 apx->type = RD_RESOLVED;
1052 }
1053 }else{
drhb59499c2002-02-23 18:45:13 +00001054 assert(
1055 apx->type==SH_RESOLVED ||
1056 apx->type==RD_RESOLVED ||
drh9892c5d2007-12-21 00:02:11 +00001057 apx->type==SSCONFLICT ||
1058 apx->type==SRCONFLICT ||
1059 apx->type==RRCONFLICT ||
drhb59499c2002-02-23 18:45:13 +00001060 apy->type==SH_RESOLVED ||
1061 apy->type==RD_RESOLVED ||
drh9892c5d2007-12-21 00:02:11 +00001062 apy->type==SSCONFLICT ||
1063 apy->type==SRCONFLICT ||
1064 apy->type==RRCONFLICT
drhb59499c2002-02-23 18:45:13 +00001065 );
1066 /* The REDUCE/SHIFT case cannot happen because SHIFTs come before
1067 ** REDUCEs on the list. If we reach this point it must be because
1068 ** the parser conflict had already been resolved. */
drh75897232000-05-29 14:26:00 +00001069 }
1070 return errcnt;
1071}
1072/********************* From the file "configlist.c" *************************/
1073/*
1074** Routines to processing a configuration list and building a state
1075** in the LEMON parser generator.
1076*/
1077
1078static struct config *freelist = 0; /* List of free configurations */
1079static struct config *current = 0; /* Top of list of configurations */
1080static struct config **currentend = 0; /* Last on list of configs */
1081static struct config *basis = 0; /* Top of list of basis configs */
1082static struct config **basisend = 0; /* End of list of basis configs */
1083
1084/* Return a pointer to a new configuration */
1085PRIVATE struct config *newconfig(){
1086 struct config *new;
1087 if( freelist==0 ){
1088 int i;
1089 int amt = 3;
drh9892c5d2007-12-21 00:02:11 +00001090 freelist = (struct config *)calloc( amt, sizeof(struct config) );
drh75897232000-05-29 14:26:00 +00001091 if( freelist==0 ){
1092 fprintf(stderr,"Unable to allocate memory for a new configuration.");
1093 exit(1);
1094 }
1095 for(i=0; i<amt-1; i++) freelist[i].next = &freelist[i+1];
1096 freelist[amt-1].next = 0;
1097 }
1098 new = freelist;
1099 freelist = freelist->next;
1100 return new;
1101}
1102
1103/* The configuration "old" is no longer used */
1104PRIVATE void deleteconfig(old)
1105struct config *old;
1106{
1107 old->next = freelist;
1108 freelist = old;
1109}
1110
1111/* Initialized the configuration list builder */
1112void Configlist_init(){
1113 current = 0;
1114 currentend = &current;
1115 basis = 0;
1116 basisend = &basis;
1117 Configtable_init();
1118 return;
1119}
1120
1121/* Initialized the configuration list builder */
1122void Configlist_reset(){
1123 current = 0;
1124 currentend = &current;
1125 basis = 0;
1126 basisend = &basis;
1127 Configtable_clear(0);
1128 return;
1129}
1130
1131/* Add another configuration to the configuration list */
1132struct config *Configlist_add(rp,dot)
1133struct rule *rp; /* The rule */
1134int dot; /* Index into the RHS of the rule where the dot goes */
1135{
1136 struct config *cfp, model;
1137
1138 assert( currentend!=0 );
1139 model.rp = rp;
1140 model.dot = dot;
1141 cfp = Configtable_find(&model);
1142 if( cfp==0 ){
1143 cfp = newconfig();
1144 cfp->rp = rp;
1145 cfp->dot = dot;
1146 cfp->fws = SetNew();
1147 cfp->stp = 0;
1148 cfp->fplp = cfp->bplp = 0;
1149 cfp->next = 0;
1150 cfp->bp = 0;
1151 *currentend = cfp;
1152 currentend = &cfp->next;
1153 Configtable_insert(cfp);
1154 }
1155 return cfp;
1156}
1157
1158/* Add a basis configuration to the configuration list */
1159struct config *Configlist_addbasis(rp,dot)
1160struct rule *rp;
1161int dot;
1162{
1163 struct config *cfp, model;
1164
1165 assert( basisend!=0 );
1166 assert( currentend!=0 );
1167 model.rp = rp;
1168 model.dot = dot;
1169 cfp = Configtable_find(&model);
1170 if( cfp==0 ){
1171 cfp = newconfig();
1172 cfp->rp = rp;
1173 cfp->dot = dot;
1174 cfp->fws = SetNew();
1175 cfp->stp = 0;
1176 cfp->fplp = cfp->bplp = 0;
1177 cfp->next = 0;
1178 cfp->bp = 0;
1179 *currentend = cfp;
1180 currentend = &cfp->next;
1181 *basisend = cfp;
1182 basisend = &cfp->bp;
1183 Configtable_insert(cfp);
1184 }
1185 return cfp;
1186}
1187
1188/* Compute the closure of the configuration list */
1189void Configlist_closure(lemp)
1190struct lemon *lemp;
1191{
1192 struct config *cfp, *newcfp;
1193 struct rule *rp, *newrp;
1194 struct symbol *sp, *xsp;
1195 int i, dot;
1196
1197 assert( currentend!=0 );
1198 for(cfp=current; cfp; cfp=cfp->next){
1199 rp = cfp->rp;
1200 dot = cfp->dot;
1201 if( dot>=rp->nrhs ) continue;
1202 sp = rp->rhs[dot];
1203 if( sp->type==NONTERMINAL ){
1204 if( sp->rule==0 && sp!=lemp->errsym ){
1205 ErrorMsg(lemp->filename,rp->line,"Nonterminal \"%s\" has no rules.",
1206 sp->name);
1207 lemp->errorcnt++;
1208 }
1209 for(newrp=sp->rule; newrp; newrp=newrp->nextlhs){
1210 newcfp = Configlist_add(newrp,0);
1211 for(i=dot+1; i<rp->nrhs; i++){
1212 xsp = rp->rhs[i];
1213 if( xsp->type==TERMINAL ){
1214 SetAdd(newcfp->fws,xsp->index);
1215 break;
drhfd405312005-11-06 04:06:59 +00001216 }else if( xsp->type==MULTITERMINAL ){
1217 int k;
1218 for(k=0; k<xsp->nsubsym; k++){
1219 SetAdd(newcfp->fws, xsp->subsym[k]->index);
1220 }
1221 break;
drh75897232000-05-29 14:26:00 +00001222 }else{
1223 SetUnion(newcfp->fws,xsp->firstset);
drhaa9f1122007-08-23 02:50:56 +00001224 if( xsp->lambda==LEMON_FALSE ) break;
drh75897232000-05-29 14:26:00 +00001225 }
1226 }
1227 if( i==rp->nrhs ) Plink_add(&cfp->fplp,newcfp);
1228 }
1229 }
1230 }
1231 return;
1232}
1233
1234/* Sort the configuration list */
1235void Configlist_sort(){
drh218dc692004-05-31 23:13:45 +00001236 current = (struct config *)msort((char *)current,(char **)&(current->next),Configcmp);
drh75897232000-05-29 14:26:00 +00001237 currentend = 0;
1238 return;
1239}
1240
1241/* Sort the basis configuration list */
1242void Configlist_sortbasis(){
drh218dc692004-05-31 23:13:45 +00001243 basis = (struct config *)msort((char *)current,(char **)&(current->bp),Configcmp);
drh75897232000-05-29 14:26:00 +00001244 basisend = 0;
1245 return;
1246}
1247
1248/* Return a pointer to the head of the configuration list and
1249** reset the list */
1250struct config *Configlist_return(){
1251 struct config *old;
1252 old = current;
1253 current = 0;
1254 currentend = 0;
1255 return old;
1256}
1257
1258/* Return a pointer to the head of the configuration list and
1259** reset the list */
1260struct config *Configlist_basis(){
1261 struct config *old;
1262 old = basis;
1263 basis = 0;
1264 basisend = 0;
1265 return old;
1266}
1267
1268/* Free all elements of the given configuration list */
1269void Configlist_eat(cfp)
1270struct config *cfp;
1271{
1272 struct config *nextcfp;
1273 for(; cfp; cfp=nextcfp){
1274 nextcfp = cfp->next;
1275 assert( cfp->fplp==0 );
1276 assert( cfp->bplp==0 );
1277 if( cfp->fws ) SetFree(cfp->fws);
1278 deleteconfig(cfp);
1279 }
1280 return;
1281}
1282/***************** From the file "error.c" *********************************/
1283/*
1284** Code for printing error message.
1285*/
1286
1287/* Find a good place to break "msg" so that its length is at least "min"
1288** but no more than "max". Make the point as close to max as possible.
1289*/
1290static int findbreak(msg,min,max)
1291char *msg;
1292int min;
1293int max;
1294{
1295 int i,spot;
1296 char c;
1297 for(i=spot=min; i<=max; i++){
1298 c = msg[i];
1299 if( c=='\t' ) msg[i] = ' ';
1300 if( c=='\n' ){ msg[i] = ' '; spot = i; break; }
1301 if( c==0 ){ spot = i; break; }
1302 if( c=='-' && i<max-1 ) spot = i+1;
1303 if( c==' ' ) spot = i;
1304 }
1305 return spot;
1306}
1307
1308/*
1309** The error message is split across multiple lines if necessary. The
1310** splits occur at a space, if there is a space available near the end
1311** of the line.
1312*/
1313#define ERRMSGSIZE 10000 /* Hope this is big enough. No way to error check */
1314#define LINEWIDTH 79 /* Max width of any output line */
1315#define PREFIXLIMIT 30 /* Max width of the prefix on each line */
drhf9a2e7b2003-04-15 01:49:48 +00001316void ErrorMsg(const char *filename, int lineno, const char *format, ...){
drh75897232000-05-29 14:26:00 +00001317 char errmsg[ERRMSGSIZE];
1318 char prefix[PREFIXLIMIT+10];
1319 int errmsgsize;
1320 int prefixsize;
1321 int availablewidth;
1322 va_list ap;
1323 int end, restart, base;
1324
drhf9a2e7b2003-04-15 01:49:48 +00001325 va_start(ap, format);
drh75897232000-05-29 14:26:00 +00001326 /* Prepare a prefix to be prepended to every output line */
1327 if( lineno>0 ){
1328 sprintf(prefix,"%.*s:%d: ",PREFIXLIMIT-10,filename,lineno);
1329 }else{
1330 sprintf(prefix,"%.*s: ",PREFIXLIMIT-10,filename);
1331 }
drh87cf1372008-08-13 20:09:06 +00001332 prefixsize = lemonStrlen(prefix);
drh75897232000-05-29 14:26:00 +00001333 availablewidth = LINEWIDTH - prefixsize;
1334
1335 /* Generate the error message */
1336 vsprintf(errmsg,format,ap);
1337 va_end(ap);
drh87cf1372008-08-13 20:09:06 +00001338 errmsgsize = lemonStrlen(errmsg);
drh75897232000-05-29 14:26:00 +00001339 /* Remove trailing '\n's from the error message. */
1340 while( errmsgsize>0 && errmsg[errmsgsize-1]=='\n' ){
1341 errmsg[--errmsgsize] = 0;
1342 }
1343
1344 /* Print the error message */
1345 base = 0;
1346 while( errmsg[base]!=0 ){
1347 end = restart = findbreak(&errmsg[base],0,availablewidth);
1348 restart += base;
1349 while( errmsg[restart]==' ' ) restart++;
1350 fprintf(stdout,"%s%.*s\n",prefix,end,&errmsg[base]);
1351 base = restart;
1352 }
1353}
1354/**************** From the file "main.c" ************************************/
1355/*
1356** Main program file for the LEMON parser generator.
1357*/
1358
1359/* Report an out-of-memory condition and abort. This function
1360** is used mostly by the "MemoryCheck" macro in struct.h
1361*/
1362void memory_error(){
1363 fprintf(stderr,"Out of memory. Aborting...\n");
1364 exit(1);
1365}
1366
drh6d08b4d2004-07-20 12:45:22 +00001367static int nDefine = 0; /* Number of -D options on the command line */
1368static char **azDefine = 0; /* Name of the -D macros */
1369
1370/* This routine is called with the argument to each -D command-line option.
1371** Add the macro defined to the azDefine array.
1372*/
1373static void handle_D_option(char *z){
1374 char **paz;
1375 nDefine++;
1376 azDefine = realloc(azDefine, sizeof(azDefine[0])*nDefine);
1377 if( azDefine==0 ){
1378 fprintf(stderr,"out of memory\n");
1379 exit(1);
1380 }
1381 paz = &azDefine[nDefine-1];
drh87cf1372008-08-13 20:09:06 +00001382 *paz = malloc( lemonStrlen(z)+1 );
drh6d08b4d2004-07-20 12:45:22 +00001383 if( *paz==0 ){
1384 fprintf(stderr,"out of memory\n");
1385 exit(1);
1386 }
1387 strcpy(*paz, z);
1388 for(z=*paz; *z && *z!='='; z++){}
1389 *z = 0;
1390}
1391
drh75897232000-05-29 14:26:00 +00001392
1393/* The main program. Parse the command line and do it... */
1394int main(argc,argv)
1395int argc;
1396char **argv;
1397{
1398 static int version = 0;
1399 static int rpflag = 0;
1400 static int basisflag = 0;
1401 static int compress = 0;
1402 static int quiet = 0;
1403 static int statistics = 0;
1404 static int mhflag = 0;
shane58543932008-12-10 20:10:04 +00001405 static int nolinenosflag = 0;
drh75897232000-05-29 14:26:00 +00001406 static struct s_options options[] = {
1407 {OPT_FLAG, "b", (char*)&basisflag, "Print only the basis in report."},
1408 {OPT_FLAG, "c", (char*)&compress, "Don't compress the action table."},
drh6d08b4d2004-07-20 12:45:22 +00001409 {OPT_FSTR, "D", (char*)handle_D_option, "Define an %ifdef macro."},
drh75897232000-05-29 14:26:00 +00001410 {OPT_FLAG, "g", (char*)&rpflag, "Print grammar without actions."},
shane58543932008-12-10 20:10:04 +00001411 {OPT_FLAG, "m", (char*)&mhflag, "Output a makeheaders compatible file."},
1412 {OPT_FLAG, "l", (char*)&nolinenosflag, "Do not print #line statements."},
drh75897232000-05-29 14:26:00 +00001413 {OPT_FLAG, "q", (char*)&quiet, "(Quiet) Don't print the report file."},
drh6d08b4d2004-07-20 12:45:22 +00001414 {OPT_FLAG, "s", (char*)&statistics,
1415 "Print parser stats to standard output."},
drh75897232000-05-29 14:26:00 +00001416 {OPT_FLAG, "x", (char*)&version, "Print the version number."},
1417 {OPT_FLAG,0,0,0}
1418 };
1419 int i;
1420 struct lemon lem;
1421
drhb0c86772000-06-02 23:21:26 +00001422 OptInit(argv,options,stderr);
drh75897232000-05-29 14:26:00 +00001423 if( version ){
drhb19a2bc2001-09-16 00:13:26 +00001424 printf("Lemon version 1.0\n");
drh75897232000-05-29 14:26:00 +00001425 exit(0);
1426 }
drhb0c86772000-06-02 23:21:26 +00001427 if( OptNArgs()!=1 ){
drh75897232000-05-29 14:26:00 +00001428 fprintf(stderr,"Exactly one filename argument is required.\n");
1429 exit(1);
1430 }
drh954f6b42006-06-13 13:27:46 +00001431 memset(&lem, 0, sizeof(lem));
drh75897232000-05-29 14:26:00 +00001432 lem.errorcnt = 0;
1433
1434 /* Initialize the machine */
1435 Strsafe_init();
1436 Symbol_init();
1437 State_init();
1438 lem.argv0 = argv[0];
drhb0c86772000-06-02 23:21:26 +00001439 lem.filename = OptArg(0);
drh75897232000-05-29 14:26:00 +00001440 lem.basisflag = basisflag;
shane58543932008-12-10 20:10:04 +00001441 lem.nolinenosflag = nolinenosflag;
drh75897232000-05-29 14:26:00 +00001442 Symbol_new("$");
1443 lem.errsym = Symbol_new("error");
drhc4dd3fd2008-01-22 01:48:05 +00001444 lem.errsym->useCnt = 0;
drh75897232000-05-29 14:26:00 +00001445
1446 /* Parse the input file */
1447 Parse(&lem);
1448 if( lem.errorcnt ) exit(lem.errorcnt);
drh954f6b42006-06-13 13:27:46 +00001449 if( lem.nrule==0 ){
drh75897232000-05-29 14:26:00 +00001450 fprintf(stderr,"Empty grammar.\n");
1451 exit(1);
1452 }
1453
1454 /* Count and index the symbols of the grammar */
1455 lem.nsymbol = Symbol_count();
1456 Symbol_new("{default}");
1457 lem.symbols = Symbol_arrayof();
drh60d31652004-02-22 00:08:04 +00001458 for(i=0; i<=lem.nsymbol; i++) lem.symbols[i]->index = i;
drh75897232000-05-29 14:26:00 +00001459 qsort(lem.symbols,lem.nsymbol+1,sizeof(struct symbol*),
1460 (int(*)())Symbolcmpp);
1461 for(i=0; i<=lem.nsymbol; i++) lem.symbols[i]->index = i;
1462 for(i=1; isupper(lem.symbols[i]->name[0]); i++);
1463 lem.nterminal = i;
1464
1465 /* Generate a reprint of the grammar, if requested on the command line */
1466 if( rpflag ){
1467 Reprint(&lem);
1468 }else{
1469 /* Initialize the size for all follow and first sets */
drh9892c5d2007-12-21 00:02:11 +00001470 SetSize(lem.nterminal+1);
drh75897232000-05-29 14:26:00 +00001471
1472 /* Find the precedence for every production rule (that has one) */
1473 FindRulePrecedences(&lem);
1474
1475 /* Compute the lambda-nonterminals and the first-sets for every
1476 ** nonterminal */
1477 FindFirstSets(&lem);
1478
1479 /* Compute all LR(0) states. Also record follow-set propagation
1480 ** links so that the follow-set can be computed later */
1481 lem.nstate = 0;
1482 FindStates(&lem);
1483 lem.sorted = State_arrayof();
1484
1485 /* Tie up loose ends on the propagation links */
1486 FindLinks(&lem);
1487
1488 /* Compute the follow set of every reducible configuration */
1489 FindFollowSets(&lem);
1490
1491 /* Compute the action tables */
1492 FindActions(&lem);
1493
1494 /* Compress the action tables */
1495 if( compress==0 ) CompressTables(&lem);
1496
drhada354d2005-11-05 15:03:59 +00001497 /* Reorder and renumber the states so that states with fewer choices
1498 ** occur at the end. */
1499 ResortStates(&lem);
1500
drh75897232000-05-29 14:26:00 +00001501 /* Generate a report of the parser generated. (the "y.output" file) */
1502 if( !quiet ) ReportOutput(&lem);
1503
1504 /* Generate the source code for the parser */
1505 ReportTable(&lem, mhflag);
1506
1507 /* Produce a header file for use by the scanner. (This step is
1508 ** omitted if the "-m" option is used because makeheaders will
1509 ** generate the file for us.) */
1510 if( !mhflag ) ReportHeader(&lem);
1511 }
1512 if( statistics ){
1513 printf("Parser statistics: %d terminals, %d nonterminals, %d rules\n",
1514 lem.nterminal, lem.nsymbol - lem.nterminal, lem.nrule);
1515 printf(" %d states, %d parser table entries, %d conflicts\n",
1516 lem.nstate, lem.tablesize, lem.nconflict);
1517 }
1518 if( lem.nconflict ){
1519 fprintf(stderr,"%d parsing conflicts.\n",lem.nconflict);
1520 }
1521 exit(lem.errorcnt + lem.nconflict);
drh218dc692004-05-31 23:13:45 +00001522 return (lem.errorcnt + lem.nconflict);
drh75897232000-05-29 14:26:00 +00001523}
1524/******************** From the file "msort.c" *******************************/
1525/*
1526** A generic merge-sort program.
1527**
1528** USAGE:
1529** Let "ptr" be a pointer to some structure which is at the head of
1530** a null-terminated list. Then to sort the list call:
1531**
1532** ptr = msort(ptr,&(ptr->next),cmpfnc);
1533**
1534** In the above, "cmpfnc" is a pointer to a function which compares
1535** two instances of the structure and returns an integer, as in
1536** strcmp. The second argument is a pointer to the pointer to the
1537** second element of the linked list. This address is used to compute
1538** the offset to the "next" field within the structure. The offset to
1539** the "next" field must be constant for all structures in the list.
1540**
1541** The function returns a new pointer which is the head of the list
1542** after sorting.
1543**
1544** ALGORITHM:
1545** Merge-sort.
1546*/
1547
1548/*
1549** Return a pointer to the next structure in the linked list.
1550*/
drhba99af52001-10-25 20:37:16 +00001551#define NEXT(A) (*(char**)(((unsigned long)A)+offset))
drh75897232000-05-29 14:26:00 +00001552
1553/*
1554** Inputs:
1555** a: A sorted, null-terminated linked list. (May be null).
1556** b: A sorted, null-terminated linked list. (May be null).
1557** cmp: A pointer to the comparison function.
1558** offset: Offset in the structure to the "next" field.
1559**
1560** Return Value:
1561** A pointer to the head of a sorted list containing the elements
1562** of both a and b.
1563**
1564** Side effects:
1565** The "next" pointers for elements in the lists a and b are
1566** changed.
1567*/
drhe9278182007-07-18 18:16:29 +00001568static char *merge(
1569 char *a,
1570 char *b,
1571 int (*cmp)(const char*,const char*),
1572 int offset
1573){
drh75897232000-05-29 14:26:00 +00001574 char *ptr, *head;
1575
1576 if( a==0 ){
1577 head = b;
1578 }else if( b==0 ){
1579 head = a;
1580 }else{
1581 if( (*cmp)(a,b)<0 ){
1582 ptr = a;
1583 a = NEXT(a);
1584 }else{
1585 ptr = b;
1586 b = NEXT(b);
1587 }
1588 head = ptr;
1589 while( a && b ){
1590 if( (*cmp)(a,b)<0 ){
1591 NEXT(ptr) = a;
1592 ptr = a;
1593 a = NEXT(a);
1594 }else{
1595 NEXT(ptr) = b;
1596 ptr = b;
1597 b = NEXT(b);
1598 }
1599 }
1600 if( a ) NEXT(ptr) = a;
1601 else NEXT(ptr) = b;
1602 }
1603 return head;
1604}
1605
1606/*
1607** Inputs:
1608** list: Pointer to a singly-linked list of structures.
1609** next: Pointer to pointer to the second element of the list.
1610** cmp: A comparison function.
1611**
1612** Return Value:
1613** A pointer to the head of a sorted list containing the elements
1614** orginally in list.
1615**
1616** Side effects:
1617** The "next" pointers for elements in list are changed.
1618*/
1619#define LISTSIZE 30
drhe9278182007-07-18 18:16:29 +00001620static char *msort(
1621 char *list,
1622 char **next,
1623 int (*cmp)(const char*,const char*)
1624){
drhba99af52001-10-25 20:37:16 +00001625 unsigned long offset;
drh75897232000-05-29 14:26:00 +00001626 char *ep;
1627 char *set[LISTSIZE];
1628 int i;
drhba99af52001-10-25 20:37:16 +00001629 offset = (unsigned long)next - (unsigned long)list;
drh75897232000-05-29 14:26:00 +00001630 for(i=0; i<LISTSIZE; i++) set[i] = 0;
1631 while( list ){
1632 ep = list;
1633 list = NEXT(list);
1634 NEXT(ep) = 0;
1635 for(i=0; i<LISTSIZE-1 && set[i]!=0; i++){
1636 ep = merge(ep,set[i],cmp,offset);
1637 set[i] = 0;
1638 }
1639 set[i] = ep;
1640 }
1641 ep = 0;
1642 for(i=0; i<LISTSIZE; i++) if( set[i] ) ep = merge(ep,set[i],cmp,offset);
1643 return ep;
1644}
1645/************************ From the file "option.c" **************************/
1646static char **argv;
1647static struct s_options *op;
1648static FILE *errstream;
1649
1650#define ISOPT(X) ((X)[0]=='-'||(X)[0]=='+'||strchr((X),'=')!=0)
1651
1652/*
1653** Print the command line with a carrot pointing to the k-th character
1654** of the n-th field.
1655*/
1656static void errline(n,k,err)
1657int n;
1658int k;
1659FILE *err;
1660{
1661 int spcnt, i;
drh75897232000-05-29 14:26:00 +00001662 if( argv[0] ) fprintf(err,"%s",argv[0]);
drh87cf1372008-08-13 20:09:06 +00001663 spcnt = lemonStrlen(argv[0]) + 1;
drh75897232000-05-29 14:26:00 +00001664 for(i=1; i<n && argv[i]; i++){
1665 fprintf(err," %s",argv[i]);
drh87cf1372008-08-13 20:09:06 +00001666 spcnt += lemonStrlen(argv[i])+1;
drh75897232000-05-29 14:26:00 +00001667 }
1668 spcnt += k;
1669 for(; argv[i]; i++) fprintf(err," %s",argv[i]);
1670 if( spcnt<20 ){
1671 fprintf(err,"\n%*s^-- here\n",spcnt,"");
1672 }else{
1673 fprintf(err,"\n%*shere --^\n",spcnt-7,"");
1674 }
1675}
1676
1677/*
1678** Return the index of the N-th non-switch argument. Return -1
1679** if N is out of range.
1680*/
1681static int argindex(n)
1682int n;
1683{
1684 int i;
1685 int dashdash = 0;
1686 if( argv!=0 && *argv!=0 ){
1687 for(i=1; argv[i]; i++){
1688 if( dashdash || !ISOPT(argv[i]) ){
1689 if( n==0 ) return i;
1690 n--;
1691 }
1692 if( strcmp(argv[i],"--")==0 ) dashdash = 1;
1693 }
1694 }
1695 return -1;
1696}
1697
1698static char emsg[] = "Command line syntax error: ";
1699
1700/*
1701** Process a flag command line argument.
1702*/
1703static int handleflags(i,err)
1704int i;
1705FILE *err;
1706{
1707 int v;
1708 int errcnt = 0;
1709 int j;
1710 for(j=0; op[j].label; j++){
drh87cf1372008-08-13 20:09:06 +00001711 if( strncmp(&argv[i][1],op[j].label,lemonStrlen(op[j].label))==0 ) break;
drh75897232000-05-29 14:26:00 +00001712 }
1713 v = argv[i][0]=='-' ? 1 : 0;
1714 if( op[j].label==0 ){
1715 if( err ){
1716 fprintf(err,"%sundefined option.\n",emsg);
1717 errline(i,1,err);
1718 }
1719 errcnt++;
1720 }else if( op[j].type==OPT_FLAG ){
1721 *((int*)op[j].arg) = v;
1722 }else if( op[j].type==OPT_FFLAG ){
1723 (*(void(*)())(op[j].arg))(v);
drh6d08b4d2004-07-20 12:45:22 +00001724 }else if( op[j].type==OPT_FSTR ){
1725 (*(void(*)())(op[j].arg))(&argv[i][2]);
drh75897232000-05-29 14:26:00 +00001726 }else{
1727 if( err ){
1728 fprintf(err,"%smissing argument on switch.\n",emsg);
1729 errline(i,1,err);
1730 }
1731 errcnt++;
1732 }
1733 return errcnt;
1734}
1735
1736/*
1737** Process a command line switch which has an argument.
1738*/
1739static int handleswitch(i,err)
1740int i;
1741FILE *err;
1742{
1743 int lv = 0;
1744 double dv = 0.0;
1745 char *sv = 0, *end;
1746 char *cp;
1747 int j;
1748 int errcnt = 0;
1749 cp = strchr(argv[i],'=');
drh43617e92006-03-06 20:55:46 +00001750 assert( cp!=0 );
drh75897232000-05-29 14:26:00 +00001751 *cp = 0;
1752 for(j=0; op[j].label; j++){
1753 if( strcmp(argv[i],op[j].label)==0 ) break;
1754 }
1755 *cp = '=';
1756 if( op[j].label==0 ){
1757 if( err ){
1758 fprintf(err,"%sundefined option.\n",emsg);
1759 errline(i,0,err);
1760 }
1761 errcnt++;
1762 }else{
1763 cp++;
1764 switch( op[j].type ){
1765 case OPT_FLAG:
1766 case OPT_FFLAG:
1767 if( err ){
1768 fprintf(err,"%soption requires an argument.\n",emsg);
1769 errline(i,0,err);
1770 }
1771 errcnt++;
1772 break;
1773 case OPT_DBL:
1774 case OPT_FDBL:
1775 dv = strtod(cp,&end);
1776 if( *end ){
1777 if( err ){
1778 fprintf(err,"%sillegal character in floating-point argument.\n",emsg);
drhba99af52001-10-25 20:37:16 +00001779 errline(i,((unsigned long)end)-(unsigned long)argv[i],err);
drh75897232000-05-29 14:26:00 +00001780 }
1781 errcnt++;
1782 }
1783 break;
1784 case OPT_INT:
1785 case OPT_FINT:
1786 lv = strtol(cp,&end,0);
1787 if( *end ){
1788 if( err ){
1789 fprintf(err,"%sillegal character in integer argument.\n",emsg);
drhba99af52001-10-25 20:37:16 +00001790 errline(i,((unsigned long)end)-(unsigned long)argv[i],err);
drh75897232000-05-29 14:26:00 +00001791 }
1792 errcnt++;
1793 }
1794 break;
1795 case OPT_STR:
1796 case OPT_FSTR:
1797 sv = cp;
1798 break;
1799 }
1800 switch( op[j].type ){
1801 case OPT_FLAG:
1802 case OPT_FFLAG:
1803 break;
1804 case OPT_DBL:
1805 *(double*)(op[j].arg) = dv;
1806 break;
1807 case OPT_FDBL:
1808 (*(void(*)())(op[j].arg))(dv);
1809 break;
1810 case OPT_INT:
1811 *(int*)(op[j].arg) = lv;
1812 break;
1813 case OPT_FINT:
1814 (*(void(*)())(op[j].arg))((int)lv);
1815 break;
1816 case OPT_STR:
1817 *(char**)(op[j].arg) = sv;
1818 break;
1819 case OPT_FSTR:
1820 (*(void(*)())(op[j].arg))(sv);
1821 break;
1822 }
1823 }
1824 return errcnt;
1825}
1826
drhb0c86772000-06-02 23:21:26 +00001827int OptInit(a,o,err)
drh75897232000-05-29 14:26:00 +00001828char **a;
1829struct s_options *o;
1830FILE *err;
1831{
1832 int errcnt = 0;
1833 argv = a;
1834 op = o;
1835 errstream = err;
1836 if( argv && *argv && op ){
1837 int i;
1838 for(i=1; argv[i]; i++){
1839 if( argv[i][0]=='+' || argv[i][0]=='-' ){
1840 errcnt += handleflags(i,err);
1841 }else if( strchr(argv[i],'=') ){
1842 errcnt += handleswitch(i,err);
1843 }
1844 }
1845 }
1846 if( errcnt>0 ){
1847 fprintf(err,"Valid command line options for \"%s\" are:\n",*a);
drhb0c86772000-06-02 23:21:26 +00001848 OptPrint();
drh75897232000-05-29 14:26:00 +00001849 exit(1);
1850 }
1851 return 0;
1852}
1853
drhb0c86772000-06-02 23:21:26 +00001854int OptNArgs(){
drh75897232000-05-29 14:26:00 +00001855 int cnt = 0;
1856 int dashdash = 0;
1857 int i;
1858 if( argv!=0 && argv[0]!=0 ){
1859 for(i=1; argv[i]; i++){
1860 if( dashdash || !ISOPT(argv[i]) ) cnt++;
1861 if( strcmp(argv[i],"--")==0 ) dashdash = 1;
1862 }
1863 }
1864 return cnt;
1865}
1866
drhb0c86772000-06-02 23:21:26 +00001867char *OptArg(n)
drh75897232000-05-29 14:26:00 +00001868int n;
1869{
1870 int i;
1871 i = argindex(n);
1872 return i>=0 ? argv[i] : 0;
1873}
1874
drhb0c86772000-06-02 23:21:26 +00001875void OptErr(n)
drh75897232000-05-29 14:26:00 +00001876int n;
1877{
1878 int i;
1879 i = argindex(n);
1880 if( i>=0 ) errline(i,0,errstream);
1881}
1882
drhb0c86772000-06-02 23:21:26 +00001883void OptPrint(){
drh75897232000-05-29 14:26:00 +00001884 int i;
1885 int max, len;
1886 max = 0;
1887 for(i=0; op[i].label; i++){
drh87cf1372008-08-13 20:09:06 +00001888 len = lemonStrlen(op[i].label) + 1;
drh75897232000-05-29 14:26:00 +00001889 switch( op[i].type ){
1890 case OPT_FLAG:
1891 case OPT_FFLAG:
1892 break;
1893 case OPT_INT:
1894 case OPT_FINT:
1895 len += 9; /* length of "<integer>" */
1896 break;
1897 case OPT_DBL:
1898 case OPT_FDBL:
1899 len += 6; /* length of "<real>" */
1900 break;
1901 case OPT_STR:
1902 case OPT_FSTR:
1903 len += 8; /* length of "<string>" */
1904 break;
1905 }
1906 if( len>max ) max = len;
1907 }
1908 for(i=0; op[i].label; i++){
1909 switch( op[i].type ){
1910 case OPT_FLAG:
1911 case OPT_FFLAG:
1912 fprintf(errstream," -%-*s %s\n",max,op[i].label,op[i].message);
1913 break;
1914 case OPT_INT:
1915 case OPT_FINT:
1916 fprintf(errstream," %s=<integer>%*s %s\n",op[i].label,
drh87cf1372008-08-13 20:09:06 +00001917 (int)(max-lemonStrlen(op[i].label)-9),"",op[i].message);
drh75897232000-05-29 14:26:00 +00001918 break;
1919 case OPT_DBL:
1920 case OPT_FDBL:
1921 fprintf(errstream," %s=<real>%*s %s\n",op[i].label,
drh87cf1372008-08-13 20:09:06 +00001922 (int)(max-lemonStrlen(op[i].label)-6),"",op[i].message);
drh75897232000-05-29 14:26:00 +00001923 break;
1924 case OPT_STR:
1925 case OPT_FSTR:
1926 fprintf(errstream," %s=<string>%*s %s\n",op[i].label,
drh87cf1372008-08-13 20:09:06 +00001927 (int)(max-lemonStrlen(op[i].label)-8),"",op[i].message);
drh75897232000-05-29 14:26:00 +00001928 break;
1929 }
1930 }
1931}
1932/*********************** From the file "parse.c" ****************************/
1933/*
1934** Input file parser for the LEMON parser generator.
1935*/
1936
1937/* The state of the parser */
1938struct pstate {
1939 char *filename; /* Name of the input file */
1940 int tokenlineno; /* Linenumber at which current token starts */
1941 int errorcnt; /* Number of errors so far */
1942 char *tokenstart; /* Text of current token */
1943 struct lemon *gp; /* Global state vector */
1944 enum e_state {
1945 INITIALIZE,
1946 WAITING_FOR_DECL_OR_RULE,
1947 WAITING_FOR_DECL_KEYWORD,
1948 WAITING_FOR_DECL_ARG,
1949 WAITING_FOR_PRECEDENCE_SYMBOL,
1950 WAITING_FOR_ARROW,
1951 IN_RHS,
1952 LHS_ALIAS_1,
1953 LHS_ALIAS_2,
1954 LHS_ALIAS_3,
1955 RHS_ALIAS_1,
1956 RHS_ALIAS_2,
1957 PRECEDENCE_MARK_1,
1958 PRECEDENCE_MARK_2,
1959 RESYNC_AFTER_RULE_ERROR,
1960 RESYNC_AFTER_DECL_ERROR,
1961 WAITING_FOR_DESTRUCTOR_SYMBOL,
drh0bd1f4e2002-06-06 18:54:39 +00001962 WAITING_FOR_DATATYPE_SYMBOL,
drhe09daa92006-06-10 13:29:31 +00001963 WAITING_FOR_FALLBACK_ID,
1964 WAITING_FOR_WILDCARD_ID
drh75897232000-05-29 14:26:00 +00001965 } state; /* The state of the parser */
drh0bd1f4e2002-06-06 18:54:39 +00001966 struct symbol *fallback; /* The fallback token */
drh75897232000-05-29 14:26:00 +00001967 struct symbol *lhs; /* Left-hand side of current rule */
1968 char *lhsalias; /* Alias for the LHS */
1969 int nrhs; /* Number of right-hand side symbols seen */
1970 struct symbol *rhs[MAXRHS]; /* RHS symbols */
1971 char *alias[MAXRHS]; /* Aliases for each RHS symbol (or NULL) */
1972 struct rule *prevrule; /* Previous rule parsed */
1973 char *declkeyword; /* Keyword of a declaration */
1974 char **declargslot; /* Where the declaration argument should be put */
drha5808f32008-04-27 22:19:44 +00001975 int insertLineMacro; /* Add #line before declaration insert */
drh4dc8ef52008-07-01 17:13:57 +00001976 int *decllinenoslot; /* Where to write declaration line number */
drh75897232000-05-29 14:26:00 +00001977 enum e_assoc declassoc; /* Assign this association to decl arguments */
1978 int preccounter; /* Assign this precedence to decl arguments */
1979 struct rule *firstrule; /* Pointer to first rule in the grammar */
1980 struct rule *lastrule; /* Pointer to the most recently parsed rule */
1981};
1982
1983/* Parse a single token */
1984static void parseonetoken(psp)
1985struct pstate *psp;
1986{
1987 char *x;
1988 x = Strsafe(psp->tokenstart); /* Save the token permanently */
1989#if 0
1990 printf("%s:%d: Token=[%s] state=%d\n",psp->filename,psp->tokenlineno,
1991 x,psp->state);
1992#endif
1993 switch( psp->state ){
1994 case INITIALIZE:
1995 psp->prevrule = 0;
1996 psp->preccounter = 0;
1997 psp->firstrule = psp->lastrule = 0;
1998 psp->gp->nrule = 0;
1999 /* Fall thru to next case */
2000 case WAITING_FOR_DECL_OR_RULE:
2001 if( x[0]=='%' ){
2002 psp->state = WAITING_FOR_DECL_KEYWORD;
2003 }else if( islower(x[0]) ){
2004 psp->lhs = Symbol_new(x);
2005 psp->nrhs = 0;
2006 psp->lhsalias = 0;
2007 psp->state = WAITING_FOR_ARROW;
2008 }else if( x[0]=='{' ){
2009 if( psp->prevrule==0 ){
2010 ErrorMsg(psp->filename,psp->tokenlineno,
drh34ff57b2008-07-14 12:27:51 +00002011"There is no prior rule opon which to attach the code \
drh75897232000-05-29 14:26:00 +00002012fragment which begins on this line.");
2013 psp->errorcnt++;
2014 }else if( psp->prevrule->code!=0 ){
2015 ErrorMsg(psp->filename,psp->tokenlineno,
2016"Code fragment beginning on this line is not the first \
2017to follow the previous rule.");
2018 psp->errorcnt++;
2019 }else{
2020 psp->prevrule->line = psp->tokenlineno;
2021 psp->prevrule->code = &x[1];
2022 }
2023 }else if( x[0]=='[' ){
2024 psp->state = PRECEDENCE_MARK_1;
2025 }else{
2026 ErrorMsg(psp->filename,psp->tokenlineno,
2027 "Token \"%s\" should be either \"%%\" or a nonterminal name.",
2028 x);
2029 psp->errorcnt++;
2030 }
2031 break;
2032 case PRECEDENCE_MARK_1:
2033 if( !isupper(x[0]) ){
2034 ErrorMsg(psp->filename,psp->tokenlineno,
2035 "The precedence symbol must be a terminal.");
2036 psp->errorcnt++;
2037 }else if( psp->prevrule==0 ){
2038 ErrorMsg(psp->filename,psp->tokenlineno,
2039 "There is no prior rule to assign precedence \"[%s]\".",x);
2040 psp->errorcnt++;
2041 }else if( psp->prevrule->precsym!=0 ){
2042 ErrorMsg(psp->filename,psp->tokenlineno,
2043"Precedence mark on this line is not the first \
2044to follow the previous rule.");
2045 psp->errorcnt++;
2046 }else{
2047 psp->prevrule->precsym = Symbol_new(x);
2048 }
2049 psp->state = PRECEDENCE_MARK_2;
2050 break;
2051 case PRECEDENCE_MARK_2:
2052 if( x[0]!=']' ){
2053 ErrorMsg(psp->filename,psp->tokenlineno,
2054 "Missing \"]\" on precedence mark.");
2055 psp->errorcnt++;
2056 }
2057 psp->state = WAITING_FOR_DECL_OR_RULE;
2058 break;
2059 case WAITING_FOR_ARROW:
2060 if( x[0]==':' && x[1]==':' && x[2]=='=' ){
2061 psp->state = IN_RHS;
2062 }else if( x[0]=='(' ){
2063 psp->state = LHS_ALIAS_1;
2064 }else{
2065 ErrorMsg(psp->filename,psp->tokenlineno,
2066 "Expected to see a \":\" following the LHS symbol \"%s\".",
2067 psp->lhs->name);
2068 psp->errorcnt++;
2069 psp->state = RESYNC_AFTER_RULE_ERROR;
2070 }
2071 break;
2072 case LHS_ALIAS_1:
2073 if( isalpha(x[0]) ){
2074 psp->lhsalias = x;
2075 psp->state = LHS_ALIAS_2;
2076 }else{
2077 ErrorMsg(psp->filename,psp->tokenlineno,
2078 "\"%s\" is not a valid alias for the LHS \"%s\"\n",
2079 x,psp->lhs->name);
2080 psp->errorcnt++;
2081 psp->state = RESYNC_AFTER_RULE_ERROR;
2082 }
2083 break;
2084 case LHS_ALIAS_2:
2085 if( x[0]==')' ){
2086 psp->state = LHS_ALIAS_3;
2087 }else{
2088 ErrorMsg(psp->filename,psp->tokenlineno,
2089 "Missing \")\" following LHS alias name \"%s\".",psp->lhsalias);
2090 psp->errorcnt++;
2091 psp->state = RESYNC_AFTER_RULE_ERROR;
2092 }
2093 break;
2094 case LHS_ALIAS_3:
2095 if( x[0]==':' && x[1]==':' && x[2]=='=' ){
2096 psp->state = IN_RHS;
2097 }else{
2098 ErrorMsg(psp->filename,psp->tokenlineno,
2099 "Missing \"->\" following: \"%s(%s)\".",
2100 psp->lhs->name,psp->lhsalias);
2101 psp->errorcnt++;
2102 psp->state = RESYNC_AFTER_RULE_ERROR;
2103 }
2104 break;
2105 case IN_RHS:
2106 if( x[0]=='.' ){
2107 struct rule *rp;
drh9892c5d2007-12-21 00:02:11 +00002108 rp = (struct rule *)calloc( sizeof(struct rule) +
2109 sizeof(struct symbol*)*psp->nrhs + sizeof(char*)*psp->nrhs, 1);
drh75897232000-05-29 14:26:00 +00002110 if( rp==0 ){
2111 ErrorMsg(psp->filename,psp->tokenlineno,
2112 "Can't allocate enough memory for this rule.");
2113 psp->errorcnt++;
2114 psp->prevrule = 0;
2115 }else{
2116 int i;
2117 rp->ruleline = psp->tokenlineno;
2118 rp->rhs = (struct symbol**)&rp[1];
2119 rp->rhsalias = (char**)&(rp->rhs[psp->nrhs]);
2120 for(i=0; i<psp->nrhs; i++){
2121 rp->rhs[i] = psp->rhs[i];
2122 rp->rhsalias[i] = psp->alias[i];
2123 }
2124 rp->lhs = psp->lhs;
2125 rp->lhsalias = psp->lhsalias;
2126 rp->nrhs = psp->nrhs;
2127 rp->code = 0;
2128 rp->precsym = 0;
2129 rp->index = psp->gp->nrule++;
2130 rp->nextlhs = rp->lhs->rule;
2131 rp->lhs->rule = rp;
2132 rp->next = 0;
2133 if( psp->firstrule==0 ){
2134 psp->firstrule = psp->lastrule = rp;
2135 }else{
2136 psp->lastrule->next = rp;
2137 psp->lastrule = rp;
2138 }
2139 psp->prevrule = rp;
2140 }
2141 psp->state = WAITING_FOR_DECL_OR_RULE;
2142 }else if( isalpha(x[0]) ){
2143 if( psp->nrhs>=MAXRHS ){
2144 ErrorMsg(psp->filename,psp->tokenlineno,
drhc4dd3fd2008-01-22 01:48:05 +00002145 "Too many symbols on RHS of rule beginning at \"%s\".",
drh75897232000-05-29 14:26:00 +00002146 x);
2147 psp->errorcnt++;
2148 psp->state = RESYNC_AFTER_RULE_ERROR;
2149 }else{
2150 psp->rhs[psp->nrhs] = Symbol_new(x);
2151 psp->alias[psp->nrhs] = 0;
2152 psp->nrhs++;
2153 }
drhfd405312005-11-06 04:06:59 +00002154 }else if( (x[0]=='|' || x[0]=='/') && psp->nrhs>0 ){
2155 struct symbol *msp = psp->rhs[psp->nrhs-1];
2156 if( msp->type!=MULTITERMINAL ){
2157 struct symbol *origsp = msp;
drh9892c5d2007-12-21 00:02:11 +00002158 msp = calloc(1,sizeof(*msp));
drhfd405312005-11-06 04:06:59 +00002159 memset(msp, 0, sizeof(*msp));
2160 msp->type = MULTITERMINAL;
2161 msp->nsubsym = 1;
drh9892c5d2007-12-21 00:02:11 +00002162 msp->subsym = calloc(1,sizeof(struct symbol*));
drhfd405312005-11-06 04:06:59 +00002163 msp->subsym[0] = origsp;
2164 msp->name = origsp->name;
2165 psp->rhs[psp->nrhs-1] = msp;
2166 }
2167 msp->nsubsym++;
2168 msp->subsym = realloc(msp->subsym, sizeof(struct symbol*)*msp->nsubsym);
2169 msp->subsym[msp->nsubsym-1] = Symbol_new(&x[1]);
2170 if( islower(x[1]) || islower(msp->subsym[0]->name[0]) ){
2171 ErrorMsg(psp->filename,psp->tokenlineno,
2172 "Cannot form a compound containing a non-terminal");
2173 psp->errorcnt++;
2174 }
drh75897232000-05-29 14:26:00 +00002175 }else if( x[0]=='(' && psp->nrhs>0 ){
2176 psp->state = RHS_ALIAS_1;
2177 }else{
2178 ErrorMsg(psp->filename,psp->tokenlineno,
2179 "Illegal character on RHS of rule: \"%s\".",x);
2180 psp->errorcnt++;
2181 psp->state = RESYNC_AFTER_RULE_ERROR;
2182 }
2183 break;
2184 case RHS_ALIAS_1:
2185 if( isalpha(x[0]) ){
2186 psp->alias[psp->nrhs-1] = x;
2187 psp->state = RHS_ALIAS_2;
2188 }else{
2189 ErrorMsg(psp->filename,psp->tokenlineno,
2190 "\"%s\" is not a valid alias for the RHS symbol \"%s\"\n",
2191 x,psp->rhs[psp->nrhs-1]->name);
2192 psp->errorcnt++;
2193 psp->state = RESYNC_AFTER_RULE_ERROR;
2194 }
2195 break;
2196 case RHS_ALIAS_2:
2197 if( x[0]==')' ){
2198 psp->state = IN_RHS;
2199 }else{
2200 ErrorMsg(psp->filename,psp->tokenlineno,
2201 "Missing \")\" following LHS alias name \"%s\".",psp->lhsalias);
2202 psp->errorcnt++;
2203 psp->state = RESYNC_AFTER_RULE_ERROR;
2204 }
2205 break;
2206 case WAITING_FOR_DECL_KEYWORD:
2207 if( isalpha(x[0]) ){
2208 psp->declkeyword = x;
2209 psp->declargslot = 0;
drh4dc8ef52008-07-01 17:13:57 +00002210 psp->decllinenoslot = 0;
drha5808f32008-04-27 22:19:44 +00002211 psp->insertLineMacro = 1;
drh75897232000-05-29 14:26:00 +00002212 psp->state = WAITING_FOR_DECL_ARG;
2213 if( strcmp(x,"name")==0 ){
2214 psp->declargslot = &(psp->gp->name);
drha5808f32008-04-27 22:19:44 +00002215 psp->insertLineMacro = 0;
drh75897232000-05-29 14:26:00 +00002216 }else if( strcmp(x,"include")==0 ){
2217 psp->declargslot = &(psp->gp->include);
drh75897232000-05-29 14:26:00 +00002218 }else if( strcmp(x,"code")==0 ){
2219 psp->declargslot = &(psp->gp->extracode);
drh75897232000-05-29 14:26:00 +00002220 }else if( strcmp(x,"token_destructor")==0 ){
2221 psp->declargslot = &psp->gp->tokendest;
drh960e8c62001-04-03 16:53:21 +00002222 }else if( strcmp(x,"default_destructor")==0 ){
2223 psp->declargslot = &psp->gp->vardest;
drh75897232000-05-29 14:26:00 +00002224 }else if( strcmp(x,"token_prefix")==0 ){
2225 psp->declargslot = &psp->gp->tokenprefix;
drha5808f32008-04-27 22:19:44 +00002226 psp->insertLineMacro = 0;
drh75897232000-05-29 14:26:00 +00002227 }else if( strcmp(x,"syntax_error")==0 ){
2228 psp->declargslot = &(psp->gp->error);
drh75897232000-05-29 14:26:00 +00002229 }else if( strcmp(x,"parse_accept")==0 ){
2230 psp->declargslot = &(psp->gp->accept);
drh75897232000-05-29 14:26:00 +00002231 }else if( strcmp(x,"parse_failure")==0 ){
2232 psp->declargslot = &(psp->gp->failure);
drh75897232000-05-29 14:26:00 +00002233 }else if( strcmp(x,"stack_overflow")==0 ){
2234 psp->declargslot = &(psp->gp->overflow);
drh75897232000-05-29 14:26:00 +00002235 }else if( strcmp(x,"extra_argument")==0 ){
2236 psp->declargslot = &(psp->gp->arg);
drha5808f32008-04-27 22:19:44 +00002237 psp->insertLineMacro = 0;
drh75897232000-05-29 14:26:00 +00002238 }else if( strcmp(x,"token_type")==0 ){
2239 psp->declargslot = &(psp->gp->tokentype);
drha5808f32008-04-27 22:19:44 +00002240 psp->insertLineMacro = 0;
drh960e8c62001-04-03 16:53:21 +00002241 }else if( strcmp(x,"default_type")==0 ){
2242 psp->declargslot = &(psp->gp->vartype);
drha5808f32008-04-27 22:19:44 +00002243 psp->insertLineMacro = 0;
drh75897232000-05-29 14:26:00 +00002244 }else if( strcmp(x,"stack_size")==0 ){
2245 psp->declargslot = &(psp->gp->stacksize);
drha5808f32008-04-27 22:19:44 +00002246 psp->insertLineMacro = 0;
drh75897232000-05-29 14:26:00 +00002247 }else if( strcmp(x,"start_symbol")==0 ){
2248 psp->declargslot = &(psp->gp->start);
drha5808f32008-04-27 22:19:44 +00002249 psp->insertLineMacro = 0;
drh75897232000-05-29 14:26:00 +00002250 }else if( strcmp(x,"left")==0 ){
2251 psp->preccounter++;
2252 psp->declassoc = LEFT;
2253 psp->state = WAITING_FOR_PRECEDENCE_SYMBOL;
2254 }else if( strcmp(x,"right")==0 ){
2255 psp->preccounter++;
2256 psp->declassoc = RIGHT;
2257 psp->state = WAITING_FOR_PRECEDENCE_SYMBOL;
2258 }else if( strcmp(x,"nonassoc")==0 ){
2259 psp->preccounter++;
2260 psp->declassoc = NONE;
2261 psp->state = WAITING_FOR_PRECEDENCE_SYMBOL;
2262 }else if( strcmp(x,"destructor")==0 ){
2263 psp->state = WAITING_FOR_DESTRUCTOR_SYMBOL;
2264 }else if( strcmp(x,"type")==0 ){
2265 psp->state = WAITING_FOR_DATATYPE_SYMBOL;
drh0bd1f4e2002-06-06 18:54:39 +00002266 }else if( strcmp(x,"fallback")==0 ){
2267 psp->fallback = 0;
2268 psp->state = WAITING_FOR_FALLBACK_ID;
drhe09daa92006-06-10 13:29:31 +00002269 }else if( strcmp(x,"wildcard")==0 ){
2270 psp->state = WAITING_FOR_WILDCARD_ID;
drh75897232000-05-29 14:26:00 +00002271 }else{
2272 ErrorMsg(psp->filename,psp->tokenlineno,
2273 "Unknown declaration keyword: \"%%%s\".",x);
2274 psp->errorcnt++;
2275 psp->state = RESYNC_AFTER_DECL_ERROR;
2276 }
2277 }else{
2278 ErrorMsg(psp->filename,psp->tokenlineno,
2279 "Illegal declaration keyword: \"%s\".",x);
2280 psp->errorcnt++;
2281 psp->state = RESYNC_AFTER_DECL_ERROR;
2282 }
2283 break;
2284 case WAITING_FOR_DESTRUCTOR_SYMBOL:
2285 if( !isalpha(x[0]) ){
2286 ErrorMsg(psp->filename,psp->tokenlineno,
2287 "Symbol name missing after %destructor keyword");
2288 psp->errorcnt++;
2289 psp->state = RESYNC_AFTER_DECL_ERROR;
2290 }else{
2291 struct symbol *sp = Symbol_new(x);
2292 psp->declargslot = &sp->destructor;
drh4dc8ef52008-07-01 17:13:57 +00002293 psp->decllinenoslot = &sp->destLineno;
drha5808f32008-04-27 22:19:44 +00002294 psp->insertLineMacro = 1;
drh75897232000-05-29 14:26:00 +00002295 psp->state = WAITING_FOR_DECL_ARG;
2296 }
2297 break;
2298 case WAITING_FOR_DATATYPE_SYMBOL:
2299 if( !isalpha(x[0]) ){
2300 ErrorMsg(psp->filename,psp->tokenlineno,
2301 "Symbol name missing after %destructor keyword");
2302 psp->errorcnt++;
2303 psp->state = RESYNC_AFTER_DECL_ERROR;
2304 }else{
2305 struct symbol *sp = Symbol_new(x);
2306 psp->declargslot = &sp->datatype;
drha5808f32008-04-27 22:19:44 +00002307 psp->insertLineMacro = 0;
drh75897232000-05-29 14:26:00 +00002308 psp->state = WAITING_FOR_DECL_ARG;
2309 }
2310 break;
2311 case WAITING_FOR_PRECEDENCE_SYMBOL:
2312 if( x[0]=='.' ){
2313 psp->state = WAITING_FOR_DECL_OR_RULE;
2314 }else if( isupper(x[0]) ){
2315 struct symbol *sp;
2316 sp = Symbol_new(x);
2317 if( sp->prec>=0 ){
2318 ErrorMsg(psp->filename,psp->tokenlineno,
2319 "Symbol \"%s\" has already be given a precedence.",x);
2320 psp->errorcnt++;
2321 }else{
2322 sp->prec = psp->preccounter;
2323 sp->assoc = psp->declassoc;
2324 }
2325 }else{
2326 ErrorMsg(psp->filename,psp->tokenlineno,
2327 "Can't assign a precedence to \"%s\".",x);
2328 psp->errorcnt++;
2329 }
2330 break;
2331 case WAITING_FOR_DECL_ARG:
drha5808f32008-04-27 22:19:44 +00002332 if( x[0]=='{' || x[0]=='\"' || isalnum(x[0]) ){
2333 char *zOld, *zNew, *zBuf, *z;
2334 int nOld, n, nLine, nNew, nBack;
drhb5bd49e2008-07-14 12:21:08 +00002335 int addLineMacro;
drha5808f32008-04-27 22:19:44 +00002336 char zLine[50];
2337 zNew = x;
2338 if( zNew[0]=='"' || zNew[0]=='{' ) zNew++;
drh87cf1372008-08-13 20:09:06 +00002339 nNew = lemonStrlen(zNew);
drha5808f32008-04-27 22:19:44 +00002340 if( *psp->declargslot ){
2341 zOld = *psp->declargslot;
2342 }else{
2343 zOld = "";
2344 }
drh87cf1372008-08-13 20:09:06 +00002345 nOld = lemonStrlen(zOld);
drha5808f32008-04-27 22:19:44 +00002346 n = nOld + nNew + 20;
shane58543932008-12-10 20:10:04 +00002347 addLineMacro = !psp->gp->nolinenosflag && psp->insertLineMacro &&
drhb5bd49e2008-07-14 12:21:08 +00002348 (psp->decllinenoslot==0 || psp->decllinenoslot[0]!=0);
2349 if( addLineMacro ){
drha5808f32008-04-27 22:19:44 +00002350 for(z=psp->filename, nBack=0; *z; z++){
2351 if( *z=='\\' ) nBack++;
2352 }
2353 sprintf(zLine, "#line %d ", psp->tokenlineno);
drh87cf1372008-08-13 20:09:06 +00002354 nLine = lemonStrlen(zLine);
2355 n += nLine + lemonStrlen(psp->filename) + nBack;
drha5808f32008-04-27 22:19:44 +00002356 }
2357 *psp->declargslot = zBuf = realloc(*psp->declargslot, n);
2358 zBuf += nOld;
drhb5bd49e2008-07-14 12:21:08 +00002359 if( addLineMacro ){
drha5808f32008-04-27 22:19:44 +00002360 if( nOld && zBuf[-1]!='\n' ){
2361 *(zBuf++) = '\n';
2362 }
2363 memcpy(zBuf, zLine, nLine);
2364 zBuf += nLine;
2365 *(zBuf++) = '"';
2366 for(z=psp->filename; *z; z++){
2367 if( *z=='\\' ){
2368 *(zBuf++) = '\\';
2369 }
2370 *(zBuf++) = *z;
2371 }
2372 *(zBuf++) = '"';
2373 *(zBuf++) = '\n';
2374 }
drh4dc8ef52008-07-01 17:13:57 +00002375 if( psp->decllinenoslot && psp->decllinenoslot[0]==0 ){
2376 psp->decllinenoslot[0] = psp->tokenlineno;
2377 }
drha5808f32008-04-27 22:19:44 +00002378 memcpy(zBuf, zNew, nNew);
2379 zBuf += nNew;
2380 *zBuf = 0;
2381 psp->state = WAITING_FOR_DECL_OR_RULE;
drh75897232000-05-29 14:26:00 +00002382 }else{
2383 ErrorMsg(psp->filename,psp->tokenlineno,
2384 "Illegal argument to %%%s: %s",psp->declkeyword,x);
2385 psp->errorcnt++;
2386 psp->state = RESYNC_AFTER_DECL_ERROR;
2387 }
2388 break;
drh0bd1f4e2002-06-06 18:54:39 +00002389 case WAITING_FOR_FALLBACK_ID:
2390 if( x[0]=='.' ){
2391 psp->state = WAITING_FOR_DECL_OR_RULE;
2392 }else if( !isupper(x[0]) ){
2393 ErrorMsg(psp->filename, psp->tokenlineno,
2394 "%%fallback argument \"%s\" should be a token", x);
2395 psp->errorcnt++;
2396 }else{
2397 struct symbol *sp = Symbol_new(x);
2398 if( psp->fallback==0 ){
2399 psp->fallback = sp;
2400 }else if( sp->fallback ){
2401 ErrorMsg(psp->filename, psp->tokenlineno,
2402 "More than one fallback assigned to token %s", x);
2403 psp->errorcnt++;
2404 }else{
2405 sp->fallback = psp->fallback;
2406 psp->gp->has_fallback = 1;
2407 }
2408 }
2409 break;
drhe09daa92006-06-10 13:29:31 +00002410 case WAITING_FOR_WILDCARD_ID:
2411 if( x[0]=='.' ){
2412 psp->state = WAITING_FOR_DECL_OR_RULE;
2413 }else if( !isupper(x[0]) ){
2414 ErrorMsg(psp->filename, psp->tokenlineno,
2415 "%%wildcard argument \"%s\" should be a token", x);
2416 psp->errorcnt++;
2417 }else{
2418 struct symbol *sp = Symbol_new(x);
2419 if( psp->gp->wildcard==0 ){
2420 psp->gp->wildcard = sp;
2421 }else{
2422 ErrorMsg(psp->filename, psp->tokenlineno,
2423 "Extra wildcard to token: %s", x);
2424 psp->errorcnt++;
2425 }
2426 }
2427 break;
drh75897232000-05-29 14:26:00 +00002428 case RESYNC_AFTER_RULE_ERROR:
2429/* if( x[0]=='.' ) psp->state = WAITING_FOR_DECL_OR_RULE;
2430** break; */
2431 case RESYNC_AFTER_DECL_ERROR:
2432 if( x[0]=='.' ) psp->state = WAITING_FOR_DECL_OR_RULE;
2433 if( x[0]=='%' ) psp->state = WAITING_FOR_DECL_KEYWORD;
2434 break;
2435 }
2436}
2437
drh34ff57b2008-07-14 12:27:51 +00002438/* Run the preprocessor over the input file text. The global variables
drh6d08b4d2004-07-20 12:45:22 +00002439** azDefine[0] through azDefine[nDefine-1] contains the names of all defined
2440** macros. This routine looks for "%ifdef" and "%ifndef" and "%endif" and
2441** comments them out. Text in between is also commented out as appropriate.
2442*/
danielk1977940fac92005-01-23 22:41:37 +00002443static void preprocess_input(char *z){
drh6d08b4d2004-07-20 12:45:22 +00002444 int i, j, k, n;
2445 int exclude = 0;
rse38514a92007-09-20 11:34:17 +00002446 int start = 0;
drh6d08b4d2004-07-20 12:45:22 +00002447 int lineno = 1;
rse38514a92007-09-20 11:34:17 +00002448 int start_lineno = 1;
drh6d08b4d2004-07-20 12:45:22 +00002449 for(i=0; z[i]; i++){
2450 if( z[i]=='\n' ) lineno++;
2451 if( z[i]!='%' || (i>0 && z[i-1]!='\n') ) continue;
2452 if( strncmp(&z[i],"%endif",6)==0 && isspace(z[i+6]) ){
2453 if( exclude ){
2454 exclude--;
2455 if( exclude==0 ){
2456 for(j=start; j<i; j++) if( z[j]!='\n' ) z[j] = ' ';
2457 }
2458 }
2459 for(j=i; z[j] && z[j]!='\n'; j++) z[j] = ' ';
2460 }else if( (strncmp(&z[i],"%ifdef",6)==0 && isspace(z[i+6]))
2461 || (strncmp(&z[i],"%ifndef",7)==0 && isspace(z[i+7])) ){
2462 if( exclude ){
2463 exclude++;
2464 }else{
2465 for(j=i+7; isspace(z[j]); j++){}
2466 for(n=0; z[j+n] && !isspace(z[j+n]); n++){}
2467 exclude = 1;
2468 for(k=0; k<nDefine; k++){
drh87cf1372008-08-13 20:09:06 +00002469 if( strncmp(azDefine[k],&z[j],n)==0 && lemonStrlen(azDefine[k])==n ){
drh6d08b4d2004-07-20 12:45:22 +00002470 exclude = 0;
2471 break;
2472 }
2473 }
2474 if( z[i+3]=='n' ) exclude = !exclude;
2475 if( exclude ){
2476 start = i;
2477 start_lineno = lineno;
2478 }
2479 }
2480 for(j=i; z[j] && z[j]!='\n'; j++) z[j] = ' ';
2481 }
2482 }
2483 if( exclude ){
2484 fprintf(stderr,"unterminated %%ifdef starting on line %d\n", start_lineno);
2485 exit(1);
2486 }
2487}
2488
drh75897232000-05-29 14:26:00 +00002489/* In spite of its name, this function is really a scanner. It read
2490** in the entire input file (all at once) then tokenizes it. Each
2491** token is passed to the function "parseonetoken" which builds all
2492** the appropriate data structures in the global state vector "gp".
2493*/
2494void Parse(gp)
2495struct lemon *gp;
2496{
2497 struct pstate ps;
2498 FILE *fp;
2499 char *filebuf;
2500 int filesize;
2501 int lineno;
2502 int c;
2503 char *cp, *nextcp;
2504 int startline = 0;
2505
rse38514a92007-09-20 11:34:17 +00002506 memset(&ps, '\0', sizeof(ps));
drh75897232000-05-29 14:26:00 +00002507 ps.gp = gp;
2508 ps.filename = gp->filename;
2509 ps.errorcnt = 0;
2510 ps.state = INITIALIZE;
2511
2512 /* Begin by reading the input file */
2513 fp = fopen(ps.filename,"rb");
2514 if( fp==0 ){
2515 ErrorMsg(ps.filename,0,"Can't open this file for reading.");
2516 gp->errorcnt++;
2517 return;
2518 }
2519 fseek(fp,0,2);
2520 filesize = ftell(fp);
2521 rewind(fp);
2522 filebuf = (char *)malloc( filesize+1 );
2523 if( filebuf==0 ){
2524 ErrorMsg(ps.filename,0,"Can't allocate %d of memory to hold this file.",
2525 filesize+1);
2526 gp->errorcnt++;
2527 return;
2528 }
2529 if( fread(filebuf,1,filesize,fp)!=filesize ){
2530 ErrorMsg(ps.filename,0,"Can't read in all %d bytes of this file.",
2531 filesize);
2532 free(filebuf);
2533 gp->errorcnt++;
2534 return;
2535 }
2536 fclose(fp);
2537 filebuf[filesize] = 0;
2538
drh6d08b4d2004-07-20 12:45:22 +00002539 /* Make an initial pass through the file to handle %ifdef and %ifndef */
2540 preprocess_input(filebuf);
2541
drh75897232000-05-29 14:26:00 +00002542 /* Now scan the text of the input file */
2543 lineno = 1;
2544 for(cp=filebuf; (c= *cp)!=0; ){
2545 if( c=='\n' ) lineno++; /* Keep track of the line number */
2546 if( isspace(c) ){ cp++; continue; } /* Skip all white space */
2547 if( c=='/' && cp[1]=='/' ){ /* Skip C++ style comments */
2548 cp+=2;
2549 while( (c= *cp)!=0 && c!='\n' ) cp++;
2550 continue;
2551 }
2552 if( c=='/' && cp[1]=='*' ){ /* Skip C style comments */
2553 cp+=2;
2554 while( (c= *cp)!=0 && (c!='/' || cp[-1]!='*') ){
2555 if( c=='\n' ) lineno++;
2556 cp++;
2557 }
2558 if( c ) cp++;
2559 continue;
2560 }
2561 ps.tokenstart = cp; /* Mark the beginning of the token */
2562 ps.tokenlineno = lineno; /* Linenumber on which token begins */
2563 if( c=='\"' ){ /* String literals */
2564 cp++;
2565 while( (c= *cp)!=0 && c!='\"' ){
2566 if( c=='\n' ) lineno++;
2567 cp++;
2568 }
2569 if( c==0 ){
2570 ErrorMsg(ps.filename,startline,
2571"String starting on this line is not terminated before the end of the file.");
2572 ps.errorcnt++;
2573 nextcp = cp;
2574 }else{
2575 nextcp = cp+1;
2576 }
2577 }else if( c=='{' ){ /* A block of C code */
2578 int level;
2579 cp++;
2580 for(level=1; (c= *cp)!=0 && (level>1 || c!='}'); cp++){
2581 if( c=='\n' ) lineno++;
2582 else if( c=='{' ) level++;
2583 else if( c=='}' ) level--;
2584 else if( c=='/' && cp[1]=='*' ){ /* Skip comments */
2585 int prevc;
2586 cp = &cp[2];
2587 prevc = 0;
2588 while( (c= *cp)!=0 && (c!='/' || prevc!='*') ){
2589 if( c=='\n' ) lineno++;
2590 prevc = c;
2591 cp++;
2592 }
2593 }else if( c=='/' && cp[1]=='/' ){ /* Skip C++ style comments too */
2594 cp = &cp[2];
2595 while( (c= *cp)!=0 && c!='\n' ) cp++;
2596 if( c ) lineno++;
2597 }else if( c=='\'' || c=='\"' ){ /* String a character literals */
2598 int startchar, prevc;
2599 startchar = c;
2600 prevc = 0;
2601 for(cp++; (c= *cp)!=0 && (c!=startchar || prevc=='\\'); cp++){
2602 if( c=='\n' ) lineno++;
2603 if( prevc=='\\' ) prevc = 0;
2604 else prevc = c;
2605 }
2606 }
2607 }
2608 if( c==0 ){
drh960e8c62001-04-03 16:53:21 +00002609 ErrorMsg(ps.filename,ps.tokenlineno,
drh75897232000-05-29 14:26:00 +00002610"C code starting on this line is not terminated before the end of the file.");
2611 ps.errorcnt++;
2612 nextcp = cp;
2613 }else{
2614 nextcp = cp+1;
2615 }
2616 }else if( isalnum(c) ){ /* Identifiers */
2617 while( (c= *cp)!=0 && (isalnum(c) || c=='_') ) cp++;
2618 nextcp = cp;
2619 }else if( c==':' && cp[1]==':' && cp[2]=='=' ){ /* The operator "::=" */
2620 cp += 3;
2621 nextcp = cp;
drhfd405312005-11-06 04:06:59 +00002622 }else if( (c=='/' || c=='|') && isalpha(cp[1]) ){
2623 cp += 2;
2624 while( (c = *cp)!=0 && (isalnum(c) || c=='_') ) cp++;
2625 nextcp = cp;
drh75897232000-05-29 14:26:00 +00002626 }else{ /* All other (one character) operators */
2627 cp++;
2628 nextcp = cp;
2629 }
2630 c = *cp;
2631 *cp = 0; /* Null terminate the token */
2632 parseonetoken(&ps); /* Parse the token */
2633 *cp = c; /* Restore the buffer */
2634 cp = nextcp;
2635 }
2636 free(filebuf); /* Release the buffer after parsing */
2637 gp->rule = ps.firstrule;
2638 gp->errorcnt = ps.errorcnt;
2639}
2640/*************************** From the file "plink.c" *********************/
2641/*
2642** Routines processing configuration follow-set propagation links
2643** in the LEMON parser generator.
2644*/
2645static struct plink *plink_freelist = 0;
2646
2647/* Allocate a new plink */
2648struct plink *Plink_new(){
2649 struct plink *new;
2650
2651 if( plink_freelist==0 ){
2652 int i;
2653 int amt = 100;
drh9892c5d2007-12-21 00:02:11 +00002654 plink_freelist = (struct plink *)calloc( amt, sizeof(struct plink) );
drh75897232000-05-29 14:26:00 +00002655 if( plink_freelist==0 ){
2656 fprintf(stderr,
2657 "Unable to allocate memory for a new follow-set propagation link.\n");
2658 exit(1);
2659 }
2660 for(i=0; i<amt-1; i++) plink_freelist[i].next = &plink_freelist[i+1];
2661 plink_freelist[amt-1].next = 0;
2662 }
2663 new = plink_freelist;
2664 plink_freelist = plink_freelist->next;
2665 return new;
2666}
2667
2668/* Add a plink to a plink list */
2669void Plink_add(plpp,cfp)
2670struct plink **plpp;
2671struct config *cfp;
2672{
2673 struct plink *new;
2674 new = Plink_new();
2675 new->next = *plpp;
2676 *plpp = new;
2677 new->cfp = cfp;
2678}
2679
2680/* Transfer every plink on the list "from" to the list "to" */
2681void Plink_copy(to,from)
2682struct plink **to;
2683struct plink *from;
2684{
2685 struct plink *nextpl;
2686 while( from ){
2687 nextpl = from->next;
2688 from->next = *to;
2689 *to = from;
2690 from = nextpl;
2691 }
2692}
2693
2694/* Delete every plink on the list */
2695void Plink_delete(plp)
2696struct plink *plp;
2697{
2698 struct plink *nextpl;
2699
2700 while( plp ){
2701 nextpl = plp->next;
2702 plp->next = plink_freelist;
2703 plink_freelist = plp;
2704 plp = nextpl;
2705 }
2706}
2707/*********************** From the file "report.c" **************************/
2708/*
2709** Procedures for generating reports and tables in the LEMON parser generator.
2710*/
2711
2712/* Generate a filename with the given suffix. Space to hold the
2713** name comes from malloc() and must be freed by the calling
2714** function.
2715*/
2716PRIVATE char *file_makename(lemp,suffix)
2717struct lemon *lemp;
2718char *suffix;
2719{
2720 char *name;
2721 char *cp;
2722
drh87cf1372008-08-13 20:09:06 +00002723 name = malloc( lemonStrlen(lemp->filename) + lemonStrlen(suffix) + 5 );
drh75897232000-05-29 14:26:00 +00002724 if( name==0 ){
2725 fprintf(stderr,"Can't allocate space for a filename.\n");
2726 exit(1);
2727 }
2728 strcpy(name,lemp->filename);
2729 cp = strrchr(name,'.');
2730 if( cp ) *cp = 0;
2731 strcat(name,suffix);
2732 return name;
2733}
2734
2735/* Open a file with a name based on the name of the input file,
2736** but with a different (specified) suffix, and return a pointer
2737** to the stream */
2738PRIVATE FILE *file_open(lemp,suffix,mode)
2739struct lemon *lemp;
2740char *suffix;
2741char *mode;
2742{
2743 FILE *fp;
2744
2745 if( lemp->outname ) free(lemp->outname);
2746 lemp->outname = file_makename(lemp, suffix);
2747 fp = fopen(lemp->outname,mode);
2748 if( fp==0 && *mode=='w' ){
2749 fprintf(stderr,"Can't open file \"%s\".\n",lemp->outname);
2750 lemp->errorcnt++;
2751 return 0;
2752 }
2753 return fp;
2754}
2755
2756/* Duplicate the input file without comments and without actions
2757** on rules */
2758void Reprint(lemp)
2759struct lemon *lemp;
2760{
2761 struct rule *rp;
2762 struct symbol *sp;
2763 int i, j, maxlen, len, ncolumns, skip;
2764 printf("// Reprint of input file \"%s\".\n// Symbols:\n",lemp->filename);
2765 maxlen = 10;
2766 for(i=0; i<lemp->nsymbol; i++){
2767 sp = lemp->symbols[i];
drh87cf1372008-08-13 20:09:06 +00002768 len = lemonStrlen(sp->name);
drh75897232000-05-29 14:26:00 +00002769 if( len>maxlen ) maxlen = len;
2770 }
2771 ncolumns = 76/(maxlen+5);
2772 if( ncolumns<1 ) ncolumns = 1;
2773 skip = (lemp->nsymbol + ncolumns - 1)/ncolumns;
2774 for(i=0; i<skip; i++){
2775 printf("//");
2776 for(j=i; j<lemp->nsymbol; j+=skip){
2777 sp = lemp->symbols[j];
2778 assert( sp->index==j );
2779 printf(" %3d %-*.*s",j,maxlen,maxlen,sp->name);
2780 }
2781 printf("\n");
2782 }
2783 for(rp=lemp->rule; rp; rp=rp->next){
2784 printf("%s",rp->lhs->name);
drhfd405312005-11-06 04:06:59 +00002785 /* if( rp->lhsalias ) printf("(%s)",rp->lhsalias); */
drh75897232000-05-29 14:26:00 +00002786 printf(" ::=");
2787 for(i=0; i<rp->nrhs; i++){
drhfd405312005-11-06 04:06:59 +00002788 sp = rp->rhs[i];
2789 printf(" %s", sp->name);
2790 if( sp->type==MULTITERMINAL ){
2791 for(j=1; j<sp->nsubsym; j++){
2792 printf("|%s", sp->subsym[j]->name);
2793 }
2794 }
2795 /* if( rp->rhsalias[i] ) printf("(%s)",rp->rhsalias[i]); */
drh75897232000-05-29 14:26:00 +00002796 }
2797 printf(".");
2798 if( rp->precsym ) printf(" [%s]",rp->precsym->name);
drhfd405312005-11-06 04:06:59 +00002799 /* if( rp->code ) printf("\n %s",rp->code); */
drh75897232000-05-29 14:26:00 +00002800 printf("\n");
2801 }
2802}
2803
2804void ConfigPrint(fp,cfp)
2805FILE *fp;
2806struct config *cfp;
2807{
2808 struct rule *rp;
drhfd405312005-11-06 04:06:59 +00002809 struct symbol *sp;
2810 int i, j;
drh75897232000-05-29 14:26:00 +00002811 rp = cfp->rp;
2812 fprintf(fp,"%s ::=",rp->lhs->name);
2813 for(i=0; i<=rp->nrhs; i++){
2814 if( i==cfp->dot ) fprintf(fp," *");
2815 if( i==rp->nrhs ) break;
drhfd405312005-11-06 04:06:59 +00002816 sp = rp->rhs[i];
2817 fprintf(fp," %s", sp->name);
2818 if( sp->type==MULTITERMINAL ){
2819 for(j=1; j<sp->nsubsym; j++){
2820 fprintf(fp,"|%s",sp->subsym[j]->name);
2821 }
2822 }
drh75897232000-05-29 14:26:00 +00002823 }
2824}
2825
2826/* #define TEST */
drhfd405312005-11-06 04:06:59 +00002827#if 0
drh75897232000-05-29 14:26:00 +00002828/* Print a set */
2829PRIVATE void SetPrint(out,set,lemp)
2830FILE *out;
2831char *set;
2832struct lemon *lemp;
2833{
2834 int i;
2835 char *spacer;
2836 spacer = "";
2837 fprintf(out,"%12s[","");
2838 for(i=0; i<lemp->nterminal; i++){
2839 if( SetFind(set,i) ){
2840 fprintf(out,"%s%s",spacer,lemp->symbols[i]->name);
2841 spacer = " ";
2842 }
2843 }
2844 fprintf(out,"]\n");
2845}
2846
2847/* Print a plink chain */
2848PRIVATE void PlinkPrint(out,plp,tag)
2849FILE *out;
2850struct plink *plp;
2851char *tag;
2852{
2853 while( plp ){
drhada354d2005-11-05 15:03:59 +00002854 fprintf(out,"%12s%s (state %2d) ","",tag,plp->cfp->stp->statenum);
drh75897232000-05-29 14:26:00 +00002855 ConfigPrint(out,plp->cfp);
2856 fprintf(out,"\n");
2857 plp = plp->next;
2858 }
2859}
2860#endif
2861
2862/* Print an action to the given file descriptor. Return FALSE if
2863** nothing was actually printed.
2864*/
2865int PrintAction(struct action *ap, FILE *fp, int indent){
2866 int result = 1;
2867 switch( ap->type ){
2868 case SHIFT:
drhada354d2005-11-05 15:03:59 +00002869 fprintf(fp,"%*s shift %d",indent,ap->sp->name,ap->x.stp->statenum);
drh75897232000-05-29 14:26:00 +00002870 break;
2871 case REDUCE:
2872 fprintf(fp,"%*s reduce %d",indent,ap->sp->name,ap->x.rp->index);
2873 break;
2874 case ACCEPT:
2875 fprintf(fp,"%*s accept",indent,ap->sp->name);
2876 break;
2877 case ERROR:
2878 fprintf(fp,"%*s error",indent,ap->sp->name);
2879 break;
drh9892c5d2007-12-21 00:02:11 +00002880 case SRCONFLICT:
2881 case RRCONFLICT:
drh75897232000-05-29 14:26:00 +00002882 fprintf(fp,"%*s reduce %-3d ** Parsing conflict **",
2883 indent,ap->sp->name,ap->x.rp->index);
2884 break;
drh9892c5d2007-12-21 00:02:11 +00002885 case SSCONFLICT:
2886 fprintf(fp,"%*s shift %d ** Parsing conflict **",
2887 indent,ap->sp->name,ap->x.stp->statenum);
2888 break;
drh75897232000-05-29 14:26:00 +00002889 case SH_RESOLVED:
2890 case RD_RESOLVED:
2891 case NOT_USED:
2892 result = 0;
2893 break;
2894 }
2895 return result;
2896}
2897
2898/* Generate the "y.output" log file */
2899void ReportOutput(lemp)
2900struct lemon *lemp;
2901{
2902 int i;
2903 struct state *stp;
2904 struct config *cfp;
2905 struct action *ap;
2906 FILE *fp;
2907
drh2aa6ca42004-09-10 00:14:04 +00002908 fp = file_open(lemp,".out","wb");
drh75897232000-05-29 14:26:00 +00002909 if( fp==0 ) return;
drh75897232000-05-29 14:26:00 +00002910 for(i=0; i<lemp->nstate; i++){
2911 stp = lemp->sorted[i];
drhada354d2005-11-05 15:03:59 +00002912 fprintf(fp,"State %d:\n",stp->statenum);
drh75897232000-05-29 14:26:00 +00002913 if( lemp->basisflag ) cfp=stp->bp;
2914 else cfp=stp->cfp;
2915 while( cfp ){
2916 char buf[20];
2917 if( cfp->dot==cfp->rp->nrhs ){
2918 sprintf(buf,"(%d)",cfp->rp->index);
2919 fprintf(fp," %5s ",buf);
2920 }else{
2921 fprintf(fp," ");
2922 }
2923 ConfigPrint(fp,cfp);
2924 fprintf(fp,"\n");
drhfd405312005-11-06 04:06:59 +00002925#if 0
drh75897232000-05-29 14:26:00 +00002926 SetPrint(fp,cfp->fws,lemp);
2927 PlinkPrint(fp,cfp->fplp,"To ");
2928 PlinkPrint(fp,cfp->bplp,"From");
2929#endif
2930 if( lemp->basisflag ) cfp=cfp->bp;
2931 else cfp=cfp->next;
2932 }
2933 fprintf(fp,"\n");
2934 for(ap=stp->ap; ap; ap=ap->next){
2935 if( PrintAction(ap,fp,30) ) fprintf(fp,"\n");
2936 }
2937 fprintf(fp,"\n");
2938 }
drhe9278182007-07-18 18:16:29 +00002939 fprintf(fp, "----------------------------------------------------\n");
2940 fprintf(fp, "Symbols:\n");
2941 for(i=0; i<lemp->nsymbol; i++){
2942 int j;
2943 struct symbol *sp;
2944
2945 sp = lemp->symbols[i];
2946 fprintf(fp, " %3d: %s", i, sp->name);
2947 if( sp->type==NONTERMINAL ){
2948 fprintf(fp, ":");
2949 if( sp->lambda ){
2950 fprintf(fp, " <lambda>");
2951 }
2952 for(j=0; j<lemp->nterminal; j++){
2953 if( sp->firstset && SetFind(sp->firstset, j) ){
2954 fprintf(fp, " %s", lemp->symbols[j]->name);
2955 }
2956 }
2957 }
2958 fprintf(fp, "\n");
2959 }
drh75897232000-05-29 14:26:00 +00002960 fclose(fp);
2961 return;
2962}
2963
2964/* Search for the file "name" which is in the same directory as
2965** the exacutable */
2966PRIVATE char *pathsearch(argv0,name,modemask)
2967char *argv0;
2968char *name;
2969int modemask;
2970{
2971 char *pathlist;
2972 char *path,*cp;
2973 char c;
drh75897232000-05-29 14:26:00 +00002974
2975#ifdef __WIN32__
2976 cp = strrchr(argv0,'\\');
2977#else
2978 cp = strrchr(argv0,'/');
2979#endif
2980 if( cp ){
2981 c = *cp;
2982 *cp = 0;
drh87cf1372008-08-13 20:09:06 +00002983 path = (char *)malloc( lemonStrlen(argv0) + lemonStrlen(name) + 2 );
drh75897232000-05-29 14:26:00 +00002984 if( path ) sprintf(path,"%s/%s",argv0,name);
2985 *cp = c;
2986 }else{
2987 extern char *getenv();
2988 pathlist = getenv("PATH");
2989 if( pathlist==0 ) pathlist = ".:/bin:/usr/bin";
drh87cf1372008-08-13 20:09:06 +00002990 path = (char *)malloc( lemonStrlen(pathlist)+lemonStrlen(name)+2 );
drh75897232000-05-29 14:26:00 +00002991 if( path!=0 ){
2992 while( *pathlist ){
2993 cp = strchr(pathlist,':');
drh87cf1372008-08-13 20:09:06 +00002994 if( cp==0 ) cp = &pathlist[lemonStrlen(pathlist)];
drh75897232000-05-29 14:26:00 +00002995 c = *cp;
2996 *cp = 0;
2997 sprintf(path,"%s/%s",pathlist,name);
2998 *cp = c;
2999 if( c==0 ) pathlist = "";
3000 else pathlist = &cp[1];
3001 if( access(path,modemask)==0 ) break;
3002 }
3003 }
3004 }
3005 return path;
3006}
3007
3008/* Given an action, compute the integer value for that action
3009** which is to be put in the action table of the generated machine.
3010** Return negative if no action should be generated.
3011*/
3012PRIVATE int compute_action(lemp,ap)
3013struct lemon *lemp;
3014struct action *ap;
3015{
3016 int act;
3017 switch( ap->type ){
drhada354d2005-11-05 15:03:59 +00003018 case SHIFT: act = ap->x.stp->statenum; break;
drh75897232000-05-29 14:26:00 +00003019 case REDUCE: act = ap->x.rp->index + lemp->nstate; break;
3020 case ERROR: act = lemp->nstate + lemp->nrule; break;
3021 case ACCEPT: act = lemp->nstate + lemp->nrule + 1; break;
3022 default: act = -1; break;
3023 }
3024 return act;
3025}
3026
3027#define LINESIZE 1000
3028/* The next cluster of routines are for reading the template file
3029** and writing the results to the generated parser */
3030/* The first function transfers data from "in" to "out" until
3031** a line is seen which begins with "%%". The line number is
3032** tracked.
3033**
3034** if name!=0, then any word that begin with "Parse" is changed to
3035** begin with *name instead.
3036*/
3037PRIVATE void tplt_xfer(name,in,out,lineno)
3038char *name;
3039FILE *in;
3040FILE *out;
3041int *lineno;
3042{
3043 int i, iStart;
3044 char line[LINESIZE];
3045 while( fgets(line,LINESIZE,in) && (line[0]!='%' || line[1]!='%') ){
3046 (*lineno)++;
3047 iStart = 0;
3048 if( name ){
3049 for(i=0; line[i]; i++){
3050 if( line[i]=='P' && strncmp(&line[i],"Parse",5)==0
3051 && (i==0 || !isalpha(line[i-1]))
3052 ){
3053 if( i>iStart ) fprintf(out,"%.*s",i-iStart,&line[iStart]);
3054 fprintf(out,"%s",name);
3055 i += 4;
3056 iStart = i+1;
3057 }
3058 }
3059 }
3060 fprintf(out,"%s",&line[iStart]);
3061 }
3062}
3063
3064/* The next function finds the template file and opens it, returning
3065** a pointer to the opened file. */
3066PRIVATE FILE *tplt_open(lemp)
3067struct lemon *lemp;
3068{
3069 static char templatename[] = "lempar.c";
3070 char buf[1000];
3071 FILE *in;
3072 char *tpltname;
3073 char *cp;
3074
3075 cp = strrchr(lemp->filename,'.');
3076 if( cp ){
drh8b582012003-10-21 13:16:03 +00003077 sprintf(buf,"%.*s.lt",(int)(cp-lemp->filename),lemp->filename);
drh75897232000-05-29 14:26:00 +00003078 }else{
3079 sprintf(buf,"%s.lt",lemp->filename);
3080 }
3081 if( access(buf,004)==0 ){
3082 tpltname = buf;
drh960e8c62001-04-03 16:53:21 +00003083 }else if( access(templatename,004)==0 ){
3084 tpltname = templatename;
drh75897232000-05-29 14:26:00 +00003085 }else{
3086 tpltname = pathsearch(lemp->argv0,templatename,0);
3087 }
3088 if( tpltname==0 ){
3089 fprintf(stderr,"Can't find the parser driver template file \"%s\".\n",
3090 templatename);
3091 lemp->errorcnt++;
3092 return 0;
3093 }
drh2aa6ca42004-09-10 00:14:04 +00003094 in = fopen(tpltname,"rb");
drh75897232000-05-29 14:26:00 +00003095 if( in==0 ){
3096 fprintf(stderr,"Can't open the template file \"%s\".\n",templatename);
3097 lemp->errorcnt++;
3098 return 0;
3099 }
3100 return in;
3101}
3102
drhaf805ca2004-09-07 11:28:25 +00003103/* Print a #line directive line to the output file. */
3104PRIVATE void tplt_linedir(out,lineno,filename)
3105FILE *out;
3106int lineno;
3107char *filename;
3108{
3109 fprintf(out,"#line %d \"",lineno);
3110 while( *filename ){
3111 if( *filename == '\\' ) putc('\\',out);
3112 putc(*filename,out);
3113 filename++;
3114 }
3115 fprintf(out,"\"\n");
3116}
3117
drh75897232000-05-29 14:26:00 +00003118/* Print a string to the file and keep the linenumber up to date */
drha5808f32008-04-27 22:19:44 +00003119PRIVATE void tplt_print(out,lemp,str,lineno)
drh75897232000-05-29 14:26:00 +00003120FILE *out;
3121struct lemon *lemp;
3122char *str;
drh75897232000-05-29 14:26:00 +00003123int *lineno;
3124{
3125 if( str==0 ) return;
drh75897232000-05-29 14:26:00 +00003126 while( *str ){
drh75897232000-05-29 14:26:00 +00003127 putc(*str,out);
shane58543932008-12-10 20:10:04 +00003128 if( *str=='\n' ) (*lineno)++;
drh75897232000-05-29 14:26:00 +00003129 str++;
3130 }
drh9db55df2004-09-09 14:01:21 +00003131 if( str[-1]!='\n' ){
3132 putc('\n',out);
3133 (*lineno)++;
3134 }
shane58543932008-12-10 20:10:04 +00003135 if (!lemp->nolinenosflag) {
3136 (*lineno)++; tplt_linedir(out,*lineno,lemp->outname);
3137 }
drh75897232000-05-29 14:26:00 +00003138 return;
3139}
3140
3141/*
3142** The following routine emits code for the destructor for the
3143** symbol sp
3144*/
3145void emit_destructor_code(out,sp,lemp,lineno)
3146FILE *out;
3147struct symbol *sp;
3148struct lemon *lemp;
3149int *lineno;
3150{
drhcc83b6e2004-04-23 23:38:42 +00003151 char *cp = 0;
drh75897232000-05-29 14:26:00 +00003152
drh75897232000-05-29 14:26:00 +00003153 if( sp->type==TERMINAL ){
3154 cp = lemp->tokendest;
3155 if( cp==0 ) return;
drha5808f32008-04-27 22:19:44 +00003156 fprintf(out,"{\n"); (*lineno)++;
drh960e8c62001-04-03 16:53:21 +00003157 }else if( sp->destructor ){
drh75897232000-05-29 14:26:00 +00003158 cp = sp->destructor;
drha5808f32008-04-27 22:19:44 +00003159 fprintf(out,"{\n"); (*lineno)++;
shane58543932008-12-10 20:10:04 +00003160 if (!lemp->nolinenosflag) { (*lineno)++; tplt_linedir(out,sp->destLineno,lemp->filename); }
drh960e8c62001-04-03 16:53:21 +00003161 }else if( lemp->vardest ){
3162 cp = lemp->vardest;
3163 if( cp==0 ) return;
drha5808f32008-04-27 22:19:44 +00003164 fprintf(out,"{\n"); (*lineno)++;
drhcc83b6e2004-04-23 23:38:42 +00003165 }else{
3166 assert( 0 ); /* Cannot happen */
drh75897232000-05-29 14:26:00 +00003167 }
3168 for(; *cp; cp++){
3169 if( *cp=='$' && cp[1]=='$' ){
3170 fprintf(out,"(yypminor->yy%d)",sp->dtnum);
3171 cp++;
3172 continue;
3173 }
shane58543932008-12-10 20:10:04 +00003174 if( *cp=='\n' ) (*lineno)++;
drh75897232000-05-29 14:26:00 +00003175 fputc(*cp,out);
3176 }
shane58543932008-12-10 20:10:04 +00003177 fprintf(out,"\n"); (*lineno)++;
3178 if (!lemp->nolinenosflag) {
3179 (*lineno)++; tplt_linedir(out,*lineno,lemp->outname);
3180 }
3181 fprintf(out,"}\n"); (*lineno)++;
drh75897232000-05-29 14:26:00 +00003182 return;
3183}
3184
3185/*
drh960e8c62001-04-03 16:53:21 +00003186** Return TRUE (non-zero) if the given symbol has a destructor.
drh75897232000-05-29 14:26:00 +00003187*/
3188int has_destructor(sp, lemp)
3189struct symbol *sp;
3190struct lemon *lemp;
3191{
3192 int ret;
3193 if( sp->type==TERMINAL ){
3194 ret = lemp->tokendest!=0;
3195 }else{
drh960e8c62001-04-03 16:53:21 +00003196 ret = lemp->vardest!=0 || sp->destructor!=0;
drh75897232000-05-29 14:26:00 +00003197 }
3198 return ret;
3199}
3200
drh0bb132b2004-07-20 14:06:51 +00003201/*
3202** Append text to a dynamically allocated string. If zText is 0 then
3203** reset the string to be empty again. Always return the complete text
3204** of the string (which is overwritten with each call).
drh7ac25c72004-08-19 15:12:26 +00003205**
3206** n bytes of zText are stored. If n==0 then all of zText up to the first
3207** \000 terminator is stored. zText can contain up to two instances of
3208** %d. The values of p1 and p2 are written into the first and second
3209** %d.
3210**
3211** If n==-1, then the previous character is overwritten.
drh0bb132b2004-07-20 14:06:51 +00003212*/
3213PRIVATE char *append_str(char *zText, int n, int p1, int p2){
3214 static char *z = 0;
3215 static int alloced = 0;
3216 static int used = 0;
drhaf805ca2004-09-07 11:28:25 +00003217 int c;
drh0bb132b2004-07-20 14:06:51 +00003218 char zInt[40];
3219
3220 if( zText==0 ){
3221 used = 0;
3222 return z;
3223 }
drh7ac25c72004-08-19 15:12:26 +00003224 if( n<=0 ){
3225 if( n<0 ){
3226 used += n;
3227 assert( used>=0 );
3228 }
drh87cf1372008-08-13 20:09:06 +00003229 n = lemonStrlen(zText);
drh7ac25c72004-08-19 15:12:26 +00003230 }
drh0bb132b2004-07-20 14:06:51 +00003231 if( n+sizeof(zInt)*2+used >= alloced ){
3232 alloced = n + sizeof(zInt)*2 + used + 200;
3233 z = realloc(z, alloced);
3234 }
3235 if( z==0 ) return "";
3236 while( n-- > 0 ){
3237 c = *(zText++);
drh50489622006-10-13 12:25:29 +00003238 if( c=='%' && n>0 && zText[0]=='d' ){
drh0bb132b2004-07-20 14:06:51 +00003239 sprintf(zInt, "%d", p1);
3240 p1 = p2;
3241 strcpy(&z[used], zInt);
drh87cf1372008-08-13 20:09:06 +00003242 used += lemonStrlen(&z[used]);
drh0bb132b2004-07-20 14:06:51 +00003243 zText++;
3244 n--;
3245 }else{
3246 z[used++] = c;
3247 }
3248 }
3249 z[used] = 0;
3250 return z;
3251}
3252
3253/*
3254** zCode is a string that is the action associated with a rule. Expand
3255** the symbols in this string so that the refer to elements of the parser
drhaf805ca2004-09-07 11:28:25 +00003256** stack.
drh0bb132b2004-07-20 14:06:51 +00003257*/
drhaf805ca2004-09-07 11:28:25 +00003258PRIVATE void translate_code(struct lemon *lemp, struct rule *rp){
drh0bb132b2004-07-20 14:06:51 +00003259 char *cp, *xp;
3260 int i;
3261 char lhsused = 0; /* True if the LHS element has been used */
3262 char used[MAXRHS]; /* True for each RHS element which is used */
3263
3264 for(i=0; i<rp->nrhs; i++) used[i] = 0;
3265 lhsused = 0;
3266
drh19c9e562007-03-29 20:13:53 +00003267 if( rp->code==0 ){
3268 rp->code = "\n";
3269 rp->line = rp->ruleline;
3270 }
3271
drh0bb132b2004-07-20 14:06:51 +00003272 append_str(0,0,0,0);
drh19c9e562007-03-29 20:13:53 +00003273 for(cp=rp->code; *cp; cp++){
drh0bb132b2004-07-20 14:06:51 +00003274 if( isalpha(*cp) && (cp==rp->code || (!isalnum(cp[-1]) && cp[-1]!='_')) ){
3275 char saved;
3276 for(xp= &cp[1]; isalnum(*xp) || *xp=='_'; xp++);
3277 saved = *xp;
3278 *xp = 0;
3279 if( rp->lhsalias && strcmp(cp,rp->lhsalias)==0 ){
drh7ac25c72004-08-19 15:12:26 +00003280 append_str("yygotominor.yy%d",0,rp->lhs->dtnum,0);
drh0bb132b2004-07-20 14:06:51 +00003281 cp = xp;
3282 lhsused = 1;
3283 }else{
3284 for(i=0; i<rp->nrhs; i++){
3285 if( rp->rhsalias[i] && strcmp(cp,rp->rhsalias[i])==0 ){
drh7ac25c72004-08-19 15:12:26 +00003286 if( cp!=rp->code && cp[-1]=='@' ){
3287 /* If the argument is of the form @X then substituted
3288 ** the token number of X, not the value of X */
3289 append_str("yymsp[%d].major",-1,i-rp->nrhs+1,0);
3290 }else{
drhfd405312005-11-06 04:06:59 +00003291 struct symbol *sp = rp->rhs[i];
3292 int dtnum;
3293 if( sp->type==MULTITERMINAL ){
3294 dtnum = sp->subsym[0]->dtnum;
3295 }else{
3296 dtnum = sp->dtnum;
3297 }
3298 append_str("yymsp[%d].minor.yy%d",0,i-rp->nrhs+1, dtnum);
drh7ac25c72004-08-19 15:12:26 +00003299 }
drh0bb132b2004-07-20 14:06:51 +00003300 cp = xp;
3301 used[i] = 1;
3302 break;
3303 }
3304 }
3305 }
3306 *xp = saved;
3307 }
3308 append_str(cp, 1, 0, 0);
3309 } /* End loop */
3310
3311 /* Check to make sure the LHS has been used */
3312 if( rp->lhsalias && !lhsused ){
3313 ErrorMsg(lemp->filename,rp->ruleline,
3314 "Label \"%s\" for \"%s(%s)\" is never used.",
3315 rp->lhsalias,rp->lhs->name,rp->lhsalias);
3316 lemp->errorcnt++;
3317 }
3318
3319 /* Generate destructor code for RHS symbols which are not used in the
3320 ** reduce code */
3321 for(i=0; i<rp->nrhs; i++){
3322 if( rp->rhsalias[i] && !used[i] ){
3323 ErrorMsg(lemp->filename,rp->ruleline,
3324 "Label %s for \"%s(%s)\" is never used.",
3325 rp->rhsalias[i],rp->rhs[i]->name,rp->rhsalias[i]);
3326 lemp->errorcnt++;
3327 }else if( rp->rhsalias[i]==0 ){
3328 if( has_destructor(rp->rhs[i],lemp) ){
drh639efd02008-08-13 20:04:29 +00003329 append_str(" yy_destructor(yypParser,%d,&yymsp[%d].minor);\n", 0,
drh0bb132b2004-07-20 14:06:51 +00003330 rp->rhs[i]->index,i-rp->nrhs+1);
3331 }else{
3332 /* No destructor defined for this term */
3333 }
3334 }
3335 }
drh61e339a2007-01-16 03:09:02 +00003336 if( rp->code ){
3337 cp = append_str(0,0,0,0);
3338 rp->code = Strsafe(cp?cp:"");
3339 }
drh0bb132b2004-07-20 14:06:51 +00003340}
3341
drh75897232000-05-29 14:26:00 +00003342/*
3343** Generate code which executes when the rule "rp" is reduced. Write
3344** the code to "out". Make sure lineno stays up-to-date.
3345*/
3346PRIVATE void emit_code(out,rp,lemp,lineno)
3347FILE *out;
3348struct rule *rp;
3349struct lemon *lemp;
3350int *lineno;
3351{
drh0bb132b2004-07-20 14:06:51 +00003352 char *cp;
drh75897232000-05-29 14:26:00 +00003353
3354 /* Generate code to do the reduce action */
3355 if( rp->code ){
shane58543932008-12-10 20:10:04 +00003356 if (!lemp->nolinenosflag) { (*lineno)++; tplt_linedir(out,rp->line,lemp->filename); }
drhaf805ca2004-09-07 11:28:25 +00003357 fprintf(out,"{%s",rp->code);
drh75897232000-05-29 14:26:00 +00003358 for(cp=rp->code; *cp; cp++){
shane58543932008-12-10 20:10:04 +00003359 if( *cp=='\n' ) (*lineno)++;
drh75897232000-05-29 14:26:00 +00003360 } /* End loop */
shane58543932008-12-10 20:10:04 +00003361 fprintf(out,"}\n"); (*lineno)++;
3362 if (!lemp->nolinenosflag) { (*lineno)++; tplt_linedir(out,*lineno,lemp->outname); }
drh75897232000-05-29 14:26:00 +00003363 } /* End if( rp->code ) */
3364
drh75897232000-05-29 14:26:00 +00003365 return;
3366}
3367
3368/*
3369** Print the definition of the union used for the parser's data stack.
3370** This union contains fields for every possible data type for tokens
3371** and nonterminals. In the process of computing and printing this
3372** union, also set the ".dtnum" field of every terminal and nonterminal
3373** symbol.
3374*/
3375void print_stack_union(out,lemp,plineno,mhflag)
3376FILE *out; /* The output stream */
3377struct lemon *lemp; /* The main info structure for this parser */
3378int *plineno; /* Pointer to the line number */
3379int mhflag; /* True if generating makeheaders output */
3380{
3381 int lineno = *plineno; /* The line number of the output */
3382 char **types; /* A hash table of datatypes */
3383 int arraysize; /* Size of the "types" array */
3384 int maxdtlength; /* Maximum length of any ".datatype" field. */
3385 char *stddt; /* Standardized name for a datatype */
3386 int i,j; /* Loop counters */
3387 int hash; /* For hashing the name of a type */
3388 char *name; /* Name of the parser */
3389
3390 /* Allocate and initialize types[] and allocate stddt[] */
3391 arraysize = lemp->nsymbol * 2;
drh9892c5d2007-12-21 00:02:11 +00003392 types = (char**)calloc( arraysize, sizeof(char*) );
drh75897232000-05-29 14:26:00 +00003393 for(i=0; i<arraysize; i++) types[i] = 0;
3394 maxdtlength = 0;
drh960e8c62001-04-03 16:53:21 +00003395 if( lemp->vartype ){
drh87cf1372008-08-13 20:09:06 +00003396 maxdtlength = lemonStrlen(lemp->vartype);
drh960e8c62001-04-03 16:53:21 +00003397 }
drh75897232000-05-29 14:26:00 +00003398 for(i=0; i<lemp->nsymbol; i++){
3399 int len;
3400 struct symbol *sp = lemp->symbols[i];
3401 if( sp->datatype==0 ) continue;
drh87cf1372008-08-13 20:09:06 +00003402 len = lemonStrlen(sp->datatype);
drh75897232000-05-29 14:26:00 +00003403 if( len>maxdtlength ) maxdtlength = len;
3404 }
3405 stddt = (char*)malloc( maxdtlength*2 + 1 );
3406 if( types==0 || stddt==0 ){
3407 fprintf(stderr,"Out of memory.\n");
3408 exit(1);
3409 }
3410
3411 /* Build a hash table of datatypes. The ".dtnum" field of each symbol
3412 ** is filled in with the hash index plus 1. A ".dtnum" value of 0 is
drh960e8c62001-04-03 16:53:21 +00003413 ** used for terminal symbols. If there is no %default_type defined then
3414 ** 0 is also used as the .dtnum value for nonterminals which do not specify
3415 ** a datatype using the %type directive.
3416 */
drh75897232000-05-29 14:26:00 +00003417 for(i=0; i<lemp->nsymbol; i++){
3418 struct symbol *sp = lemp->symbols[i];
3419 char *cp;
3420 if( sp==lemp->errsym ){
3421 sp->dtnum = arraysize+1;
3422 continue;
3423 }
drh960e8c62001-04-03 16:53:21 +00003424 if( sp->type!=NONTERMINAL || (sp->datatype==0 && lemp->vartype==0) ){
drh75897232000-05-29 14:26:00 +00003425 sp->dtnum = 0;
3426 continue;
3427 }
3428 cp = sp->datatype;
drh960e8c62001-04-03 16:53:21 +00003429 if( cp==0 ) cp = lemp->vartype;
drh75897232000-05-29 14:26:00 +00003430 j = 0;
3431 while( isspace(*cp) ) cp++;
3432 while( *cp ) stddt[j++] = *cp++;
3433 while( j>0 && isspace(stddt[j-1]) ) j--;
3434 stddt[j] = 0;
drh32c4d742008-07-01 16:34:49 +00003435 if( strcmp(stddt, lemp->tokentype)==0 ){
3436 sp->dtnum = 0;
3437 continue;
3438 }
drh75897232000-05-29 14:26:00 +00003439 hash = 0;
3440 for(j=0; stddt[j]; j++){
3441 hash = hash*53 + stddt[j];
3442 }
drh3b2129c2003-05-13 00:34:21 +00003443 hash = (hash & 0x7fffffff)%arraysize;
drh75897232000-05-29 14:26:00 +00003444 while( types[hash] ){
3445 if( strcmp(types[hash],stddt)==0 ){
3446 sp->dtnum = hash + 1;
3447 break;
3448 }
3449 hash++;
3450 if( hash>=arraysize ) hash = 0;
3451 }
3452 if( types[hash]==0 ){
3453 sp->dtnum = hash + 1;
drh87cf1372008-08-13 20:09:06 +00003454 types[hash] = (char*)malloc( lemonStrlen(stddt)+1 );
drh75897232000-05-29 14:26:00 +00003455 if( types[hash]==0 ){
3456 fprintf(stderr,"Out of memory.\n");
3457 exit(1);
3458 }
3459 strcpy(types[hash],stddt);
3460 }
3461 }
3462
3463 /* Print out the definition of YYTOKENTYPE and YYMINORTYPE */
3464 name = lemp->name ? lemp->name : "Parse";
3465 lineno = *plineno;
3466 if( mhflag ){ fprintf(out,"#if INTERFACE\n"); lineno++; }
3467 fprintf(out,"#define %sTOKENTYPE %s\n",name,
3468 lemp->tokentype?lemp->tokentype:"void*"); lineno++;
3469 if( mhflag ){ fprintf(out,"#endif\n"); lineno++; }
3470 fprintf(out,"typedef union {\n"); lineno++;
3471 fprintf(out," %sTOKENTYPE yy0;\n",name); lineno++;
3472 for(i=0; i<arraysize; i++){
3473 if( types[i]==0 ) continue;
3474 fprintf(out," %s yy%d;\n",types[i],i+1); lineno++;
3475 free(types[i]);
3476 }
drhc4dd3fd2008-01-22 01:48:05 +00003477 if( lemp->errsym->useCnt ){
3478 fprintf(out," int yy%d;\n",lemp->errsym->dtnum); lineno++;
3479 }
drh75897232000-05-29 14:26:00 +00003480 free(stddt);
3481 free(types);
3482 fprintf(out,"} YYMINORTYPE;\n"); lineno++;
3483 *plineno = lineno;
3484}
3485
drhb29b0a52002-02-23 19:39:46 +00003486/*
3487** Return the name of a C datatype able to represent values between
drh8b582012003-10-21 13:16:03 +00003488** lwr and upr, inclusive.
drhb29b0a52002-02-23 19:39:46 +00003489*/
drh8b582012003-10-21 13:16:03 +00003490static const char *minimum_size_type(int lwr, int upr){
3491 if( lwr>=0 ){
3492 if( upr<=255 ){
3493 return "unsigned char";
3494 }else if( upr<65535 ){
3495 return "unsigned short int";
3496 }else{
3497 return "unsigned int";
3498 }
3499 }else if( lwr>=-127 && upr<=127 ){
3500 return "signed char";
3501 }else if( lwr>=-32767 && upr<32767 ){
3502 return "short";
drhb29b0a52002-02-23 19:39:46 +00003503 }else{
drh8b582012003-10-21 13:16:03 +00003504 return "int";
drhb29b0a52002-02-23 19:39:46 +00003505 }
3506}
3507
drhfdbf9282003-10-21 16:34:41 +00003508/*
3509** Each state contains a set of token transaction and a set of
3510** nonterminal transactions. Each of these sets makes an instance
3511** of the following structure. An array of these structures is used
3512** to order the creation of entries in the yy_action[] table.
3513*/
3514struct axset {
3515 struct state *stp; /* A pointer to a state */
3516 int isTkn; /* True to use tokens. False for non-terminals */
3517 int nAction; /* Number of actions */
3518};
3519
3520/*
3521** Compare to axset structures for sorting purposes
3522*/
3523static int axset_compare(const void *a, const void *b){
3524 struct axset *p1 = (struct axset*)a;
3525 struct axset *p2 = (struct axset*)b;
3526 return p2->nAction - p1->nAction;
3527}
3528
drhc4dd3fd2008-01-22 01:48:05 +00003529/*
3530** Write text on "out" that describes the rule "rp".
3531*/
3532static void writeRuleText(FILE *out, struct rule *rp){
3533 int j;
3534 fprintf(out,"%s ::=", rp->lhs->name);
3535 for(j=0; j<rp->nrhs; j++){
3536 struct symbol *sp = rp->rhs[j];
3537 fprintf(out," %s", sp->name);
3538 if( sp->type==MULTITERMINAL ){
3539 int k;
3540 for(k=1; k<sp->nsubsym; k++){
3541 fprintf(out,"|%s",sp->subsym[k]->name);
3542 }
3543 }
3544 }
3545}
3546
3547
drh75897232000-05-29 14:26:00 +00003548/* Generate C source code for the parser */
3549void ReportTable(lemp, mhflag)
3550struct lemon *lemp;
3551int mhflag; /* Output in makeheaders format if true */
3552{
3553 FILE *out, *in;
3554 char line[LINESIZE];
3555 int lineno;
3556 struct state *stp;
3557 struct action *ap;
3558 struct rule *rp;
drh8b582012003-10-21 13:16:03 +00003559 struct acttab *pActtab;
3560 int i, j, n;
drh75897232000-05-29 14:26:00 +00003561 char *name;
drh8b582012003-10-21 13:16:03 +00003562 int mnTknOfst, mxTknOfst;
3563 int mnNtOfst, mxNtOfst;
drhfdbf9282003-10-21 16:34:41 +00003564 struct axset *ax;
drh75897232000-05-29 14:26:00 +00003565
3566 in = tplt_open(lemp);
3567 if( in==0 ) return;
drh2aa6ca42004-09-10 00:14:04 +00003568 out = file_open(lemp,".c","wb");
drh75897232000-05-29 14:26:00 +00003569 if( out==0 ){
3570 fclose(in);
3571 return;
3572 }
3573 lineno = 1;
3574 tplt_xfer(lemp->name,in,out,&lineno);
3575
3576 /* Generate the include code, if any */
drha5808f32008-04-27 22:19:44 +00003577 tplt_print(out,lemp,lemp->include,&lineno);
drh75897232000-05-29 14:26:00 +00003578 if( mhflag ){
3579 char *name = file_makename(lemp, ".h");
3580 fprintf(out,"#include \"%s\"\n", name); lineno++;
3581 free(name);
3582 }
3583 tplt_xfer(lemp->name,in,out,&lineno);
3584
3585 /* Generate #defines for all tokens */
3586 if( mhflag ){
3587 char *prefix;
3588 fprintf(out,"#if INTERFACE\n"); lineno++;
3589 if( lemp->tokenprefix ) prefix = lemp->tokenprefix;
3590 else prefix = "";
3591 for(i=1; i<lemp->nterminal; i++){
3592 fprintf(out,"#define %s%-30s %2d\n",prefix,lemp->symbols[i]->name,i);
3593 lineno++;
3594 }
3595 fprintf(out,"#endif\n"); lineno++;
3596 }
3597 tplt_xfer(lemp->name,in,out,&lineno);
3598
3599 /* Generate the defines */
drh75897232000-05-29 14:26:00 +00003600 fprintf(out,"#define YYCODETYPE %s\n",
drh8b582012003-10-21 13:16:03 +00003601 minimum_size_type(0, lemp->nsymbol+5)); lineno++;
drh75897232000-05-29 14:26:00 +00003602 fprintf(out,"#define YYNOCODE %d\n",lemp->nsymbol+1); lineno++;
3603 fprintf(out,"#define YYACTIONTYPE %s\n",
drh8b582012003-10-21 13:16:03 +00003604 minimum_size_type(0, lemp->nstate+lemp->nrule+5)); lineno++;
drhe09daa92006-06-10 13:29:31 +00003605 if( lemp->wildcard ){
3606 fprintf(out,"#define YYWILDCARD %d\n",
3607 lemp->wildcard->index); lineno++;
3608 }
drh75897232000-05-29 14:26:00 +00003609 print_stack_union(out,lemp,&lineno,mhflag);
drhca44b5a2007-02-22 23:06:58 +00003610 fprintf(out, "#ifndef YYSTACKDEPTH\n"); lineno++;
drh75897232000-05-29 14:26:00 +00003611 if( lemp->stacksize ){
drh75897232000-05-29 14:26:00 +00003612 fprintf(out,"#define YYSTACKDEPTH %s\n",lemp->stacksize); lineno++;
3613 }else{
3614 fprintf(out,"#define YYSTACKDEPTH 100\n"); lineno++;
3615 }
drhca44b5a2007-02-22 23:06:58 +00003616 fprintf(out, "#endif\n"); lineno++;
drh75897232000-05-29 14:26:00 +00003617 if( mhflag ){
3618 fprintf(out,"#if INTERFACE\n"); lineno++;
3619 }
3620 name = lemp->name ? lemp->name : "Parse";
3621 if( lemp->arg && lemp->arg[0] ){
3622 int i;
drh87cf1372008-08-13 20:09:06 +00003623 i = lemonStrlen(lemp->arg);
drhb1edd012000-06-02 18:52:12 +00003624 while( i>=1 && isspace(lemp->arg[i-1]) ) i--;
3625 while( i>=1 && (isalnum(lemp->arg[i-1]) || lemp->arg[i-1]=='_') ) i--;
drh1f245e42002-03-11 13:55:50 +00003626 fprintf(out,"#define %sARG_SDECL %s;\n",name,lemp->arg); lineno++;
3627 fprintf(out,"#define %sARG_PDECL ,%s\n",name,lemp->arg); lineno++;
3628 fprintf(out,"#define %sARG_FETCH %s = yypParser->%s\n",
3629 name,lemp->arg,&lemp->arg[i]); lineno++;
3630 fprintf(out,"#define %sARG_STORE yypParser->%s = %s\n",
3631 name,&lemp->arg[i],&lemp->arg[i]); lineno++;
drh75897232000-05-29 14:26:00 +00003632 }else{
drh1f245e42002-03-11 13:55:50 +00003633 fprintf(out,"#define %sARG_SDECL\n",name); lineno++;
3634 fprintf(out,"#define %sARG_PDECL\n",name); lineno++;
3635 fprintf(out,"#define %sARG_FETCH\n",name); lineno++;
3636 fprintf(out,"#define %sARG_STORE\n",name); lineno++;
drh75897232000-05-29 14:26:00 +00003637 }
3638 if( mhflag ){
3639 fprintf(out,"#endif\n"); lineno++;
3640 }
3641 fprintf(out,"#define YYNSTATE %d\n",lemp->nstate); lineno++;
3642 fprintf(out,"#define YYNRULE %d\n",lemp->nrule); lineno++;
drhc4dd3fd2008-01-22 01:48:05 +00003643 if( lemp->errsym->useCnt ){
3644 fprintf(out,"#define YYERRORSYMBOL %d\n",lemp->errsym->index); lineno++;
3645 fprintf(out,"#define YYERRSYMDT yy%d\n",lemp->errsym->dtnum); lineno++;
3646 }
drh0bd1f4e2002-06-06 18:54:39 +00003647 if( lemp->has_fallback ){
3648 fprintf(out,"#define YYFALLBACK 1\n"); lineno++;
3649 }
drh75897232000-05-29 14:26:00 +00003650 tplt_xfer(lemp->name,in,out,&lineno);
3651
drh8b582012003-10-21 13:16:03 +00003652 /* Generate the action table and its associates:
drh75897232000-05-29 14:26:00 +00003653 **
drh8b582012003-10-21 13:16:03 +00003654 ** yy_action[] A single table containing all actions.
3655 ** yy_lookahead[] A table containing the lookahead for each entry in
3656 ** yy_action. Used to detect hash collisions.
3657 ** yy_shift_ofst[] For each state, the offset into yy_action for
3658 ** shifting terminals.
3659 ** yy_reduce_ofst[] For each state, the offset into yy_action for
3660 ** shifting non-terminals after a reduce.
3661 ** yy_default[] Default action for each state.
drh75897232000-05-29 14:26:00 +00003662 */
drh75897232000-05-29 14:26:00 +00003663
drh8b582012003-10-21 13:16:03 +00003664 /* Compute the actions on all states and count them up */
drh9892c5d2007-12-21 00:02:11 +00003665 ax = calloc(lemp->nstate*2, sizeof(ax[0]));
drhfdbf9282003-10-21 16:34:41 +00003666 if( ax==0 ){
3667 fprintf(stderr,"malloc failed\n");
3668 exit(1);
3669 }
drh75897232000-05-29 14:26:00 +00003670 for(i=0; i<lemp->nstate; i++){
drh75897232000-05-29 14:26:00 +00003671 stp = lemp->sorted[i];
drhfdbf9282003-10-21 16:34:41 +00003672 ax[i*2].stp = stp;
3673 ax[i*2].isTkn = 1;
3674 ax[i*2].nAction = stp->nTknAct;
3675 ax[i*2+1].stp = stp;
3676 ax[i*2+1].isTkn = 0;
3677 ax[i*2+1].nAction = stp->nNtAct;
drh75897232000-05-29 14:26:00 +00003678 }
drh8b582012003-10-21 13:16:03 +00003679 mxTknOfst = mnTknOfst = 0;
3680 mxNtOfst = mnNtOfst = 0;
3681
drhfdbf9282003-10-21 16:34:41 +00003682 /* Compute the action table. In order to try to keep the size of the
3683 ** action table to a minimum, the heuristic of placing the largest action
3684 ** sets first is used.
drh8b582012003-10-21 13:16:03 +00003685 */
drhfdbf9282003-10-21 16:34:41 +00003686 qsort(ax, lemp->nstate*2, sizeof(ax[0]), axset_compare);
drh8b582012003-10-21 13:16:03 +00003687 pActtab = acttab_alloc();
drhfdbf9282003-10-21 16:34:41 +00003688 for(i=0; i<lemp->nstate*2 && ax[i].nAction>0; i++){
3689 stp = ax[i].stp;
3690 if( ax[i].isTkn ){
3691 for(ap=stp->ap; ap; ap=ap->next){
3692 int action;
3693 if( ap->sp->index>=lemp->nterminal ) continue;
3694 action = compute_action(lemp, ap);
3695 if( action<0 ) continue;
3696 acttab_action(pActtab, ap->sp->index, action);
drh8b582012003-10-21 13:16:03 +00003697 }
drhfdbf9282003-10-21 16:34:41 +00003698 stp->iTknOfst = acttab_insert(pActtab);
3699 if( stp->iTknOfst<mnTknOfst ) mnTknOfst = stp->iTknOfst;
3700 if( stp->iTknOfst>mxTknOfst ) mxTknOfst = stp->iTknOfst;
3701 }else{
3702 for(ap=stp->ap; ap; ap=ap->next){
3703 int action;
3704 if( ap->sp->index<lemp->nterminal ) continue;
3705 if( ap->sp->index==lemp->nsymbol ) continue;
3706 action = compute_action(lemp, ap);
3707 if( action<0 ) continue;
3708 acttab_action(pActtab, ap->sp->index, action);
drh8b582012003-10-21 13:16:03 +00003709 }
drhfdbf9282003-10-21 16:34:41 +00003710 stp->iNtOfst = acttab_insert(pActtab);
3711 if( stp->iNtOfst<mnNtOfst ) mnNtOfst = stp->iNtOfst;
3712 if( stp->iNtOfst>mxNtOfst ) mxNtOfst = stp->iNtOfst;
drh8b582012003-10-21 13:16:03 +00003713 }
3714 }
drhfdbf9282003-10-21 16:34:41 +00003715 free(ax);
drh8b582012003-10-21 13:16:03 +00003716
3717 /* Output the yy_action table */
drh57196282004-10-06 15:41:16 +00003718 fprintf(out,"static const YYACTIONTYPE yy_action[] = {\n"); lineno++;
drh8b582012003-10-21 13:16:03 +00003719 n = acttab_size(pActtab);
3720 for(i=j=0; i<n; i++){
3721 int action = acttab_yyaction(pActtab, i);
drhe0479212007-01-12 23:09:23 +00003722 if( action<0 ) action = lemp->nstate + lemp->nrule + 2;
drhfdbf9282003-10-21 16:34:41 +00003723 if( j==0 ) fprintf(out," /* %5d */ ", i);
drh8b582012003-10-21 13:16:03 +00003724 fprintf(out, " %4d,", action);
3725 if( j==9 || i==n-1 ){
3726 fprintf(out, "\n"); lineno++;
3727 j = 0;
3728 }else{
3729 j++;
3730 }
3731 }
3732 fprintf(out, "};\n"); lineno++;
3733
3734 /* Output the yy_lookahead table */
drh57196282004-10-06 15:41:16 +00003735 fprintf(out,"static const YYCODETYPE yy_lookahead[] = {\n"); lineno++;
drh8b582012003-10-21 13:16:03 +00003736 for(i=j=0; i<n; i++){
3737 int la = acttab_yylookahead(pActtab, i);
3738 if( la<0 ) la = lemp->nsymbol;
drhfdbf9282003-10-21 16:34:41 +00003739 if( j==0 ) fprintf(out," /* %5d */ ", i);
drh8b582012003-10-21 13:16:03 +00003740 fprintf(out, " %4d,", la);
3741 if( j==9 || i==n-1 ){
3742 fprintf(out, "\n"); lineno++;
3743 j = 0;
3744 }else{
3745 j++;
3746 }
3747 }
3748 fprintf(out, "};\n"); lineno++;
3749
3750 /* Output the yy_shift_ofst[] table */
3751 fprintf(out, "#define YY_SHIFT_USE_DFLT (%d)\n", mnTknOfst-1); lineno++;
drhada354d2005-11-05 15:03:59 +00003752 n = lemp->nstate;
3753 while( n>0 && lemp->sorted[n-1]->iTknOfst==NO_OFFSET ) n--;
3754 fprintf(out, "#define YY_SHIFT_MAX %d\n", n-1); lineno++;
drh57196282004-10-06 15:41:16 +00003755 fprintf(out, "static const %s yy_shift_ofst[] = {\n",
drh8b582012003-10-21 13:16:03 +00003756 minimum_size_type(mnTknOfst-1, mxTknOfst)); lineno++;
drh8b582012003-10-21 13:16:03 +00003757 for(i=j=0; i<n; i++){
3758 int ofst;
3759 stp = lemp->sorted[i];
3760 ofst = stp->iTknOfst;
3761 if( ofst==NO_OFFSET ) ofst = mnTknOfst - 1;
drhfdbf9282003-10-21 16:34:41 +00003762 if( j==0 ) fprintf(out," /* %5d */ ", i);
drh8b582012003-10-21 13:16:03 +00003763 fprintf(out, " %4d,", ofst);
3764 if( j==9 || i==n-1 ){
3765 fprintf(out, "\n"); lineno++;
3766 j = 0;
3767 }else{
3768 j++;
3769 }
3770 }
3771 fprintf(out, "};\n"); lineno++;
3772
3773 /* Output the yy_reduce_ofst[] table */
3774 fprintf(out, "#define YY_REDUCE_USE_DFLT (%d)\n", mnNtOfst-1); lineno++;
drhada354d2005-11-05 15:03:59 +00003775 n = lemp->nstate;
3776 while( n>0 && lemp->sorted[n-1]->iNtOfst==NO_OFFSET ) n--;
3777 fprintf(out, "#define YY_REDUCE_MAX %d\n", n-1); lineno++;
drh57196282004-10-06 15:41:16 +00003778 fprintf(out, "static const %s yy_reduce_ofst[] = {\n",
drh8b582012003-10-21 13:16:03 +00003779 minimum_size_type(mnNtOfst-1, mxNtOfst)); lineno++;
drh8b582012003-10-21 13:16:03 +00003780 for(i=j=0; i<n; i++){
3781 int ofst;
3782 stp = lemp->sorted[i];
3783 ofst = stp->iNtOfst;
3784 if( ofst==NO_OFFSET ) ofst = mnNtOfst - 1;
drhfdbf9282003-10-21 16:34:41 +00003785 if( j==0 ) fprintf(out," /* %5d */ ", i);
drh8b582012003-10-21 13:16:03 +00003786 fprintf(out, " %4d,", ofst);
3787 if( j==9 || i==n-1 ){
3788 fprintf(out, "\n"); lineno++;
3789 j = 0;
3790 }else{
3791 j++;
3792 }
3793 }
3794 fprintf(out, "};\n"); lineno++;
3795
3796 /* Output the default action table */
drh57196282004-10-06 15:41:16 +00003797 fprintf(out, "static const YYACTIONTYPE yy_default[] = {\n"); lineno++;
drh8b582012003-10-21 13:16:03 +00003798 n = lemp->nstate;
3799 for(i=j=0; i<n; i++){
3800 stp = lemp->sorted[i];
drhfdbf9282003-10-21 16:34:41 +00003801 if( j==0 ) fprintf(out," /* %5d */ ", i);
drh8b582012003-10-21 13:16:03 +00003802 fprintf(out, " %4d,", stp->iDflt);
3803 if( j==9 || i==n-1 ){
3804 fprintf(out, "\n"); lineno++;
3805 j = 0;
3806 }else{
3807 j++;
3808 }
3809 }
3810 fprintf(out, "};\n"); lineno++;
drh75897232000-05-29 14:26:00 +00003811 tplt_xfer(lemp->name,in,out,&lineno);
3812
drh0bd1f4e2002-06-06 18:54:39 +00003813 /* Generate the table of fallback tokens.
3814 */
3815 if( lemp->has_fallback ){
3816 for(i=0; i<lemp->nterminal; i++){
3817 struct symbol *p = lemp->symbols[i];
3818 if( p->fallback==0 ){
3819 fprintf(out, " 0, /* %10s => nothing */\n", p->name);
3820 }else{
3821 fprintf(out, " %3d, /* %10s => %s */\n", p->fallback->index,
3822 p->name, p->fallback->name);
3823 }
3824 lineno++;
3825 }
3826 }
3827 tplt_xfer(lemp->name, in, out, &lineno);
3828
3829 /* Generate a table containing the symbolic name of every symbol
3830 */
drh75897232000-05-29 14:26:00 +00003831 for(i=0; i<lemp->nsymbol; i++){
3832 sprintf(line,"\"%s\",",lemp->symbols[i]->name);
3833 fprintf(out," %-15s",line);
3834 if( (i&3)==3 ){ fprintf(out,"\n"); lineno++; }
3835 }
3836 if( (i&3)!=0 ){ fprintf(out,"\n"); lineno++; }
3837 tplt_xfer(lemp->name,in,out,&lineno);
3838
drh0bd1f4e2002-06-06 18:54:39 +00003839 /* Generate a table containing a text string that describes every
drh34ff57b2008-07-14 12:27:51 +00003840 ** rule in the rule set of the grammar. This information is used
drh0bd1f4e2002-06-06 18:54:39 +00003841 ** when tracing REDUCE actions.
3842 */
3843 for(i=0, rp=lemp->rule; rp; rp=rp->next, i++){
3844 assert( rp->index==i );
drhc4dd3fd2008-01-22 01:48:05 +00003845 fprintf(out," /* %3d */ \"", i);
3846 writeRuleText(out, rp);
drh0bd1f4e2002-06-06 18:54:39 +00003847 fprintf(out,"\",\n"); lineno++;
3848 }
3849 tplt_xfer(lemp->name,in,out,&lineno);
3850
drh75897232000-05-29 14:26:00 +00003851 /* Generate code which executes every time a symbol is popped from
3852 ** the stack while processing errors or while destroying the parser.
drh0bd1f4e2002-06-06 18:54:39 +00003853 ** (In other words, generate the %destructor actions)
3854 */
drh75897232000-05-29 14:26:00 +00003855 if( lemp->tokendest ){
drh4dc8ef52008-07-01 17:13:57 +00003856 int once = 1;
drh75897232000-05-29 14:26:00 +00003857 for(i=0; i<lemp->nsymbol; i++){
3858 struct symbol *sp = lemp->symbols[i];
3859 if( sp==0 || sp->type!=TERMINAL ) continue;
drh4dc8ef52008-07-01 17:13:57 +00003860 if( once ){
3861 fprintf(out, " /* TERMINAL Destructor */\n"); lineno++;
3862 once = 0;
3863 }
drhc4dd3fd2008-01-22 01:48:05 +00003864 fprintf(out," case %d: /* %s */\n",
3865 sp->index, sp->name); lineno++;
drh75897232000-05-29 14:26:00 +00003866 }
3867 for(i=0; i<lemp->nsymbol && lemp->symbols[i]->type!=TERMINAL; i++);
3868 if( i<lemp->nsymbol ){
3869 emit_destructor_code(out,lemp->symbols[i],lemp,&lineno);
3870 fprintf(out," break;\n"); lineno++;
3871 }
3872 }
drh8d659732005-01-13 23:54:06 +00003873 if( lemp->vardest ){
3874 struct symbol *dflt_sp = 0;
drh4dc8ef52008-07-01 17:13:57 +00003875 int once = 1;
drh8d659732005-01-13 23:54:06 +00003876 for(i=0; i<lemp->nsymbol; i++){
3877 struct symbol *sp = lemp->symbols[i];
3878 if( sp==0 || sp->type==TERMINAL ||
3879 sp->index<=0 || sp->destructor!=0 ) continue;
drh4dc8ef52008-07-01 17:13:57 +00003880 if( once ){
3881 fprintf(out, " /* Default NON-TERMINAL Destructor */\n"); lineno++;
3882 once = 0;
3883 }
drhc4dd3fd2008-01-22 01:48:05 +00003884 fprintf(out," case %d: /* %s */\n",
3885 sp->index, sp->name); lineno++;
drh8d659732005-01-13 23:54:06 +00003886 dflt_sp = sp;
3887 }
3888 if( dflt_sp!=0 ){
3889 emit_destructor_code(out,dflt_sp,lemp,&lineno);
drh8d659732005-01-13 23:54:06 +00003890 }
drh4dc8ef52008-07-01 17:13:57 +00003891 fprintf(out," break;\n"); lineno++;
drh8d659732005-01-13 23:54:06 +00003892 }
drh75897232000-05-29 14:26:00 +00003893 for(i=0; i<lemp->nsymbol; i++){
3894 struct symbol *sp = lemp->symbols[i];
3895 if( sp==0 || sp->type==TERMINAL || sp->destructor==0 ) continue;
drhc4dd3fd2008-01-22 01:48:05 +00003896 fprintf(out," case %d: /* %s */\n",
3897 sp->index, sp->name); lineno++;
drh0bb132b2004-07-20 14:06:51 +00003898
3899 /* Combine duplicate destructors into a single case */
3900 for(j=i+1; j<lemp->nsymbol; j++){
3901 struct symbol *sp2 = lemp->symbols[j];
3902 if( sp2 && sp2->type!=TERMINAL && sp2->destructor
3903 && sp2->dtnum==sp->dtnum
3904 && strcmp(sp->destructor,sp2->destructor)==0 ){
drhc4dd3fd2008-01-22 01:48:05 +00003905 fprintf(out," case %d: /* %s */\n",
3906 sp2->index, sp2->name); lineno++;
drh0bb132b2004-07-20 14:06:51 +00003907 sp2->destructor = 0;
3908 }
3909 }
3910
drh75897232000-05-29 14:26:00 +00003911 emit_destructor_code(out,lemp->symbols[i],lemp,&lineno);
3912 fprintf(out," break;\n"); lineno++;
3913 }
drh75897232000-05-29 14:26:00 +00003914 tplt_xfer(lemp->name,in,out,&lineno);
3915
3916 /* Generate code which executes whenever the parser stack overflows */
drha5808f32008-04-27 22:19:44 +00003917 tplt_print(out,lemp,lemp->overflow,&lineno);
drh75897232000-05-29 14:26:00 +00003918 tplt_xfer(lemp->name,in,out,&lineno);
3919
3920 /* Generate the table of rule information
3921 **
3922 ** Note: This code depends on the fact that rules are number
3923 ** sequentually beginning with 0.
3924 */
3925 for(rp=lemp->rule; rp; rp=rp->next){
3926 fprintf(out," { %d, %d },\n",rp->lhs->index,rp->nrhs); lineno++;
3927 }
3928 tplt_xfer(lemp->name,in,out,&lineno);
3929
3930 /* Generate code which execution during each REDUCE action */
3931 for(rp=lemp->rule; rp; rp=rp->next){
drh61e339a2007-01-16 03:09:02 +00003932 translate_code(lemp, rp);
drh0bb132b2004-07-20 14:06:51 +00003933 }
3934 for(rp=lemp->rule; rp; rp=rp->next){
3935 struct rule *rp2;
3936 if( rp->code==0 ) continue;
drhc4dd3fd2008-01-22 01:48:05 +00003937 fprintf(out," case %d: /* ", rp->index);
3938 writeRuleText(out, rp);
3939 fprintf(out, " */\n"); lineno++;
drh0bb132b2004-07-20 14:06:51 +00003940 for(rp2=rp->next; rp2; rp2=rp2->next){
3941 if( rp2->code==rp->code ){
drhc4dd3fd2008-01-22 01:48:05 +00003942 fprintf(out," case %d: /* ", rp2->index);
3943 writeRuleText(out, rp2);
3944 fprintf(out," */\n"); lineno++;
drh0bb132b2004-07-20 14:06:51 +00003945 rp2->code = 0;
3946 }
3947 }
drh75897232000-05-29 14:26:00 +00003948 emit_code(out,rp,lemp,&lineno);
3949 fprintf(out," break;\n"); lineno++;
3950 }
3951 tplt_xfer(lemp->name,in,out,&lineno);
3952
3953 /* Generate code which executes if a parse fails */
drha5808f32008-04-27 22:19:44 +00003954 tplt_print(out,lemp,lemp->failure,&lineno);
drh75897232000-05-29 14:26:00 +00003955 tplt_xfer(lemp->name,in,out,&lineno);
3956
3957 /* Generate code which executes when a syntax error occurs */
drha5808f32008-04-27 22:19:44 +00003958 tplt_print(out,lemp,lemp->error,&lineno);
drh75897232000-05-29 14:26:00 +00003959 tplt_xfer(lemp->name,in,out,&lineno);
3960
3961 /* Generate code which executes when the parser accepts its input */
drha5808f32008-04-27 22:19:44 +00003962 tplt_print(out,lemp,lemp->accept,&lineno);
drh75897232000-05-29 14:26:00 +00003963 tplt_xfer(lemp->name,in,out,&lineno);
3964
3965 /* Append any addition code the user desires */
drha5808f32008-04-27 22:19:44 +00003966 tplt_print(out,lemp,lemp->extracode,&lineno);
drh75897232000-05-29 14:26:00 +00003967
3968 fclose(in);
3969 fclose(out);
3970 return;
3971}
3972
3973/* Generate a header file for the parser */
3974void ReportHeader(lemp)
3975struct lemon *lemp;
3976{
3977 FILE *out, *in;
3978 char *prefix;
3979 char line[LINESIZE];
3980 char pattern[LINESIZE];
3981 int i;
3982
3983 if( lemp->tokenprefix ) prefix = lemp->tokenprefix;
3984 else prefix = "";
drh2aa6ca42004-09-10 00:14:04 +00003985 in = file_open(lemp,".h","rb");
drh75897232000-05-29 14:26:00 +00003986 if( in ){
3987 for(i=1; i<lemp->nterminal && fgets(line,LINESIZE,in); i++){
3988 sprintf(pattern,"#define %s%-30s %2d\n",prefix,lemp->symbols[i]->name,i);
3989 if( strcmp(line,pattern) ) break;
3990 }
3991 fclose(in);
3992 if( i==lemp->nterminal ){
3993 /* No change in the file. Don't rewrite it. */
3994 return;
3995 }
3996 }
drh2aa6ca42004-09-10 00:14:04 +00003997 out = file_open(lemp,".h","wb");
drh75897232000-05-29 14:26:00 +00003998 if( out ){
3999 for(i=1; i<lemp->nterminal; i++){
4000 fprintf(out,"#define %s%-30s %2d\n",prefix,lemp->symbols[i]->name,i);
4001 }
4002 fclose(out);
4003 }
4004 return;
4005}
4006
4007/* Reduce the size of the action tables, if possible, by making use
4008** of defaults.
4009**
drhb59499c2002-02-23 18:45:13 +00004010** In this version, we take the most frequent REDUCE action and make
drhe09daa92006-06-10 13:29:31 +00004011** it the default. Except, there is no default if the wildcard token
4012** is a possible look-ahead.
drh75897232000-05-29 14:26:00 +00004013*/
4014void CompressTables(lemp)
4015struct lemon *lemp;
4016{
4017 struct state *stp;
drhb59499c2002-02-23 18:45:13 +00004018 struct action *ap, *ap2;
4019 struct rule *rp, *rp2, *rbest;
4020 int nbest, n;
drh75897232000-05-29 14:26:00 +00004021 int i;
drhe09daa92006-06-10 13:29:31 +00004022 int usesWildcard;
drh75897232000-05-29 14:26:00 +00004023
4024 for(i=0; i<lemp->nstate; i++){
4025 stp = lemp->sorted[i];
drhb59499c2002-02-23 18:45:13 +00004026 nbest = 0;
4027 rbest = 0;
drhe09daa92006-06-10 13:29:31 +00004028 usesWildcard = 0;
drh75897232000-05-29 14:26:00 +00004029
drhb59499c2002-02-23 18:45:13 +00004030 for(ap=stp->ap; ap; ap=ap->next){
drhe09daa92006-06-10 13:29:31 +00004031 if( ap->type==SHIFT && ap->sp==lemp->wildcard ){
4032 usesWildcard = 1;
4033 }
drhb59499c2002-02-23 18:45:13 +00004034 if( ap->type!=REDUCE ) continue;
4035 rp = ap->x.rp;
drhb4960992007-10-05 16:16:36 +00004036 if( rp->lhsStart ) continue;
drhb59499c2002-02-23 18:45:13 +00004037 if( rp==rbest ) continue;
4038 n = 1;
4039 for(ap2=ap->next; ap2; ap2=ap2->next){
4040 if( ap2->type!=REDUCE ) continue;
4041 rp2 = ap2->x.rp;
4042 if( rp2==rbest ) continue;
4043 if( rp2==rp ) n++;
4044 }
4045 if( n>nbest ){
4046 nbest = n;
4047 rbest = rp;
drh75897232000-05-29 14:26:00 +00004048 }
4049 }
drhb59499c2002-02-23 18:45:13 +00004050
4051 /* Do not make a default if the number of rules to default
drhe09daa92006-06-10 13:29:31 +00004052 ** is not at least 1 or if the wildcard token is a possible
4053 ** lookahead.
4054 */
4055 if( nbest<1 || usesWildcard ) continue;
drh75897232000-05-29 14:26:00 +00004056
drhb59499c2002-02-23 18:45:13 +00004057
4058 /* Combine matching REDUCE actions into a single default */
4059 for(ap=stp->ap; ap; ap=ap->next){
4060 if( ap->type==REDUCE && ap->x.rp==rbest ) break;
4061 }
drh75897232000-05-29 14:26:00 +00004062 assert( ap );
4063 ap->sp = Symbol_new("{default}");
4064 for(ap=ap->next; ap; ap=ap->next){
drhb59499c2002-02-23 18:45:13 +00004065 if( ap->type==REDUCE && ap->x.rp==rbest ) ap->type = NOT_USED;
drh75897232000-05-29 14:26:00 +00004066 }
4067 stp->ap = Action_sort(stp->ap);
4068 }
4069}
drhb59499c2002-02-23 18:45:13 +00004070
drhada354d2005-11-05 15:03:59 +00004071
4072/*
4073** Compare two states for sorting purposes. The smaller state is the
4074** one with the most non-terminal actions. If they have the same number
4075** of non-terminal actions, then the smaller is the one with the most
4076** token actions.
4077*/
4078static int stateResortCompare(const void *a, const void *b){
4079 const struct state *pA = *(const struct state**)a;
4080 const struct state *pB = *(const struct state**)b;
4081 int n;
4082
4083 n = pB->nNtAct - pA->nNtAct;
4084 if( n==0 ){
4085 n = pB->nTknAct - pA->nTknAct;
4086 }
4087 return n;
4088}
4089
4090
4091/*
4092** Renumber and resort states so that states with fewer choices
4093** occur at the end. Except, keep state 0 as the first state.
4094*/
4095void ResortStates(lemp)
4096struct lemon *lemp;
4097{
4098 int i;
4099 struct state *stp;
4100 struct action *ap;
4101
4102 for(i=0; i<lemp->nstate; i++){
4103 stp = lemp->sorted[i];
4104 stp->nTknAct = stp->nNtAct = 0;
4105 stp->iDflt = lemp->nstate + lemp->nrule;
4106 stp->iTknOfst = NO_OFFSET;
4107 stp->iNtOfst = NO_OFFSET;
4108 for(ap=stp->ap; ap; ap=ap->next){
4109 if( compute_action(lemp,ap)>=0 ){
4110 if( ap->sp->index<lemp->nterminal ){
4111 stp->nTknAct++;
4112 }else if( ap->sp->index<lemp->nsymbol ){
4113 stp->nNtAct++;
4114 }else{
4115 stp->iDflt = compute_action(lemp, ap);
4116 }
4117 }
4118 }
4119 }
4120 qsort(&lemp->sorted[1], lemp->nstate-1, sizeof(lemp->sorted[0]),
4121 stateResortCompare);
4122 for(i=0; i<lemp->nstate; i++){
4123 lemp->sorted[i]->statenum = i;
4124 }
4125}
4126
4127
drh75897232000-05-29 14:26:00 +00004128/***************** From the file "set.c" ************************************/
4129/*
4130** Set manipulation routines for the LEMON parser generator.
4131*/
4132
4133static int size = 0;
4134
4135/* Set the set size */
4136void SetSize(n)
4137int n;
4138{
4139 size = n+1;
4140}
4141
4142/* Allocate a new set */
4143char *SetNew(){
4144 char *s;
drh9892c5d2007-12-21 00:02:11 +00004145 s = (char*)calloc( size, 1);
drh75897232000-05-29 14:26:00 +00004146 if( s==0 ){
4147 extern void memory_error();
4148 memory_error();
4149 }
drh75897232000-05-29 14:26:00 +00004150 return s;
4151}
4152
4153/* Deallocate a set */
4154void SetFree(s)
4155char *s;
4156{
4157 free(s);
4158}
4159
4160/* Add a new element to the set. Return TRUE if the element was added
4161** and FALSE if it was already there. */
4162int SetAdd(s,e)
4163char *s;
4164int e;
4165{
4166 int rv;
drh9892c5d2007-12-21 00:02:11 +00004167 assert( e>=0 && e<size );
drh75897232000-05-29 14:26:00 +00004168 rv = s[e];
4169 s[e] = 1;
4170 return !rv;
4171}
4172
4173/* Add every element of s2 to s1. Return TRUE if s1 changes. */
4174int SetUnion(s1,s2)
4175char *s1;
4176char *s2;
4177{
4178 int i, progress;
4179 progress = 0;
4180 for(i=0; i<size; i++){
4181 if( s2[i]==0 ) continue;
4182 if( s1[i]==0 ){
4183 progress = 1;
4184 s1[i] = 1;
4185 }
4186 }
4187 return progress;
4188}
4189/********************** From the file "table.c" ****************************/
4190/*
4191** All code in this file has been automatically generated
4192** from a specification in the file
4193** "table.q"
4194** by the associative array code building program "aagen".
4195** Do not edit this file! Instead, edit the specification
4196** file, then rerun aagen.
4197*/
4198/*
4199** Code for processing tables in the LEMON parser generator.
4200*/
4201
4202PRIVATE int strhash(x)
4203char *x;
4204{
4205 int h = 0;
4206 while( *x) h = h*13 + *(x++);
4207 return h;
4208}
4209
4210/* Works like strdup, sort of. Save a string in malloced memory, but
4211** keep strings in a table so that the same string is not in more
4212** than one place.
4213*/
4214char *Strsafe(y)
4215char *y;
4216{
4217 char *z;
4218
drh916f75f2006-07-17 00:19:39 +00004219 if( y==0 ) return 0;
drh75897232000-05-29 14:26:00 +00004220 z = Strsafe_find(y);
drh87cf1372008-08-13 20:09:06 +00004221 if( z==0 && (z=malloc( lemonStrlen(y)+1 ))!=0 ){
drh75897232000-05-29 14:26:00 +00004222 strcpy(z,y);
4223 Strsafe_insert(z);
4224 }
4225 MemoryCheck(z);
4226 return z;
4227}
4228
4229/* There is one instance of the following structure for each
4230** associative array of type "x1".
4231*/
4232struct s_x1 {
4233 int size; /* The number of available slots. */
4234 /* Must be a power of 2 greater than or */
4235 /* equal to 1 */
4236 int count; /* Number of currently slots filled */
4237 struct s_x1node *tbl; /* The data stored here */
4238 struct s_x1node **ht; /* Hash table for lookups */
4239};
4240
4241/* There is one instance of this structure for every data element
4242** in an associative array of type "x1".
4243*/
4244typedef struct s_x1node {
4245 char *data; /* The data */
4246 struct s_x1node *next; /* Next entry with the same hash */
4247 struct s_x1node **from; /* Previous link */
4248} x1node;
4249
4250/* There is only one instance of the array, which is the following */
4251static struct s_x1 *x1a;
4252
4253/* Allocate a new associative array */
4254void Strsafe_init(){
4255 if( x1a ) return;
4256 x1a = (struct s_x1*)malloc( sizeof(struct s_x1) );
4257 if( x1a ){
4258 x1a->size = 1024;
4259 x1a->count = 0;
4260 x1a->tbl = (x1node*)malloc(
4261 (sizeof(x1node) + sizeof(x1node*))*1024 );
4262 if( x1a->tbl==0 ){
4263 free(x1a);
4264 x1a = 0;
4265 }else{
4266 int i;
4267 x1a->ht = (x1node**)&(x1a->tbl[1024]);
4268 for(i=0; i<1024; i++) x1a->ht[i] = 0;
4269 }
4270 }
4271}
4272/* Insert a new record into the array. Return TRUE if successful.
4273** Prior data with the same key is NOT overwritten */
4274int Strsafe_insert(data)
4275char *data;
4276{
4277 x1node *np;
4278 int h;
4279 int ph;
4280
4281 if( x1a==0 ) return 0;
4282 ph = strhash(data);
4283 h = ph & (x1a->size-1);
4284 np = x1a->ht[h];
4285 while( np ){
4286 if( strcmp(np->data,data)==0 ){
4287 /* An existing entry with the same key is found. */
4288 /* Fail because overwrite is not allows. */
4289 return 0;
4290 }
4291 np = np->next;
4292 }
4293 if( x1a->count>=x1a->size ){
4294 /* Need to make the hash table bigger */
4295 int i,size;
4296 struct s_x1 array;
4297 array.size = size = x1a->size*2;
4298 array.count = x1a->count;
4299 array.tbl = (x1node*)malloc(
4300 (sizeof(x1node) + sizeof(x1node*))*size );
4301 if( array.tbl==0 ) return 0; /* Fail due to malloc failure */
4302 array.ht = (x1node**)&(array.tbl[size]);
4303 for(i=0; i<size; i++) array.ht[i] = 0;
4304 for(i=0; i<x1a->count; i++){
4305 x1node *oldnp, *newnp;
4306 oldnp = &(x1a->tbl[i]);
4307 h = strhash(oldnp->data) & (size-1);
4308 newnp = &(array.tbl[i]);
4309 if( array.ht[h] ) array.ht[h]->from = &(newnp->next);
4310 newnp->next = array.ht[h];
4311 newnp->data = oldnp->data;
4312 newnp->from = &(array.ht[h]);
4313 array.ht[h] = newnp;
4314 }
4315 free(x1a->tbl);
4316 *x1a = array;
4317 }
4318 /* Insert the new data */
4319 h = ph & (x1a->size-1);
4320 np = &(x1a->tbl[x1a->count++]);
4321 np->data = data;
4322 if( x1a->ht[h] ) x1a->ht[h]->from = &(np->next);
4323 np->next = x1a->ht[h];
4324 x1a->ht[h] = np;
4325 np->from = &(x1a->ht[h]);
4326 return 1;
4327}
4328
4329/* Return a pointer to data assigned to the given key. Return NULL
4330** if no such key. */
4331char *Strsafe_find(key)
4332char *key;
4333{
4334 int h;
4335 x1node *np;
4336
4337 if( x1a==0 ) return 0;
4338 h = strhash(key) & (x1a->size-1);
4339 np = x1a->ht[h];
4340 while( np ){
4341 if( strcmp(np->data,key)==0 ) break;
4342 np = np->next;
4343 }
4344 return np ? np->data : 0;
4345}
4346
4347/* Return a pointer to the (terminal or nonterminal) symbol "x".
4348** Create a new symbol if this is the first time "x" has been seen.
4349*/
4350struct symbol *Symbol_new(x)
4351char *x;
4352{
4353 struct symbol *sp;
4354
4355 sp = Symbol_find(x);
4356 if( sp==0 ){
drh9892c5d2007-12-21 00:02:11 +00004357 sp = (struct symbol *)calloc(1, sizeof(struct symbol) );
drh75897232000-05-29 14:26:00 +00004358 MemoryCheck(sp);
4359 sp->name = Strsafe(x);
4360 sp->type = isupper(*x) ? TERMINAL : NONTERMINAL;
4361 sp->rule = 0;
drh0bd1f4e2002-06-06 18:54:39 +00004362 sp->fallback = 0;
drh75897232000-05-29 14:26:00 +00004363 sp->prec = -1;
4364 sp->assoc = UNK;
4365 sp->firstset = 0;
drhaa9f1122007-08-23 02:50:56 +00004366 sp->lambda = LEMON_FALSE;
drh75897232000-05-29 14:26:00 +00004367 sp->destructor = 0;
drh4dc8ef52008-07-01 17:13:57 +00004368 sp->destLineno = 0;
drh75897232000-05-29 14:26:00 +00004369 sp->datatype = 0;
drhc4dd3fd2008-01-22 01:48:05 +00004370 sp->useCnt = 0;
drh75897232000-05-29 14:26:00 +00004371 Symbol_insert(sp,sp->name);
4372 }
drhc4dd3fd2008-01-22 01:48:05 +00004373 sp->useCnt++;
drh75897232000-05-29 14:26:00 +00004374 return sp;
4375}
4376
drh60d31652004-02-22 00:08:04 +00004377/* Compare two symbols for working purposes
4378**
4379** Symbols that begin with upper case letters (terminals or tokens)
4380** must sort before symbols that begin with lower case letters
4381** (non-terminals). Other than that, the order does not matter.
4382**
4383** We find experimentally that leaving the symbols in their original
4384** order (the order they appeared in the grammar file) gives the
4385** smallest parser tables in SQLite.
4386*/
4387int Symbolcmpp(struct symbol **a, struct symbol **b){
4388 int i1 = (**a).index + 10000000*((**a).name[0]>'Z');
4389 int i2 = (**b).index + 10000000*((**b).name[0]>'Z');
4390 return i1-i2;
drh75897232000-05-29 14:26:00 +00004391}
4392
4393/* There is one instance of the following structure for each
4394** associative array of type "x2".
4395*/
4396struct s_x2 {
4397 int size; /* The number of available slots. */
4398 /* Must be a power of 2 greater than or */
4399 /* equal to 1 */
4400 int count; /* Number of currently slots filled */
4401 struct s_x2node *tbl; /* The data stored here */
4402 struct s_x2node **ht; /* Hash table for lookups */
4403};
4404
4405/* There is one instance of this structure for every data element
4406** in an associative array of type "x2".
4407*/
4408typedef struct s_x2node {
4409 struct symbol *data; /* The data */
4410 char *key; /* The key */
4411 struct s_x2node *next; /* Next entry with the same hash */
4412 struct s_x2node **from; /* Previous link */
4413} x2node;
4414
4415/* There is only one instance of the array, which is the following */
4416static struct s_x2 *x2a;
4417
4418/* Allocate a new associative array */
4419void Symbol_init(){
4420 if( x2a ) return;
4421 x2a = (struct s_x2*)malloc( sizeof(struct s_x2) );
4422 if( x2a ){
4423 x2a->size = 128;
4424 x2a->count = 0;
4425 x2a->tbl = (x2node*)malloc(
4426 (sizeof(x2node) + sizeof(x2node*))*128 );
4427 if( x2a->tbl==0 ){
4428 free(x2a);
4429 x2a = 0;
4430 }else{
4431 int i;
4432 x2a->ht = (x2node**)&(x2a->tbl[128]);
4433 for(i=0; i<128; i++) x2a->ht[i] = 0;
4434 }
4435 }
4436}
4437/* Insert a new record into the array. Return TRUE if successful.
4438** Prior data with the same key is NOT overwritten */
4439int Symbol_insert(data,key)
4440struct symbol *data;
4441char *key;
4442{
4443 x2node *np;
4444 int h;
4445 int ph;
4446
4447 if( x2a==0 ) return 0;
4448 ph = strhash(key);
4449 h = ph & (x2a->size-1);
4450 np = x2a->ht[h];
4451 while( np ){
4452 if( strcmp(np->key,key)==0 ){
4453 /* An existing entry with the same key is found. */
4454 /* Fail because overwrite is not allows. */
4455 return 0;
4456 }
4457 np = np->next;
4458 }
4459 if( x2a->count>=x2a->size ){
4460 /* Need to make the hash table bigger */
4461 int i,size;
4462 struct s_x2 array;
4463 array.size = size = x2a->size*2;
4464 array.count = x2a->count;
4465 array.tbl = (x2node*)malloc(
4466 (sizeof(x2node) + sizeof(x2node*))*size );
4467 if( array.tbl==0 ) return 0; /* Fail due to malloc failure */
4468 array.ht = (x2node**)&(array.tbl[size]);
4469 for(i=0; i<size; i++) array.ht[i] = 0;
4470 for(i=0; i<x2a->count; i++){
4471 x2node *oldnp, *newnp;
4472 oldnp = &(x2a->tbl[i]);
4473 h = strhash(oldnp->key) & (size-1);
4474 newnp = &(array.tbl[i]);
4475 if( array.ht[h] ) array.ht[h]->from = &(newnp->next);
4476 newnp->next = array.ht[h];
4477 newnp->key = oldnp->key;
4478 newnp->data = oldnp->data;
4479 newnp->from = &(array.ht[h]);
4480 array.ht[h] = newnp;
4481 }
4482 free(x2a->tbl);
4483 *x2a = array;
4484 }
4485 /* Insert the new data */
4486 h = ph & (x2a->size-1);
4487 np = &(x2a->tbl[x2a->count++]);
4488 np->key = key;
4489 np->data = data;
4490 if( x2a->ht[h] ) x2a->ht[h]->from = &(np->next);
4491 np->next = x2a->ht[h];
4492 x2a->ht[h] = np;
4493 np->from = &(x2a->ht[h]);
4494 return 1;
4495}
4496
4497/* Return a pointer to data assigned to the given key. Return NULL
4498** if no such key. */
4499struct symbol *Symbol_find(key)
4500char *key;
4501{
4502 int h;
4503 x2node *np;
4504
4505 if( x2a==0 ) return 0;
4506 h = strhash(key) & (x2a->size-1);
4507 np = x2a->ht[h];
4508 while( np ){
4509 if( strcmp(np->key,key)==0 ) break;
4510 np = np->next;
4511 }
4512 return np ? np->data : 0;
4513}
4514
4515/* Return the n-th data. Return NULL if n is out of range. */
4516struct symbol *Symbol_Nth(n)
4517int n;
4518{
4519 struct symbol *data;
4520 if( x2a && n>0 && n<=x2a->count ){
4521 data = x2a->tbl[n-1].data;
4522 }else{
4523 data = 0;
4524 }
4525 return data;
4526}
4527
4528/* Return the size of the array */
4529int Symbol_count()
4530{
4531 return x2a ? x2a->count : 0;
4532}
4533
4534/* Return an array of pointers to all data in the table.
4535** The array is obtained from malloc. Return NULL if memory allocation
4536** problems, or if the array is empty. */
4537struct symbol **Symbol_arrayof()
4538{
4539 struct symbol **array;
4540 int i,size;
4541 if( x2a==0 ) return 0;
4542 size = x2a->count;
drh9892c5d2007-12-21 00:02:11 +00004543 array = (struct symbol **)calloc(size, sizeof(struct symbol *));
drh75897232000-05-29 14:26:00 +00004544 if( array ){
4545 for(i=0; i<size; i++) array[i] = x2a->tbl[i].data;
4546 }
4547 return array;
4548}
4549
4550/* Compare two configurations */
4551int Configcmp(a,b)
4552struct config *a;
4553struct config *b;
4554{
4555 int x;
4556 x = a->rp->index - b->rp->index;
4557 if( x==0 ) x = a->dot - b->dot;
4558 return x;
4559}
4560
4561/* Compare two states */
4562PRIVATE int statecmp(a,b)
4563struct config *a;
4564struct config *b;
4565{
4566 int rc;
4567 for(rc=0; rc==0 && a && b; a=a->bp, b=b->bp){
4568 rc = a->rp->index - b->rp->index;
4569 if( rc==0 ) rc = a->dot - b->dot;
4570 }
4571 if( rc==0 ){
4572 if( a ) rc = 1;
4573 if( b ) rc = -1;
4574 }
4575 return rc;
4576}
4577
4578/* Hash a state */
4579PRIVATE int statehash(a)
4580struct config *a;
4581{
4582 int h=0;
4583 while( a ){
4584 h = h*571 + a->rp->index*37 + a->dot;
4585 a = a->bp;
4586 }
4587 return h;
4588}
4589
4590/* Allocate a new state structure */
4591struct state *State_new()
4592{
4593 struct state *new;
drh9892c5d2007-12-21 00:02:11 +00004594 new = (struct state *)calloc(1, sizeof(struct state) );
drh75897232000-05-29 14:26:00 +00004595 MemoryCheck(new);
4596 return new;
4597}
4598
4599/* There is one instance of the following structure for each
4600** associative array of type "x3".
4601*/
4602struct s_x3 {
4603 int size; /* The number of available slots. */
4604 /* Must be a power of 2 greater than or */
4605 /* equal to 1 */
4606 int count; /* Number of currently slots filled */
4607 struct s_x3node *tbl; /* The data stored here */
4608 struct s_x3node **ht; /* Hash table for lookups */
4609};
4610
4611/* There is one instance of this structure for every data element
4612** in an associative array of type "x3".
4613*/
4614typedef struct s_x3node {
4615 struct state *data; /* The data */
4616 struct config *key; /* The key */
4617 struct s_x3node *next; /* Next entry with the same hash */
4618 struct s_x3node **from; /* Previous link */
4619} x3node;
4620
4621/* There is only one instance of the array, which is the following */
4622static struct s_x3 *x3a;
4623
4624/* Allocate a new associative array */
4625void State_init(){
4626 if( x3a ) return;
4627 x3a = (struct s_x3*)malloc( sizeof(struct s_x3) );
4628 if( x3a ){
4629 x3a->size = 128;
4630 x3a->count = 0;
4631 x3a->tbl = (x3node*)malloc(
4632 (sizeof(x3node) + sizeof(x3node*))*128 );
4633 if( x3a->tbl==0 ){
4634 free(x3a);
4635 x3a = 0;
4636 }else{
4637 int i;
4638 x3a->ht = (x3node**)&(x3a->tbl[128]);
4639 for(i=0; i<128; i++) x3a->ht[i] = 0;
4640 }
4641 }
4642}
4643/* Insert a new record into the array. Return TRUE if successful.
4644** Prior data with the same key is NOT overwritten */
4645int State_insert(data,key)
4646struct state *data;
4647struct config *key;
4648{
4649 x3node *np;
4650 int h;
4651 int ph;
4652
4653 if( x3a==0 ) return 0;
4654 ph = statehash(key);
4655 h = ph & (x3a->size-1);
4656 np = x3a->ht[h];
4657 while( np ){
4658 if( statecmp(np->key,key)==0 ){
4659 /* An existing entry with the same key is found. */
4660 /* Fail because overwrite is not allows. */
4661 return 0;
4662 }
4663 np = np->next;
4664 }
4665 if( x3a->count>=x3a->size ){
4666 /* Need to make the hash table bigger */
4667 int i,size;
4668 struct s_x3 array;
4669 array.size = size = x3a->size*2;
4670 array.count = x3a->count;
4671 array.tbl = (x3node*)malloc(
4672 (sizeof(x3node) + sizeof(x3node*))*size );
4673 if( array.tbl==0 ) return 0; /* Fail due to malloc failure */
4674 array.ht = (x3node**)&(array.tbl[size]);
4675 for(i=0; i<size; i++) array.ht[i] = 0;
4676 for(i=0; i<x3a->count; i++){
4677 x3node *oldnp, *newnp;
4678 oldnp = &(x3a->tbl[i]);
4679 h = statehash(oldnp->key) & (size-1);
4680 newnp = &(array.tbl[i]);
4681 if( array.ht[h] ) array.ht[h]->from = &(newnp->next);
4682 newnp->next = array.ht[h];
4683 newnp->key = oldnp->key;
4684 newnp->data = oldnp->data;
4685 newnp->from = &(array.ht[h]);
4686 array.ht[h] = newnp;
4687 }
4688 free(x3a->tbl);
4689 *x3a = array;
4690 }
4691 /* Insert the new data */
4692 h = ph & (x3a->size-1);
4693 np = &(x3a->tbl[x3a->count++]);
4694 np->key = key;
4695 np->data = data;
4696 if( x3a->ht[h] ) x3a->ht[h]->from = &(np->next);
4697 np->next = x3a->ht[h];
4698 x3a->ht[h] = np;
4699 np->from = &(x3a->ht[h]);
4700 return 1;
4701}
4702
4703/* Return a pointer to data assigned to the given key. Return NULL
4704** if no such key. */
4705struct state *State_find(key)
4706struct config *key;
4707{
4708 int h;
4709 x3node *np;
4710
4711 if( x3a==0 ) return 0;
4712 h = statehash(key) & (x3a->size-1);
4713 np = x3a->ht[h];
4714 while( np ){
4715 if( statecmp(np->key,key)==0 ) break;
4716 np = np->next;
4717 }
4718 return np ? np->data : 0;
4719}
4720
4721/* Return an array of pointers to all data in the table.
4722** The array is obtained from malloc. Return NULL if memory allocation
4723** problems, or if the array is empty. */
4724struct state **State_arrayof()
4725{
4726 struct state **array;
4727 int i,size;
4728 if( x3a==0 ) return 0;
4729 size = x3a->count;
4730 array = (struct state **)malloc( sizeof(struct state *)*size );
4731 if( array ){
4732 for(i=0; i<size; i++) array[i] = x3a->tbl[i].data;
4733 }
4734 return array;
4735}
4736
4737/* Hash a configuration */
4738PRIVATE int confighash(a)
4739struct config *a;
4740{
4741 int h=0;
4742 h = h*571 + a->rp->index*37 + a->dot;
4743 return h;
4744}
4745
4746/* There is one instance of the following structure for each
4747** associative array of type "x4".
4748*/
4749struct s_x4 {
4750 int size; /* The number of available slots. */
4751 /* Must be a power of 2 greater than or */
4752 /* equal to 1 */
4753 int count; /* Number of currently slots filled */
4754 struct s_x4node *tbl; /* The data stored here */
4755 struct s_x4node **ht; /* Hash table for lookups */
4756};
4757
4758/* There is one instance of this structure for every data element
4759** in an associative array of type "x4".
4760*/
4761typedef struct s_x4node {
4762 struct config *data; /* The data */
4763 struct s_x4node *next; /* Next entry with the same hash */
4764 struct s_x4node **from; /* Previous link */
4765} x4node;
4766
4767/* There is only one instance of the array, which is the following */
4768static struct s_x4 *x4a;
4769
4770/* Allocate a new associative array */
4771void Configtable_init(){
4772 if( x4a ) return;
4773 x4a = (struct s_x4*)malloc( sizeof(struct s_x4) );
4774 if( x4a ){
4775 x4a->size = 64;
4776 x4a->count = 0;
4777 x4a->tbl = (x4node*)malloc(
4778 (sizeof(x4node) + sizeof(x4node*))*64 );
4779 if( x4a->tbl==0 ){
4780 free(x4a);
4781 x4a = 0;
4782 }else{
4783 int i;
4784 x4a->ht = (x4node**)&(x4a->tbl[64]);
4785 for(i=0; i<64; i++) x4a->ht[i] = 0;
4786 }
4787 }
4788}
4789/* Insert a new record into the array. Return TRUE if successful.
4790** Prior data with the same key is NOT overwritten */
4791int Configtable_insert(data)
4792struct config *data;
4793{
4794 x4node *np;
4795 int h;
4796 int ph;
4797
4798 if( x4a==0 ) return 0;
4799 ph = confighash(data);
4800 h = ph & (x4a->size-1);
4801 np = x4a->ht[h];
4802 while( np ){
4803 if( Configcmp(np->data,data)==0 ){
4804 /* An existing entry with the same key is found. */
4805 /* Fail because overwrite is not allows. */
4806 return 0;
4807 }
4808 np = np->next;
4809 }
4810 if( x4a->count>=x4a->size ){
4811 /* Need to make the hash table bigger */
4812 int i,size;
4813 struct s_x4 array;
4814 array.size = size = x4a->size*2;
4815 array.count = x4a->count;
4816 array.tbl = (x4node*)malloc(
4817 (sizeof(x4node) + sizeof(x4node*))*size );
4818 if( array.tbl==0 ) return 0; /* Fail due to malloc failure */
4819 array.ht = (x4node**)&(array.tbl[size]);
4820 for(i=0; i<size; i++) array.ht[i] = 0;
4821 for(i=0; i<x4a->count; i++){
4822 x4node *oldnp, *newnp;
4823 oldnp = &(x4a->tbl[i]);
4824 h = confighash(oldnp->data) & (size-1);
4825 newnp = &(array.tbl[i]);
4826 if( array.ht[h] ) array.ht[h]->from = &(newnp->next);
4827 newnp->next = array.ht[h];
4828 newnp->data = oldnp->data;
4829 newnp->from = &(array.ht[h]);
4830 array.ht[h] = newnp;
4831 }
4832 free(x4a->tbl);
4833 *x4a = array;
4834 }
4835 /* Insert the new data */
4836 h = ph & (x4a->size-1);
4837 np = &(x4a->tbl[x4a->count++]);
4838 np->data = data;
4839 if( x4a->ht[h] ) x4a->ht[h]->from = &(np->next);
4840 np->next = x4a->ht[h];
4841 x4a->ht[h] = np;
4842 np->from = &(x4a->ht[h]);
4843 return 1;
4844}
4845
4846/* Return a pointer to data assigned to the given key. Return NULL
4847** if no such key. */
4848struct config *Configtable_find(key)
4849struct config *key;
4850{
4851 int h;
4852 x4node *np;
4853
4854 if( x4a==0 ) return 0;
4855 h = confighash(key) & (x4a->size-1);
4856 np = x4a->ht[h];
4857 while( np ){
4858 if( Configcmp(np->data,key)==0 ) break;
4859 np = np->next;
4860 }
4861 return np ? np->data : 0;
4862}
4863
4864/* Remove all data from the table. Pass each data to the function "f"
4865** as it is removed. ("f" may be null to avoid this step.) */
4866void Configtable_clear(f)
4867int(*f)(/* struct config * */);
4868{
4869 int i;
4870 if( x4a==0 || x4a->count==0 ) return;
4871 if( f ) for(i=0; i<x4a->count; i++) (*f)(x4a->tbl[i].data);
4872 for(i=0; i<x4a->size; i++) x4a->ht[i] = 0;
4873 x4a->count = 0;
4874 return;
4875}