blob: ab7bcca23d47de084293557c9fca255d4e60fd6e [file] [log] [blame]
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001/*
2*******************************************************************************
3*
4* Copyright (C) 1998-2013, International Business Machines
5* Corporation and others. All Rights Reserved.
6*
7*******************************************************************************
8*
9* File parse.cpp
10*
11* Modification History:
12*
13* Date Name Description
14* 05/26/99 stephen Creation.
15* 02/25/00 weiv Overhaul to write udata
16* 5/10/01 Ram removed ustdio dependency
17* 06/10/2001 Dominic Ludlam <dom@recoil.org> Rewritten
18*******************************************************************************
19*/
20
21#include "ucol_imp.h"
22#include "parse.h"
23#include "errmsg.h"
24#include "uhash.h"
25#include "cmemory.h"
26#include "cstring.h"
27#include "uinvchar.h"
28#include "read.h"
29#include "ustr.h"
30#include "reslist.h"
31#include "rbt_pars.h"
32#include "genrb.h"
33#include "unicode/ustring.h"
34#include "unicode/uscript.h"
35#include "unicode/putil.h"
36#include <stdio.h>
37
38/* Number of tokens to read ahead of the current stream position */
39#define MAX_LOOKAHEAD 3
40
41#define CR 0x000D
42#define LF 0x000A
43#define SPACE 0x0020
44#define TAB 0x0009
45#define ESCAPE 0x005C
46#define HASH 0x0023
47#define QUOTE 0x0027
48#define ZERO 0x0030
49#define STARTCOMMAND 0x005B
50#define ENDCOMMAND 0x005D
51#define OPENSQBRACKET 0x005B
52#define CLOSESQBRACKET 0x005D
53
54struct Lookahead
55{
56 enum ETokenType type;
57 struct UString value;
58 struct UString comment;
59 uint32_t line;
60};
61
62/* keep in sync with token defines in read.h */
63const char *tokenNames[TOK_TOKEN_COUNT] =
64{
65 "string", /* A string token, such as "MonthNames" */
66 "'{'", /* An opening brace character */
67 "'}'", /* A closing brace character */
68 "','", /* A comma */
69 "':'", /* A colon */
70
71 "<end of file>", /* End of the file has been reached successfully */
72 "<end of line>"
73};
74
75/* Just to store "TRUE" */
76//static const UChar trueValue[] = {0x0054, 0x0052, 0x0055, 0x0045, 0x0000};
77
78typedef struct {
79 struct Lookahead lookahead[MAX_LOOKAHEAD + 1];
80 uint32_t lookaheadPosition;
81 UCHARBUF *buffer;
82 struct SRBRoot *bundle;
83 const char *inputdir;
84 uint32_t inputdirLength;
85 const char *outputdir;
86 uint32_t outputdirLength;
87 UBool makeBinaryCollation;
88} ParseState;
89
90static UBool gOmitCollationRules = FALSE;
91
92typedef struct SResource *
93ParseResourceFunction(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status);
94
95static struct SResource *parseResource(ParseState* state, char *tag, const struct UString *comment, UErrorCode *status);
96
97/* The nature of the lookahead buffer:
98 There are MAX_LOOKAHEAD + 1 slots, used as a circular buffer. This provides
99 MAX_LOOKAHEAD lookahead tokens and a slot for the current token and value.
100 When getToken is called, the current pointer is moved to the next slot and the
101 old slot is filled with the next token from the reader by calling getNextToken.
102 The token values are stored in the slot, which means that token values don't
103 survive a call to getToken, ie.
104
105 UString *value;
106
107 getToken(&value, NULL, status);
108 getToken(NULL, NULL, status); bad - value is now a different string
109*/
110static void
111initLookahead(ParseState* state, UCHARBUF *buf, UErrorCode *status)
112{
113 static uint32_t initTypeStrings = 0;
114 uint32_t i;
115
116 if (!initTypeStrings)
117 {
118 initTypeStrings = 1;
119 }
120
121 state->lookaheadPosition = 0;
122 state->buffer = buf;
123
124 resetLineNumber();
125
126 for (i = 0; i < MAX_LOOKAHEAD; i++)
127 {
128 state->lookahead[i].type = getNextToken(state->buffer, &state->lookahead[i].value, &state->lookahead[i].line, &state->lookahead[i].comment, status);
129 if (U_FAILURE(*status))
130 {
131 return;
132 }
133 }
134
135 *status = U_ZERO_ERROR;
136}
137
138static void
139cleanupLookahead(ParseState* state)
140{
141 uint32_t i;
142 for (i = 0; i <= MAX_LOOKAHEAD; i++)
143 {
144 ustr_deinit(&state->lookahead[i].value);
145 ustr_deinit(&state->lookahead[i].comment);
146 }
147
148}
149
150static enum ETokenType
151getToken(ParseState* state, struct UString **tokenValue, struct UString* comment, uint32_t *linenumber, UErrorCode *status)
152{
153 enum ETokenType result;
154 uint32_t i;
155
156 result = state->lookahead[state->lookaheadPosition].type;
157
158 if (tokenValue != NULL)
159 {
160 *tokenValue = &state->lookahead[state->lookaheadPosition].value;
161 }
162
163 if (linenumber != NULL)
164 {
165 *linenumber = state->lookahead[state->lookaheadPosition].line;
166 }
167
168 if (comment != NULL)
169 {
170 ustr_cpy(comment, &(state->lookahead[state->lookaheadPosition].comment), status);
171 }
172
173 i = (state->lookaheadPosition + MAX_LOOKAHEAD) % (MAX_LOOKAHEAD + 1);
174 state->lookaheadPosition = (state->lookaheadPosition + 1) % (MAX_LOOKAHEAD + 1);
175 ustr_setlen(&state->lookahead[i].comment, 0, status);
176 ustr_setlen(&state->lookahead[i].value, 0, status);
177 state->lookahead[i].type = getNextToken(state->buffer, &state->lookahead[i].value, &state->lookahead[i].line, &state->lookahead[i].comment, status);
178
179 /* printf("getToken, returning %s\n", tokenNames[result]); */
180
181 return result;
182}
183
184static enum ETokenType
185peekToken(ParseState* state, uint32_t lookaheadCount, struct UString **tokenValue, uint32_t *linenumber, struct UString *comment, UErrorCode *status)
186{
187 uint32_t i = (state->lookaheadPosition + lookaheadCount) % (MAX_LOOKAHEAD + 1);
188
189 if (U_FAILURE(*status))
190 {
191 return TOK_ERROR;
192 }
193
194 if (lookaheadCount >= MAX_LOOKAHEAD)
195 {
196 *status = U_INTERNAL_PROGRAM_ERROR;
197 return TOK_ERROR;
198 }
199
200 if (tokenValue != NULL)
201 {
202 *tokenValue = &state->lookahead[i].value;
203 }
204
205 if (linenumber != NULL)
206 {
207 *linenumber = state->lookahead[i].line;
208 }
209
210 if(comment != NULL){
211 ustr_cpy(comment, &(state->lookahead[state->lookaheadPosition].comment), status);
212 }
213
214 return state->lookahead[i].type;
215}
216
217static void
218expect(ParseState* state, enum ETokenType expectedToken, struct UString **tokenValue, struct UString *comment, uint32_t *linenumber, UErrorCode *status)
219{
220 uint32_t line;
221
222 enum ETokenType token = getToken(state, tokenValue, comment, &line, status);
223
224 if (linenumber != NULL)
225 {
226 *linenumber = line;
227 }
228
229 if (U_FAILURE(*status))
230 {
231 return;
232 }
233
234 if (token != expectedToken)
235 {
236 *status = U_INVALID_FORMAT_ERROR;
237 error(line, "expecting %s, got %s", tokenNames[expectedToken], tokenNames[token]);
238 }
239 else
240 {
241 *status = U_ZERO_ERROR;
242 }
243}
244
245static char *getInvariantString(ParseState* state, uint32_t *line, struct UString *comment, UErrorCode *status)
246{
247 struct UString *tokenValue;
248 char *result;
249 uint32_t count;
250
251 expect(state, TOK_STRING, &tokenValue, comment, line, status);
252
253 if (U_FAILURE(*status))
254 {
255 return NULL;
256 }
257
258 count = u_strlen(tokenValue->fChars);
259 if(!uprv_isInvariantUString(tokenValue->fChars, count)) {
260 *status = U_INVALID_FORMAT_ERROR;
261 error(*line, "invariant characters required for table keys, binary data, etc.");
262 return NULL;
263 }
264
265 result = static_cast<char *>(uprv_malloc(count+1));
266
267 if (result == NULL)
268 {
269 *status = U_MEMORY_ALLOCATION_ERROR;
270 return NULL;
271 }
272
273 u_UCharsToChars(tokenValue->fChars, result, count+1);
274 return result;
275}
276
277static struct SResource *
278parseUCARules(ParseState* state, char *tag, uint32_t startline, const struct UString* /*comment*/, UErrorCode *status)
279{
280 struct SResource *result = NULL;
281 struct UString *tokenValue;
282 FileStream *file = NULL;
283 char filename[256] = { '\0' };
284 char cs[128] = { '\0' };
285 uint32_t line;
286 UBool quoted = FALSE;
287 UCHARBUF *ucbuf=NULL;
288 UChar32 c = 0;
289 const char* cp = NULL;
290 UChar *pTarget = NULL;
291 UChar *target = NULL;
292 UChar *targetLimit = NULL;
293 int32_t size = 0;
294
295 expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
296
297 if(isVerbose()){
298 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
299 }
300
301 if (U_FAILURE(*status))
302 {
303 return NULL;
304 }
305 /* make the filename including the directory */
306 if (state->inputdir != NULL)
307 {
308 uprv_strcat(filename, state->inputdir);
309
310 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
311 {
312 uprv_strcat(filename, U_FILE_SEP_STRING);
313 }
314 }
315
316 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
317
318 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
319
320 if (U_FAILURE(*status))
321 {
322 return NULL;
323 }
324 uprv_strcat(filename, cs);
325
326 if(gOmitCollationRules) {
327 return res_none();
328 }
329
330 ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status);
331
332 if (U_FAILURE(*status)) {
333 error(line, "An error occured while opening the input file %s\n", filename);
334 return NULL;
335 }
336
337 /* We allocate more space than actually required
338 * since the actual size needed for storing UChars
339 * is not known in UTF-8 byte stream
340 */
341 size = ucbuf_size(ucbuf) + 1;
342 pTarget = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * size);
343 uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR);
344 target = pTarget;
345 targetLimit = pTarget+size;
346
347 /* read the rules into the buffer */
348 while (target < targetLimit)
349 {
350 c = ucbuf_getc(ucbuf, status);
351 if(c == QUOTE) {
352 quoted = (UBool)!quoted;
353 }
354 /* weiv (06/26/2002): adding the following:
355 * - preserving spaces in commands [...]
356 * - # comments until the end of line
357 */
358 if (c == STARTCOMMAND && !quoted)
359 {
360 /* preserve commands
361 * closing bracket will be handled by the
362 * append at the end of the loop
363 */
364 while(c != ENDCOMMAND) {
365 U_APPEND_CHAR32_ONLY(c, target);
366 c = ucbuf_getc(ucbuf, status);
367 }
368 }
369 else if (c == HASH && !quoted) {
370 /* skip comments */
371 while(c != CR && c != LF) {
372 c = ucbuf_getc(ucbuf, status);
373 }
374 continue;
375 }
376 else if (c == ESCAPE)
377 {
378 c = unescape(ucbuf, status);
379
380 if (c == (UChar32)U_ERR)
381 {
382 uprv_free(pTarget);
383 T_FileStream_close(file);
384 return NULL;
385 }
386 }
387 else if (!quoted && (c == SPACE || c == TAB || c == CR || c == LF))
388 {
389 /* ignore spaces carriage returns
390 * and line feed unless in the form \uXXXX
391 */
392 continue;
393 }
394
395 /* Append UChar * after dissembling if c > 0xffff*/
396 if (c != (UChar32)U_EOF)
397 {
398 U_APPEND_CHAR32_ONLY(c, target);
399 }
400 else
401 {
402 break;
403 }
404 }
405
406 /* terminate the string */
407 if(target < targetLimit){
408 *target = 0x0000;
409 }
410
411 result = string_open(state->bundle, tag, pTarget, (int32_t)(target - pTarget), NULL, status);
412
413
414 ucbuf_close(ucbuf);
415 uprv_free(pTarget);
416 T_FileStream_close(file);
417
418 return result;
419}
420
421static struct SResource *
422parseTransliterator(ParseState* state, char *tag, uint32_t startline, const struct UString* /*comment*/, UErrorCode *status)
423{
424 struct SResource *result = NULL;
425 struct UString *tokenValue;
426 FileStream *file = NULL;
427 char filename[256] = { '\0' };
428 char cs[128] = { '\0' };
429 uint32_t line;
430 UCHARBUF *ucbuf=NULL;
431 const char* cp = NULL;
432 UChar *pTarget = NULL;
433 const UChar *pSource = NULL;
434 int32_t size = 0;
435
436 expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
437
438 if(isVerbose()){
439 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
440 }
441
442 if (U_FAILURE(*status))
443 {
444 return NULL;
445 }
446 /* make the filename including the directory */
447 if (state->inputdir != NULL)
448 {
449 uprv_strcat(filename, state->inputdir);
450
451 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
452 {
453 uprv_strcat(filename, U_FILE_SEP_STRING);
454 }
455 }
456
457 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
458
459 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
460
461 if (U_FAILURE(*status))
462 {
463 return NULL;
464 }
465 uprv_strcat(filename, cs);
466
467
468 ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status);
469
470 if (U_FAILURE(*status)) {
471 error(line, "An error occured while opening the input file %s\n", filename);
472 return NULL;
473 }
474
475 /* We allocate more space than actually required
476 * since the actual size needed for storing UChars
477 * is not known in UTF-8 byte stream
478 */
479 pSource = ucbuf_getBuffer(ucbuf, &size, status);
480 pTarget = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * (size + 1));
481 uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR);
482
483#if !UCONFIG_NO_TRANSLITERATION
484 size = utrans_stripRules(pSource, size, pTarget, status);
485#else
486 size = 0;
487 fprintf(stderr, " Warning: writing empty transliteration data ( UCONFIG_NO_TRANSLITERATION ) \n");
488#endif
489 result = string_open(state->bundle, tag, pTarget, size, NULL, status);
490
491 ucbuf_close(ucbuf);
492 uprv_free(pTarget);
493 T_FileStream_close(file);
494
495 return result;
496}
497static struct SResource* dependencyArray = NULL;
498
499static struct SResource *
500parseDependency(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
501{
502 struct SResource *result = NULL;
503 struct SResource *elem = NULL;
504 struct UString *tokenValue;
505 uint32_t line;
506 char filename[256] = { '\0' };
507 char cs[128] = { '\0' };
508
509 expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
510
511 if(isVerbose()){
512 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
513 }
514
515 if (U_FAILURE(*status))
516 {
517 return NULL;
518 }
519 /* make the filename including the directory */
520 if (state->outputdir != NULL)
521 {
522 uprv_strcat(filename, state->outputdir);
523
524 if (state->outputdir[state->outputdirLength - 1] != U_FILE_SEP_CHAR)
525 {
526 uprv_strcat(filename, U_FILE_SEP_STRING);
527 }
528 }
529
530 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
531
532 if (U_FAILURE(*status))
533 {
534 return NULL;
535 }
536 uprv_strcat(filename, cs);
537 if(!T_FileStream_file_exists(filename)){
538 if(isStrict()){
539 error(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename);
540 }else{
541 warning(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename);
542 }
543 }
544 if(dependencyArray==NULL){
545 dependencyArray = array_open(state->bundle, "%%DEPENDENCY", NULL, status);
546 }
547 if(tag!=NULL){
548 result = string_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
549 }
550 elem = string_open(state->bundle, NULL, tokenValue->fChars, tokenValue->fLength, comment, status);
551
552 array_add(dependencyArray, elem, status);
553
554 if (U_FAILURE(*status))
555 {
556 return NULL;
557 }
558 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
559 return result;
560}
561static struct SResource *
562parseString(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
563{
564 struct UString *tokenValue;
565 struct SResource *result = NULL;
566
567/* if (tag != NULL && uprv_strcmp(tag, "%%UCARULES") == 0)
568 {
569 return parseUCARules(tag, startline, status);
570 }*/
571 if(isVerbose()){
572 printf(" string %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
573 }
574 expect(state, TOK_STRING, &tokenValue, NULL, NULL, status);
575
576 if (U_SUCCESS(*status))
577 {
578 /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
579 doesn't survive expect either) */
580
581 result = string_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
582 if(U_SUCCESS(*status) && result) {
583 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
584
585 if (U_FAILURE(*status))
586 {
587 res_close(result);
588 return NULL;
589 }
590 }
591 }
592
593 return result;
594}
595
596static struct SResource *
597parseAlias(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
598{
599 struct UString *tokenValue;
600 struct SResource *result = NULL;
601
602 expect(state, TOK_STRING, &tokenValue, NULL, NULL, status);
603
604 if(isVerbose()){
605 printf(" alias %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
606 }
607
608 if (U_SUCCESS(*status))
609 {
610 /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
611 doesn't survive expect either) */
612
613 result = alias_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
614
615 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
616
617 if (U_FAILURE(*status))
618 {
619 res_close(result);
620 return NULL;
621 }
622 }
623
624 return result;
625}
626
627typedef struct{
628 const char* inputDir;
629 const char* outputDir;
630} GenrbData;
631
632static struct SResource* resLookup(struct SResource* res, const char* key){
633 struct SResource *current = NULL;
634 struct SResTable *list;
635 if (res == res_none()) {
636 return NULL;
637 }
638
639 list = &(res->u.fTable);
640
641 current = list->fFirst;
642 while (current != NULL) {
643 if (uprv_strcmp(((list->fRoot->fKeys) + (current->fKey)), key) == 0) {
644 return current;
645 }
646 current = current->fNext;
647 }
648 return NULL;
649}
650
651static const UChar* importFromDataFile(void* context, const char* locale, const char* type, int32_t* pLength, UErrorCode* status){
652 struct SRBRoot *data = NULL;
653 UCHARBUF *ucbuf = NULL;
654 GenrbData* genrbdata = (GenrbData*) context;
655 int localeLength = strlen(locale);
656 char* filename = (char*)uprv_malloc(localeLength+5);
657 char *inputDirBuf = NULL;
658 char *openFileName = NULL;
659 const char* cp = "";
660 UChar* urules = NULL;
661 int32_t urulesLength = 0;
662 int32_t i = 0;
663 int32_t dirlen = 0;
664 int32_t filelen = 0;
665 struct SResource* root;
666 struct SResource* collations;
667 struct SResource* collation;
668 struct SResource* sequence;
669
670 memcpy(filename, locale, localeLength);
671 for(i = 0; i < localeLength; i++){
672 if(filename[i] == '-'){
673 filename[i] = '_';
674 }
675 }
676 filename[localeLength] = '.';
677 filename[localeLength+1] = 't';
678 filename[localeLength+2] = 'x';
679 filename[localeLength+3] = 't';
680 filename[localeLength+4] = 0;
681
682
683 if (status==NULL || U_FAILURE(*status)) {
684 return NULL;
685 }
686 if(filename==NULL){
687 *status=U_ILLEGAL_ARGUMENT_ERROR;
688 return NULL;
689 }else{
690 filelen = (int32_t)uprv_strlen(filename);
691 }
692 if(genrbdata->inputDir == NULL) {
693 const char *filenameBegin = uprv_strrchr(filename, U_FILE_SEP_CHAR);
694 openFileName = (char *) uprv_malloc(dirlen + filelen + 2);
695 openFileName[0] = '\0';
696 if (filenameBegin != NULL) {
697 /*
698 * When a filename ../../../data/root.txt is specified,
699 * we presume that the input directory is ../../../data
700 * This is very important when the resource file includes
701 * another file, like UCARules.txt or thaidict.brk.
702 */
703 int32_t filenameSize = (int32_t)(filenameBegin - filename + 1);
704 inputDirBuf = uprv_strncpy((char *)uprv_malloc(filenameSize), filename, filenameSize);
705
706 /* test for NULL */
707 if(inputDirBuf == NULL) {
708 *status = U_MEMORY_ALLOCATION_ERROR;
709 goto finish;
710 }
711
712 inputDirBuf[filenameSize - 1] = 0;
713 genrbdata->inputDir = inputDirBuf;
714 dirlen = (int32_t)uprv_strlen(genrbdata->inputDir);
715 }
716 }else{
717 dirlen = (int32_t)uprv_strlen(genrbdata->inputDir);
718
719 if(genrbdata->inputDir[dirlen-1] != U_FILE_SEP_CHAR) {
720 openFileName = (char *) uprv_malloc(dirlen + filelen + 2);
721
722 /* test for NULL */
723 if(openFileName == NULL) {
724 *status = U_MEMORY_ALLOCATION_ERROR;
725 goto finish;
726 }
727
728 openFileName[0] = '\0';
729 /*
730 * append the input dir to openFileName if the first char in
731 * filename is not file seperation char and the last char input directory is not '.'.
732 * This is to support :
733 * genrb -s. /home/icu/data
734 * genrb -s. icu/data
735 * The user cannot mix notations like
736 * genrb -s. /icu/data --- the absolute path specified. -s redundant
737 * user should use
738 * genrb -s. icu/data --- start from CWD and look in icu/data dir
739 */
740 if( (filename[0] != U_FILE_SEP_CHAR) && (genrbdata->inputDir[dirlen-1] !='.')){
741 uprv_strcpy(openFileName, genrbdata->inputDir);
742 openFileName[dirlen] = U_FILE_SEP_CHAR;
743 }
744 openFileName[dirlen + 1] = '\0';
745 } else {
746 openFileName = (char *) uprv_malloc(dirlen + filelen + 1);
747
748 /* test for NULL */
749 if(openFileName == NULL) {
750 *status = U_MEMORY_ALLOCATION_ERROR;
751 goto finish;
752 }
753
754 uprv_strcpy(openFileName, genrbdata->inputDir);
755
756 }
757 }
758 uprv_strcat(openFileName, filename);
759 /* printf("%s\n", openFileName); */
760 *status = U_ZERO_ERROR;
761 ucbuf = ucbuf_open(openFileName, &cp,getShowWarning(),TRUE, status);
762
763 if(*status == U_FILE_ACCESS_ERROR) {
764
765 fprintf(stderr, "couldn't open file %s\n", openFileName == NULL ? filename : openFileName);
766 goto finish;
767 }
768 if (ucbuf == NULL || U_FAILURE(*status)) {
769 fprintf(stderr, "An error occured processing file %s. Error: %s\n", openFileName == NULL ? filename : openFileName,u_errorName(*status));
770 goto finish;
771 }
772
773 /* Parse the data into an SRBRoot */
774 data = parse(ucbuf, genrbdata->inputDir, genrbdata->outputDir, FALSE, status);
775
776 root = data->fRoot;
777 collations = resLookup(root, "collations");
778 if (collations != NULL) {
779 collation = resLookup(collations, type);
780 if (collation != NULL) {
781 sequence = resLookup(collation, "Sequence");
782 if (sequence != NULL) {
783 urules = sequence->u.fString.fChars;
784 urulesLength = sequence->u.fString.fLength;
785 *pLength = urulesLength;
786 }
787 }
788 }
789
790finish:
791 if (inputDirBuf != NULL) {
792 uprv_free(inputDirBuf);
793 }
794
795 if (openFileName != NULL) {
796 uprv_free(openFileName);
797 }
798
799 if(ucbuf) {
800 ucbuf_close(ucbuf);
801 }
802
803 return urules;
804}
805
806// Quick-and-dirty escaping function.
807// Assumes that we are on an ASCII-based platform.
808static void
809escape(const UChar *s, char *buffer) {
810 int32_t length = u_strlen(s);
811 int32_t i = 0;
812 for (;;) {
813 UChar32 c;
814 U16_NEXT(s, i, length, c);
815 if (c == 0) {
816 *buffer = 0;
817 return;
818 } else if (0x20 <= c && c <= 0x7e) {
819 // printable ASCII
820 *buffer++ = (char)c; // assumes ASCII-based platform
821 } else {
822 buffer += sprintf(buffer, "\\u%04X", (int)c);
823 }
824 }
825}
826
827static struct SResource *
828addCollation(ParseState* state, struct SResource *result, uint32_t startline, UErrorCode *status)
829{
830 struct SResource *member = NULL;
831 struct UString *tokenValue;
832 struct UString comment;
833 enum ETokenType token;
834 char subtag[1024];
835 UVersionInfo version;
836 uint32_t line;
837 GenrbData genrbdata;
838 /* '{' . (name resource)* '}' */
839 version[0]=0; version[1]=0; version[2]=0; version[3]=0;
840
841 for (;;)
842 {
843 ustr_init(&comment);
844 token = getToken(state, &tokenValue, &comment, &line, status);
845
846 if (token == TOK_CLOSE_BRACE)
847 {
848 return result;
849 }
850
851 if (token != TOK_STRING)
852 {
853 res_close(result);
854 *status = U_INVALID_FORMAT_ERROR;
855
856 if (token == TOK_EOF)
857 {
858 error(startline, "unterminated table");
859 }
860 else
861 {
862 error(line, "Unexpected token %s", tokenNames[token]);
863 }
864
865 return NULL;
866 }
867
868 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
869
870 if (U_FAILURE(*status))
871 {
872 res_close(result);
873 return NULL;
874 }
875
876 member = parseResource(state, subtag, NULL, status);
877
878 if (U_FAILURE(*status))
879 {
880 res_close(result);
881 return NULL;
882 }
883
884 if (uprv_strcmp(subtag, "Version") == 0)
885 {
886 char ver[40];
887 int32_t length = member->u.fString.fLength;
888
889 if (length >= (int32_t) sizeof(ver))
890 {
891 length = (int32_t) sizeof(ver) - 1;
892 }
893
894 u_UCharsToChars(member->u.fString.fChars, ver, length + 1); /* +1 for copying NULL */
895 u_versionFromString(version, ver);
896
897 table_add(result, member, line, status);
898
899 }
900 else if (uprv_strcmp(subtag, "Override") == 0)
901 {
902 // UBool override = (u_strncmp(member->u.fString.fChars, trueValue, u_strlen(trueValue)) == 0);
903 table_add(result, member, line, status);
904
905 }
906 else if(uprv_strcmp(subtag, "%%CollationBin")==0)
907 {
908 /* discard duplicate %%CollationBin if any*/
909 }
910 else if (uprv_strcmp(subtag, "Sequence") == 0)
911 {
912#if UCONFIG_NO_COLLATION || UCONFIG_NO_FILE_IO
913 warning(line, "Not building collation elements because of UCONFIG_NO_COLLATION and/or UCONFIG_NO_FILE_IO, see uconfig.h");
914#else
915 if(state->makeBinaryCollation) {
916
917 /* do the collation elements */
918 int32_t len = 0;
919 uint8_t *data = NULL;
920 UCollator *coll = NULL;
921 int32_t reorderCodes[USCRIPT_CODE_LIMIT + (UCOL_REORDER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST)];
922 int32_t reorderCodeCount;
923 int32_t reorderCodeIndex;
924 UParseError parseError;
925
926 genrbdata.inputDir = state->inputdir;
927 genrbdata.outputDir = state->outputdir;
928
929 UErrorCode intStatus = U_ZERO_ERROR;
930 uprv_memset(&parseError, 0, sizeof(parseError));
931 coll = ucol_openRulesForImport(member->u.fString.fChars, member->u.fString.fLength,
932 UCOL_OFF, UCOL_DEFAULT_STRENGTH,&parseError, importFromDataFile, &genrbdata, &intStatus);
933
934 if (U_SUCCESS(intStatus) && coll != NULL)
935 {
936 len = ucol_cloneBinary(coll, NULL, 0, &intStatus);
937 data = (uint8_t *)uprv_malloc(len);
938 intStatus = U_ZERO_ERROR;
939 len = ucol_cloneBinary(coll, data, len, &intStatus);
940
941 /* tailoring rules version */
942 /* This is wrong! */
943 /*coll->dataInfo.dataVersion[1] = version[0];*/
944 /* Copy tailoring version. Builder version already */
945 /* set in ucol_openRules */
946 ((UCATableHeader *)data)->version[1] = version[0];
947 ((UCATableHeader *)data)->version[2] = version[1];
948 ((UCATableHeader *)data)->version[3] = version[2];
949
950 if (U_SUCCESS(intStatus) && data != NULL)
951 {
952 struct SResource *collationBin = bin_open(state->bundle, "%%CollationBin", len, data, NULL, NULL, status);
953 table_add(result, collationBin, line, status);
954 uprv_free(data);
955
956 reorderCodeCount = ucol_getReorderCodes(
957 coll, reorderCodes, USCRIPT_CODE_LIMIT + (UCOL_REORDER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST), &intStatus);
958 if (U_SUCCESS(intStatus) && reorderCodeCount > 0) {
959 struct SResource *reorderCodeRes = intvector_open(state->bundle, "%%ReorderCodes", NULL, status);
960 for (reorderCodeIndex = 0; reorderCodeIndex < reorderCodeCount; reorderCodeIndex++) {
961 intvector_add(reorderCodeRes, reorderCodes[reorderCodeIndex], status);
962 }
963 table_add(result, reorderCodeRes, line, status);
964 }
965 }
966 else
967 {
968 warning(line, "could not obtain rules from collator");
969 if(isStrict()){
970 *status = U_INVALID_FORMAT_ERROR;
971 return NULL;
972 }
973 }
974
975 ucol_close(coll);
976 }
977 else
978 {
979 if(intStatus == U_FILE_ACCESS_ERROR) {
980 error(startline, "Collation could not be built- U_FILE_ACCESS_ERROR. Make sure ICU's data has been built and is loading properly.");
981 *status = intStatus;
982 return NULL;
983 }
984 char preBuffer[100], postBuffer[100];
985 escape(parseError.preContext, preBuffer);
986 escape(parseError.postContext, postBuffer);
987 warning(line,
988 "%%%%CollationBin could not be constructed from CollationElements\n"
989 " check context, check that the FractionalUCA.txt UCA version "
990 "matches the current UCD version\n"
991 " UErrorCode=%s UParseError={ line=%d offset=%d pre=<> post=<> }",
992 u_errorName(intStatus),
993 parseError.line,
994 parseError.offset,
995 preBuffer,
996 postBuffer);
997 if(isStrict()){
998 *status = intStatus;
999 return NULL;
1000 }
1001 }
1002 } else {
1003 if(isVerbose()) {
1004 printf("Not building Collation binary\n");
1005 }
1006 }
1007#endif
1008 /* in order to achieve smaller data files, we can direct genrb */
1009 /* to omit collation rules */
1010 if(gOmitCollationRules) {
1011 bundle_closeString(state->bundle, member);
1012 } else {
1013 table_add(result, member, line, status);
1014 }
1015 }
1016 if (U_FAILURE(*status))
1017 {
1018 res_close(result);
1019 return NULL;
1020 }
1021 }
1022
1023 // Reached the end without a TOK_CLOSE_BRACE. Should be an error.
1024 *status = U_INTERNAL_PROGRAM_ERROR;
1025 return NULL;
1026}
1027
1028static struct SResource *
1029parseCollationElements(ParseState* state, char *tag, uint32_t startline, UBool newCollation, UErrorCode *status)
1030{
1031 struct SResource *result = NULL;
1032 struct SResource *member = NULL;
1033 struct SResource *collationRes = NULL;
1034 struct UString *tokenValue;
1035 struct UString comment;
1036 enum ETokenType token;
1037 char subtag[1024], typeKeyword[1024];
1038 uint32_t line;
1039
1040 result = table_open(state->bundle, tag, NULL, status);
1041
1042 if (result == NULL || U_FAILURE(*status))
1043 {
1044 return NULL;
1045 }
1046 if(isVerbose()){
1047 printf(" collation elements %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1048 }
1049 if(!newCollation) {
1050 return addCollation(state, result, startline, status);
1051 }
1052 else {
1053 for(;;) {
1054 ustr_init(&comment);
1055 token = getToken(state, &tokenValue, &comment, &line, status);
1056
1057 if (token == TOK_CLOSE_BRACE)
1058 {
1059 return result;
1060 }
1061
1062 if (token != TOK_STRING)
1063 {
1064 res_close(result);
1065 *status = U_INVALID_FORMAT_ERROR;
1066
1067 if (token == TOK_EOF)
1068 {
1069 error(startline, "unterminated table");
1070 }
1071 else
1072 {
1073 error(line, "Unexpected token %s", tokenNames[token]);
1074 }
1075
1076 return NULL;
1077 }
1078
1079 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
1080
1081 if (U_FAILURE(*status))
1082 {
1083 res_close(result);
1084 return NULL;
1085 }
1086
1087 if (uprv_strcmp(subtag, "default") == 0)
1088 {
1089 member = parseResource(state, subtag, NULL, status);
1090
1091 if (U_FAILURE(*status))
1092 {
1093 res_close(result);
1094 return NULL;
1095 }
1096
1097 table_add(result, member, line, status);
1098 }
1099 else
1100 {
1101 token = peekToken(state, 0, &tokenValue, &line, &comment, status);
1102 /* this probably needs to be refactored or recursively use the parser */
1103 /* first we assume that our collation table won't have the explicit type */
1104 /* then, we cannot handle aliases */
1105 if(token == TOK_OPEN_BRACE) {
1106 token = getToken(state, &tokenValue, &comment, &line, status);
1107 collationRes = table_open(state->bundle, subtag, NULL, status);
1108 collationRes = addCollation(state, collationRes, startline, status); /* need to parse the collation data regardless */
1109 if (gIncludeUnihanColl || uprv_strcmp(subtag, "unihan") != 0) {
1110 table_add(result, collationRes, startline, status);
1111 }
1112 } else if(token == TOK_COLON) { /* right now, we'll just try to see if we have aliases */
1113 /* we could have a table too */
1114 token = peekToken(state, 1, &tokenValue, &line, &comment, status);
1115 u_UCharsToChars(tokenValue->fChars, typeKeyword, u_strlen(tokenValue->fChars) + 1);
1116 if(uprv_strcmp(typeKeyword, "alias") == 0) {
1117 member = parseResource(state, subtag, NULL, status);
1118 if (U_FAILURE(*status))
1119 {
1120 res_close(result);
1121 return NULL;
1122 }
1123
1124 table_add(result, member, line, status);
1125 } else {
1126 res_close(result);
1127 *status = U_INVALID_FORMAT_ERROR;
1128 return NULL;
1129 }
1130 } else {
1131 res_close(result);
1132 *status = U_INVALID_FORMAT_ERROR;
1133 return NULL;
1134 }
1135 }
1136
1137 /*member = string_open(bundle, subtag, tokenValue->fChars, tokenValue->fLength, status);*/
1138
1139 /*expect(TOK_CLOSE_BRACE, NULL, NULL, status);*/
1140
1141 if (U_FAILURE(*status))
1142 {
1143 res_close(result);
1144 return NULL;
1145 }
1146 }
1147 }
1148}
1149
1150/* Necessary, because CollationElements requires the bundle->fRoot member to be present which,
1151 if this weren't special-cased, wouldn't be set until the entire file had been processed. */
1152static struct SResource *
1153realParseTable(ParseState* state, struct SResource *table, char *tag, uint32_t startline, UErrorCode *status)
1154{
1155 struct SResource *member = NULL;
1156 struct UString *tokenValue=NULL;
1157 struct UString comment;
1158 enum ETokenType token;
1159 char subtag[1024];
1160 uint32_t line;
1161 UBool readToken = FALSE;
1162
1163 /* '{' . (name resource)* '}' */
1164
1165 if(isVerbose()){
1166 printf(" parsing table %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1167 }
1168 for (;;)
1169 {
1170 ustr_init(&comment);
1171 token = getToken(state, &tokenValue, &comment, &line, status);
1172
1173 if (token == TOK_CLOSE_BRACE)
1174 {
1175 if (!readToken) {
1176 warning(startline, "Encountered empty table");
1177 }
1178 return table;
1179 }
1180
1181 if (token != TOK_STRING)
1182 {
1183 *status = U_INVALID_FORMAT_ERROR;
1184
1185 if (token == TOK_EOF)
1186 {
1187 error(startline, "unterminated table");
1188 }
1189 else
1190 {
1191 error(line, "unexpected token %s", tokenNames[token]);
1192 }
1193
1194 return NULL;
1195 }
1196
1197 if(uprv_isInvariantUString(tokenValue->fChars, -1)) {
1198 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
1199 } else {
1200 *status = U_INVALID_FORMAT_ERROR;
1201 error(line, "invariant characters required for table keys");
1202 return NULL;
1203 }
1204
1205 if (U_FAILURE(*status))
1206 {
1207 error(line, "parse error. Stopped parsing tokens with %s", u_errorName(*status));
1208 return NULL;
1209 }
1210
1211 member = parseResource(state, subtag, &comment, status);
1212
1213 if (member == NULL || U_FAILURE(*status))
1214 {
1215 error(line, "parse error. Stopped parsing resource with %s", u_errorName(*status));
1216 return NULL;
1217 }
1218
1219 table_add(table, member, line, status);
1220
1221 if (U_FAILURE(*status))
1222 {
1223 error(line, "parse error. Stopped parsing table with %s", u_errorName(*status));
1224 return NULL;
1225 }
1226 readToken = TRUE;
1227 ustr_deinit(&comment);
1228 }
1229
1230 /* not reached */
1231 /* A compiler warning will appear if all paths don't contain a return statement. */
1232/* *status = U_INTERNAL_PROGRAM_ERROR;
1233 return NULL;*/
1234}
1235
1236static struct SResource *
1237parseTable(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1238{
1239 struct SResource *result;
1240
1241 if (tag != NULL && uprv_strcmp(tag, "CollationElements") == 0)
1242 {
1243 return parseCollationElements(state, tag, startline, FALSE, status);
1244 }
1245 if (tag != NULL && uprv_strcmp(tag, "collations") == 0)
1246 {
1247 return parseCollationElements(state, tag, startline, TRUE, status);
1248 }
1249 if(isVerbose()){
1250 printf(" table %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1251 }
1252
1253 result = table_open(state->bundle, tag, comment, status);
1254
1255 if (result == NULL || U_FAILURE(*status))
1256 {
1257 return NULL;
1258 }
1259 return realParseTable(state, result, tag, startline, status);
1260}
1261
1262static struct SResource *
1263parseArray(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1264{
1265 struct SResource *result = NULL;
1266 struct SResource *member = NULL;
1267 struct UString *tokenValue;
1268 struct UString memberComments;
1269 enum ETokenType token;
1270 UBool readToken = FALSE;
1271
1272 result = array_open(state->bundle, tag, comment, status);
1273
1274 if (result == NULL || U_FAILURE(*status))
1275 {
1276 return NULL;
1277 }
1278 if(isVerbose()){
1279 printf(" array %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1280 }
1281
1282 ustr_init(&memberComments);
1283
1284 /* '{' . resource [','] '}' */
1285 for (;;)
1286 {
1287 /* reset length */
1288 ustr_setlen(&memberComments, 0, status);
1289
1290 /* check for end of array, but don't consume next token unless it really is the end */
1291 token = peekToken(state, 0, &tokenValue, NULL, &memberComments, status);
1292
1293
1294 if (token == TOK_CLOSE_BRACE)
1295 {
1296 getToken(state, NULL, NULL, NULL, status);
1297 if (!readToken) {
1298 warning(startline, "Encountered empty array");
1299 }
1300 break;
1301 }
1302
1303 if (token == TOK_EOF)
1304 {
1305 res_close(result);
1306 *status = U_INVALID_FORMAT_ERROR;
1307 error(startline, "unterminated array");
1308 return NULL;
1309 }
1310
1311 /* string arrays are a special case */
1312 if (token == TOK_STRING)
1313 {
1314 getToken(state, &tokenValue, &memberComments, NULL, status);
1315 member = string_open(state->bundle, NULL, tokenValue->fChars, tokenValue->fLength, &memberComments, status);
1316 }
1317 else
1318 {
1319 member = parseResource(state, NULL, &memberComments, status);
1320 }
1321
1322 if (member == NULL || U_FAILURE(*status))
1323 {
1324 res_close(result);
1325 return NULL;
1326 }
1327
1328 array_add(result, member, status);
1329
1330 if (U_FAILURE(*status))
1331 {
1332 res_close(result);
1333 return NULL;
1334 }
1335
1336 /* eat optional comma if present */
1337 token = peekToken(state, 0, NULL, NULL, NULL, status);
1338
1339 if (token == TOK_COMMA)
1340 {
1341 getToken(state, NULL, NULL, NULL, status);
1342 }
1343
1344 if (U_FAILURE(*status))
1345 {
1346 res_close(result);
1347 return NULL;
1348 }
1349 readToken = TRUE;
1350 }
1351
1352 ustr_deinit(&memberComments);
1353 return result;
1354}
1355
1356static struct SResource *
1357parseIntVector(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1358{
1359 struct SResource *result = NULL;
1360 enum ETokenType token;
1361 char *string;
1362 int32_t value;
1363 UBool readToken = FALSE;
1364 char *stopstring;
1365 uint32_t len;
1366 struct UString memberComments;
1367
1368 result = intvector_open(state->bundle, tag, comment, status);
1369
1370 if (result == NULL || U_FAILURE(*status))
1371 {
1372 return NULL;
1373 }
1374
1375 if(isVerbose()){
1376 printf(" vector %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1377 }
1378 ustr_init(&memberComments);
1379 /* '{' . string [','] '}' */
1380 for (;;)
1381 {
1382 ustr_setlen(&memberComments, 0, status);
1383
1384 /* check for end of array, but don't consume next token unless it really is the end */
1385 token = peekToken(state, 0, NULL, NULL,&memberComments, status);
1386
1387 if (token == TOK_CLOSE_BRACE)
1388 {
1389 /* it's the end, consume the close brace */
1390 getToken(state, NULL, NULL, NULL, status);
1391 if (!readToken) {
1392 warning(startline, "Encountered empty int vector");
1393 }
1394 ustr_deinit(&memberComments);
1395 return result;
1396 }
1397
1398 string = getInvariantString(state, NULL, NULL, status);
1399
1400 if (U_FAILURE(*status))
1401 {
1402 res_close(result);
1403 return NULL;
1404 }
1405
1406 /* For handling illegal char in the Intvector */
1407 value = uprv_strtoul(string, &stopstring, 0);/* make intvector support decimal,hexdigit,octal digit ranging from -2^31-2^32-1*/
1408 len=(uint32_t)(stopstring-string);
1409
1410 if(len==uprv_strlen(string))
1411 {
1412 intvector_add(result, value, status);
1413 uprv_free(string);
1414 token = peekToken(state, 0, NULL, NULL, NULL, status);
1415 }
1416 else
1417 {
1418 uprv_free(string);
1419 *status=U_INVALID_CHAR_FOUND;
1420 }
1421
1422 if (U_FAILURE(*status))
1423 {
1424 res_close(result);
1425 return NULL;
1426 }
1427
1428 /* the comma is optional (even though it is required to prevent the reader from concatenating
1429 consecutive entries) so that a missing comma on the last entry isn't an error */
1430 if (token == TOK_COMMA)
1431 {
1432 getToken(state, NULL, NULL, NULL, status);
1433 }
1434 readToken = TRUE;
1435 }
1436
1437 /* not reached */
1438 /* A compiler warning will appear if all paths don't contain a return statement. */
1439/* intvector_close(result, status);
1440 *status = U_INTERNAL_PROGRAM_ERROR;
1441 return NULL;*/
1442}
1443
1444static struct SResource *
1445parseBinary(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1446{
1447 struct SResource *result = NULL;
1448 uint8_t *value;
1449 char *string;
1450 char toConv[3] = {'\0', '\0', '\0'};
1451 uint32_t count;
1452 uint32_t i;
1453 uint32_t line;
1454 char *stopstring;
1455 uint32_t len;
1456
1457 string = getInvariantString(state, &line, NULL, status);
1458
1459 if (string == NULL || U_FAILURE(*status))
1460 {
1461 return NULL;
1462 }
1463
1464 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1465
1466 if (U_FAILURE(*status))
1467 {
1468 uprv_free(string);
1469 return NULL;
1470 }
1471
1472 if(isVerbose()){
1473 printf(" binary %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1474 }
1475
1476 count = (uint32_t)uprv_strlen(string);
1477 if (count > 0){
1478 if((count % 2)==0){
1479 value = static_cast<uint8_t *>(uprv_malloc(sizeof(uint8_t) * count));
1480
1481 if (value == NULL)
1482 {
1483 uprv_free(string);
1484 *status = U_MEMORY_ALLOCATION_ERROR;
1485 return NULL;
1486 }
1487
1488 for (i = 0; i < count; i += 2)
1489 {
1490 toConv[0] = string[i];
1491 toConv[1] = string[i + 1];
1492
1493 value[i >> 1] = (uint8_t) uprv_strtoul(toConv, &stopstring, 16);
1494 len=(uint32_t)(stopstring-toConv);
1495
1496 if(len!=uprv_strlen(toConv))
1497 {
1498 uprv_free(string);
1499 *status=U_INVALID_CHAR_FOUND;
1500 return NULL;
1501 }
1502 }
1503
1504 result = bin_open(state->bundle, tag, (i >> 1), value,NULL, comment, status);
1505
1506 uprv_free(value);
1507 }
1508 else
1509 {
1510 *status = U_INVALID_CHAR_FOUND;
1511 uprv_free(string);
1512 error(line, "Encountered invalid binary string");
1513 return NULL;
1514 }
1515 }
1516 else
1517 {
1518 result = bin_open(state->bundle, tag, 0, NULL, "",comment,status);
1519 warning(startline, "Encountered empty binary tag");
1520 }
1521 uprv_free(string);
1522
1523 return result;
1524}
1525
1526static struct SResource *
1527parseInteger(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1528{
1529 struct SResource *result = NULL;
1530 int32_t value;
1531 char *string;
1532 char *stopstring;
1533 uint32_t len;
1534
1535 string = getInvariantString(state, NULL, NULL, status);
1536
1537 if (string == NULL || U_FAILURE(*status))
1538 {
1539 return NULL;
1540 }
1541
1542 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1543
1544 if (U_FAILURE(*status))
1545 {
1546 uprv_free(string);
1547 return NULL;
1548 }
1549
1550 if(isVerbose()){
1551 printf(" integer %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1552 }
1553
1554 if (uprv_strlen(string) <= 0)
1555 {
1556 warning(startline, "Encountered empty integer. Default value is 0.");
1557 }
1558
1559 /* Allow integer support for hexdecimal, octal digit and decimal*/
1560 /* and handle illegal char in the integer*/
1561 value = uprv_strtoul(string, &stopstring, 0);
1562 len=(uint32_t)(stopstring-string);
1563 if(len==uprv_strlen(string))
1564 {
1565 result = int_open(state->bundle, tag, value, comment, status);
1566 }
1567 else
1568 {
1569 *status=U_INVALID_CHAR_FOUND;
1570 }
1571 uprv_free(string);
1572
1573 return result;
1574}
1575
1576static struct SResource *
1577parseImport(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
1578{
1579 struct SResource *result;
1580 FileStream *file;
1581 int32_t len;
1582 uint8_t *data;
1583 char *filename;
1584 uint32_t line;
1585 char *fullname = NULL;
1586 filename = getInvariantString(state, &line, NULL, status);
1587
1588 if (U_FAILURE(*status))
1589 {
1590 return NULL;
1591 }
1592
1593 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1594
1595 if (U_FAILURE(*status))
1596 {
1597 uprv_free(filename);
1598 return NULL;
1599 }
1600
1601 if(isVerbose()){
1602 printf(" import %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1603 }
1604
1605 /* Open the input file for reading */
1606 if (state->inputdir == NULL)
1607 {
1608#if 1
1609 /*
1610 * Always save file file name, even if there's
1611 * no input directory specified. MIGHT BREAK SOMETHING
1612 */
1613 int32_t filenameLength = uprv_strlen(filename);
1614
1615 fullname = (char *) uprv_malloc(filenameLength + 1);
1616 uprv_strcpy(fullname, filename);
1617#endif
1618
1619 file = T_FileStream_open(filename, "rb");
1620 }
1621 else
1622 {
1623
1624 int32_t count = (int32_t)uprv_strlen(filename);
1625
1626 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
1627 {
1628 fullname = (char *) uprv_malloc(state->inputdirLength + count + 2);
1629
1630 /* test for NULL */
1631 if(fullname == NULL)
1632 {
1633 *status = U_MEMORY_ALLOCATION_ERROR;
1634 return NULL;
1635 }
1636
1637 uprv_strcpy(fullname, state->inputdir);
1638
1639 fullname[state->inputdirLength] = U_FILE_SEP_CHAR;
1640 fullname[state->inputdirLength + 1] = '\0';
1641
1642 uprv_strcat(fullname, filename);
1643 }
1644 else
1645 {
1646 fullname = (char *) uprv_malloc(state->inputdirLength + count + 1);
1647
1648 /* test for NULL */
1649 if(fullname == NULL)
1650 {
1651 *status = U_MEMORY_ALLOCATION_ERROR;
1652 return NULL;
1653 }
1654
1655 uprv_strcpy(fullname, state->inputdir);
1656 uprv_strcat(fullname, filename);
1657 }
1658
1659 file = T_FileStream_open(fullname, "rb");
1660
1661 }
1662
1663 if (file == NULL)
1664 {
1665 error(line, "couldn't open input file %s", filename);
1666 *status = U_FILE_ACCESS_ERROR;
1667 return NULL;
1668 }
1669
1670 len = T_FileStream_size(file);
1671 data = (uint8_t*)uprv_malloc(len * sizeof(uint8_t));
1672 /* test for NULL */
1673 if(data == NULL)
1674 {
1675 *status = U_MEMORY_ALLOCATION_ERROR;
1676 T_FileStream_close (file);
1677 return NULL;
1678 }
1679
1680 /* int32_t numRead = */ T_FileStream_read (file, data, len);
1681 T_FileStream_close (file);
1682
1683 result = bin_open(state->bundle, tag, len, data, fullname, comment, status);
1684
1685 uprv_free(data);
1686 uprv_free(filename);
1687 uprv_free(fullname);
1688
1689 return result;
1690}
1691
1692static struct SResource *
1693parseInclude(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
1694{
1695 struct SResource *result;
1696 int32_t len=0;
1697 char *filename;
1698 uint32_t line;
1699 UChar *pTarget = NULL;
1700
1701 UCHARBUF *ucbuf;
1702 char *fullname = NULL;
1703 int32_t count = 0;
1704 const char* cp = NULL;
1705 const UChar* uBuffer = NULL;
1706
1707 filename = getInvariantString(state, &line, NULL, status);
1708 count = (int32_t)uprv_strlen(filename);
1709
1710 if (U_FAILURE(*status))
1711 {
1712 return NULL;
1713 }
1714
1715 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1716
1717 if (U_FAILURE(*status))
1718 {
1719 uprv_free(filename);
1720 return NULL;
1721 }
1722
1723 if(isVerbose()){
1724 printf(" include %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1725 }
1726
1727 fullname = (char *) uprv_malloc(state->inputdirLength + count + 2);
1728 /* test for NULL */
1729 if(fullname == NULL)
1730 {
1731 *status = U_MEMORY_ALLOCATION_ERROR;
1732 uprv_free(filename);
1733 return NULL;
1734 }
1735
1736 if(state->inputdir!=NULL){
1737 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
1738 {
1739
1740 uprv_strcpy(fullname, state->inputdir);
1741
1742 fullname[state->inputdirLength] = U_FILE_SEP_CHAR;
1743 fullname[state->inputdirLength + 1] = '\0';
1744
1745 uprv_strcat(fullname, filename);
1746 }
1747 else
1748 {
1749 uprv_strcpy(fullname, state->inputdir);
1750 uprv_strcat(fullname, filename);
1751 }
1752 }else{
1753 uprv_strcpy(fullname,filename);
1754 }
1755
1756 ucbuf = ucbuf_open(fullname, &cp,getShowWarning(),FALSE,status);
1757
1758 if (U_FAILURE(*status)) {
1759 error(line, "couldn't open input file %s\n", filename);
1760 return NULL;
1761 }
1762
1763 uBuffer = ucbuf_getBuffer(ucbuf,&len,status);
1764 result = string_open(state->bundle, tag, uBuffer, len, comment, status);
1765
1766 ucbuf_close(ucbuf);
1767
1768 uprv_free(pTarget);
1769
1770 uprv_free(filename);
1771 uprv_free(fullname);
1772
1773 return result;
1774}
1775
1776
1777
1778
1779
1780U_STRING_DECL(k_type_string, "string", 6);
1781U_STRING_DECL(k_type_binary, "binary", 6);
1782U_STRING_DECL(k_type_bin, "bin", 3);
1783U_STRING_DECL(k_type_table, "table", 5);
1784U_STRING_DECL(k_type_table_no_fallback, "table(nofallback)", 17);
1785U_STRING_DECL(k_type_int, "int", 3);
1786U_STRING_DECL(k_type_integer, "integer", 7);
1787U_STRING_DECL(k_type_array, "array", 5);
1788U_STRING_DECL(k_type_alias, "alias", 5);
1789U_STRING_DECL(k_type_intvector, "intvector", 9);
1790U_STRING_DECL(k_type_import, "import", 6);
1791U_STRING_DECL(k_type_include, "include", 7);
1792
1793/* Various non-standard processing plugins that create one or more special resources. */
1794U_STRING_DECL(k_type_plugin_uca_rules, "process(uca_rules)", 18);
1795U_STRING_DECL(k_type_plugin_collation, "process(collation)", 18);
1796U_STRING_DECL(k_type_plugin_transliterator, "process(transliterator)", 23);
1797U_STRING_DECL(k_type_plugin_dependency, "process(dependency)", 19);
1798
1799typedef enum EResourceType
1800{
1801 RT_UNKNOWN,
1802 RT_STRING,
1803 RT_BINARY,
1804 RT_TABLE,
1805 RT_TABLE_NO_FALLBACK,
1806 RT_INTEGER,
1807 RT_ARRAY,
1808 RT_ALIAS,
1809 RT_INTVECTOR,
1810 RT_IMPORT,
1811 RT_INCLUDE,
1812 RT_PROCESS_UCA_RULES,
1813 RT_PROCESS_COLLATION,
1814 RT_PROCESS_TRANSLITERATOR,
1815 RT_PROCESS_DEPENDENCY,
1816 RT_RESERVED
1817} EResourceType;
1818
1819static struct {
1820 const char *nameChars; /* only used for debugging */
1821 const UChar *nameUChars;
1822 ParseResourceFunction *parseFunction;
1823} gResourceTypes[] = {
1824 {"Unknown", NULL, NULL},
1825 {"string", k_type_string, parseString},
1826 {"binary", k_type_binary, parseBinary},
1827 {"table", k_type_table, parseTable},
1828 {"table(nofallback)", k_type_table_no_fallback, NULL}, /* parseFunction will never be called */
1829 {"integer", k_type_integer, parseInteger},
1830 {"array", k_type_array, parseArray},
1831 {"alias", k_type_alias, parseAlias},
1832 {"intvector", k_type_intvector, parseIntVector},
1833 {"import", k_type_import, parseImport},
1834 {"include", k_type_include, parseInclude},
1835 {"process(uca_rules)", k_type_plugin_uca_rules, parseUCARules},
1836 {"process(collation)", k_type_plugin_collation, NULL /* not implemented yet */},
1837 {"process(transliterator)", k_type_plugin_transliterator, parseTransliterator},
1838 {"process(dependency)", k_type_plugin_dependency, parseDependency},
1839 {"reserved", NULL, NULL}
1840};
1841
1842void initParser(UBool omitCollationRules)
1843{
1844 U_STRING_INIT(k_type_string, "string", 6);
1845 U_STRING_INIT(k_type_binary, "binary", 6);
1846 U_STRING_INIT(k_type_bin, "bin", 3);
1847 U_STRING_INIT(k_type_table, "table", 5);
1848 U_STRING_INIT(k_type_table_no_fallback, "table(nofallback)", 17);
1849 U_STRING_INIT(k_type_int, "int", 3);
1850 U_STRING_INIT(k_type_integer, "integer", 7);
1851 U_STRING_INIT(k_type_array, "array", 5);
1852 U_STRING_INIT(k_type_alias, "alias", 5);
1853 U_STRING_INIT(k_type_intvector, "intvector", 9);
1854 U_STRING_INIT(k_type_import, "import", 6);
1855 U_STRING_INIT(k_type_include, "include", 7);
1856
1857 U_STRING_INIT(k_type_plugin_uca_rules, "process(uca_rules)", 18);
1858 U_STRING_INIT(k_type_plugin_collation, "process(collation)", 18);
1859 U_STRING_INIT(k_type_plugin_transliterator, "process(transliterator)", 23);
1860 U_STRING_INIT(k_type_plugin_dependency, "process(dependency)", 19);
1861
1862 gOmitCollationRules = omitCollationRules;
1863}
1864
1865static inline UBool isTable(enum EResourceType type) {
1866 return (UBool)(type==RT_TABLE || type==RT_TABLE_NO_FALLBACK);
1867}
1868
1869static enum EResourceType
1870parseResourceType(ParseState* state, UErrorCode *status)
1871{
1872 struct UString *tokenValue;
1873 struct UString comment;
1874 enum EResourceType result = RT_UNKNOWN;
1875 uint32_t line=0;
1876 ustr_init(&comment);
1877 expect(state, TOK_STRING, &tokenValue, &comment, &line, status);
1878
1879 if (U_FAILURE(*status))
1880 {
1881 return RT_UNKNOWN;
1882 }
1883
1884 *status = U_ZERO_ERROR;
1885
1886 /* Search for normal types */
1887 result=RT_UNKNOWN;
1888 while ((result=(EResourceType)(result+1)) < RT_RESERVED) {
1889 if (u_strcmp(tokenValue->fChars, gResourceTypes[result].nameUChars) == 0) {
1890 break;
1891 }
1892 }
1893 /* Now search for the aliases */
1894 if (u_strcmp(tokenValue->fChars, k_type_int) == 0) {
1895 result = RT_INTEGER;
1896 }
1897 else if (u_strcmp(tokenValue->fChars, k_type_bin) == 0) {
1898 result = RT_BINARY;
1899 }
1900 else if (result == RT_RESERVED) {
1901 char tokenBuffer[1024];
1902 u_austrncpy(tokenBuffer, tokenValue->fChars, sizeof(tokenBuffer));
1903 tokenBuffer[sizeof(tokenBuffer) - 1] = 0;
1904 *status = U_INVALID_FORMAT_ERROR;
1905 error(line, "unknown resource type '%s'", tokenBuffer);
1906 }
1907
1908 return result;
1909}
1910
1911/* parse a non-top-level resource */
1912static struct SResource *
1913parseResource(ParseState* state, char *tag, const struct UString *comment, UErrorCode *status)
1914{
1915 enum ETokenType token;
1916 enum EResourceType resType = RT_UNKNOWN;
1917 ParseResourceFunction *parseFunction = NULL;
1918 struct UString *tokenValue;
1919 uint32_t startline;
1920 uint32_t line;
1921
1922
1923 token = getToken(state, &tokenValue, NULL, &startline, status);
1924
1925 if(isVerbose()){
1926 printf(" resource %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1927 }
1928
1929 /* name . [ ':' type ] '{' resource '}' */
1930 /* This function parses from the colon onwards. If the colon is present, parse the
1931 type then try to parse a resource of that type. If there is no explicit type,
1932 work it out using the lookahead tokens. */
1933 switch (token)
1934 {
1935 case TOK_EOF:
1936 *status = U_INVALID_FORMAT_ERROR;
1937 error(startline, "Unexpected EOF encountered");
1938 return NULL;
1939
1940 case TOK_ERROR:
1941 *status = U_INVALID_FORMAT_ERROR;
1942 return NULL;
1943
1944 case TOK_COLON:
1945 resType = parseResourceType(state, status);
1946 expect(state, TOK_OPEN_BRACE, &tokenValue, NULL, &startline, status);
1947
1948 if (U_FAILURE(*status))
1949 {
1950 return NULL;
1951 }
1952
1953 break;
1954
1955 case TOK_OPEN_BRACE:
1956 break;
1957
1958 default:
1959 *status = U_INVALID_FORMAT_ERROR;
1960 error(startline, "syntax error while reading a resource, expected '{' or ':'");
1961 return NULL;
1962 }
1963
1964
1965 if (resType == RT_UNKNOWN)
1966 {
1967 /* No explicit type, so try to work it out. At this point, we've read the first '{'.
1968 We could have any of the following:
1969 { { => array (nested)
1970 { :/} => array
1971 { string , => string array
1972
1973 { string { => table
1974
1975 { string :/{ => table
1976 { string } => string
1977 */
1978
1979 token = peekToken(state, 0, NULL, &line, NULL,status);
1980
1981 if (U_FAILURE(*status))
1982 {
1983 return NULL;
1984 }
1985
1986 if (token == TOK_OPEN_BRACE || token == TOK_COLON ||token ==TOK_CLOSE_BRACE )
1987 {
1988 resType = RT_ARRAY;
1989 }
1990 else if (token == TOK_STRING)
1991 {
1992 token = peekToken(state, 1, NULL, &line, NULL, status);
1993
1994 if (U_FAILURE(*status))
1995 {
1996 return NULL;
1997 }
1998
1999 switch (token)
2000 {
2001 case TOK_COMMA: resType = RT_ARRAY; break;
2002 case TOK_OPEN_BRACE: resType = RT_TABLE; break;
2003 case TOK_CLOSE_BRACE: resType = RT_STRING; break;
2004 case TOK_COLON: resType = RT_TABLE; break;
2005 default:
2006 *status = U_INVALID_FORMAT_ERROR;
2007 error(line, "Unexpected token after string, expected ',', '{' or '}'");
2008 return NULL;
2009 }
2010 }
2011 else
2012 {
2013 *status = U_INVALID_FORMAT_ERROR;
2014 error(line, "Unexpected token after '{'");
2015 return NULL;
2016 }
2017
2018 /* printf("Type guessed as %s\n", resourceNames[resType]); */
2019 } else if(resType == RT_TABLE_NO_FALLBACK) {
2020 *status = U_INVALID_FORMAT_ERROR;
2021 error(startline, "error: %s resource type not valid except on top bundle level", gResourceTypes[resType].nameChars);
2022 return NULL;
2023 }
2024
2025
2026 /* We should now know what we need to parse next, so call the appropriate parser
2027 function and return. */
2028 parseFunction = gResourceTypes[resType].parseFunction;
2029 if (parseFunction != NULL) {
2030 return parseFunction(state, tag, startline, comment, status);
2031 }
2032 else {
2033 *status = U_INTERNAL_PROGRAM_ERROR;
2034 error(startline, "internal error: %s resource type found and not handled", gResourceTypes[resType].nameChars);
2035 }
2036
2037 return NULL;
2038}
2039
2040/* parse the top-level resource */
2041struct SRBRoot *
2042parse(UCHARBUF *buf, const char *inputDir, const char *outputDir, UBool makeBinaryCollation,
2043 UErrorCode *status)
2044{
2045 struct UString *tokenValue;
2046 struct UString comment;
2047 uint32_t line;
2048 enum EResourceType bundleType;
2049 enum ETokenType token;
2050 ParseState state;
2051 uint32_t i;
2052
2053
2054 for (i = 0; i < MAX_LOOKAHEAD + 1; i++)
2055 {
2056 ustr_init(&state.lookahead[i].value);
2057 ustr_init(&state.lookahead[i].comment);
2058 }
2059
2060 initLookahead(&state, buf, status);
2061
2062 state.inputdir = inputDir;
2063 state.inputdirLength = (state.inputdir != NULL) ? (uint32_t)uprv_strlen(state.inputdir) : 0;
2064 state.outputdir = outputDir;
2065 state.outputdirLength = (state.outputdir != NULL) ? (uint32_t)uprv_strlen(state.outputdir) : 0;
2066 state.makeBinaryCollation = makeBinaryCollation;
2067
2068 ustr_init(&comment);
2069 expect(&state, TOK_STRING, &tokenValue, &comment, NULL, status);
2070
2071 state.bundle = bundle_open(&comment, FALSE, status);
2072
2073 if (state.bundle == NULL || U_FAILURE(*status))
2074 {
2075 return NULL;
2076 }
2077
2078
2079 bundle_setlocale(state.bundle, tokenValue->fChars, status);
2080
2081 /* The following code is to make Empty bundle work no matter with :table specifer or not */
2082 token = getToken(&state, NULL, NULL, &line, status);
2083 if(token==TOK_COLON) {
2084 *status=U_ZERO_ERROR;
2085 bundleType=parseResourceType(&state, status);
2086
2087 if(isTable(bundleType))
2088 {
2089 expect(&state, TOK_OPEN_BRACE, NULL, NULL, &line, status);
2090 }
2091 else
2092 {
2093 *status=U_PARSE_ERROR;
2094 error(line, "parse error. Stopped parsing with %s", u_errorName(*status));
2095 }
2096 }
2097 else
2098 {
2099 /* not a colon */
2100 if(token==TOK_OPEN_BRACE)
2101 {
2102 *status=U_ZERO_ERROR;
2103 bundleType=RT_TABLE;
2104 }
2105 else
2106 {
2107 /* neither colon nor open brace */
2108 *status=U_PARSE_ERROR;
2109 bundleType=RT_UNKNOWN;
2110 error(line, "parse error, did not find open-brace '{' or colon ':', stopped with %s", u_errorName(*status));
2111 }
2112 }
2113
2114 if (U_FAILURE(*status))
2115 {
2116 bundle_close(state.bundle, status);
2117 return NULL;
2118 }
2119
2120 if(bundleType==RT_TABLE_NO_FALLBACK) {
2121 /*
2122 * Parse a top-level table with the table(nofallback) declaration.
2123 * This is the same as a regular table, but also sets the
2124 * URES_ATT_NO_FALLBACK flag in indexes[URES_INDEX_ATTRIBUTES] .
2125 */
2126 state.bundle->noFallback=TRUE;
2127 }
2128 /* top-level tables need not handle special table names like "collations" */
2129 realParseTable(&state, state.bundle->fRoot, NULL, line, status);
2130 if(dependencyArray!=NULL){
2131 table_add(state.bundle->fRoot, dependencyArray, 0, status);
2132 dependencyArray = NULL;
2133 }
2134 if (U_FAILURE(*status))
2135 {
2136 bundle_close(state.bundle, status);
2137 res_close(dependencyArray);
2138 return NULL;
2139 }
2140
2141 if (getToken(&state, NULL, NULL, &line, status) != TOK_EOF)
2142 {
2143 warning(line, "extraneous text after resource bundle (perhaps unmatched braces)");
2144 if(isStrict()){
2145 *status = U_INVALID_FORMAT_ERROR;
2146 return NULL;
2147 }
2148 }
2149
2150 cleanupLookahead(&state);
2151 ustr_deinit(&comment);
2152 return state.bundle;
2153}