blob: b1cab3a64689231f1bf209a0564542c7a99bb70e [file] [log] [blame]
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001/*
2*******************************************************************************
3*
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08004* Copyright (C) 1998-2014, International Business Machines
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00005* Corporation and others. All Rights Reserved.
6*
7*******************************************************************************
8*
9* File parse.cpp
10*
11* Modification History:
12*
13* Date Name Description
14* 05/26/99 stephen Creation.
15* 02/25/00 weiv Overhaul to write udata
16* 5/10/01 Ram removed ustdio dependency
17* 06/10/2001 Dominic Ludlam <dom@recoil.org> Rewritten
18*******************************************************************************
19*/
20
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -080021// Safer use of UnicodeString.
22#ifndef UNISTR_FROM_CHAR_EXPLICIT
23# define UNISTR_FROM_CHAR_EXPLICIT explicit
24#endif
25
26// Less important, but still a good idea.
27#ifndef UNISTR_FROM_STRING_EXPLICIT
28# define UNISTR_FROM_STRING_EXPLICIT explicit
29#endif
30
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000031#include "parse.h"
32#include "errmsg.h"
33#include "uhash.h"
34#include "cmemory.h"
35#include "cstring.h"
36#include "uinvchar.h"
37#include "read.h"
38#include "ustr.h"
39#include "reslist.h"
40#include "rbt_pars.h"
41#include "genrb.h"
42#include "unicode/ustring.h"
43#include "unicode/uscript.h"
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -080044#include "unicode/utf16.h"
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000045#include "unicode/putil.h"
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -080046#include "collationbuilder.h"
47#include "collationdata.h"
48#include "collationdatareader.h"
49#include "collationdatawriter.h"
50#include "collationfastlatinbuilder.h"
51#include "collationinfo.h"
52#include "collationroot.h"
53#include "collationruleparser.h"
54#include "collationtailoring.h"
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000055#include <stdio.h>
56
57/* Number of tokens to read ahead of the current stream position */
58#define MAX_LOOKAHEAD 3
59
60#define CR 0x000D
61#define LF 0x000A
62#define SPACE 0x0020
63#define TAB 0x0009
64#define ESCAPE 0x005C
65#define HASH 0x0023
66#define QUOTE 0x0027
67#define ZERO 0x0030
68#define STARTCOMMAND 0x005B
69#define ENDCOMMAND 0x005D
70#define OPENSQBRACKET 0x005B
71#define CLOSESQBRACKET 0x005D
72
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -080073using icu::LocalPointer;
74using icu::UnicodeString;
75
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000076struct Lookahead
77{
78 enum ETokenType type;
79 struct UString value;
80 struct UString comment;
81 uint32_t line;
82};
83
84/* keep in sync with token defines in read.h */
85const char *tokenNames[TOK_TOKEN_COUNT] =
86{
87 "string", /* A string token, such as "MonthNames" */
88 "'{'", /* An opening brace character */
89 "'}'", /* A closing brace character */
90 "','", /* A comma */
91 "':'", /* A colon */
92
93 "<end of file>", /* End of the file has been reached successfully */
94 "<end of line>"
95};
96
97/* Just to store "TRUE" */
98//static const UChar trueValue[] = {0x0054, 0x0052, 0x0055, 0x0045, 0x0000};
99
100typedef struct {
101 struct Lookahead lookahead[MAX_LOOKAHEAD + 1];
102 uint32_t lookaheadPosition;
103 UCHARBUF *buffer;
104 struct SRBRoot *bundle;
105 const char *inputdir;
106 uint32_t inputdirLength;
107 const char *outputdir;
108 uint32_t outputdirLength;
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800109 const char *filename;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000110 UBool makeBinaryCollation;
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800111 UBool omitCollationRules;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000112} ParseState;
113
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000114typedef struct SResource *
115ParseResourceFunction(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status);
116
117static struct SResource *parseResource(ParseState* state, char *tag, const struct UString *comment, UErrorCode *status);
118
119/* The nature of the lookahead buffer:
120 There are MAX_LOOKAHEAD + 1 slots, used as a circular buffer. This provides
121 MAX_LOOKAHEAD lookahead tokens and a slot for the current token and value.
122 When getToken is called, the current pointer is moved to the next slot and the
123 old slot is filled with the next token from the reader by calling getNextToken.
124 The token values are stored in the slot, which means that token values don't
125 survive a call to getToken, ie.
126
127 UString *value;
128
129 getToken(&value, NULL, status);
130 getToken(NULL, NULL, status); bad - value is now a different string
131*/
132static void
133initLookahead(ParseState* state, UCHARBUF *buf, UErrorCode *status)
134{
135 static uint32_t initTypeStrings = 0;
136 uint32_t i;
137
138 if (!initTypeStrings)
139 {
140 initTypeStrings = 1;
141 }
142
143 state->lookaheadPosition = 0;
144 state->buffer = buf;
145
146 resetLineNumber();
147
148 for (i = 0; i < MAX_LOOKAHEAD; i++)
149 {
150 state->lookahead[i].type = getNextToken(state->buffer, &state->lookahead[i].value, &state->lookahead[i].line, &state->lookahead[i].comment, status);
151 if (U_FAILURE(*status))
152 {
153 return;
154 }
155 }
156
157 *status = U_ZERO_ERROR;
158}
159
160static void
161cleanupLookahead(ParseState* state)
162{
163 uint32_t i;
164 for (i = 0; i <= MAX_LOOKAHEAD; i++)
165 {
166 ustr_deinit(&state->lookahead[i].value);
167 ustr_deinit(&state->lookahead[i].comment);
168 }
169
170}
171
172static enum ETokenType
173getToken(ParseState* state, struct UString **tokenValue, struct UString* comment, uint32_t *linenumber, UErrorCode *status)
174{
175 enum ETokenType result;
176 uint32_t i;
177
178 result = state->lookahead[state->lookaheadPosition].type;
179
180 if (tokenValue != NULL)
181 {
182 *tokenValue = &state->lookahead[state->lookaheadPosition].value;
183 }
184
185 if (linenumber != NULL)
186 {
187 *linenumber = state->lookahead[state->lookaheadPosition].line;
188 }
189
190 if (comment != NULL)
191 {
192 ustr_cpy(comment, &(state->lookahead[state->lookaheadPosition].comment), status);
193 }
194
195 i = (state->lookaheadPosition + MAX_LOOKAHEAD) % (MAX_LOOKAHEAD + 1);
196 state->lookaheadPosition = (state->lookaheadPosition + 1) % (MAX_LOOKAHEAD + 1);
197 ustr_setlen(&state->lookahead[i].comment, 0, status);
198 ustr_setlen(&state->lookahead[i].value, 0, status);
199 state->lookahead[i].type = getNextToken(state->buffer, &state->lookahead[i].value, &state->lookahead[i].line, &state->lookahead[i].comment, status);
200
201 /* printf("getToken, returning %s\n", tokenNames[result]); */
202
203 return result;
204}
205
206static enum ETokenType
207peekToken(ParseState* state, uint32_t lookaheadCount, struct UString **tokenValue, uint32_t *linenumber, struct UString *comment, UErrorCode *status)
208{
209 uint32_t i = (state->lookaheadPosition + lookaheadCount) % (MAX_LOOKAHEAD + 1);
210
211 if (U_FAILURE(*status))
212 {
213 return TOK_ERROR;
214 }
215
216 if (lookaheadCount >= MAX_LOOKAHEAD)
217 {
218 *status = U_INTERNAL_PROGRAM_ERROR;
219 return TOK_ERROR;
220 }
221
222 if (tokenValue != NULL)
223 {
224 *tokenValue = &state->lookahead[i].value;
225 }
226
227 if (linenumber != NULL)
228 {
229 *linenumber = state->lookahead[i].line;
230 }
231
232 if(comment != NULL){
233 ustr_cpy(comment, &(state->lookahead[state->lookaheadPosition].comment), status);
234 }
235
236 return state->lookahead[i].type;
237}
238
239static void
240expect(ParseState* state, enum ETokenType expectedToken, struct UString **tokenValue, struct UString *comment, uint32_t *linenumber, UErrorCode *status)
241{
242 uint32_t line;
243
244 enum ETokenType token = getToken(state, tokenValue, comment, &line, status);
245
246 if (linenumber != NULL)
247 {
248 *linenumber = line;
249 }
250
251 if (U_FAILURE(*status))
252 {
253 return;
254 }
255
256 if (token != expectedToken)
257 {
258 *status = U_INVALID_FORMAT_ERROR;
259 error(line, "expecting %s, got %s", tokenNames[expectedToken], tokenNames[token]);
260 }
261 else
262 {
263 *status = U_ZERO_ERROR;
264 }
265}
266
267static char *getInvariantString(ParseState* state, uint32_t *line, struct UString *comment, UErrorCode *status)
268{
269 struct UString *tokenValue;
270 char *result;
271 uint32_t count;
272
273 expect(state, TOK_STRING, &tokenValue, comment, line, status);
274
275 if (U_FAILURE(*status))
276 {
277 return NULL;
278 }
279
280 count = u_strlen(tokenValue->fChars);
281 if(!uprv_isInvariantUString(tokenValue->fChars, count)) {
282 *status = U_INVALID_FORMAT_ERROR;
283 error(*line, "invariant characters required for table keys, binary data, etc.");
284 return NULL;
285 }
286
287 result = static_cast<char *>(uprv_malloc(count+1));
288
289 if (result == NULL)
290 {
291 *status = U_MEMORY_ALLOCATION_ERROR;
292 return NULL;
293 }
294
295 u_UCharsToChars(tokenValue->fChars, result, count+1);
296 return result;
297}
298
299static struct SResource *
300parseUCARules(ParseState* state, char *tag, uint32_t startline, const struct UString* /*comment*/, UErrorCode *status)
301{
302 struct SResource *result = NULL;
303 struct UString *tokenValue;
304 FileStream *file = NULL;
305 char filename[256] = { '\0' };
306 char cs[128] = { '\0' };
307 uint32_t line;
308 UBool quoted = FALSE;
309 UCHARBUF *ucbuf=NULL;
310 UChar32 c = 0;
311 const char* cp = NULL;
312 UChar *pTarget = NULL;
313 UChar *target = NULL;
314 UChar *targetLimit = NULL;
315 int32_t size = 0;
316
317 expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
318
319 if(isVerbose()){
320 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
321 }
322
323 if (U_FAILURE(*status))
324 {
325 return NULL;
326 }
327 /* make the filename including the directory */
328 if (state->inputdir != NULL)
329 {
330 uprv_strcat(filename, state->inputdir);
331
332 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
333 {
334 uprv_strcat(filename, U_FILE_SEP_STRING);
335 }
336 }
337
338 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
339
340 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
341
342 if (U_FAILURE(*status))
343 {
344 return NULL;
345 }
346 uprv_strcat(filename, cs);
347
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800348 if(state->omitCollationRules) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000349 return res_none();
350 }
351
352 ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status);
353
354 if (U_FAILURE(*status)) {
355 error(line, "An error occured while opening the input file %s\n", filename);
356 return NULL;
357 }
358
359 /* We allocate more space than actually required
360 * since the actual size needed for storing UChars
361 * is not known in UTF-8 byte stream
362 */
363 size = ucbuf_size(ucbuf) + 1;
364 pTarget = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * size);
365 uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR);
366 target = pTarget;
367 targetLimit = pTarget+size;
368
369 /* read the rules into the buffer */
370 while (target < targetLimit)
371 {
372 c = ucbuf_getc(ucbuf, status);
373 if(c == QUOTE) {
374 quoted = (UBool)!quoted;
375 }
376 /* weiv (06/26/2002): adding the following:
377 * - preserving spaces in commands [...]
378 * - # comments until the end of line
379 */
380 if (c == STARTCOMMAND && !quoted)
381 {
382 /* preserve commands
383 * closing bracket will be handled by the
384 * append at the end of the loop
385 */
386 while(c != ENDCOMMAND) {
387 U_APPEND_CHAR32_ONLY(c, target);
388 c = ucbuf_getc(ucbuf, status);
389 }
390 }
391 else if (c == HASH && !quoted) {
392 /* skip comments */
393 while(c != CR && c != LF) {
394 c = ucbuf_getc(ucbuf, status);
395 }
396 continue;
397 }
398 else if (c == ESCAPE)
399 {
400 c = unescape(ucbuf, status);
401
402 if (c == (UChar32)U_ERR)
403 {
404 uprv_free(pTarget);
405 T_FileStream_close(file);
406 return NULL;
407 }
408 }
409 else if (!quoted && (c == SPACE || c == TAB || c == CR || c == LF))
410 {
411 /* ignore spaces carriage returns
412 * and line feed unless in the form \uXXXX
413 */
414 continue;
415 }
416
417 /* Append UChar * after dissembling if c > 0xffff*/
418 if (c != (UChar32)U_EOF)
419 {
420 U_APPEND_CHAR32_ONLY(c, target);
421 }
422 else
423 {
424 break;
425 }
426 }
427
428 /* terminate the string */
429 if(target < targetLimit){
430 *target = 0x0000;
431 }
432
433 result = string_open(state->bundle, tag, pTarget, (int32_t)(target - pTarget), NULL, status);
434
435
436 ucbuf_close(ucbuf);
437 uprv_free(pTarget);
438 T_FileStream_close(file);
439
440 return result;
441}
442
443static struct SResource *
444parseTransliterator(ParseState* state, char *tag, uint32_t startline, const struct UString* /*comment*/, UErrorCode *status)
445{
446 struct SResource *result = NULL;
447 struct UString *tokenValue;
448 FileStream *file = NULL;
449 char filename[256] = { '\0' };
450 char cs[128] = { '\0' };
451 uint32_t line;
452 UCHARBUF *ucbuf=NULL;
453 const char* cp = NULL;
454 UChar *pTarget = NULL;
455 const UChar *pSource = NULL;
456 int32_t size = 0;
457
458 expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
459
460 if(isVerbose()){
461 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
462 }
463
464 if (U_FAILURE(*status))
465 {
466 return NULL;
467 }
468 /* make the filename including the directory */
469 if (state->inputdir != NULL)
470 {
471 uprv_strcat(filename, state->inputdir);
472
473 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
474 {
475 uprv_strcat(filename, U_FILE_SEP_STRING);
476 }
477 }
478
479 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
480
481 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
482
483 if (U_FAILURE(*status))
484 {
485 return NULL;
486 }
487 uprv_strcat(filename, cs);
488
489
490 ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status);
491
492 if (U_FAILURE(*status)) {
493 error(line, "An error occured while opening the input file %s\n", filename);
494 return NULL;
495 }
496
497 /* We allocate more space than actually required
498 * since the actual size needed for storing UChars
499 * is not known in UTF-8 byte stream
500 */
501 pSource = ucbuf_getBuffer(ucbuf, &size, status);
502 pTarget = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * (size + 1));
503 uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR);
504
505#if !UCONFIG_NO_TRANSLITERATION
506 size = utrans_stripRules(pSource, size, pTarget, status);
507#else
508 size = 0;
509 fprintf(stderr, " Warning: writing empty transliteration data ( UCONFIG_NO_TRANSLITERATION ) \n");
510#endif
511 result = string_open(state->bundle, tag, pTarget, size, NULL, status);
512
513 ucbuf_close(ucbuf);
514 uprv_free(pTarget);
515 T_FileStream_close(file);
516
517 return result;
518}
519static struct SResource* dependencyArray = NULL;
520
521static struct SResource *
522parseDependency(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
523{
524 struct SResource *result = NULL;
525 struct SResource *elem = NULL;
526 struct UString *tokenValue;
527 uint32_t line;
528 char filename[256] = { '\0' };
529 char cs[128] = { '\0' };
530
531 expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
532
533 if(isVerbose()){
534 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
535 }
536
537 if (U_FAILURE(*status))
538 {
539 return NULL;
540 }
541 /* make the filename including the directory */
542 if (state->outputdir != NULL)
543 {
544 uprv_strcat(filename, state->outputdir);
545
546 if (state->outputdir[state->outputdirLength - 1] != U_FILE_SEP_CHAR)
547 {
548 uprv_strcat(filename, U_FILE_SEP_STRING);
549 }
550 }
551
552 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
553
554 if (U_FAILURE(*status))
555 {
556 return NULL;
557 }
558 uprv_strcat(filename, cs);
559 if(!T_FileStream_file_exists(filename)){
560 if(isStrict()){
561 error(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename);
562 }else{
563 warning(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename);
564 }
565 }
566 if(dependencyArray==NULL){
567 dependencyArray = array_open(state->bundle, "%%DEPENDENCY", NULL, status);
568 }
569 if(tag!=NULL){
570 result = string_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
571 }
572 elem = string_open(state->bundle, NULL, tokenValue->fChars, tokenValue->fLength, comment, status);
573
574 array_add(dependencyArray, elem, status);
575
576 if (U_FAILURE(*status))
577 {
578 return NULL;
579 }
580 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
581 return result;
582}
583static struct SResource *
584parseString(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
585{
586 struct UString *tokenValue;
587 struct SResource *result = NULL;
588
589/* if (tag != NULL && uprv_strcmp(tag, "%%UCARULES") == 0)
590 {
591 return parseUCARules(tag, startline, status);
592 }*/
593 if(isVerbose()){
594 printf(" string %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
595 }
596 expect(state, TOK_STRING, &tokenValue, NULL, NULL, status);
597
598 if (U_SUCCESS(*status))
599 {
600 /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
601 doesn't survive expect either) */
602
603 result = string_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
604 if(U_SUCCESS(*status) && result) {
605 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
606
607 if (U_FAILURE(*status))
608 {
609 res_close(result);
610 return NULL;
611 }
612 }
613 }
614
615 return result;
616}
617
618static struct SResource *
619parseAlias(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
620{
621 struct UString *tokenValue;
622 struct SResource *result = NULL;
623
624 expect(state, TOK_STRING, &tokenValue, NULL, NULL, status);
625
626 if(isVerbose()){
627 printf(" alias %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
628 }
629
630 if (U_SUCCESS(*status))
631 {
632 /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
633 doesn't survive expect either) */
634
635 result = alias_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
636
637 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
638
639 if (U_FAILURE(*status))
640 {
641 res_close(result);
642 return NULL;
643 }
644 }
645
646 return result;
647}
648
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800649#if !UCONFIG_NO_COLLATION
650
651namespace {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000652
653static struct SResource* resLookup(struct SResource* res, const char* key){
654 struct SResource *current = NULL;
655 struct SResTable *list;
656 if (res == res_none()) {
657 return NULL;
658 }
659
660 list = &(res->u.fTable);
661
662 current = list->fFirst;
663 while (current != NULL) {
664 if (uprv_strcmp(((list->fRoot->fKeys) + (current->fKey)), key) == 0) {
665 return current;
666 }
667 current = current->fNext;
668 }
669 return NULL;
670}
671
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800672class GenrbImporter : public icu::CollationRuleParser::Importer {
673public:
674 GenrbImporter(const char *in, const char *out) : inputDir(in), outputDir(out) {}
675 virtual ~GenrbImporter();
676 virtual void getRules(
677 const char *localeID, const char *collationType,
678 UnicodeString &rules,
679 const char *&errorReason, UErrorCode &errorCode);
680
681private:
682 const char *inputDir;
683 const char *outputDir;
684};
685
686GenrbImporter::~GenrbImporter() {}
687
688void
689GenrbImporter::getRules(
690 const char *localeID, const char *collationType,
691 UnicodeString &rules,
692 const char *& /*errorReason*/, UErrorCode &errorCode) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000693 struct SRBRoot *data = NULL;
694 UCHARBUF *ucbuf = NULL;
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800695 int localeLength = strlen(localeID);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000696 char* filename = (char*)uprv_malloc(localeLength+5);
697 char *inputDirBuf = NULL;
698 char *openFileName = NULL;
699 const char* cp = "";
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000700 int32_t i = 0;
701 int32_t dirlen = 0;
702 int32_t filelen = 0;
703 struct SResource* root;
704 struct SResource* collations;
705 struct SResource* collation;
706 struct SResource* sequence;
707
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800708 memcpy(filename, localeID, localeLength);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000709 for(i = 0; i < localeLength; i++){
710 if(filename[i] == '-'){
711 filename[i] = '_';
712 }
713 }
714 filename[localeLength] = '.';
715 filename[localeLength+1] = 't';
716 filename[localeLength+2] = 'x';
717 filename[localeLength+3] = 't';
718 filename[localeLength+4] = 0;
719
720
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800721 if (U_FAILURE(errorCode)) {
722 return;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000723 }
724 if(filename==NULL){
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800725 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
726 return;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000727 }else{
728 filelen = (int32_t)uprv_strlen(filename);
729 }
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800730 if(inputDir == NULL) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000731 const char *filenameBegin = uprv_strrchr(filename, U_FILE_SEP_CHAR);
732 openFileName = (char *) uprv_malloc(dirlen + filelen + 2);
733 openFileName[0] = '\0';
734 if (filenameBegin != NULL) {
735 /*
736 * When a filename ../../../data/root.txt is specified,
737 * we presume that the input directory is ../../../data
738 * This is very important when the resource file includes
739 * another file, like UCARules.txt or thaidict.brk.
740 */
741 int32_t filenameSize = (int32_t)(filenameBegin - filename + 1);
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800742 inputDirBuf = (char *)uprv_malloc(filenameSize);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000743
744 /* test for NULL */
745 if(inputDirBuf == NULL) {
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800746 errorCode = U_MEMORY_ALLOCATION_ERROR;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000747 goto finish;
748 }
749
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800750 uprv_strncpy(inputDirBuf, filename, filenameSize);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000751 inputDirBuf[filenameSize - 1] = 0;
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800752 inputDir = inputDirBuf;
753 dirlen = (int32_t)uprv_strlen(inputDir);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000754 }
755 }else{
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800756 dirlen = (int32_t)uprv_strlen(inputDir);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000757
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800758 if(inputDir[dirlen-1] != U_FILE_SEP_CHAR) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000759 openFileName = (char *) uprv_malloc(dirlen + filelen + 2);
760
761 /* test for NULL */
762 if(openFileName == NULL) {
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800763 errorCode = U_MEMORY_ALLOCATION_ERROR;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000764 goto finish;
765 }
766
767 openFileName[0] = '\0';
768 /*
769 * append the input dir to openFileName if the first char in
770 * filename is not file seperation char and the last char input directory is not '.'.
771 * This is to support :
772 * genrb -s. /home/icu/data
773 * genrb -s. icu/data
774 * The user cannot mix notations like
775 * genrb -s. /icu/data --- the absolute path specified. -s redundant
776 * user should use
777 * genrb -s. icu/data --- start from CWD and look in icu/data dir
778 */
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800779 if( (filename[0] != U_FILE_SEP_CHAR) && (inputDir[dirlen-1] !='.')){
780 uprv_strcpy(openFileName, inputDir);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000781 openFileName[dirlen] = U_FILE_SEP_CHAR;
782 }
783 openFileName[dirlen + 1] = '\0';
784 } else {
785 openFileName = (char *) uprv_malloc(dirlen + filelen + 1);
786
787 /* test for NULL */
788 if(openFileName == NULL) {
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800789 errorCode = U_MEMORY_ALLOCATION_ERROR;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000790 goto finish;
791 }
792
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800793 uprv_strcpy(openFileName, inputDir);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000794
795 }
796 }
797 uprv_strcat(openFileName, filename);
798 /* printf("%s\n", openFileName); */
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800799 errorCode = U_ZERO_ERROR;
800 ucbuf = ucbuf_open(openFileName, &cp,getShowWarning(),TRUE, &errorCode);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000801
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800802 if(errorCode == U_FILE_ACCESS_ERROR) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000803
804 fprintf(stderr, "couldn't open file %s\n", openFileName == NULL ? filename : openFileName);
805 goto finish;
806 }
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800807 if (ucbuf == NULL || U_FAILURE(errorCode)) {
808 fprintf(stderr, "An error occured processing file %s. Error: %s\n", openFileName == NULL ? filename : openFileName,u_errorName(errorCode));
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000809 goto finish;
810 }
811
812 /* Parse the data into an SRBRoot */
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800813 data = parse(ucbuf, inputDir, outputDir, filename, FALSE, FALSE, &errorCode);
814 if (U_FAILURE(errorCode)) {
815 goto finish;
816 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000817
818 root = data->fRoot;
819 collations = resLookup(root, "collations");
820 if (collations != NULL) {
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800821 collation = resLookup(collations, collationType);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000822 if (collation != NULL) {
823 sequence = resLookup(collation, "Sequence");
824 if (sequence != NULL) {
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800825 // No string pointer aliasing so that we need not hold onto the resource bundle.
826 rules.setTo(sequence->u.fString.fChars, sequence->u.fString.fLength);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000827 }
828 }
829 }
830
831finish:
832 if (inputDirBuf != NULL) {
833 uprv_free(inputDirBuf);
834 }
835
836 if (openFileName != NULL) {
837 uprv_free(openFileName);
838 }
839
840 if(ucbuf) {
841 ucbuf_close(ucbuf);
842 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000843}
844
845// Quick-and-dirty escaping function.
846// Assumes that we are on an ASCII-based platform.
847static void
848escape(const UChar *s, char *buffer) {
849 int32_t length = u_strlen(s);
850 int32_t i = 0;
851 for (;;) {
852 UChar32 c;
853 U16_NEXT(s, i, length, c);
854 if (c == 0) {
855 *buffer = 0;
856 return;
857 } else if (0x20 <= c && c <= 0x7e) {
858 // printable ASCII
859 *buffer++ = (char)c; // assumes ASCII-based platform
860 } else {
861 buffer += sprintf(buffer, "\\u%04X", (int)c);
862 }
863 }
864}
865
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800866} // namespace
867
868#endif // !UCONFIG_NO_COLLATION
869
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000870static struct SResource *
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800871addCollation(ParseState* state, struct SResource *result, const char *collationType,
872 uint32_t startline, UErrorCode *status)
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000873{
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800874 // TODO: Use LocalPointer for result, or make caller close it when there is a failure.
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000875 struct SResource *member = NULL;
876 struct UString *tokenValue;
877 struct UString comment;
878 enum ETokenType token;
879 char subtag[1024];
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800880 UnicodeString rules;
881 UBool haveRules = FALSE;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000882 UVersionInfo version;
883 uint32_t line;
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800884
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000885 /* '{' . (name resource)* '}' */
886 version[0]=0; version[1]=0; version[2]=0; version[3]=0;
887
888 for (;;)
889 {
890 ustr_init(&comment);
891 token = getToken(state, &tokenValue, &comment, &line, status);
892
893 if (token == TOK_CLOSE_BRACE)
894 {
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800895 break;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000896 }
897
898 if (token != TOK_STRING)
899 {
900 res_close(result);
901 *status = U_INVALID_FORMAT_ERROR;
902
903 if (token == TOK_EOF)
904 {
905 error(startline, "unterminated table");
906 }
907 else
908 {
909 error(line, "Unexpected token %s", tokenNames[token]);
910 }
911
912 return NULL;
913 }
914
915 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
916
917 if (U_FAILURE(*status))
918 {
919 res_close(result);
920 return NULL;
921 }
922
923 member = parseResource(state, subtag, NULL, status);
924
925 if (U_FAILURE(*status))
926 {
927 res_close(result);
928 return NULL;
929 }
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800930 if (result == NULL)
931 {
932 // Ignore the parsed resources, continue parsing.
933 }
934 else if (uprv_strcmp(subtag, "Version") == 0)
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000935 {
936 char ver[40];
937 int32_t length = member->u.fString.fLength;
938
939 if (length >= (int32_t) sizeof(ver))
940 {
941 length = (int32_t) sizeof(ver) - 1;
942 }
943
944 u_UCharsToChars(member->u.fString.fChars, ver, length + 1); /* +1 for copying NULL */
945 u_versionFromString(version, ver);
946
947 table_add(result, member, line, status);
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800948 member = NULL;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000949 }
950 else if(uprv_strcmp(subtag, "%%CollationBin")==0)
951 {
952 /* discard duplicate %%CollationBin if any*/
953 }
954 else if (uprv_strcmp(subtag, "Sequence") == 0)
955 {
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800956 rules.setTo(member->u.fString.fChars, member->u.fString.fLength);
957 haveRules = TRUE;
958 // Defer building the collator until we have seen
959 // all sub-elements of the collation table, including the Version.
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000960 /* in order to achieve smaller data files, we can direct genrb */
961 /* to omit collation rules */
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800962 if(!state->omitCollationRules) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000963 table_add(result, member, line, status);
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800964 member = NULL;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000965 }
966 }
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800967 else // Just copy non-special items.
968 {
969 table_add(result, member, line, status);
970 member = NULL;
971 }
972 res_close(member); // TODO: use LocalPointer
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000973 if (U_FAILURE(*status))
974 {
975 res_close(result);
976 return NULL;
977 }
978 }
979
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800980 if (!haveRules) { return result; }
981
982#if UCONFIG_NO_COLLATION || UCONFIG_NO_FILE_IO
983 warning(line, "Not building collation elements because of UCONFIG_NO_COLLATION and/or UCONFIG_NO_FILE_IO, see uconfig.h");
984 (void)collationType;
985#else
986 // CLDR ticket #3949, ICU ticket #8082:
987 // Do not build collation binary data for for-import-only "private" collation rule strings.
988 if (uprv_strncmp(collationType, "private-", 8) == 0) {
989 if(isVerbose()) {
990 printf("Not building %s~%s collation binary\n", state->filename, collationType);
991 }
992 return result;
993 }
994
995 if(!state->makeBinaryCollation) {
996 if(isVerbose()) {
997 printf("Not building %s~%s collation binary\n", state->filename, collationType);
998 }
999 return result;
1000 }
1001 UErrorCode intStatus = U_ZERO_ERROR;
1002 UParseError parseError;
1003 uprv_memset(&parseError, 0, sizeof(parseError));
1004 GenrbImporter importer(state->inputdir, state->outputdir);
1005 const icu::CollationTailoring *base = icu::CollationRoot::getRoot(intStatus);
1006 if(U_FAILURE(intStatus)) {
1007 error(line, "failed to load root collator (ucadata.icu) - %s", u_errorName(intStatus));
1008 res_close(result);
1009 return NULL; // TODO: use LocalUResourceBundlePointer for result
1010 }
1011 icu::CollationBuilder builder(base, intStatus);
1012 if(uprv_strncmp(collationType, "search", 6) == 0) {
1013 builder.disableFastLatin(); // build fast-Latin table unless search collator
1014 }
1015 LocalPointer<icu::CollationTailoring> t(
1016 builder.parseAndBuild(rules, version, &importer, &parseError, intStatus));
1017 if(U_FAILURE(intStatus)) {
1018 const char *reason = builder.getErrorReason();
1019 if(reason == NULL) { reason = ""; }
1020 error(line, "CollationBuilder failed at %s~%s/Sequence rule offset %ld: %s %s",
1021 state->filename, collationType,
1022 (long)parseError.offset, u_errorName(intStatus), reason);
1023 if(parseError.preContext[0] != 0 || parseError.postContext[0] != 0) {
1024 // Print pre- and post-context.
1025 char preBuffer[100], postBuffer[100];
1026 escape(parseError.preContext, preBuffer);
1027 escape(parseError.postContext, postBuffer);
1028 error(line, " error context: \"...%s\" ! \"%s...\"", preBuffer, postBuffer);
1029 }
1030 if(isStrict()) {
1031 *status = intStatus;
1032 res_close(result);
1033 return NULL;
1034 }
1035 }
1036 icu::LocalMemory<uint8_t> buffer;
1037 int32_t capacity = 100000;
1038 uint8_t *dest = buffer.allocateInsteadAndCopy(capacity);
1039 if(dest == NULL) {
1040 fprintf(stderr, "memory allocation (%ld bytes) for file contents failed\n",
1041 (long)capacity);
1042 *status = U_MEMORY_ALLOCATION_ERROR;
1043 res_close(result);
1044 return NULL;
1045 }
1046 int32_t indexes[icu::CollationDataReader::IX_TOTAL_SIZE + 1];
1047 int32_t totalSize = icu::CollationDataWriter::writeTailoring(
1048 *t, *t->settings, indexes, dest, capacity, intStatus);
1049 if(intStatus == U_BUFFER_OVERFLOW_ERROR) {
1050 intStatus = U_ZERO_ERROR;
1051 capacity = totalSize;
1052 dest = buffer.allocateInsteadAndCopy(capacity);
1053 if(dest == NULL) {
1054 fprintf(stderr, "memory allocation (%ld bytes) for file contents failed\n",
1055 (long)capacity);
1056 *status = U_MEMORY_ALLOCATION_ERROR;
1057 res_close(result);
1058 return NULL;
1059 }
1060 totalSize = icu::CollationDataWriter::writeTailoring(
1061 *t, *t->settings, indexes, dest, capacity, intStatus);
1062 }
1063 if(U_FAILURE(intStatus)) {
1064 fprintf(stderr, "CollationDataWriter::writeTailoring() failed: %s\n",
1065 u_errorName(intStatus));
1066 res_close(result);
1067 return NULL;
1068 }
1069 if(isVerbose()) {
1070 printf("%s~%s collation tailoring part sizes:\n", state->filename, collationType);
1071 icu::CollationInfo::printSizes(totalSize, indexes);
1072 }
1073 struct SResource *collationBin = bin_open(state->bundle, "%%CollationBin", totalSize, dest, NULL, NULL, status);
1074 table_add(result, collationBin, line, status);
1075 if (U_FAILURE(*status)) {
1076 res_close(result);
1077 return NULL;
1078 }
1079#endif
1080 return result;
1081}
1082
1083static UBool
1084keepCollationType(const char * /*type*/) {
1085 return TRUE;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001086}
1087
1088static struct SResource *
1089parseCollationElements(ParseState* state, char *tag, uint32_t startline, UBool newCollation, UErrorCode *status)
1090{
1091 struct SResource *result = NULL;
1092 struct SResource *member = NULL;
1093 struct SResource *collationRes = NULL;
1094 struct UString *tokenValue;
1095 struct UString comment;
1096 enum ETokenType token;
1097 char subtag[1024], typeKeyword[1024];
1098 uint32_t line;
1099
1100 result = table_open(state->bundle, tag, NULL, status);
1101
1102 if (result == NULL || U_FAILURE(*status))
1103 {
1104 return NULL;
1105 }
1106 if(isVerbose()){
1107 printf(" collation elements %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1108 }
1109 if(!newCollation) {
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001110 return addCollation(state, result, "(no type)", startline, status);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001111 }
1112 else {
1113 for(;;) {
1114 ustr_init(&comment);
1115 token = getToken(state, &tokenValue, &comment, &line, status);
1116
1117 if (token == TOK_CLOSE_BRACE)
1118 {
1119 return result;
1120 }
1121
1122 if (token != TOK_STRING)
1123 {
1124 res_close(result);
1125 *status = U_INVALID_FORMAT_ERROR;
1126
1127 if (token == TOK_EOF)
1128 {
1129 error(startline, "unterminated table");
1130 }
1131 else
1132 {
1133 error(line, "Unexpected token %s", tokenNames[token]);
1134 }
1135
1136 return NULL;
1137 }
1138
1139 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
1140
1141 if (U_FAILURE(*status))
1142 {
1143 res_close(result);
1144 return NULL;
1145 }
1146
1147 if (uprv_strcmp(subtag, "default") == 0)
1148 {
1149 member = parseResource(state, subtag, NULL, status);
1150
1151 if (U_FAILURE(*status))
1152 {
1153 res_close(result);
1154 return NULL;
1155 }
1156
1157 table_add(result, member, line, status);
1158 }
1159 else
1160 {
1161 token = peekToken(state, 0, &tokenValue, &line, &comment, status);
1162 /* this probably needs to be refactored or recursively use the parser */
1163 /* first we assume that our collation table won't have the explicit type */
1164 /* then, we cannot handle aliases */
1165 if(token == TOK_OPEN_BRACE) {
1166 token = getToken(state, &tokenValue, &comment, &line, status);
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001167 if (keepCollationType(subtag)) {
1168 collationRes = table_open(state->bundle, subtag, NULL, status);
1169 } else {
1170 collationRes = NULL;
1171 }
1172 // need to parse the collation data regardless
1173 collationRes = addCollation(state, collationRes, subtag, startline, status);
1174 if (collationRes != NULL) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001175 table_add(result, collationRes, startline, status);
1176 }
1177 } else if(token == TOK_COLON) { /* right now, we'll just try to see if we have aliases */
1178 /* we could have a table too */
1179 token = peekToken(state, 1, &tokenValue, &line, &comment, status);
1180 u_UCharsToChars(tokenValue->fChars, typeKeyword, u_strlen(tokenValue->fChars) + 1);
1181 if(uprv_strcmp(typeKeyword, "alias") == 0) {
1182 member = parseResource(state, subtag, NULL, status);
1183 if (U_FAILURE(*status))
1184 {
1185 res_close(result);
1186 return NULL;
1187 }
1188
1189 table_add(result, member, line, status);
1190 } else {
1191 res_close(result);
1192 *status = U_INVALID_FORMAT_ERROR;
1193 return NULL;
1194 }
1195 } else {
1196 res_close(result);
1197 *status = U_INVALID_FORMAT_ERROR;
1198 return NULL;
1199 }
1200 }
1201
1202 /*member = string_open(bundle, subtag, tokenValue->fChars, tokenValue->fLength, status);*/
1203
1204 /*expect(TOK_CLOSE_BRACE, NULL, NULL, status);*/
1205
1206 if (U_FAILURE(*status))
1207 {
1208 res_close(result);
1209 return NULL;
1210 }
1211 }
1212 }
1213}
1214
1215/* Necessary, because CollationElements requires the bundle->fRoot member to be present which,
1216 if this weren't special-cased, wouldn't be set until the entire file had been processed. */
1217static struct SResource *
1218realParseTable(ParseState* state, struct SResource *table, char *tag, uint32_t startline, UErrorCode *status)
1219{
1220 struct SResource *member = NULL;
1221 struct UString *tokenValue=NULL;
1222 struct UString comment;
1223 enum ETokenType token;
1224 char subtag[1024];
1225 uint32_t line;
1226 UBool readToken = FALSE;
1227
1228 /* '{' . (name resource)* '}' */
1229
1230 if(isVerbose()){
1231 printf(" parsing table %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1232 }
1233 for (;;)
1234 {
1235 ustr_init(&comment);
1236 token = getToken(state, &tokenValue, &comment, &line, status);
1237
1238 if (token == TOK_CLOSE_BRACE)
1239 {
1240 if (!readToken) {
1241 warning(startline, "Encountered empty table");
1242 }
1243 return table;
1244 }
1245
1246 if (token != TOK_STRING)
1247 {
1248 *status = U_INVALID_FORMAT_ERROR;
1249
1250 if (token == TOK_EOF)
1251 {
1252 error(startline, "unterminated table");
1253 }
1254 else
1255 {
1256 error(line, "unexpected token %s", tokenNames[token]);
1257 }
1258
1259 return NULL;
1260 }
1261
1262 if(uprv_isInvariantUString(tokenValue->fChars, -1)) {
1263 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
1264 } else {
1265 *status = U_INVALID_FORMAT_ERROR;
1266 error(line, "invariant characters required for table keys");
1267 return NULL;
1268 }
1269
1270 if (U_FAILURE(*status))
1271 {
1272 error(line, "parse error. Stopped parsing tokens with %s", u_errorName(*status));
1273 return NULL;
1274 }
1275
1276 member = parseResource(state, subtag, &comment, status);
1277
1278 if (member == NULL || U_FAILURE(*status))
1279 {
1280 error(line, "parse error. Stopped parsing resource with %s", u_errorName(*status));
1281 return NULL;
1282 }
1283
1284 table_add(table, member, line, status);
1285
1286 if (U_FAILURE(*status))
1287 {
1288 error(line, "parse error. Stopped parsing table with %s", u_errorName(*status));
1289 return NULL;
1290 }
1291 readToken = TRUE;
1292 ustr_deinit(&comment);
1293 }
1294
1295 /* not reached */
1296 /* A compiler warning will appear if all paths don't contain a return statement. */
1297/* *status = U_INTERNAL_PROGRAM_ERROR;
1298 return NULL;*/
1299}
1300
1301static struct SResource *
1302parseTable(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1303{
1304 struct SResource *result;
1305
1306 if (tag != NULL && uprv_strcmp(tag, "CollationElements") == 0)
1307 {
1308 return parseCollationElements(state, tag, startline, FALSE, status);
1309 }
1310 if (tag != NULL && uprv_strcmp(tag, "collations") == 0)
1311 {
1312 return parseCollationElements(state, tag, startline, TRUE, status);
1313 }
1314 if(isVerbose()){
1315 printf(" table %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1316 }
1317
1318 result = table_open(state->bundle, tag, comment, status);
1319
1320 if (result == NULL || U_FAILURE(*status))
1321 {
1322 return NULL;
1323 }
1324 return realParseTable(state, result, tag, startline, status);
1325}
1326
1327static struct SResource *
1328parseArray(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1329{
1330 struct SResource *result = NULL;
1331 struct SResource *member = NULL;
1332 struct UString *tokenValue;
1333 struct UString memberComments;
1334 enum ETokenType token;
1335 UBool readToken = FALSE;
1336
1337 result = array_open(state->bundle, tag, comment, status);
1338
1339 if (result == NULL || U_FAILURE(*status))
1340 {
1341 return NULL;
1342 }
1343 if(isVerbose()){
1344 printf(" array %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1345 }
1346
1347 ustr_init(&memberComments);
1348
1349 /* '{' . resource [','] '}' */
1350 for (;;)
1351 {
1352 /* reset length */
1353 ustr_setlen(&memberComments, 0, status);
1354
1355 /* check for end of array, but don't consume next token unless it really is the end */
1356 token = peekToken(state, 0, &tokenValue, NULL, &memberComments, status);
1357
1358
1359 if (token == TOK_CLOSE_BRACE)
1360 {
1361 getToken(state, NULL, NULL, NULL, status);
1362 if (!readToken) {
1363 warning(startline, "Encountered empty array");
1364 }
1365 break;
1366 }
1367
1368 if (token == TOK_EOF)
1369 {
1370 res_close(result);
1371 *status = U_INVALID_FORMAT_ERROR;
1372 error(startline, "unterminated array");
1373 return NULL;
1374 }
1375
1376 /* string arrays are a special case */
1377 if (token == TOK_STRING)
1378 {
1379 getToken(state, &tokenValue, &memberComments, NULL, status);
1380 member = string_open(state->bundle, NULL, tokenValue->fChars, tokenValue->fLength, &memberComments, status);
1381 }
1382 else
1383 {
1384 member = parseResource(state, NULL, &memberComments, status);
1385 }
1386
1387 if (member == NULL || U_FAILURE(*status))
1388 {
1389 res_close(result);
1390 return NULL;
1391 }
1392
1393 array_add(result, member, status);
1394
1395 if (U_FAILURE(*status))
1396 {
1397 res_close(result);
1398 return NULL;
1399 }
1400
1401 /* eat optional comma if present */
1402 token = peekToken(state, 0, NULL, NULL, NULL, status);
1403
1404 if (token == TOK_COMMA)
1405 {
1406 getToken(state, NULL, NULL, NULL, status);
1407 }
1408
1409 if (U_FAILURE(*status))
1410 {
1411 res_close(result);
1412 return NULL;
1413 }
1414 readToken = TRUE;
1415 }
1416
1417 ustr_deinit(&memberComments);
1418 return result;
1419}
1420
1421static struct SResource *
1422parseIntVector(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1423{
1424 struct SResource *result = NULL;
1425 enum ETokenType token;
1426 char *string;
1427 int32_t value;
1428 UBool readToken = FALSE;
1429 char *stopstring;
1430 uint32_t len;
1431 struct UString memberComments;
1432
1433 result = intvector_open(state->bundle, tag, comment, status);
1434
1435 if (result == NULL || U_FAILURE(*status))
1436 {
1437 return NULL;
1438 }
1439
1440 if(isVerbose()){
1441 printf(" vector %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1442 }
1443 ustr_init(&memberComments);
1444 /* '{' . string [','] '}' */
1445 for (;;)
1446 {
1447 ustr_setlen(&memberComments, 0, status);
1448
1449 /* check for end of array, but don't consume next token unless it really is the end */
1450 token = peekToken(state, 0, NULL, NULL,&memberComments, status);
1451
1452 if (token == TOK_CLOSE_BRACE)
1453 {
1454 /* it's the end, consume the close brace */
1455 getToken(state, NULL, NULL, NULL, status);
1456 if (!readToken) {
1457 warning(startline, "Encountered empty int vector");
1458 }
1459 ustr_deinit(&memberComments);
1460 return result;
1461 }
1462
1463 string = getInvariantString(state, NULL, NULL, status);
1464
1465 if (U_FAILURE(*status))
1466 {
1467 res_close(result);
1468 return NULL;
1469 }
1470
1471 /* For handling illegal char in the Intvector */
1472 value = uprv_strtoul(string, &stopstring, 0);/* make intvector support decimal,hexdigit,octal digit ranging from -2^31-2^32-1*/
1473 len=(uint32_t)(stopstring-string);
1474
1475 if(len==uprv_strlen(string))
1476 {
1477 intvector_add(result, value, status);
1478 uprv_free(string);
1479 token = peekToken(state, 0, NULL, NULL, NULL, status);
1480 }
1481 else
1482 {
1483 uprv_free(string);
1484 *status=U_INVALID_CHAR_FOUND;
1485 }
1486
1487 if (U_FAILURE(*status))
1488 {
1489 res_close(result);
1490 return NULL;
1491 }
1492
1493 /* the comma is optional (even though it is required to prevent the reader from concatenating
1494 consecutive entries) so that a missing comma on the last entry isn't an error */
1495 if (token == TOK_COMMA)
1496 {
1497 getToken(state, NULL, NULL, NULL, status);
1498 }
1499 readToken = TRUE;
1500 }
1501
1502 /* not reached */
1503 /* A compiler warning will appear if all paths don't contain a return statement. */
1504/* intvector_close(result, status);
1505 *status = U_INTERNAL_PROGRAM_ERROR;
1506 return NULL;*/
1507}
1508
1509static struct SResource *
1510parseBinary(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1511{
1512 struct SResource *result = NULL;
1513 uint8_t *value;
1514 char *string;
1515 char toConv[3] = {'\0', '\0', '\0'};
1516 uint32_t count;
1517 uint32_t i;
1518 uint32_t line;
1519 char *stopstring;
1520 uint32_t len;
1521
1522 string = getInvariantString(state, &line, NULL, status);
1523
1524 if (string == NULL || U_FAILURE(*status))
1525 {
1526 return NULL;
1527 }
1528
1529 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1530
1531 if (U_FAILURE(*status))
1532 {
1533 uprv_free(string);
1534 return NULL;
1535 }
1536
1537 if(isVerbose()){
1538 printf(" binary %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1539 }
1540
1541 count = (uint32_t)uprv_strlen(string);
1542 if (count > 0){
1543 if((count % 2)==0){
1544 value = static_cast<uint8_t *>(uprv_malloc(sizeof(uint8_t) * count));
1545
1546 if (value == NULL)
1547 {
1548 uprv_free(string);
1549 *status = U_MEMORY_ALLOCATION_ERROR;
1550 return NULL;
1551 }
1552
1553 for (i = 0; i < count; i += 2)
1554 {
1555 toConv[0] = string[i];
1556 toConv[1] = string[i + 1];
1557
1558 value[i >> 1] = (uint8_t) uprv_strtoul(toConv, &stopstring, 16);
1559 len=(uint32_t)(stopstring-toConv);
1560
1561 if(len!=uprv_strlen(toConv))
1562 {
1563 uprv_free(string);
1564 *status=U_INVALID_CHAR_FOUND;
1565 return NULL;
1566 }
1567 }
1568
1569 result = bin_open(state->bundle, tag, (i >> 1), value,NULL, comment, status);
1570
1571 uprv_free(value);
1572 }
1573 else
1574 {
1575 *status = U_INVALID_CHAR_FOUND;
1576 uprv_free(string);
1577 error(line, "Encountered invalid binary string");
1578 return NULL;
1579 }
1580 }
1581 else
1582 {
1583 result = bin_open(state->bundle, tag, 0, NULL, "",comment,status);
1584 warning(startline, "Encountered empty binary tag");
1585 }
1586 uprv_free(string);
1587
1588 return result;
1589}
1590
1591static struct SResource *
1592parseInteger(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1593{
1594 struct SResource *result = NULL;
1595 int32_t value;
1596 char *string;
1597 char *stopstring;
1598 uint32_t len;
1599
1600 string = getInvariantString(state, NULL, NULL, status);
1601
1602 if (string == NULL || U_FAILURE(*status))
1603 {
1604 return NULL;
1605 }
1606
1607 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1608
1609 if (U_FAILURE(*status))
1610 {
1611 uprv_free(string);
1612 return NULL;
1613 }
1614
1615 if(isVerbose()){
1616 printf(" integer %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1617 }
1618
1619 if (uprv_strlen(string) <= 0)
1620 {
1621 warning(startline, "Encountered empty integer. Default value is 0.");
1622 }
1623
1624 /* Allow integer support for hexdecimal, octal digit and decimal*/
1625 /* and handle illegal char in the integer*/
1626 value = uprv_strtoul(string, &stopstring, 0);
1627 len=(uint32_t)(stopstring-string);
1628 if(len==uprv_strlen(string))
1629 {
1630 result = int_open(state->bundle, tag, value, comment, status);
1631 }
1632 else
1633 {
1634 *status=U_INVALID_CHAR_FOUND;
1635 }
1636 uprv_free(string);
1637
1638 return result;
1639}
1640
1641static struct SResource *
1642parseImport(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
1643{
1644 struct SResource *result;
1645 FileStream *file;
1646 int32_t len;
1647 uint8_t *data;
1648 char *filename;
1649 uint32_t line;
1650 char *fullname = NULL;
1651 filename = getInvariantString(state, &line, NULL, status);
1652
1653 if (U_FAILURE(*status))
1654 {
1655 return NULL;
1656 }
1657
1658 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1659
1660 if (U_FAILURE(*status))
1661 {
1662 uprv_free(filename);
1663 return NULL;
1664 }
1665
1666 if(isVerbose()){
1667 printf(" import %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1668 }
1669
1670 /* Open the input file for reading */
1671 if (state->inputdir == NULL)
1672 {
1673#if 1
1674 /*
1675 * Always save file file name, even if there's
1676 * no input directory specified. MIGHT BREAK SOMETHING
1677 */
1678 int32_t filenameLength = uprv_strlen(filename);
1679
1680 fullname = (char *) uprv_malloc(filenameLength + 1);
1681 uprv_strcpy(fullname, filename);
1682#endif
1683
1684 file = T_FileStream_open(filename, "rb");
1685 }
1686 else
1687 {
1688
1689 int32_t count = (int32_t)uprv_strlen(filename);
1690
1691 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
1692 {
1693 fullname = (char *) uprv_malloc(state->inputdirLength + count + 2);
1694
1695 /* test for NULL */
1696 if(fullname == NULL)
1697 {
1698 *status = U_MEMORY_ALLOCATION_ERROR;
1699 return NULL;
1700 }
1701
1702 uprv_strcpy(fullname, state->inputdir);
1703
1704 fullname[state->inputdirLength] = U_FILE_SEP_CHAR;
1705 fullname[state->inputdirLength + 1] = '\0';
1706
1707 uprv_strcat(fullname, filename);
1708 }
1709 else
1710 {
1711 fullname = (char *) uprv_malloc(state->inputdirLength + count + 1);
1712
1713 /* test for NULL */
1714 if(fullname == NULL)
1715 {
1716 *status = U_MEMORY_ALLOCATION_ERROR;
1717 return NULL;
1718 }
1719
1720 uprv_strcpy(fullname, state->inputdir);
1721 uprv_strcat(fullname, filename);
1722 }
1723
1724 file = T_FileStream_open(fullname, "rb");
1725
1726 }
1727
1728 if (file == NULL)
1729 {
1730 error(line, "couldn't open input file %s", filename);
1731 *status = U_FILE_ACCESS_ERROR;
1732 return NULL;
1733 }
1734
1735 len = T_FileStream_size(file);
1736 data = (uint8_t*)uprv_malloc(len * sizeof(uint8_t));
1737 /* test for NULL */
1738 if(data == NULL)
1739 {
1740 *status = U_MEMORY_ALLOCATION_ERROR;
1741 T_FileStream_close (file);
1742 return NULL;
1743 }
1744
1745 /* int32_t numRead = */ T_FileStream_read (file, data, len);
1746 T_FileStream_close (file);
1747
1748 result = bin_open(state->bundle, tag, len, data, fullname, comment, status);
1749
1750 uprv_free(data);
1751 uprv_free(filename);
1752 uprv_free(fullname);
1753
1754 return result;
1755}
1756
1757static struct SResource *
1758parseInclude(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
1759{
1760 struct SResource *result;
1761 int32_t len=0;
1762 char *filename;
1763 uint32_t line;
1764 UChar *pTarget = NULL;
1765
1766 UCHARBUF *ucbuf;
1767 char *fullname = NULL;
1768 int32_t count = 0;
1769 const char* cp = NULL;
1770 const UChar* uBuffer = NULL;
1771
1772 filename = getInvariantString(state, &line, NULL, status);
1773 count = (int32_t)uprv_strlen(filename);
1774
1775 if (U_FAILURE(*status))
1776 {
1777 return NULL;
1778 }
1779
1780 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1781
1782 if (U_FAILURE(*status))
1783 {
1784 uprv_free(filename);
1785 return NULL;
1786 }
1787
1788 if(isVerbose()){
1789 printf(" include %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1790 }
1791
1792 fullname = (char *) uprv_malloc(state->inputdirLength + count + 2);
1793 /* test for NULL */
1794 if(fullname == NULL)
1795 {
1796 *status = U_MEMORY_ALLOCATION_ERROR;
1797 uprv_free(filename);
1798 return NULL;
1799 }
1800
1801 if(state->inputdir!=NULL){
1802 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
1803 {
1804
1805 uprv_strcpy(fullname, state->inputdir);
1806
1807 fullname[state->inputdirLength] = U_FILE_SEP_CHAR;
1808 fullname[state->inputdirLength + 1] = '\0';
1809
1810 uprv_strcat(fullname, filename);
1811 }
1812 else
1813 {
1814 uprv_strcpy(fullname, state->inputdir);
1815 uprv_strcat(fullname, filename);
1816 }
1817 }else{
1818 uprv_strcpy(fullname,filename);
1819 }
1820
1821 ucbuf = ucbuf_open(fullname, &cp,getShowWarning(),FALSE,status);
1822
1823 if (U_FAILURE(*status)) {
1824 error(line, "couldn't open input file %s\n", filename);
1825 return NULL;
1826 }
1827
1828 uBuffer = ucbuf_getBuffer(ucbuf,&len,status);
1829 result = string_open(state->bundle, tag, uBuffer, len, comment, status);
1830
1831 ucbuf_close(ucbuf);
1832
1833 uprv_free(pTarget);
1834
1835 uprv_free(filename);
1836 uprv_free(fullname);
1837
1838 return result;
1839}
1840
1841
1842
1843
1844
1845U_STRING_DECL(k_type_string, "string", 6);
1846U_STRING_DECL(k_type_binary, "binary", 6);
1847U_STRING_DECL(k_type_bin, "bin", 3);
1848U_STRING_DECL(k_type_table, "table", 5);
1849U_STRING_DECL(k_type_table_no_fallback, "table(nofallback)", 17);
1850U_STRING_DECL(k_type_int, "int", 3);
1851U_STRING_DECL(k_type_integer, "integer", 7);
1852U_STRING_DECL(k_type_array, "array", 5);
1853U_STRING_DECL(k_type_alias, "alias", 5);
1854U_STRING_DECL(k_type_intvector, "intvector", 9);
1855U_STRING_DECL(k_type_import, "import", 6);
1856U_STRING_DECL(k_type_include, "include", 7);
1857
1858/* Various non-standard processing plugins that create one or more special resources. */
1859U_STRING_DECL(k_type_plugin_uca_rules, "process(uca_rules)", 18);
1860U_STRING_DECL(k_type_plugin_collation, "process(collation)", 18);
1861U_STRING_DECL(k_type_plugin_transliterator, "process(transliterator)", 23);
1862U_STRING_DECL(k_type_plugin_dependency, "process(dependency)", 19);
1863
1864typedef enum EResourceType
1865{
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001866 RESTYPE_UNKNOWN,
1867 RESTYPE_STRING,
1868 RESTYPE_BINARY,
1869 RESTYPE_TABLE,
1870 RESTYPE_TABLE_NO_FALLBACK,
1871 RESTYPE_INTEGER,
1872 RESTYPE_ARRAY,
1873 RESTYPE_ALIAS,
1874 RESTYPE_INTVECTOR,
1875 RESTYPE_IMPORT,
1876 RESTYPE_INCLUDE,
1877 RESTYPE_PROCESS_UCA_RULES,
1878 RESTYPE_PROCESS_COLLATION,
1879 RESTYPE_PROCESS_TRANSLITERATOR,
1880 RESTYPE_PROCESS_DEPENDENCY,
1881 RESTYPE_RESERVED
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001882} EResourceType;
1883
1884static struct {
1885 const char *nameChars; /* only used for debugging */
1886 const UChar *nameUChars;
1887 ParseResourceFunction *parseFunction;
1888} gResourceTypes[] = {
1889 {"Unknown", NULL, NULL},
1890 {"string", k_type_string, parseString},
1891 {"binary", k_type_binary, parseBinary},
1892 {"table", k_type_table, parseTable},
1893 {"table(nofallback)", k_type_table_no_fallback, NULL}, /* parseFunction will never be called */
1894 {"integer", k_type_integer, parseInteger},
1895 {"array", k_type_array, parseArray},
1896 {"alias", k_type_alias, parseAlias},
1897 {"intvector", k_type_intvector, parseIntVector},
1898 {"import", k_type_import, parseImport},
1899 {"include", k_type_include, parseInclude},
1900 {"process(uca_rules)", k_type_plugin_uca_rules, parseUCARules},
1901 {"process(collation)", k_type_plugin_collation, NULL /* not implemented yet */},
1902 {"process(transliterator)", k_type_plugin_transliterator, parseTransliterator},
1903 {"process(dependency)", k_type_plugin_dependency, parseDependency},
1904 {"reserved", NULL, NULL}
1905};
1906
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001907void initParser()
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001908{
1909 U_STRING_INIT(k_type_string, "string", 6);
1910 U_STRING_INIT(k_type_binary, "binary", 6);
1911 U_STRING_INIT(k_type_bin, "bin", 3);
1912 U_STRING_INIT(k_type_table, "table", 5);
1913 U_STRING_INIT(k_type_table_no_fallback, "table(nofallback)", 17);
1914 U_STRING_INIT(k_type_int, "int", 3);
1915 U_STRING_INIT(k_type_integer, "integer", 7);
1916 U_STRING_INIT(k_type_array, "array", 5);
1917 U_STRING_INIT(k_type_alias, "alias", 5);
1918 U_STRING_INIT(k_type_intvector, "intvector", 9);
1919 U_STRING_INIT(k_type_import, "import", 6);
1920 U_STRING_INIT(k_type_include, "include", 7);
1921
1922 U_STRING_INIT(k_type_plugin_uca_rules, "process(uca_rules)", 18);
1923 U_STRING_INIT(k_type_plugin_collation, "process(collation)", 18);
1924 U_STRING_INIT(k_type_plugin_transliterator, "process(transliterator)", 23);
1925 U_STRING_INIT(k_type_plugin_dependency, "process(dependency)", 19);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001926}
1927
1928static inline UBool isTable(enum EResourceType type) {
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001929 return (UBool)(type==RESTYPE_TABLE || type==RESTYPE_TABLE_NO_FALLBACK);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001930}
1931
1932static enum EResourceType
1933parseResourceType(ParseState* state, UErrorCode *status)
1934{
1935 struct UString *tokenValue;
1936 struct UString comment;
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001937 enum EResourceType result = RESTYPE_UNKNOWN;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001938 uint32_t line=0;
1939 ustr_init(&comment);
1940 expect(state, TOK_STRING, &tokenValue, &comment, &line, status);
1941
1942 if (U_FAILURE(*status))
1943 {
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001944 return RESTYPE_UNKNOWN;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001945 }
1946
1947 *status = U_ZERO_ERROR;
1948
1949 /* Search for normal types */
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001950 result=RESTYPE_UNKNOWN;
1951 while ((result=(EResourceType)(result+1)) < RESTYPE_RESERVED) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001952 if (u_strcmp(tokenValue->fChars, gResourceTypes[result].nameUChars) == 0) {
1953 break;
1954 }
1955 }
1956 /* Now search for the aliases */
1957 if (u_strcmp(tokenValue->fChars, k_type_int) == 0) {
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001958 result = RESTYPE_INTEGER;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001959 }
1960 else if (u_strcmp(tokenValue->fChars, k_type_bin) == 0) {
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001961 result = RESTYPE_BINARY;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001962 }
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001963 else if (result == RESTYPE_RESERVED) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001964 char tokenBuffer[1024];
1965 u_austrncpy(tokenBuffer, tokenValue->fChars, sizeof(tokenBuffer));
1966 tokenBuffer[sizeof(tokenBuffer) - 1] = 0;
1967 *status = U_INVALID_FORMAT_ERROR;
1968 error(line, "unknown resource type '%s'", tokenBuffer);
1969 }
1970
1971 return result;
1972}
1973
1974/* parse a non-top-level resource */
1975static struct SResource *
1976parseResource(ParseState* state, char *tag, const struct UString *comment, UErrorCode *status)
1977{
1978 enum ETokenType token;
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001979 enum EResourceType resType = RESTYPE_UNKNOWN;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001980 ParseResourceFunction *parseFunction = NULL;
1981 struct UString *tokenValue;
1982 uint32_t startline;
1983 uint32_t line;
1984
1985
1986 token = getToken(state, &tokenValue, NULL, &startline, status);
1987
1988 if(isVerbose()){
1989 printf(" resource %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1990 }
1991
1992 /* name . [ ':' type ] '{' resource '}' */
1993 /* This function parses from the colon onwards. If the colon is present, parse the
1994 type then try to parse a resource of that type. If there is no explicit type,
1995 work it out using the lookahead tokens. */
1996 switch (token)
1997 {
1998 case TOK_EOF:
1999 *status = U_INVALID_FORMAT_ERROR;
2000 error(startline, "Unexpected EOF encountered");
2001 return NULL;
2002
2003 case TOK_ERROR:
2004 *status = U_INVALID_FORMAT_ERROR;
2005 return NULL;
2006
2007 case TOK_COLON:
2008 resType = parseResourceType(state, status);
2009 expect(state, TOK_OPEN_BRACE, &tokenValue, NULL, &startline, status);
2010
2011 if (U_FAILURE(*status))
2012 {
2013 return NULL;
2014 }
2015
2016 break;
2017
2018 case TOK_OPEN_BRACE:
2019 break;
2020
2021 default:
2022 *status = U_INVALID_FORMAT_ERROR;
2023 error(startline, "syntax error while reading a resource, expected '{' or ':'");
2024 return NULL;
2025 }
2026
2027
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08002028 if (resType == RESTYPE_UNKNOWN)
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002029 {
2030 /* No explicit type, so try to work it out. At this point, we've read the first '{'.
2031 We could have any of the following:
2032 { { => array (nested)
2033 { :/} => array
2034 { string , => string array
2035
2036 { string { => table
2037
2038 { string :/{ => table
2039 { string } => string
2040 */
2041
2042 token = peekToken(state, 0, NULL, &line, NULL,status);
2043
2044 if (U_FAILURE(*status))
2045 {
2046 return NULL;
2047 }
2048
2049 if (token == TOK_OPEN_BRACE || token == TOK_COLON ||token ==TOK_CLOSE_BRACE )
2050 {
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08002051 resType = RESTYPE_ARRAY;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002052 }
2053 else if (token == TOK_STRING)
2054 {
2055 token = peekToken(state, 1, NULL, &line, NULL, status);
2056
2057 if (U_FAILURE(*status))
2058 {
2059 return NULL;
2060 }
2061
2062 switch (token)
2063 {
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08002064 case TOK_COMMA: resType = RESTYPE_ARRAY; break;
2065 case TOK_OPEN_BRACE: resType = RESTYPE_TABLE; break;
2066 case TOK_CLOSE_BRACE: resType = RESTYPE_STRING; break;
2067 case TOK_COLON: resType = RESTYPE_TABLE; break;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002068 default:
2069 *status = U_INVALID_FORMAT_ERROR;
2070 error(line, "Unexpected token after string, expected ',', '{' or '}'");
2071 return NULL;
2072 }
2073 }
2074 else
2075 {
2076 *status = U_INVALID_FORMAT_ERROR;
2077 error(line, "Unexpected token after '{'");
2078 return NULL;
2079 }
2080
2081 /* printf("Type guessed as %s\n", resourceNames[resType]); */
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08002082 } else if(resType == RESTYPE_TABLE_NO_FALLBACK) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002083 *status = U_INVALID_FORMAT_ERROR;
2084 error(startline, "error: %s resource type not valid except on top bundle level", gResourceTypes[resType].nameChars);
2085 return NULL;
2086 }
2087
2088
2089 /* We should now know what we need to parse next, so call the appropriate parser
2090 function and return. */
2091 parseFunction = gResourceTypes[resType].parseFunction;
2092 if (parseFunction != NULL) {
2093 return parseFunction(state, tag, startline, comment, status);
2094 }
2095 else {
2096 *status = U_INTERNAL_PROGRAM_ERROR;
2097 error(startline, "internal error: %s resource type found and not handled", gResourceTypes[resType].nameChars);
2098 }
2099
2100 return NULL;
2101}
2102
2103/* parse the top-level resource */
2104struct SRBRoot *
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08002105parse(UCHARBUF *buf, const char *inputDir, const char *outputDir, const char *filename,
2106 UBool makeBinaryCollation, UBool omitCollationRules, UErrorCode *status)
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002107{
2108 struct UString *tokenValue;
2109 struct UString comment;
2110 uint32_t line;
2111 enum EResourceType bundleType;
2112 enum ETokenType token;
2113 ParseState state;
2114 uint32_t i;
2115
2116
2117 for (i = 0; i < MAX_LOOKAHEAD + 1; i++)
2118 {
2119 ustr_init(&state.lookahead[i].value);
2120 ustr_init(&state.lookahead[i].comment);
2121 }
2122
2123 initLookahead(&state, buf, status);
2124
2125 state.inputdir = inputDir;
2126 state.inputdirLength = (state.inputdir != NULL) ? (uint32_t)uprv_strlen(state.inputdir) : 0;
2127 state.outputdir = outputDir;
2128 state.outputdirLength = (state.outputdir != NULL) ? (uint32_t)uprv_strlen(state.outputdir) : 0;
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08002129 state.filename = filename;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002130 state.makeBinaryCollation = makeBinaryCollation;
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08002131 state.omitCollationRules = omitCollationRules;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002132
2133 ustr_init(&comment);
2134 expect(&state, TOK_STRING, &tokenValue, &comment, NULL, status);
2135
2136 state.bundle = bundle_open(&comment, FALSE, status);
2137
2138 if (state.bundle == NULL || U_FAILURE(*status))
2139 {
2140 return NULL;
2141 }
2142
2143
2144 bundle_setlocale(state.bundle, tokenValue->fChars, status);
2145
2146 /* The following code is to make Empty bundle work no matter with :table specifer or not */
2147 token = getToken(&state, NULL, NULL, &line, status);
2148 if(token==TOK_COLON) {
2149 *status=U_ZERO_ERROR;
2150 bundleType=parseResourceType(&state, status);
2151
2152 if(isTable(bundleType))
2153 {
2154 expect(&state, TOK_OPEN_BRACE, NULL, NULL, &line, status);
2155 }
2156 else
2157 {
2158 *status=U_PARSE_ERROR;
2159 error(line, "parse error. Stopped parsing with %s", u_errorName(*status));
2160 }
2161 }
2162 else
2163 {
2164 /* not a colon */
2165 if(token==TOK_OPEN_BRACE)
2166 {
2167 *status=U_ZERO_ERROR;
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08002168 bundleType=RESTYPE_TABLE;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002169 }
2170 else
2171 {
2172 /* neither colon nor open brace */
2173 *status=U_PARSE_ERROR;
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08002174 bundleType=RESTYPE_UNKNOWN;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002175 error(line, "parse error, did not find open-brace '{' or colon ':', stopped with %s", u_errorName(*status));
2176 }
2177 }
2178
2179 if (U_FAILURE(*status))
2180 {
2181 bundle_close(state.bundle, status);
2182 return NULL;
2183 }
2184
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08002185 if(bundleType==RESTYPE_TABLE_NO_FALLBACK) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002186 /*
2187 * Parse a top-level table with the table(nofallback) declaration.
2188 * This is the same as a regular table, but also sets the
2189 * URES_ATT_NO_FALLBACK flag in indexes[URES_INDEX_ATTRIBUTES] .
2190 */
2191 state.bundle->noFallback=TRUE;
2192 }
2193 /* top-level tables need not handle special table names like "collations" */
2194 realParseTable(&state, state.bundle->fRoot, NULL, line, status);
2195 if(dependencyArray!=NULL){
2196 table_add(state.bundle->fRoot, dependencyArray, 0, status);
2197 dependencyArray = NULL;
2198 }
2199 if (U_FAILURE(*status))
2200 {
2201 bundle_close(state.bundle, status);
2202 res_close(dependencyArray);
2203 return NULL;
2204 }
2205
2206 if (getToken(&state, NULL, NULL, &line, status) != TOK_EOF)
2207 {
2208 warning(line, "extraneous text after resource bundle (perhaps unmatched braces)");
2209 if(isStrict()){
2210 *status = U_INVALID_FORMAT_ERROR;
2211 return NULL;
2212 }
2213 }
2214
2215 cleanupLookahead(&state);
2216 ustr_deinit(&comment);
2217 return state.bundle;
2218}