blob: b31755947d0cdf3ed5970c612706dbc58b41b7f7 [file] [log] [blame]
Jungshik Shin87232d82017-05-13 21:10:13 -07001// © 2016 and later: Unicode, Inc. and others.
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002// License & terms of use: http://www.unicode.org/copyright.html
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00003/*
4*******************************************************************************
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08005* Copyright (C) 2011-2014, International Business Machines
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00006* Corporation and others. All Rights Reserved.
7*******************************************************************************
8* file name: ppucd.cpp
Jungshik Shin87232d82017-05-13 21:10:13 -07009* encoding: UTF-8
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000010* tab size: 8 (not used)
11* indentation:4
12*
13* created on: 2011dec11
14* created by: Markus W. Scherer
15*/
16
17#include "unicode/utypes.h"
18#include "unicode/uchar.h"
19#include "charstr.h"
20#include "cstring.h"
21#include "ppucd.h"
22#include "uassert.h"
23#include "uparse.h"
24
25#include <stdio.h>
26#include <string.h>
27
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000028U_NAMESPACE_BEGIN
29
30PropertyNames::~PropertyNames() {}
31
Frank Tang3e05d9d2021-11-08 14:04:04 -080032// TODO: Create a concrete subclass for the default PropertyNames implementation
33// using the ICU library built-in property names API & data.
34// Currently only the genprops tool uses PreparsedUCD, and provides its own
35// PropertyNames implementation using its just-build property names data and its own code.
36// At some point, we should use PreparsedUCD in tests, and then we will need the
37// default implementation somewhere.
38#if 0
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000039int32_t
40PropertyNames::getPropertyEnum(const char *name) const {
41 return u_getPropertyEnum(name);
42}
43
44int32_t
45PropertyNames::getPropertyValueEnum(int32_t property, const char *name) const {
46 return u_getPropertyValueEnum((UProperty)property, name);
47}
Frank Tang3e05d9d2021-11-08 14:04:04 -080048#endif
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000049
50UniProps::UniProps()
51 : start(U_SENTINEL), end(U_SENTINEL),
52 bmg(U_SENTINEL), bpb(U_SENTINEL),
53 scf(U_SENTINEL), slc(U_SENTINEL), stc(U_SENTINEL), suc(U_SENTINEL),
54 digitValue(-1), numericValue(NULL),
55 name(NULL), nameAlias(NULL) {
56 memset(binProps, 0, sizeof(binProps));
57 memset(intProps, 0, sizeof(intProps));
58 memset(age, 0, 4);
59}
60
61UniProps::~UniProps() {}
62
63const int32_t PreparsedUCD::kNumLineBuffers;
64
65PreparsedUCD::PreparsedUCD(const char *filename, UErrorCode &errorCode)
Frank Tang3e05d9d2021-11-08 14:04:04 -080066 : pnames(nullptr),
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000067 file(NULL),
68 defaultLineIndex(-1), blockLineIndex(-1), lineIndex(0),
69 lineNumber(0),
70 lineType(NO_LINE),
71 fieldLimit(NULL), lineLimit(NULL) {
72 if(U_FAILURE(errorCode)) { return; }
73
74 if(filename==NULL || *filename==0 || (*filename=='-' && filename[1]==0)) {
75 filename=NULL;
76 file=stdin;
77 } else {
78 file=fopen(filename, "r");
79 }
80 if(file==NULL) {
81 perror("error opening preparsed UCD");
82 fprintf(stderr, "error opening preparsed UCD file %s\n", filename ? filename : "\"no file name given\"");
83 errorCode=U_FILE_ACCESS_ERROR;
84 return;
85 }
86
87 memset(ucdVersion, 0, 4);
88 lines[0][0]=0;
89}
90
91PreparsedUCD::~PreparsedUCD() {
92 if(file!=stdin) {
93 fclose(file);
94 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000095}
96
97// Same order as the LineType values.
98static const char *lineTypeStrings[]={
99 NULL,
100 NULL,
101 "ucd",
102 "property",
103 "binary",
104 "value",
105 "defaults",
106 "block",
107 "cp",
Jungshik Shinb3189662017-11-07 11:18:34 -0800108 "unassigned",
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000109 "algnamesrange"
110};
111
112PreparsedUCD::LineType
113PreparsedUCD::readLine(UErrorCode &errorCode) {
114 if(U_FAILURE(errorCode)) { return NO_LINE; }
115 // Select the next available line buffer.
116 while(!isLineBufferAvailable(lineIndex)) {
117 ++lineIndex;
118 if (lineIndex == kNumLineBuffers) {
119 lineIndex = 0;
120 }
121 }
122 char *line=lines[lineIndex];
123 *line=0;
124 lineLimit=fieldLimit=line;
125 lineType=NO_LINE;
126 char *result=fgets(line, sizeof(lines[0]), file);
127 if(result==NULL) {
128 if(ferror(file)) {
129 perror("error reading preparsed UCD");
130 fprintf(stderr, "error reading preparsed UCD before line %ld\n", (long)lineNumber);
131 errorCode=U_FILE_ACCESS_ERROR;
132 }
133 return NO_LINE;
134 }
135 ++lineNumber;
136 if(*line=='#') {
137 fieldLimit=strchr(line, 0);
138 return lineType=EMPTY_LINE;
139 }
140 // Remove trailing /r/n.
141 char c;
142 char *limit=strchr(line, 0);
143 while(line<limit && ((c=*(limit-1))=='\n' || c=='\r')) { --limit; }
144 // Remove trailing white space.
145 while(line<limit && ((c=*(limit-1))==' ' || c=='\t')) { --limit; }
146 *limit=0;
147 lineLimit=limit;
148 if(line==limit) {
149 fieldLimit=limit;
150 return lineType=EMPTY_LINE;
151 }
152 // Split by ';'.
153 char *semi=line;
154 while((semi=strchr(semi, ';'))!=NULL) { *semi++=0; }
155 fieldLimit=strchr(line, 0);
156 // Determine the line type.
157 int32_t type;
158 for(type=EMPTY_LINE+1;; ++type) {
159 if(type==LINE_TYPE_COUNT) {
160 fprintf(stderr,
161 "error in preparsed UCD: unknown line type (first field) '%s' on line %ld\n",
162 line, (long)lineNumber);
163 errorCode=U_PARSE_ERROR;
164 return NO_LINE;
165 }
166 if(0==strcmp(line, lineTypeStrings[type])) {
167 break;
168 }
169 }
170 lineType=(LineType)type;
171 if(lineType==UNICODE_VERSION_LINE && fieldLimit<lineLimit) {
172 u_versionFromString(ucdVersion, fieldLimit+1);
173 }
174 return lineType;
175}
176
177const char *
178PreparsedUCD::firstField() {
179 char *field=lines[lineIndex];
180 fieldLimit=strchr(field, 0);
181 return field;
182}
183
184const char *
185PreparsedUCD::nextField() {
186 if(fieldLimit==lineLimit) { return NULL; }
187 char *field=fieldLimit+1;
188 fieldLimit=strchr(field, 0);
189 return field;
190}
191
192const UniProps *
193PreparsedUCD::getProps(UnicodeSet &newValues, UErrorCode &errorCode) {
194 if(U_FAILURE(errorCode)) { return NULL; }
195 newValues.clear();
196 if(!lineHasPropertyValues()) {
197 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
198 return NULL;
199 }
200 firstField();
201 const char *field=nextField();
202 if(field==NULL) {
203 // No range field after the type.
204 fprintf(stderr,
205 "error in preparsed UCD: missing default/block/cp range field "
206 "(no second field) on line %ld\n",
207 (long)lineNumber);
208 errorCode=U_PARSE_ERROR;
209 return NULL;
210 }
211 UChar32 start, end;
212 if(!parseCodePointRange(field, start, end, errorCode)) { return NULL; }
213 UniProps *props;
Frank Tang1f164ee2022-11-08 12:31:27 -0800214 UBool insideBlock=false; // true if cp or unassigned range inside the block range.
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000215 switch(lineType) {
216 case DEFAULTS_LINE:
Jungshik Shinb3189662017-11-07 11:18:34 -0800217 // Should occur before any block/cp/unassigned line.
218 if(blockLineIndex>=0) {
219 fprintf(stderr,
220 "error in preparsed UCD: default line %ld after one or more block lines\n",
221 (long)lineNumber);
222 errorCode=U_PARSE_ERROR;
223 return NULL;
224 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000225 if(defaultLineIndex>=0) {
226 fprintf(stderr,
227 "error in preparsed UCD: second line with default properties on line %ld\n",
228 (long)lineNumber);
229 errorCode=U_PARSE_ERROR;
230 return NULL;
231 }
232 if(start!=0 || end!=0x10ffff) {
233 fprintf(stderr,
234 "error in preparsed UCD: default range must be 0..10FFFF, not '%s' on line %ld\n",
235 field, (long)lineNumber);
236 errorCode=U_PARSE_ERROR;
237 return NULL;
238 }
239 props=&defaultProps;
240 defaultLineIndex=lineIndex;
241 break;
242 case BLOCK_LINE:
243 blockProps=defaultProps; // Block inherits default properties.
244 props=&blockProps;
245 blockLineIndex=lineIndex;
246 break;
247 case CP_LINE:
Jungshik Shinb3189662017-11-07 11:18:34 -0800248 case UNASSIGNED_LINE:
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000249 if(blockProps.start<=start && end<=blockProps.end) {
Frank Tang1f164ee2022-11-08 12:31:27 -0800250 insideBlock=true;
Jungshik Shinb3189662017-11-07 11:18:34 -0800251 if(lineType==CP_LINE) {
252 // Code point range fully inside the last block inherits the block properties.
253 cpProps=blockProps;
254 } else {
255 // Unassigned line inside the block is based on default properties
256 // which override block properties.
257 cpProps=defaultProps;
258 newValues=blockValues;
259 // Except, it inherits the one blk=Block property.
260 int32_t blkIndex=UCHAR_BLOCK-UCHAR_INT_START;
261 cpProps.intProps[blkIndex]=blockProps.intProps[blkIndex];
262 newValues.remove((UChar32)UCHAR_BLOCK);
263 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000264 } else if(start>blockProps.end || end<blockProps.start) {
265 // Code point range fully outside the last block inherits the default properties.
266 cpProps=defaultProps;
267 } else {
268 // Code point range partially overlapping with the last block is illegal.
269 fprintf(stderr,
270 "error in preparsed UCD: cp range %s on line %ld only "
271 "partially overlaps with block range %04lX..%04lX\n",
272 field, (long)lineNumber, (long)blockProps.start, (long)blockProps.end);
273 errorCode=U_PARSE_ERROR;
274 return NULL;
275 }
276 props=&cpProps;
277 break;
278 default:
279 // Will not occur because of the range check above.
280 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
281 return NULL;
282 }
283 props->start=start;
284 props->end=end;
285 while((field=nextField())!=NULL) {
286 if(!parseProperty(*props, field, newValues, errorCode)) { return NULL; }
287 }
Jungshik Shinb3189662017-11-07 11:18:34 -0800288 if(lineType==BLOCK_LINE) {
289 blockValues=newValues;
290 } else if(lineType==UNASSIGNED_LINE && insideBlock) {
291 // Unset newValues for values that are the same as the block values.
292 for(int32_t prop=0; prop<UCHAR_BINARY_LIMIT; ++prop) {
293 if(newValues.contains(prop) && cpProps.binProps[prop]==blockProps.binProps[prop]) {
294 newValues.remove(prop);
295 }
296 }
297 for(int32_t prop=UCHAR_INT_START; prop<UCHAR_INT_LIMIT; ++prop) {
298 int32_t index=prop-UCHAR_INT_START;
299 if(newValues.contains(prop) && cpProps.intProps[index]==blockProps.intProps[index]) {
300 newValues.remove(prop);
301 }
302 }
303 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000304 return props;
305}
306
307static const struct {
308 const char *name;
309 int32_t prop;
310} ppucdProperties[]={
311 { "Name_Alias", PPUCD_NAME_ALIAS },
312 { "Conditional_Case_Mappings", PPUCD_CONDITIONAL_CASE_MAPPINGS },
313 { "Turkic_Case_Folding", PPUCD_TURKIC_CASE_FOLDING }
314};
315
Frank Tang1f164ee2022-11-08 12:31:27 -0800316// Returns true for "ok to continue parsing fields".
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000317UBool
318PreparsedUCD::parseProperty(UniProps &props, const char *field, UnicodeSet &newValues,
319 UErrorCode &errorCode) {
320 CharString pBuffer;
321 const char *p=field;
322 const char *v=strchr(p, '=');
323 int binaryValue;
324 if(*p=='-') {
325 if(v!=NULL) {
326 fprintf(stderr,
327 "error in preparsed UCD: mix of binary-property-no and "
328 "enum-property syntax '%s' on line %ld\n",
329 field, (long)lineNumber);
330 errorCode=U_PARSE_ERROR;
Frank Tang1f164ee2022-11-08 12:31:27 -0800331 return false;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000332 }
333 binaryValue=0;
334 ++p;
335 } else if(v==NULL) {
336 binaryValue=1;
337 } else {
338 binaryValue=-1;
339 // Copy out the property name rather than modifying the field (writing a NUL).
340 pBuffer.append(p, (int32_t)(v-p), errorCode);
341 p=pBuffer.data();
342 ++v;
343 }
344 int32_t prop=pnames->getPropertyEnum(p);
345 if(prop<0) {
346 for(int32_t i=0;; ++i) {
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800347 if(i==UPRV_LENGTHOF(ppucdProperties)) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000348 // Ignore unknown property names.
Frank Tang1f164ee2022-11-08 12:31:27 -0800349 return true;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000350 }
351 if(0==uprv_stricmp(p, ppucdProperties[i].name)) {
352 prop=ppucdProperties[i].prop;
353 U_ASSERT(prop>=0);
354 break;
355 }
356 }
357 }
358 if(prop<UCHAR_BINARY_LIMIT) {
359 if(binaryValue>=0) {
360 props.binProps[prop]=(UBool)binaryValue;
361 } else {
362 // No binary value for a binary property.
363 fprintf(stderr,
364 "error in preparsed UCD: enum-property syntax '%s' "
365 "for binary property on line %ld\n",
366 field, (long)lineNumber);
367 errorCode=U_PARSE_ERROR;
368 }
369 } else if(binaryValue>=0) {
370 // Binary value for a non-binary property.
371 fprintf(stderr,
372 "error in preparsed UCD: binary-property syntax '%s' "
373 "for non-binary property on line %ld\n",
374 field, (long)lineNumber);
375 errorCode=U_PARSE_ERROR;
376 } else if (prop < UCHAR_INT_START) {
377 fprintf(stderr,
378 "error in preparsed UCD: prop value is invalid: '%d' for line %ld\n",
379 prop, (long)lineNumber);
380 errorCode=U_PARSE_ERROR;
381 } else if(prop<UCHAR_INT_LIMIT) {
382 int32_t value=pnames->getPropertyValueEnum(prop, v);
383 if(value==UCHAR_INVALID_CODE && prop==UCHAR_CANONICAL_COMBINING_CLASS) {
384 // TODO: Make getPropertyValueEnum(UCHAR_CANONICAL_COMBINING_CLASS, v) work.
385 char *end;
386 unsigned long ccc=uprv_strtoul(v, &end, 10);
387 if(v<end && *end==0 && ccc<=254) {
388 value=(int32_t)ccc;
389 }
390 }
391 if(value==UCHAR_INVALID_CODE) {
392 fprintf(stderr,
393 "error in preparsed UCD: '%s' is not a valid value on line %ld\n",
394 field, (long)lineNumber);
395 errorCode=U_PARSE_ERROR;
396 } else {
397 props.intProps[prop-UCHAR_INT_START]=value;
398 }
399 } else if(*v=='<') {
400 // Do not parse default values like <code point>, just set null values.
401 switch(prop) {
402 case UCHAR_BIDI_MIRRORING_GLYPH:
403 props.bmg=U_SENTINEL;
404 break;
405 case UCHAR_BIDI_PAIRED_BRACKET:
406 props.bpb=U_SENTINEL;
407 break;
408 case UCHAR_SIMPLE_CASE_FOLDING:
409 props.scf=U_SENTINEL;
410 break;
411 case UCHAR_SIMPLE_LOWERCASE_MAPPING:
412 props.slc=U_SENTINEL;
413 break;
414 case UCHAR_SIMPLE_TITLECASE_MAPPING:
415 props.stc=U_SENTINEL;
416 break;
417 case UCHAR_SIMPLE_UPPERCASE_MAPPING:
418 props.suc=U_SENTINEL;
419 break;
420 case UCHAR_CASE_FOLDING:
421 props.cf.remove();
422 break;
423 case UCHAR_LOWERCASE_MAPPING:
424 props.lc.remove();
425 break;
426 case UCHAR_TITLECASE_MAPPING:
427 props.tc.remove();
428 break;
429 case UCHAR_UPPERCASE_MAPPING:
430 props.uc.remove();
431 break;
432 case UCHAR_SCRIPT_EXTENSIONS:
433 props.scx.clear();
434 break;
435 default:
436 fprintf(stderr,
437 "error in preparsed UCD: '%s' is not a valid default value on line %ld\n",
438 field, (long)lineNumber);
439 errorCode=U_PARSE_ERROR;
440 }
441 } else {
442 char c;
443 switch(prop) {
444 case UCHAR_NUMERIC_VALUE:
445 props.numericValue=v;
446 c=*v;
447 if('0'<=c && c<='9' && v[1]==0) {
448 props.digitValue=c-'0';
449 } else {
450 props.digitValue=-1;
451 }
452 break;
453 case UCHAR_NAME:
454 props.name=v;
455 break;
456 case UCHAR_AGE:
457 u_versionFromString(props.age, v); // Writes 0.0.0.0 if v is not numeric.
458 break;
459 case UCHAR_BIDI_MIRRORING_GLYPH:
460 props.bmg=parseCodePoint(v, errorCode);
461 break;
462 case UCHAR_BIDI_PAIRED_BRACKET:
463 props.bpb=parseCodePoint(v, errorCode);
464 break;
465 case UCHAR_SIMPLE_CASE_FOLDING:
466 props.scf=parseCodePoint(v, errorCode);
467 break;
468 case UCHAR_SIMPLE_LOWERCASE_MAPPING:
469 props.slc=parseCodePoint(v, errorCode);
470 break;
471 case UCHAR_SIMPLE_TITLECASE_MAPPING:
472 props.stc=parseCodePoint(v, errorCode);
473 break;
474 case UCHAR_SIMPLE_UPPERCASE_MAPPING:
475 props.suc=parseCodePoint(v, errorCode);
476 break;
477 case UCHAR_CASE_FOLDING:
478 parseString(v, props.cf, errorCode);
479 break;
480 case UCHAR_LOWERCASE_MAPPING:
481 parseString(v, props.lc, errorCode);
482 break;
483 case UCHAR_TITLECASE_MAPPING:
484 parseString(v, props.tc, errorCode);
485 break;
486 case UCHAR_UPPERCASE_MAPPING:
487 parseString(v, props.uc, errorCode);
488 break;
489 case PPUCD_NAME_ALIAS:
490 props.nameAlias=v;
491 break;
492 case PPUCD_CONDITIONAL_CASE_MAPPINGS:
493 case PPUCD_TURKIC_CASE_FOLDING:
494 // No need to parse their values: They are hardcoded in the runtime library.
495 break;
496 case UCHAR_SCRIPT_EXTENSIONS:
497 parseScriptExtensions(v, props.scx, errorCode);
498 break;
499 default:
500 // Ignore unhandled properties.
Frank Tang1f164ee2022-11-08 12:31:27 -0800501 return true;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000502 }
503 }
504 if(U_SUCCESS(errorCode)) {
505 newValues.add((UChar32)prop);
Frank Tang1f164ee2022-11-08 12:31:27 -0800506 return true;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000507 } else {
Frank Tang1f164ee2022-11-08 12:31:27 -0800508 return false;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000509 }
510}
511
512UBool
513PreparsedUCD::getRangeForAlgNames(UChar32 &start, UChar32 &end, UErrorCode &errorCode) {
Frank Tang1f164ee2022-11-08 12:31:27 -0800514 if(U_FAILURE(errorCode)) { return false; }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000515 if(lineType!=ALG_NAMES_RANGE_LINE) {
516 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
Frank Tang1f164ee2022-11-08 12:31:27 -0800517 return false;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000518 }
519 firstField();
520 const char *field=nextField();
521 if(field==NULL) {
522 // No range field after the type.
523 fprintf(stderr,
524 "error in preparsed UCD: missing algnamesrange range field "
525 "(no second field) on line %ld\n",
526 (long)lineNumber);
527 errorCode=U_PARSE_ERROR;
Frank Tang1f164ee2022-11-08 12:31:27 -0800528 return false;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000529 }
530 return parseCodePointRange(field, start, end, errorCode);
531}
532
533UChar32
534PreparsedUCD::parseCodePoint(const char *s, UErrorCode &errorCode) {
535 char *end;
536 uint32_t value=(uint32_t)uprv_strtoul(s, &end, 16);
537 if(end<=s || *end!=0 || value>=0x110000) {
538 fprintf(stderr,
539 "error in preparsed UCD: '%s' is not a valid code point on line %ld\n",
540 s, (long)lineNumber);
541 errorCode=U_PARSE_ERROR;
542 return U_SENTINEL;
543 }
544 return (UChar32)value;
545}
546
547UBool
548PreparsedUCD::parseCodePointRange(const char *s, UChar32 &start, UChar32 &end, UErrorCode &errorCode) {
549 uint32_t st, e;
550 u_parseCodePointRange(s, &st, &e, &errorCode);
551 if(U_FAILURE(errorCode)) {
552 fprintf(stderr,
553 "error in preparsed UCD: '%s' is not a valid code point range on line %ld\n",
554 s, (long)lineNumber);
Frank Tang1f164ee2022-11-08 12:31:27 -0800555 return false;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000556 }
557 start=(UChar32)st;
558 end=(UChar32)e;
Frank Tang1f164ee2022-11-08 12:31:27 -0800559 return true;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000560}
561
562void
563PreparsedUCD::parseString(const char *s, UnicodeString &uni, UErrorCode &errorCode) {
Jungshik Shin87232d82017-05-13 21:10:13 -0700564 UChar *buffer=toUCharPtr(uni.getBuffer(-1));
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000565 int32_t length=u_parseString(s, buffer, uni.getCapacity(), NULL, &errorCode);
566 if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
567 errorCode=U_ZERO_ERROR;
568 uni.releaseBuffer(0);
Jungshik Shin87232d82017-05-13 21:10:13 -0700569 buffer=toUCharPtr(uni.getBuffer(length));
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000570 length=u_parseString(s, buffer, uni.getCapacity(), NULL, &errorCode);
571 }
572 uni.releaseBuffer(length);
573 if(U_FAILURE(errorCode)) {
574 fprintf(stderr,
575 "error in preparsed UCD: '%s' is not a valid Unicode string on line %ld\n",
576 s, (long)lineNumber);
577 }
578}
579
580void
581PreparsedUCD::parseScriptExtensions(const char *s, UnicodeSet &scx, UErrorCode &errorCode) {
582 if(U_FAILURE(errorCode)) { return; }
583 scx.clear();
584 CharString scString;
585 for(;;) {
586 const char *scs;
587 const char *scLimit=strchr(s, ' ');
588 if(scLimit!=NULL) {
589 scs=scString.clear().append(s, (int32_t)(scLimit-s), errorCode).data();
590 if(U_FAILURE(errorCode)) { return; }
591 } else {
592 scs=s;
593 }
594 int32_t script=pnames->getPropertyValueEnum(UCHAR_SCRIPT, scs);
595 if(script==UCHAR_INVALID_CODE) {
596 fprintf(stderr,
597 "error in preparsed UCD: '%s' is not a valid script code on line %ld\n",
598 scs, (long)lineNumber);
599 errorCode=U_PARSE_ERROR;
600 return;
601 } else if(scx.contains(script)) {
602 fprintf(stderr,
603 "error in preparsed UCD: scx has duplicate '%s' codes on line %ld\n",
604 scs, (long)lineNumber);
605 errorCode=U_PARSE_ERROR;
606 return;
607 } else {
608 scx.add(script);
609 }
610 if(scLimit!=NULL) {
611 s=scLimit+1;
612 } else {
613 break;
614 }
615 }
616 if(scx.isEmpty()) {
617 fprintf(stderr, "error in preparsed UCD: empty scx= on line %ld\n", (long)lineNumber);
618 errorCode=U_PARSE_ERROR;
619 }
620}
621
622U_NAMESPACE_END