jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 1 | /* |
| 2 | ****************************************************************************** |
| 3 | * |
Jungshik Shin (jungshik at google) | 0f8746a | 2015-01-08 15:46:45 -0800 | [diff] [blame^] | 4 | * Copyright (C) 2001-2014, International Business Machines |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 5 | * Corporation and others. All Rights Reserved. |
| 6 | * |
| 7 | ****************************************************************************** |
| 8 | * file name: utrie2.cpp |
| 9 | * encoding: US-ASCII |
| 10 | * tab size: 8 (not used) |
| 11 | * indentation:4 |
| 12 | * |
| 13 | * created on: 2008aug16 (starting from a copy of utrie.c) |
| 14 | * created by: Markus W. Scherer |
| 15 | * |
| 16 | * This is a common implementation of a Unicode trie. |
| 17 | * It is a kind of compressed, serializable table of 16- or 32-bit values associated with |
| 18 | * Unicode code points (0..0x10ffff). |
| 19 | * This is the second common version of a Unicode trie (hence the name UTrie2). |
| 20 | * See utrie2.h for a comparison. |
| 21 | * |
| 22 | * This file contains only the runtime and enumeration code, for read-only access. |
| 23 | * See utrie2_builder.c for the builder code. |
| 24 | */ |
| 25 | #ifdef UTRIE2_DEBUG |
| 26 | # include <stdio.h> |
| 27 | #endif |
| 28 | |
| 29 | #include "unicode/utypes.h" |
| 30 | #include "unicode/utf.h" |
| 31 | #include "unicode/utf8.h" |
| 32 | #include "unicode/utf16.h" |
| 33 | #include "cmemory.h" |
| 34 | #include "utrie2.h" |
| 35 | #include "utrie2_impl.h" |
| 36 | #include "uassert.h" |
| 37 | |
| 38 | /* Public UTrie2 API implementation ----------------------------------------- */ |
| 39 | |
| 40 | static uint32_t |
| 41 | get32(const UNewTrie2 *trie, UChar32 c, UBool fromLSCP) { |
| 42 | int32_t i2, block; |
| 43 | |
| 44 | if(c>=trie->highStart && (!U_IS_LEAD(c) || fromLSCP)) { |
| 45 | return trie->data[trie->dataLength-UTRIE2_DATA_GRANULARITY]; |
| 46 | } |
| 47 | |
| 48 | if(U_IS_LEAD(c) && fromLSCP) { |
| 49 | i2=(UTRIE2_LSCP_INDEX_2_OFFSET-(0xd800>>UTRIE2_SHIFT_2))+ |
| 50 | (c>>UTRIE2_SHIFT_2); |
| 51 | } else { |
| 52 | i2=trie->index1[c>>UTRIE2_SHIFT_1]+ |
| 53 | ((c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK); |
| 54 | } |
| 55 | block=trie->index2[i2]; |
| 56 | return trie->data[block+(c&UTRIE2_DATA_MASK)]; |
| 57 | } |
| 58 | |
| 59 | U_CAPI uint32_t U_EXPORT2 |
| 60 | utrie2_get32(const UTrie2 *trie, UChar32 c) { |
| 61 | if(trie->data16!=NULL) { |
| 62 | return UTRIE2_GET16(trie, c); |
| 63 | } else if(trie->data32!=NULL) { |
| 64 | return UTRIE2_GET32(trie, c); |
| 65 | } else if((uint32_t)c>0x10ffff) { |
| 66 | return trie->errorValue; |
| 67 | } else { |
| 68 | return get32(trie->newTrie, c, TRUE); |
| 69 | } |
| 70 | } |
| 71 | |
| 72 | U_CAPI uint32_t U_EXPORT2 |
| 73 | utrie2_get32FromLeadSurrogateCodeUnit(const UTrie2 *trie, UChar32 c) { |
| 74 | if(!U_IS_LEAD(c)) { |
| 75 | return trie->errorValue; |
| 76 | } |
| 77 | if(trie->data16!=NULL) { |
| 78 | return UTRIE2_GET16_FROM_U16_SINGLE_LEAD(trie, c); |
| 79 | } else if(trie->data32!=NULL) { |
| 80 | return UTRIE2_GET32_FROM_U16_SINGLE_LEAD(trie, c); |
| 81 | } else { |
| 82 | return get32(trie->newTrie, c, FALSE); |
| 83 | } |
| 84 | } |
| 85 | |
| 86 | static inline int32_t |
| 87 | u8Index(const UTrie2 *trie, UChar32 c, int32_t i) { |
| 88 | int32_t idx= |
| 89 | _UTRIE2_INDEX_FROM_CP( |
| 90 | trie, |
| 91 | trie->data32==NULL ? trie->indexLength : 0, |
| 92 | c); |
| 93 | return (idx<<3)|i; |
| 94 | } |
| 95 | |
| 96 | U_CAPI int32_t U_EXPORT2 |
| 97 | utrie2_internalU8NextIndex(const UTrie2 *trie, UChar32 c, |
| 98 | const uint8_t *src, const uint8_t *limit) { |
| 99 | int32_t i, length; |
| 100 | i=0; |
| 101 | /* support 64-bit pointers by avoiding cast of arbitrary difference */ |
| 102 | if((limit-src)<=7) { |
| 103 | length=(int32_t)(limit-src); |
| 104 | } else { |
| 105 | length=7; |
| 106 | } |
| 107 | c=utf8_nextCharSafeBody(src, &i, length, c, -1); |
| 108 | return u8Index(trie, c, i); |
| 109 | } |
| 110 | |
| 111 | U_CAPI int32_t U_EXPORT2 |
| 112 | utrie2_internalU8PrevIndex(const UTrie2 *trie, UChar32 c, |
| 113 | const uint8_t *start, const uint8_t *src) { |
| 114 | int32_t i, length; |
| 115 | /* support 64-bit pointers by avoiding cast of arbitrary difference */ |
| 116 | if((src-start)<=7) { |
| 117 | i=length=(int32_t)(src-start); |
| 118 | } else { |
| 119 | i=length=7; |
| 120 | start=src-7; |
| 121 | } |
| 122 | c=utf8_prevCharSafeBody(start, 0, &i, c, -1); |
| 123 | i=length-i; /* number of bytes read backward from src */ |
| 124 | return u8Index(trie, c, i); |
| 125 | } |
| 126 | |
| 127 | U_CAPI UTrie2 * U_EXPORT2 |
| 128 | utrie2_openFromSerialized(UTrie2ValueBits valueBits, |
| 129 | const void *data, int32_t length, int32_t *pActualLength, |
| 130 | UErrorCode *pErrorCode) { |
| 131 | const UTrie2Header *header; |
| 132 | const uint16_t *p16; |
| 133 | int32_t actualLength; |
| 134 | |
| 135 | UTrie2 tempTrie; |
| 136 | UTrie2 *trie; |
| 137 | |
| 138 | if(U_FAILURE(*pErrorCode)) { |
| 139 | return 0; |
| 140 | } |
| 141 | |
| 142 | if( length<=0 || (U_POINTER_MASK_LSB(data, 3)!=0) || |
| 143 | valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits |
| 144 | ) { |
| 145 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| 146 | return 0; |
| 147 | } |
| 148 | |
| 149 | /* enough data for a trie header? */ |
| 150 | if(length<(int32_t)sizeof(UTrie2Header)) { |
| 151 | *pErrorCode=U_INVALID_FORMAT_ERROR; |
| 152 | return 0; |
| 153 | } |
| 154 | |
| 155 | /* check the signature */ |
| 156 | header=(const UTrie2Header *)data; |
| 157 | if(header->signature!=UTRIE2_SIG) { |
| 158 | *pErrorCode=U_INVALID_FORMAT_ERROR; |
| 159 | return 0; |
| 160 | } |
| 161 | |
| 162 | /* get the options */ |
| 163 | if(valueBits!=(UTrie2ValueBits)(header->options&UTRIE2_OPTIONS_VALUE_BITS_MASK)) { |
| 164 | *pErrorCode=U_INVALID_FORMAT_ERROR; |
| 165 | return 0; |
| 166 | } |
| 167 | |
| 168 | /* get the length values and offsets */ |
| 169 | uprv_memset(&tempTrie, 0, sizeof(tempTrie)); |
| 170 | tempTrie.indexLength=header->indexLength; |
| 171 | tempTrie.dataLength=header->shiftedDataLength<<UTRIE2_INDEX_SHIFT; |
| 172 | tempTrie.index2NullOffset=header->index2NullOffset; |
| 173 | tempTrie.dataNullOffset=header->dataNullOffset; |
| 174 | |
| 175 | tempTrie.highStart=header->shiftedHighStart<<UTRIE2_SHIFT_1; |
| 176 | tempTrie.highValueIndex=tempTrie.dataLength-UTRIE2_DATA_GRANULARITY; |
| 177 | if(valueBits==UTRIE2_16_VALUE_BITS) { |
| 178 | tempTrie.highValueIndex+=tempTrie.indexLength; |
| 179 | } |
| 180 | |
| 181 | /* calculate the actual length */ |
| 182 | actualLength=(int32_t)sizeof(UTrie2Header)+tempTrie.indexLength*2; |
| 183 | if(valueBits==UTRIE2_16_VALUE_BITS) { |
| 184 | actualLength+=tempTrie.dataLength*2; |
| 185 | } else { |
| 186 | actualLength+=tempTrie.dataLength*4; |
| 187 | } |
| 188 | if(length<actualLength) { |
| 189 | *pErrorCode=U_INVALID_FORMAT_ERROR; /* not enough bytes */ |
| 190 | return 0; |
| 191 | } |
| 192 | |
| 193 | /* allocate the trie */ |
| 194 | trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2)); |
| 195 | if(trie==NULL) { |
| 196 | *pErrorCode=U_MEMORY_ALLOCATION_ERROR; |
| 197 | return 0; |
| 198 | } |
| 199 | uprv_memcpy(trie, &tempTrie, sizeof(tempTrie)); |
| 200 | trie->memory=(uint32_t *)data; |
| 201 | trie->length=actualLength; |
| 202 | trie->isMemoryOwned=FALSE; |
| 203 | |
| 204 | /* set the pointers to its index and data arrays */ |
| 205 | p16=(const uint16_t *)(header+1); |
| 206 | trie->index=p16; |
| 207 | p16+=trie->indexLength; |
| 208 | |
| 209 | /* get the data */ |
| 210 | switch(valueBits) { |
| 211 | case UTRIE2_16_VALUE_BITS: |
| 212 | trie->data16=p16; |
| 213 | trie->data32=NULL; |
| 214 | trie->initialValue=trie->index[trie->dataNullOffset]; |
| 215 | trie->errorValue=trie->data16[UTRIE2_BAD_UTF8_DATA_OFFSET]; |
| 216 | break; |
| 217 | case UTRIE2_32_VALUE_BITS: |
| 218 | trie->data16=NULL; |
| 219 | trie->data32=(const uint32_t *)p16; |
| 220 | trie->initialValue=trie->data32[trie->dataNullOffset]; |
| 221 | trie->errorValue=trie->data32[UTRIE2_BAD_UTF8_DATA_OFFSET]; |
| 222 | break; |
| 223 | default: |
| 224 | *pErrorCode=U_INVALID_FORMAT_ERROR; |
| 225 | return 0; |
| 226 | } |
| 227 | |
| 228 | if(pActualLength!=NULL) { |
| 229 | *pActualLength=actualLength; |
| 230 | } |
| 231 | return trie; |
| 232 | } |
| 233 | |
| 234 | U_CAPI UTrie2 * U_EXPORT2 |
| 235 | utrie2_openDummy(UTrie2ValueBits valueBits, |
| 236 | uint32_t initialValue, uint32_t errorValue, |
| 237 | UErrorCode *pErrorCode) { |
| 238 | UTrie2 *trie; |
| 239 | UTrie2Header *header; |
| 240 | uint32_t *p; |
| 241 | uint16_t *dest16; |
| 242 | int32_t indexLength, dataLength, length, i; |
| 243 | int32_t dataMove; /* >0 if the data is moved to the end of the index array */ |
| 244 | |
| 245 | if(U_FAILURE(*pErrorCode)) { |
| 246 | return 0; |
| 247 | } |
| 248 | |
| 249 | if(valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits) { |
| 250 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| 251 | return 0; |
| 252 | } |
| 253 | |
| 254 | /* calculate the total length of the dummy trie data */ |
| 255 | indexLength=UTRIE2_INDEX_1_OFFSET; |
| 256 | dataLength=UTRIE2_DATA_START_OFFSET+UTRIE2_DATA_GRANULARITY; |
| 257 | length=(int32_t)sizeof(UTrie2Header)+indexLength*2; |
| 258 | if(valueBits==UTRIE2_16_VALUE_BITS) { |
| 259 | length+=dataLength*2; |
| 260 | } else { |
| 261 | length+=dataLength*4; |
| 262 | } |
| 263 | |
| 264 | /* allocate the trie */ |
| 265 | trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2)); |
| 266 | if(trie==NULL) { |
| 267 | *pErrorCode=U_MEMORY_ALLOCATION_ERROR; |
| 268 | return 0; |
| 269 | } |
| 270 | uprv_memset(trie, 0, sizeof(UTrie2)); |
| 271 | trie->memory=uprv_malloc(length); |
| 272 | if(trie->memory==NULL) { |
| 273 | uprv_free(trie); |
| 274 | *pErrorCode=U_MEMORY_ALLOCATION_ERROR; |
| 275 | return 0; |
| 276 | } |
| 277 | trie->length=length; |
| 278 | trie->isMemoryOwned=TRUE; |
| 279 | |
| 280 | /* set the UTrie2 fields */ |
| 281 | if(valueBits==UTRIE2_16_VALUE_BITS) { |
| 282 | dataMove=indexLength; |
| 283 | } else { |
| 284 | dataMove=0; |
| 285 | } |
| 286 | |
| 287 | trie->indexLength=indexLength; |
| 288 | trie->dataLength=dataLength; |
| 289 | trie->index2NullOffset=UTRIE2_INDEX_2_OFFSET; |
| 290 | trie->dataNullOffset=(uint16_t)dataMove; |
| 291 | trie->initialValue=initialValue; |
| 292 | trie->errorValue=errorValue; |
| 293 | trie->highStart=0; |
| 294 | trie->highValueIndex=dataMove+UTRIE2_DATA_START_OFFSET; |
| 295 | |
| 296 | /* set the header fields */ |
| 297 | header=(UTrie2Header *)trie->memory; |
| 298 | |
| 299 | header->signature=UTRIE2_SIG; /* "Tri2" */ |
| 300 | header->options=(uint16_t)valueBits; |
| 301 | |
| 302 | header->indexLength=(uint16_t)indexLength; |
| 303 | header->shiftedDataLength=(uint16_t)(dataLength>>UTRIE2_INDEX_SHIFT); |
| 304 | header->index2NullOffset=(uint16_t)UTRIE2_INDEX_2_OFFSET; |
| 305 | header->dataNullOffset=(uint16_t)dataMove; |
| 306 | header->shiftedHighStart=0; |
| 307 | |
| 308 | /* fill the index and data arrays */ |
| 309 | dest16=(uint16_t *)(header+1); |
| 310 | trie->index=dest16; |
| 311 | |
| 312 | /* write the index-2 array values shifted right by UTRIE2_INDEX_SHIFT */ |
| 313 | for(i=0; i<UTRIE2_INDEX_2_BMP_LENGTH; ++i) { |
| 314 | *dest16++=(uint16_t)(dataMove>>UTRIE2_INDEX_SHIFT); /* null data block */ |
| 315 | } |
| 316 | |
| 317 | /* write UTF-8 2-byte index-2 values, not right-shifted */ |
| 318 | for(i=0; i<(0xc2-0xc0); ++i) { /* C0..C1 */ |
| 319 | *dest16++=(uint16_t)(dataMove+UTRIE2_BAD_UTF8_DATA_OFFSET); |
| 320 | } |
| 321 | for(; i<(0xe0-0xc0); ++i) { /* C2..DF */ |
| 322 | *dest16++=(uint16_t)dataMove; |
| 323 | } |
| 324 | |
| 325 | /* write the 16/32-bit data array */ |
| 326 | switch(valueBits) { |
| 327 | case UTRIE2_16_VALUE_BITS: |
| 328 | /* write 16-bit data values */ |
| 329 | trie->data16=dest16; |
| 330 | trie->data32=NULL; |
| 331 | for(i=0; i<0x80; ++i) { |
| 332 | *dest16++=(uint16_t)initialValue; |
| 333 | } |
| 334 | for(; i<0xc0; ++i) { |
| 335 | *dest16++=(uint16_t)errorValue; |
| 336 | } |
| 337 | /* highValue and reserved values */ |
| 338 | for(i=0; i<UTRIE2_DATA_GRANULARITY; ++i) { |
| 339 | *dest16++=(uint16_t)initialValue; |
| 340 | } |
| 341 | break; |
| 342 | case UTRIE2_32_VALUE_BITS: |
| 343 | /* write 32-bit data values */ |
| 344 | p=(uint32_t *)dest16; |
| 345 | trie->data16=NULL; |
| 346 | trie->data32=p; |
| 347 | for(i=0; i<0x80; ++i) { |
| 348 | *p++=initialValue; |
| 349 | } |
| 350 | for(; i<0xc0; ++i) { |
| 351 | *p++=errorValue; |
| 352 | } |
| 353 | /* highValue and reserved values */ |
| 354 | for(i=0; i<UTRIE2_DATA_GRANULARITY; ++i) { |
| 355 | *p++=initialValue; |
| 356 | } |
| 357 | break; |
| 358 | default: |
| 359 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| 360 | return 0; |
| 361 | } |
| 362 | |
| 363 | return trie; |
| 364 | } |
| 365 | |
| 366 | U_CAPI void U_EXPORT2 |
| 367 | utrie2_close(UTrie2 *trie) { |
| 368 | if(trie!=NULL) { |
| 369 | if(trie->isMemoryOwned) { |
| 370 | uprv_free(trie->memory); |
| 371 | } |
| 372 | if(trie->newTrie!=NULL) { |
| 373 | uprv_free(trie->newTrie->data); |
| 374 | uprv_free(trie->newTrie); |
| 375 | } |
| 376 | uprv_free(trie); |
| 377 | } |
| 378 | } |
| 379 | |
| 380 | U_CAPI int32_t U_EXPORT2 |
| 381 | utrie2_getVersion(const void *data, int32_t length, UBool anyEndianOk) { |
| 382 | uint32_t signature; |
| 383 | if(length<16 || data==NULL || (U_POINTER_MASK_LSB(data, 3)!=0)) { |
| 384 | return 0; |
| 385 | } |
| 386 | signature=*(const uint32_t *)data; |
| 387 | if(signature==UTRIE2_SIG) { |
| 388 | return 2; |
| 389 | } |
| 390 | if(anyEndianOk && signature==UTRIE2_OE_SIG) { |
| 391 | return 2; |
| 392 | } |
| 393 | if(signature==UTRIE_SIG) { |
| 394 | return 1; |
| 395 | } |
| 396 | if(anyEndianOk && signature==UTRIE_OE_SIG) { |
| 397 | return 1; |
| 398 | } |
| 399 | return 0; |
| 400 | } |
| 401 | |
Jungshik Shin (jungshik at google) | 0f8746a | 2015-01-08 15:46:45 -0800 | [diff] [blame^] | 402 | U_CAPI UBool U_EXPORT2 |
| 403 | utrie2_isFrozen(const UTrie2 *trie) { |
| 404 | return (UBool)(trie->newTrie==NULL); |
| 405 | } |
| 406 | |
| 407 | U_CAPI int32_t U_EXPORT2 |
| 408 | utrie2_serialize(const UTrie2 *trie, |
| 409 | void *data, int32_t capacity, |
| 410 | UErrorCode *pErrorCode) { |
| 411 | /* argument check */ |
| 412 | if(U_FAILURE(*pErrorCode)) { |
| 413 | return 0; |
| 414 | } |
| 415 | |
| 416 | if( trie==NULL || trie->memory==NULL || trie->newTrie!=NULL || |
| 417 | capacity<0 || (capacity>0 && (data==NULL || (U_POINTER_MASK_LSB(data, 3)!=0))) |
| 418 | ) { |
| 419 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| 420 | return 0; |
| 421 | } |
| 422 | |
| 423 | if(capacity>=trie->length) { |
| 424 | uprv_memcpy(data, trie->memory, trie->length); |
| 425 | } else { |
| 426 | *pErrorCode=U_BUFFER_OVERFLOW_ERROR; |
| 427 | } |
| 428 | return trie->length; |
| 429 | } |
| 430 | |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 431 | U_CAPI int32_t U_EXPORT2 |
| 432 | utrie2_swap(const UDataSwapper *ds, |
| 433 | const void *inData, int32_t length, void *outData, |
| 434 | UErrorCode *pErrorCode) { |
| 435 | const UTrie2Header *inTrie; |
| 436 | UTrie2Header trie; |
| 437 | int32_t dataLength, size; |
| 438 | UTrie2ValueBits valueBits; |
| 439 | |
| 440 | if(U_FAILURE(*pErrorCode)) { |
| 441 | return 0; |
| 442 | } |
| 443 | if(ds==NULL || inData==NULL || (length>=0 && outData==NULL)) { |
| 444 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| 445 | return 0; |
| 446 | } |
| 447 | |
| 448 | /* setup and swapping */ |
| 449 | if(length>=0 && length<(int32_t)sizeof(UTrie2Header)) { |
| 450 | *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; |
| 451 | return 0; |
| 452 | } |
| 453 | |
| 454 | inTrie=(const UTrie2Header *)inData; |
| 455 | trie.signature=ds->readUInt32(inTrie->signature); |
| 456 | trie.options=ds->readUInt16(inTrie->options); |
| 457 | trie.indexLength=ds->readUInt16(inTrie->indexLength); |
| 458 | trie.shiftedDataLength=ds->readUInt16(inTrie->shiftedDataLength); |
| 459 | |
| 460 | valueBits=(UTrie2ValueBits)(trie.options&UTRIE2_OPTIONS_VALUE_BITS_MASK); |
| 461 | dataLength=(int32_t)trie.shiftedDataLength<<UTRIE2_INDEX_SHIFT; |
| 462 | |
| 463 | if( trie.signature!=UTRIE2_SIG || |
| 464 | valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits || |
| 465 | trie.indexLength<UTRIE2_INDEX_1_OFFSET || |
| 466 | dataLength<UTRIE2_DATA_START_OFFSET |
| 467 | ) { |
| 468 | *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UTrie */ |
| 469 | return 0; |
| 470 | } |
| 471 | |
| 472 | size=sizeof(UTrie2Header)+trie.indexLength*2; |
| 473 | switch(valueBits) { |
| 474 | case UTRIE2_16_VALUE_BITS: |
| 475 | size+=dataLength*2; |
| 476 | break; |
| 477 | case UTRIE2_32_VALUE_BITS: |
| 478 | size+=dataLength*4; |
| 479 | break; |
| 480 | default: |
| 481 | *pErrorCode=U_INVALID_FORMAT_ERROR; |
| 482 | return 0; |
| 483 | } |
| 484 | |
| 485 | if(length>=0) { |
| 486 | UTrie2Header *outTrie; |
| 487 | |
| 488 | if(length<size) { |
| 489 | *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; |
| 490 | return 0; |
| 491 | } |
| 492 | |
| 493 | outTrie=(UTrie2Header *)outData; |
| 494 | |
| 495 | /* swap the header */ |
| 496 | ds->swapArray32(ds, &inTrie->signature, 4, &outTrie->signature, pErrorCode); |
| 497 | ds->swapArray16(ds, &inTrie->options, 12, &outTrie->options, pErrorCode); |
| 498 | |
| 499 | /* swap the index and the data */ |
| 500 | switch(valueBits) { |
| 501 | case UTRIE2_16_VALUE_BITS: |
| 502 | ds->swapArray16(ds, inTrie+1, (trie.indexLength+dataLength)*2, outTrie+1, pErrorCode); |
| 503 | break; |
| 504 | case UTRIE2_32_VALUE_BITS: |
| 505 | ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode); |
| 506 | ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, dataLength*4, |
| 507 | (uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode); |
| 508 | break; |
| 509 | default: |
| 510 | *pErrorCode=U_INVALID_FORMAT_ERROR; |
| 511 | return 0; |
| 512 | } |
| 513 | } |
| 514 | |
| 515 | return size; |
| 516 | } |
| 517 | |
| 518 | // utrie2_swapAnyVersion() should be defined here but lives in utrie2_builder.c |
| 519 | // to avoid a dependency from utrie2.cpp on utrie.c. |
| 520 | |
| 521 | /* enumeration -------------------------------------------------------------- */ |
| 522 | |
| 523 | #define MIN_VALUE(a, b) ((a)<(b) ? (a) : (b)) |
| 524 | |
| 525 | /* default UTrie2EnumValue() returns the input value itself */ |
| 526 | static uint32_t U_CALLCONV |
| 527 | enumSameValue(const void * /*context*/, uint32_t value) { |
| 528 | return value; |
| 529 | } |
| 530 | |
| 531 | /** |
| 532 | * Enumerate all ranges of code points with the same relevant values. |
| 533 | * The values are transformed from the raw trie entries by the enumValue function. |
| 534 | * |
| 535 | * Currently requires start<limit and both start and limit must be multiples |
| 536 | * of UTRIE2_DATA_BLOCK_LENGTH. |
| 537 | * |
| 538 | * Optimizations: |
| 539 | * - Skip a whole block if we know that it is filled with a single value, |
| 540 | * and it is the same as we visited just before. |
| 541 | * - Handle the null block specially because we know a priori that it is filled |
| 542 | * with a single value. |
| 543 | */ |
| 544 | static void |
| 545 | enumEitherTrie(const UTrie2 *trie, |
| 546 | UChar32 start, UChar32 limit, |
| 547 | UTrie2EnumValue *enumValue, UTrie2EnumRange *enumRange, const void *context) { |
| 548 | const uint32_t *data32; |
| 549 | const uint16_t *idx; |
| 550 | |
| 551 | uint32_t value, prevValue, initialValue; |
| 552 | UChar32 c, prev, highStart; |
| 553 | int32_t j, i2Block, prevI2Block, index2NullOffset, block, prevBlock, nullBlock; |
| 554 | |
| 555 | if(enumRange==NULL) { |
| 556 | return; |
| 557 | } |
| 558 | if(enumValue==NULL) { |
| 559 | enumValue=enumSameValue; |
| 560 | } |
| 561 | |
| 562 | if(trie->newTrie==NULL) { |
| 563 | /* frozen trie */ |
| 564 | idx=trie->index; |
| 565 | U_ASSERT(idx!=NULL); /* the following code assumes trie->newTrie is not NULL when idx is NULL */ |
| 566 | data32=trie->data32; |
| 567 | |
| 568 | index2NullOffset=trie->index2NullOffset; |
| 569 | nullBlock=trie->dataNullOffset; |
| 570 | } else { |
| 571 | /* unfrozen, mutable trie */ |
| 572 | idx=NULL; |
| 573 | data32=trie->newTrie->data; |
| 574 | U_ASSERT(data32!=NULL); /* the following code assumes idx is not NULL when data32 is NULL */ |
| 575 | |
| 576 | index2NullOffset=trie->newTrie->index2NullOffset; |
| 577 | nullBlock=trie->newTrie->dataNullOffset; |
| 578 | } |
| 579 | |
| 580 | highStart=trie->highStart; |
| 581 | |
| 582 | /* get the enumeration value that corresponds to an initial-value trie data entry */ |
| 583 | initialValue=enumValue(context, trie->initialValue); |
| 584 | |
| 585 | /* set variables for previous range */ |
| 586 | prevI2Block=-1; |
| 587 | prevBlock=-1; |
| 588 | prev=start; |
| 589 | prevValue=0; |
| 590 | |
| 591 | /* enumerate index-2 blocks */ |
| 592 | for(c=start; c<limit && c<highStart;) { |
| 593 | /* Code point limit for iterating inside this i2Block. */ |
| 594 | UChar32 tempLimit=c+UTRIE2_CP_PER_INDEX_1_ENTRY; |
| 595 | if(limit<tempLimit) { |
| 596 | tempLimit=limit; |
| 597 | } |
| 598 | if(c<=0xffff) { |
| 599 | if(!U_IS_SURROGATE(c)) { |
| 600 | i2Block=c>>UTRIE2_SHIFT_2; |
| 601 | } else if(U_IS_SURROGATE_LEAD(c)) { |
| 602 | /* |
| 603 | * Enumerate values for lead surrogate code points, not code units: |
| 604 | * This special block has half the normal length. |
| 605 | */ |
| 606 | i2Block=UTRIE2_LSCP_INDEX_2_OFFSET; |
| 607 | tempLimit=MIN_VALUE(0xdc00, limit); |
| 608 | } else { |
| 609 | /* |
| 610 | * Switch back to the normal part of the index-2 table. |
| 611 | * Enumerate the second half of the surrogates block. |
| 612 | */ |
| 613 | i2Block=0xd800>>UTRIE2_SHIFT_2; |
| 614 | tempLimit=MIN_VALUE(0xe000, limit); |
| 615 | } |
| 616 | } else { |
| 617 | /* supplementary code points */ |
| 618 | if(idx!=NULL) { |
| 619 | i2Block=idx[(UTRIE2_INDEX_1_OFFSET-UTRIE2_OMITTED_BMP_INDEX_1_LENGTH)+ |
| 620 | (c>>UTRIE2_SHIFT_1)]; |
| 621 | } else { |
| 622 | i2Block=trie->newTrie->index1[c>>UTRIE2_SHIFT_1]; |
| 623 | } |
| 624 | if(i2Block==prevI2Block && (c-prev)>=UTRIE2_CP_PER_INDEX_1_ENTRY) { |
| 625 | /* |
| 626 | * The index-2 block is the same as the previous one, and filled with prevValue. |
| 627 | * Only possible for supplementary code points because the linear-BMP index-2 |
| 628 | * table creates unique i2Block values. |
| 629 | */ |
| 630 | c+=UTRIE2_CP_PER_INDEX_1_ENTRY; |
| 631 | continue; |
| 632 | } |
| 633 | } |
| 634 | prevI2Block=i2Block; |
| 635 | if(i2Block==index2NullOffset) { |
| 636 | /* this is the null index-2 block */ |
| 637 | if(prevValue!=initialValue) { |
| 638 | if(prev<c && !enumRange(context, prev, c-1, prevValue)) { |
| 639 | return; |
| 640 | } |
| 641 | prevBlock=nullBlock; |
| 642 | prev=c; |
| 643 | prevValue=initialValue; |
| 644 | } |
| 645 | c+=UTRIE2_CP_PER_INDEX_1_ENTRY; |
| 646 | } else { |
| 647 | /* enumerate data blocks for one index-2 block */ |
| 648 | int32_t i2, i2Limit; |
| 649 | i2=(c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK; |
| 650 | if((c>>UTRIE2_SHIFT_1)==(tempLimit>>UTRIE2_SHIFT_1)) { |
| 651 | i2Limit=(tempLimit>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK; |
| 652 | } else { |
| 653 | i2Limit=UTRIE2_INDEX_2_BLOCK_LENGTH; |
| 654 | } |
| 655 | for(; i2<i2Limit; ++i2) { |
| 656 | if(idx!=NULL) { |
| 657 | block=(int32_t)idx[i2Block+i2]<<UTRIE2_INDEX_SHIFT; |
| 658 | } else { |
| 659 | block=trie->newTrie->index2[i2Block+i2]; |
| 660 | } |
| 661 | if(block==prevBlock && (c-prev)>=UTRIE2_DATA_BLOCK_LENGTH) { |
| 662 | /* the block is the same as the previous one, and filled with prevValue */ |
| 663 | c+=UTRIE2_DATA_BLOCK_LENGTH; |
| 664 | continue; |
| 665 | } |
| 666 | prevBlock=block; |
| 667 | if(block==nullBlock) { |
| 668 | /* this is the null data block */ |
| 669 | if(prevValue!=initialValue) { |
| 670 | if(prev<c && !enumRange(context, prev, c-1, prevValue)) { |
| 671 | return; |
| 672 | } |
| 673 | prev=c; |
| 674 | prevValue=initialValue; |
| 675 | } |
| 676 | c+=UTRIE2_DATA_BLOCK_LENGTH; |
| 677 | } else { |
| 678 | for(j=0; j<UTRIE2_DATA_BLOCK_LENGTH; ++j) { |
| 679 | value=enumValue(context, data32!=NULL ? data32[block+j] : idx[block+j]); |
| 680 | if(value!=prevValue) { |
| 681 | if(prev<c && !enumRange(context, prev, c-1, prevValue)) { |
| 682 | return; |
| 683 | } |
| 684 | prev=c; |
| 685 | prevValue=value; |
| 686 | } |
| 687 | ++c; |
| 688 | } |
| 689 | } |
| 690 | } |
| 691 | } |
| 692 | } |
| 693 | |
| 694 | if(c>limit) { |
| 695 | c=limit; /* could be higher if in the index2NullOffset */ |
| 696 | } else if(c<limit) { |
| 697 | /* c==highStart<limit */ |
| 698 | uint32_t highValue; |
| 699 | if(idx!=NULL) { |
| 700 | highValue= |
| 701 | data32!=NULL ? |
| 702 | data32[trie->highValueIndex] : |
| 703 | idx[trie->highValueIndex]; |
| 704 | } else { |
| 705 | highValue=trie->newTrie->data[trie->newTrie->dataLength-UTRIE2_DATA_GRANULARITY]; |
| 706 | } |
| 707 | value=enumValue(context, highValue); |
| 708 | if(value!=prevValue) { |
| 709 | if(prev<c && !enumRange(context, prev, c-1, prevValue)) { |
| 710 | return; |
| 711 | } |
| 712 | prev=c; |
| 713 | prevValue=value; |
| 714 | } |
| 715 | c=limit; |
| 716 | } |
| 717 | |
| 718 | /* deliver last range */ |
| 719 | enumRange(context, prev, c-1, prevValue); |
| 720 | } |
| 721 | |
| 722 | U_CAPI void U_EXPORT2 |
| 723 | utrie2_enum(const UTrie2 *trie, |
| 724 | UTrie2EnumValue *enumValue, UTrie2EnumRange *enumRange, const void *context) { |
| 725 | enumEitherTrie(trie, 0, 0x110000, enumValue, enumRange, context); |
| 726 | } |
| 727 | |
| 728 | U_CAPI void U_EXPORT2 |
| 729 | utrie2_enumForLeadSurrogate(const UTrie2 *trie, UChar32 lead, |
| 730 | UTrie2EnumValue *enumValue, UTrie2EnumRange *enumRange, |
| 731 | const void *context) { |
| 732 | if(!U16_IS_LEAD(lead)) { |
| 733 | return; |
| 734 | } |
| 735 | lead=(lead-0xd7c0)<<10; /* start code point */ |
| 736 | enumEitherTrie(trie, lead, lead+0x400, enumValue, enumRange, context); |
| 737 | } |
| 738 | |
| 739 | /* C++ convenience wrappers ------------------------------------------------- */ |
| 740 | |
| 741 | U_NAMESPACE_BEGIN |
| 742 | |
| 743 | uint16_t BackwardUTrie2StringIterator::previous16() { |
| 744 | codePointLimit=codePointStart; |
| 745 | if(start>=codePointStart) { |
| 746 | codePoint=U_SENTINEL; |
| 747 | return 0; |
| 748 | } |
| 749 | uint16_t result; |
| 750 | UTRIE2_U16_PREV16(trie, start, codePointStart, codePoint, result); |
| 751 | return result; |
| 752 | } |
| 753 | |
| 754 | uint16_t ForwardUTrie2StringIterator::next16() { |
| 755 | codePointStart=codePointLimit; |
| 756 | if(codePointLimit==limit) { |
| 757 | codePoint=U_SENTINEL; |
| 758 | return 0; |
| 759 | } |
| 760 | uint16_t result; |
| 761 | UTRIE2_U16_NEXT16(trie, codePointLimit, limit, codePoint, result); |
| 762 | return result; |
| 763 | } |
| 764 | |
| 765 | U_NAMESPACE_END |