Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 1 | // © 2016 and later: Unicode, Inc. and others. |
| 2 | // License & terms of use: http://www.unicode.org/copyright.html |
| 3 | /* |
| 4 | ******************************************************************************* |
| 5 | * Copyright (C) 2010-2014, International Business Machines |
| 6 | * Corporation and others. All Rights Reserved. |
| 7 | ******************************************************************************* |
| 8 | * file name: ucharstrietest.cpp |
| 9 | * encoding: UTF-8 |
| 10 | * tab size: 8 (not used) |
| 11 | * indentation:4 |
| 12 | * |
| 13 | * created on: 2010nov16 |
| 14 | * created by: Markus W. Scherer |
| 15 | */ |
| 16 | |
| 17 | #include <string.h> |
| 18 | |
| 19 | #include "unicode/utypes.h" |
| 20 | #include "unicode/appendable.h" |
| 21 | #include "unicode/localpointer.h" |
| 22 | #include "unicode/ucharstrie.h" |
| 23 | #include "unicode/ucharstriebuilder.h" |
| 24 | #include "unicode/uniset.h" |
| 25 | #include "unicode/unistr.h" |
| 26 | #include "unicode/utf16.h" |
| 27 | #include "intltest.h" |
| 28 | #include "cmemory.h" |
| 29 | |
| 30 | struct StringAndValue { |
| 31 | const char *s; |
| 32 | int32_t value; |
| 33 | }; |
| 34 | |
| 35 | class UCharsTrieTest : public IntlTest { |
| 36 | public: |
| 37 | UCharsTrieTest(); |
| 38 | virtual ~UCharsTrieTest(); |
| 39 | |
| 40 | void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL) override; |
| 41 | void TestBuilder(); |
| 42 | void TestEmpty(); |
| 43 | void Test_a(); |
| 44 | void Test_a_ab(); |
| 45 | void TestShortestBranch(); |
| 46 | void TestBranches(); |
| 47 | void TestLongSequence(); |
| 48 | void TestLongBranch(); |
| 49 | void TestValuesForState(); |
| 50 | void TestCompact(); |
| 51 | void TestFirstForCodePoint(); |
| 52 | void TestNextForCodePoint(); |
| 53 | |
| 54 | UCharsTrie *buildLargeTrie(int32_t numUniqueFirst); |
| 55 | void TestLargeTrie(); |
| 56 | |
| 57 | UCharsTrie *buildMonthsTrie(UStringTrieBuildOption buildOption); |
| 58 | void TestHasUniqueValue(); |
| 59 | void TestGetNextUChars(); |
| 60 | void TestIteratorFromBranch(); |
| 61 | void TestIteratorFromLinearMatch(); |
| 62 | void TestTruncatingIteratorFromRoot(); |
| 63 | void TestTruncatingIteratorFromLinearMatchShort(); |
| 64 | void TestTruncatingIteratorFromLinearMatchLong(); |
| 65 | void TestIteratorFromUChars(); |
| 66 | |
| 67 | void checkData(const StringAndValue data[], int32_t dataLength); |
| 68 | void checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption); |
| 69 | UCharsTrie *buildTrie(const StringAndValue data[], int32_t dataLength, |
| 70 | UStringTrieBuildOption buildOption); |
| 71 | void checkFirst(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength); |
| 72 | void checkNext(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength); |
| 73 | void checkNextWithState(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength); |
| 74 | void checkNextWithState64(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength); |
| 75 | void checkNextString(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength); |
| 76 | void checkIterator(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength); |
| 77 | void checkIterator(UCharsTrie::Iterator &iter, const StringAndValue data[], int32_t dataLength); |
| 78 | |
| 79 | private: |
| 80 | UCharsTrieBuilder *builder_; |
| 81 | }; |
| 82 | |
| 83 | extern IntlTest *createUCharsTrieTest() { |
| 84 | return new UCharsTrieTest(); |
| 85 | } |
| 86 | |
| 87 | UCharsTrieTest::UCharsTrieTest() : builder_(NULL) { |
| 88 | IcuTestErrorCode errorCode(*this, "UCharsTrieTest()"); |
| 89 | builder_=new UCharsTrieBuilder(errorCode); |
| 90 | } |
| 91 | |
| 92 | UCharsTrieTest::~UCharsTrieTest() { |
| 93 | delete builder_; |
| 94 | } |
| 95 | |
| 96 | void UCharsTrieTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) { |
| 97 | if(exec) { |
| 98 | logln("TestSuite UCharsTrieTest: "); |
| 99 | } |
| 100 | TESTCASE_AUTO_BEGIN; |
| 101 | TESTCASE_AUTO(TestBuilder); |
| 102 | TESTCASE_AUTO(TestEmpty); |
| 103 | TESTCASE_AUTO(Test_a); |
| 104 | TESTCASE_AUTO(Test_a_ab); |
| 105 | TESTCASE_AUTO(TestShortestBranch); |
| 106 | TESTCASE_AUTO(TestBranches); |
| 107 | TESTCASE_AUTO(TestLongSequence); |
| 108 | TESTCASE_AUTO(TestLongBranch); |
| 109 | TESTCASE_AUTO(TestValuesForState); |
| 110 | TESTCASE_AUTO(TestCompact); |
| 111 | TESTCASE_AUTO(TestFirstForCodePoint); |
| 112 | TESTCASE_AUTO(TestNextForCodePoint); |
| 113 | TESTCASE_AUTO(TestLargeTrie); |
| 114 | TESTCASE_AUTO(TestHasUniqueValue); |
| 115 | TESTCASE_AUTO(TestGetNextUChars); |
| 116 | TESTCASE_AUTO(TestIteratorFromBranch); |
| 117 | TESTCASE_AUTO(TestIteratorFromLinearMatch); |
| 118 | TESTCASE_AUTO(TestTruncatingIteratorFromRoot); |
| 119 | TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchShort); |
| 120 | TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchLong); |
| 121 | TESTCASE_AUTO(TestIteratorFromUChars); |
| 122 | TESTCASE_AUTO_END; |
| 123 | } |
| 124 | |
| 125 | void UCharsTrieTest::TestBuilder() { |
| 126 | IcuTestErrorCode errorCode(*this, "TestBuilder()"); |
| 127 | delete builder_->build(USTRINGTRIE_BUILD_FAST, errorCode); |
| 128 | if(errorCode.reset()!=U_INDEX_OUTOFBOUNDS_ERROR) { |
| 129 | errln("UCharsTrieBuilder().build() did not set U_INDEX_OUTOFBOUNDS_ERROR"); |
| 130 | return; |
| 131 | } |
| 132 | // TODO: remove .build(...) once add() checks for duplicates. |
| 133 | builder_->add("=", 0, errorCode).add("=", 1, errorCode).build(USTRINGTRIE_BUILD_FAST, errorCode); |
| 134 | if(errorCode.reset()!=U_ILLEGAL_ARGUMENT_ERROR) { |
| 135 | errln("UCharsTrieBuilder.add() did not detect duplicates"); |
| 136 | return; |
| 137 | } |
| 138 | } |
| 139 | |
| 140 | void UCharsTrieTest::TestEmpty() { |
| 141 | static const StringAndValue data[]={ |
| 142 | { "", 0 } |
| 143 | }; |
| 144 | checkData(data, UPRV_LENGTHOF(data)); |
| 145 | } |
| 146 | |
| 147 | void UCharsTrieTest::Test_a() { |
| 148 | static const StringAndValue data[]={ |
| 149 | { "a", 1 } |
| 150 | }; |
| 151 | checkData(data, UPRV_LENGTHOF(data)); |
| 152 | } |
| 153 | |
| 154 | void UCharsTrieTest::Test_a_ab() { |
| 155 | static const StringAndValue data[]={ |
| 156 | { "a", 1 }, |
| 157 | { "ab", 100 } |
| 158 | }; |
| 159 | checkData(data, UPRV_LENGTHOF(data)); |
| 160 | } |
| 161 | |
| 162 | void UCharsTrieTest::TestShortestBranch() { |
| 163 | static const StringAndValue data[]={ |
| 164 | { "a", 1000 }, |
| 165 | { "b", 2000 } |
| 166 | }; |
| 167 | checkData(data, UPRV_LENGTHOF(data)); |
| 168 | } |
| 169 | |
| 170 | void UCharsTrieTest::TestBranches() { |
| 171 | static const StringAndValue data[]={ |
| 172 | { "a", 0x10 }, |
| 173 | { "cc", 0x40 }, |
| 174 | { "e", 0x100 }, |
| 175 | { "ggg", 0x400 }, |
| 176 | { "i", 0x1000 }, |
| 177 | { "kkkk", 0x4000 }, |
| 178 | { "n", 0x10000 }, |
| 179 | { "ppppp", 0x40000 }, |
| 180 | { "r", 0x100000 }, |
| 181 | { "sss", 0x200000 }, |
| 182 | { "t", 0x400000 }, |
| 183 | { "uu", 0x800000 }, |
| 184 | { "vv", 0x7fffffff }, |
| 185 | { "zz", (int32_t)0x80000000 } |
| 186 | }; |
| 187 | for(int32_t length=2; length<=UPRV_LENGTHOF(data); ++length) { |
| 188 | logln("TestBranches length=%d", (int)length); |
| 189 | checkData(data, length); |
| 190 | } |
| 191 | } |
| 192 | |
| 193 | void UCharsTrieTest::TestLongSequence() { |
| 194 | static const StringAndValue data[]={ |
| 195 | { "a", -1 }, |
| 196 | // sequence of linear-match nodes |
| 197 | { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -2 }, |
| 198 | // more than 256 units |
| 199 | { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" |
| 200 | "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" |
| 201 | "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" |
| 202 | "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" |
| 203 | "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" |
| 204 | "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -3 } |
| 205 | }; |
| 206 | checkData(data, UPRV_LENGTHOF(data)); |
| 207 | } |
| 208 | |
| 209 | void UCharsTrieTest::TestLongBranch() { |
| 210 | // Split-branch and interesting compact-integer values. |
| 211 | static const StringAndValue data[]={ |
| 212 | { "a", -2 }, |
| 213 | { "b", -1 }, |
| 214 | { "c", 0 }, |
| 215 | { "d2", 1 }, |
| 216 | { "f", 0x3f }, |
| 217 | { "g", 0x40 }, |
| 218 | { "h", 0x41 }, |
| 219 | { "j23", 0x1900 }, |
| 220 | { "j24", 0x19ff }, |
| 221 | { "j25", 0x1a00 }, |
| 222 | { "k2", 0x1a80 }, |
| 223 | { "k3", 0x1aff }, |
| 224 | { "l234567890", 0x1b00 }, |
| 225 | { "l234567890123", 0x1b01 }, |
| 226 | { "nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn", 0x10ffff }, |
| 227 | { "oooooooooooooooooooooooooooooooooooooooooooooooooooooo", 0x110000 }, |
| 228 | { "pppppppppppppppppppppppppppppppppppppppppppppppppppppp", 0x120000 }, |
| 229 | { "r", 0x333333 }, |
| 230 | { "s2345", 0x4444444 }, |
| 231 | { "t234567890", 0x77777777 }, |
| 232 | { "z", (int32_t)0x80000001 } |
| 233 | }; |
| 234 | checkData(data, UPRV_LENGTHOF(data)); |
| 235 | } |
| 236 | |
| 237 | void UCharsTrieTest::TestValuesForState() { |
| 238 | // Check that saveState() and resetToState() interact properly |
| 239 | // with next() and current(). |
| 240 | static const StringAndValue data[]={ |
| 241 | { "a", -1 }, |
| 242 | { "ab", -2 }, |
| 243 | { "abc", -3 }, |
| 244 | { "abcd", -4 }, |
| 245 | { "abcde", -5 }, |
| 246 | { "abcdef", -6 } |
| 247 | }; |
| 248 | checkData(data, UPRV_LENGTHOF(data)); |
| 249 | } |
| 250 | |
| 251 | void UCharsTrieTest::TestCompact() { |
| 252 | // Duplicate trailing strings and values provide opportunities for compacting. |
| 253 | static const StringAndValue data[]={ |
| 254 | { "+", 0 }, |
| 255 | { "+august", 8 }, |
| 256 | { "+december", 12 }, |
| 257 | { "+july", 7 }, |
| 258 | { "+june", 6 }, |
| 259 | { "+november", 11 }, |
| 260 | { "+october", 10 }, |
| 261 | { "+september", 9 }, |
| 262 | { "-", 0 }, |
| 263 | { "-august", 8 }, |
| 264 | { "-december", 12 }, |
| 265 | { "-july", 7 }, |
| 266 | { "-june", 6 }, |
| 267 | { "-november", 11 }, |
| 268 | { "-october", 10 }, |
| 269 | { "-september", 9 }, |
| 270 | // The l+n branch (with its sub-nodes) is a duplicate but will be written |
| 271 | // both times because each time it follows a different linear-match node. |
| 272 | { "xjuly", 7 }, |
| 273 | { "xjune", 6 } |
| 274 | }; |
| 275 | checkData(data, UPRV_LENGTHOF(data)); |
| 276 | } |
| 277 | |
| 278 | void UCharsTrieTest::TestFirstForCodePoint() { |
| 279 | static const StringAndValue data[]={ |
| 280 | { "a", 1 }, |
| 281 | { "a\\ud800", 2 }, |
| 282 | { "a\\U00010000", 3 }, |
| 283 | { "\\ud840", 4 }, |
| 284 | { "\\U00020000\\udbff", 5 }, |
| 285 | { "\\U00020000\\U0010ffff", 6 }, |
| 286 | { "\\U00020000\\U0010ffffz", 7 }, |
| 287 | { "\\U00050000xy", 8 }, |
| 288 | { "\\U00050000xyz", 9 } |
| 289 | }; |
| 290 | checkData(data, UPRV_LENGTHOF(data)); |
| 291 | } |
| 292 | |
| 293 | void UCharsTrieTest::TestNextForCodePoint() { |
| 294 | static const StringAndValue data[]={ |
| 295 | { "\\u4dff\\U00010000\\u9999\\U00020000\\udfff\\U0010ffff", 2000000000 }, |
| 296 | { "\\u4dff\\U00010000\\u9999\\U00020002", 44444 }, |
| 297 | { "\\u4dff\\U000103ff", 99999 } |
| 298 | }; |
| 299 | LocalPointer<UCharsTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST)); |
| 300 | if(trie.isNull()) { |
| 301 | return; // buildTrie() reported an error |
| 302 | } |
| 303 | UStringTrieResult result; |
| 304 | if( (result=trie->nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || |
| 305 | (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || |
| 306 | (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || |
| 307 | (result=trie->nextForCodePoint(0x20000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || |
| 308 | (result=trie->nextForCodePoint(0xdfff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || |
| 309 | (result=trie->nextForCodePoint(0x10ffff))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() || |
| 310 | trie->getValue()!=2000000000 |
| 311 | ) { |
| 312 | errln("UCharsTrie.nextForCodePoint() fails for %s", data[0].s); |
| 313 | } |
| 314 | if( (result=trie->firstForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || |
| 315 | (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || |
| 316 | (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || |
| 317 | (result=trie->nextForCodePoint(0x20002))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() || |
| 318 | trie->getValue()!=44444 |
| 319 | ) { |
| 320 | errln("UCharsTrie.nextForCodePoint() fails for %s", data[1].s); |
| 321 | } |
| 322 | if( (result=trie->reset().nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || |
| 323 | (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || |
| 324 | (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || |
| 325 | (result=trie->nextForCodePoint(0x20222))!=USTRINGTRIE_NO_MATCH || result!=trie->current() // no match for trail surrogate |
| 326 | ) { |
| 327 | errln("UCharsTrie.nextForCodePoint() fails for \\u4dff\\U00010000\\u9999\\U00020222"); |
| 328 | } |
| 329 | if( (result=trie->reset().nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || |
| 330 | (result=trie->nextForCodePoint(0x103ff))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() || |
| 331 | trie->getValue()!=99999 |
| 332 | ) { |
| 333 | errln("UCharsTrie.nextForCodePoint() fails for %s", data[2].s); |
| 334 | } |
| 335 | } |
| 336 | |
| 337 | // Definitions in the anonymous namespace are invisible outside this file. |
| 338 | namespace { |
| 339 | |
| 340 | // Generate (string, value) pairs. |
| 341 | // The first string (before next()) will be empty. |
| 342 | class Generator { |
| 343 | public: |
| 344 | Generator() : value(4711), num(0) {} |
| 345 | void next() { |
| 346 | UChar c; |
| 347 | s.truncate(0); |
| 348 | s.append(c=(UChar)(value>>16)); |
| 349 | s.append((UChar)(value>>4)); |
| 350 | if(value&1) { |
| 351 | s.append((UChar)value); |
| 352 | } |
| 353 | set.add(c); |
| 354 | value+=((value>>5)&0x7ff)*3+1; |
| 355 | ++num; |
| 356 | } |
| 357 | const UnicodeString &getString() const { return s; } |
| 358 | int32_t getValue() const { return value; } |
| 359 | int32_t countUniqueFirstChars() const { return set.size(); } |
| 360 | int32_t getIndex() const { return num; } |
| 361 | |
| 362 | private: |
| 363 | UnicodeString s; |
| 364 | UnicodeSet set; |
| 365 | int32_t value; |
| 366 | int32_t num; |
| 367 | }; |
| 368 | |
| 369 | } // end namespace |
| 370 | |
| 371 | UCharsTrie *UCharsTrieTest::buildLargeTrie(int32_t numUniqueFirst) { |
| 372 | IcuTestErrorCode errorCode(*this, "buildLargeTrie()"); |
| 373 | Generator gen; |
| 374 | builder_->clear(); |
| 375 | while(gen.countUniqueFirstChars()<numUniqueFirst) { |
| 376 | builder_->add(gen.getString(), gen.getValue(), errorCode); |
| 377 | gen.next(); |
| 378 | } |
| 379 | logln("buildLargeTrie(%ld) added %ld strings", (long)numUniqueFirst, (long)gen.getIndex()); |
| 380 | UnicodeString trieUChars; |
| 381 | builder_->buildUnicodeString(USTRINGTRIE_BUILD_FAST, trieUChars, errorCode); |
| 382 | logln("serialized trie size: %ld UChars\n", (long)trieUChars.length()); |
| 383 | return new UCharsTrie(trieUChars.getBuffer()); |
| 384 | } |
| 385 | |
| 386 | // Exercise a large branch node. |
| 387 | void UCharsTrieTest::TestLargeTrie() { |
| 388 | LocalPointer<UCharsTrie> trie(buildLargeTrie(1111)); |
| 389 | if(trie.isNull()) { |
| 390 | return; // buildTrie() reported an error |
| 391 | } |
| 392 | Generator gen; |
| 393 | while(gen.countUniqueFirstChars()<1111) { |
| 394 | UnicodeString x(gen.getString()); |
| 395 | int32_t value=gen.getValue(); |
| 396 | if(!x.isEmpty()) { |
| 397 | if(trie->first(x[0])==USTRINGTRIE_NO_MATCH) { |
| 398 | errln("first(first char U+%04X)=USTRINGTRIE_NO_MATCH for string %ld\n", |
| 399 | x[0], (long)gen.getIndex()); |
| 400 | break; |
| 401 | } |
| 402 | x.remove(0, 1); |
| 403 | } |
| 404 | UStringTrieResult result=trie->next(x.getBuffer(), x.length()); |
| 405 | if(!USTRINGTRIE_HAS_VALUE(result) || result!=trie->current() || value!=trie->getValue()) { |
| 406 | errln("next(%d chars U+%04X U+%04X)!=hasValue or " |
| 407 | "next()!=current() or getValue() wrong " |
| 408 | "for string %ld\n", (int)x.length(), x[0], x[1], (long)gen.getIndex()); |
| 409 | break; |
| 410 | } |
| 411 | gen.next(); |
| 412 | } |
| 413 | } |
| 414 | |
| 415 | enum { |
| 416 | u_a=0x61, |
| 417 | u_b=0x62, |
| 418 | u_c=0x63, |
| 419 | u_j=0x6a, |
| 420 | u_n=0x6e, |
| 421 | u_r=0x72, |
| 422 | u_u=0x75, |
| 423 | u_y=0x79 |
| 424 | }; |
| 425 | |
| 426 | UCharsTrie *UCharsTrieTest::buildMonthsTrie(UStringTrieBuildOption buildOption) { |
| 427 | // All types of nodes leading to the same value, |
| 428 | // for code coverage of recursive functions. |
| 429 | // In particular, we need a lot of branches on some single level |
| 430 | // to exercise a split-branch node. |
| 431 | static const StringAndValue data[]={ |
| 432 | { "august", 8 }, |
| 433 | { "jan", 1 }, |
| 434 | { "jan.", 1 }, |
| 435 | { "jana", 1 }, |
| 436 | { "janbb", 1 }, |
| 437 | { "janc", 1 }, |
| 438 | { "janddd", 1 }, |
| 439 | { "janee", 1 }, |
| 440 | { "janef", 1 }, |
| 441 | { "janf", 1 }, |
| 442 | { "jangg", 1 }, |
| 443 | { "janh", 1 }, |
| 444 | { "janiiii", 1 }, |
| 445 | { "janj", 1 }, |
| 446 | { "jankk", 1 }, |
| 447 | { "jankl", 1 }, |
| 448 | { "jankmm", 1 }, |
| 449 | { "janl", 1 }, |
| 450 | { "janm", 1 }, |
| 451 | { "jannnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 }, |
| 452 | { "jano", 1 }, |
| 453 | { "janpp", 1 }, |
| 454 | { "janqqq", 1 }, |
| 455 | { "janr", 1 }, |
| 456 | { "januar", 1 }, |
| 457 | { "january", 1 }, |
| 458 | { "july", 7 }, |
| 459 | { "jun", 6 }, |
| 460 | { "jun.", 6 }, |
| 461 | { "june", 6 } |
| 462 | }; |
| 463 | return buildTrie(data, UPRV_LENGTHOF(data), buildOption); |
| 464 | } |
| 465 | |
| 466 | void UCharsTrieTest::TestHasUniqueValue() { |
| 467 | LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST)); |
| 468 | if(trie.isNull()) { |
| 469 | return; // buildTrie() reported an error |
| 470 | } |
| 471 | int32_t uniqueValue; |
| 472 | if(trie->hasUniqueValue(uniqueValue)) { |
| 473 | errln("unique value at root"); |
| 474 | } |
| 475 | trie->next(u_j); |
| 476 | trie->next(u_a); |
| 477 | trie->next(u_n); |
| 478 | // hasUniqueValue() directly after next() |
| 479 | if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=1) { |
| 480 | errln("not unique value 1 after \"jan\""); |
| 481 | } |
| 482 | trie->first(u_j); |
| 483 | trie->next(u_u); |
| 484 | if(trie->hasUniqueValue(uniqueValue)) { |
| 485 | errln("unique value after \"ju\""); |
| 486 | } |
| 487 | if(trie->next(u_n)!=USTRINGTRIE_INTERMEDIATE_VALUE || 6!=trie->getValue()) { |
| 488 | errln("not normal value 6 after \"jun\""); |
| 489 | } |
| 490 | // hasUniqueValue() after getValue() |
| 491 | if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=6) { |
| 492 | errln("not unique value 6 after \"jun\""); |
| 493 | } |
| 494 | // hasUniqueValue() from within a linear-match node |
| 495 | trie->first(u_a); |
| 496 | trie->next(u_u); |
| 497 | if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=8) { |
| 498 | errln("not unique value 8 after \"au\""); |
| 499 | } |
| 500 | } |
| 501 | |
| 502 | void UCharsTrieTest::TestGetNextUChars() { |
| 503 | LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL)); |
| 504 | if(trie.isNull()) { |
| 505 | return; // buildTrie() reported an error |
| 506 | } |
| 507 | UnicodeString buffer; |
| 508 | UnicodeStringAppendable app(buffer); |
| 509 | int32_t count=trie->getNextUChars(app); |
| 510 | if(count!=2 || buffer.length()!=2 || buffer[0]!=u_a || buffer[1]!=u_j) { |
| 511 | errln("months getNextUChars()!=[aj] at root"); |
| 512 | } |
| 513 | trie->next(u_j); |
| 514 | trie->next(u_a); |
| 515 | trie->next(u_n); |
| 516 | // getNextUChars() directly after next() |
| 517 | buffer.remove(); |
| 518 | count=trie->getNextUChars(app); |
| 519 | if(count!=20 || buffer!=UNICODE_STRING_SIMPLE(".abcdefghijklmnopqru")) { |
| 520 | errln("months getNextUChars()!=[.abcdefghijklmnopqru] after \"jan\""); |
| 521 | } |
| 522 | // getNextUChars() after getValue() |
| 523 | trie->getValue(); // next() had returned USTRINGTRIE_INTERMEDIATE_VALUE. |
| 524 | buffer.remove(); |
| 525 | count=trie->getNextUChars(app); |
| 526 | if(count!=20 || buffer!=UNICODE_STRING_SIMPLE(".abcdefghijklmnopqru")) { |
| 527 | errln("months getNextUChars()!=[.abcdefghijklmnopqru] after \"jan\"+getValue()"); |
| 528 | } |
| 529 | // getNextUChars() from a linear-match node |
| 530 | trie->next(u_u); |
| 531 | buffer.remove(); |
| 532 | count=trie->getNextUChars(app); |
| 533 | if(count!=1 || buffer.length()!=1 || buffer[0]!=u_a) { |
| 534 | errln("months getNextUChars()!=[a] after \"janu\""); |
| 535 | } |
| 536 | trie->next(u_a); |
| 537 | buffer.remove(); |
| 538 | count=trie->getNextUChars(app); |
| 539 | if(count!=1 || buffer.length()!=1 || buffer[0]!=u_r) { |
| 540 | errln("months getNextUChars()!=[r] after \"janua\""); |
| 541 | } |
| 542 | trie->next(u_r); |
| 543 | trie->next(u_y); |
| 544 | // getNextUChars() after a final match |
| 545 | buffer.remove(); |
| 546 | count=trie->getNextUChars(app); |
| 547 | if(count!=0 || buffer.length()!=0) { |
| 548 | errln("months getNextUChars()!=[] after \"january\""); |
| 549 | } |
| 550 | } |
| 551 | |
| 552 | void UCharsTrieTest::TestIteratorFromBranch() { |
| 553 | LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST)); |
| 554 | if(trie.isNull()) { |
| 555 | return; // buildTrie() reported an error |
| 556 | } |
| 557 | // Go to a branch node. |
| 558 | trie->next(u_j); |
| 559 | trie->next(u_a); |
| 560 | trie->next(u_n); |
| 561 | IcuTestErrorCode errorCode(*this, "TestIteratorFromBranch()"); |
| 562 | UCharsTrie::Iterator iter(*trie, 0, errorCode); |
| 563 | if(errorCode.errIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) { |
| 564 | return; |
| 565 | } |
| 566 | // Expected data: Same as in buildMonthsTrie(), except only the suffixes |
| 567 | // following "jan". |
| 568 | static const StringAndValue data[]={ |
| 569 | { "", 1 }, |
| 570 | { ".", 1 }, |
| 571 | { "a", 1 }, |
| 572 | { "bb", 1 }, |
| 573 | { "c", 1 }, |
| 574 | { "ddd", 1 }, |
| 575 | { "ee", 1 }, |
| 576 | { "ef", 1 }, |
| 577 | { "f", 1 }, |
| 578 | { "gg", 1 }, |
| 579 | { "h", 1 }, |
| 580 | { "iiii", 1 }, |
| 581 | { "j", 1 }, |
| 582 | { "kk", 1 }, |
| 583 | { "kl", 1 }, |
| 584 | { "kmm", 1 }, |
| 585 | { "l", 1 }, |
| 586 | { "m", 1 }, |
| 587 | { "nnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 }, |
| 588 | { "o", 1 }, |
| 589 | { "pp", 1 }, |
| 590 | { "qqq", 1 }, |
| 591 | { "r", 1 }, |
| 592 | { "uar", 1 }, |
| 593 | { "uary", 1 } |
| 594 | }; |
| 595 | checkIterator(iter, data, UPRV_LENGTHOF(data)); |
| 596 | // Reset, and we should get the same result. |
| 597 | logln("after iter.reset()"); |
| 598 | checkIterator(iter.reset(), data, UPRV_LENGTHOF(data)); |
| 599 | } |
| 600 | |
| 601 | void UCharsTrieTest::TestIteratorFromLinearMatch() { |
| 602 | LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL)); |
| 603 | if(trie.isNull()) { |
| 604 | return; // buildTrie() reported an error |
| 605 | } |
| 606 | // Go into a linear-match node. |
| 607 | trie->next(u_j); |
| 608 | trie->next(u_a); |
| 609 | trie->next(u_n); |
| 610 | trie->next(u_u); |
| 611 | trie->next(u_a); |
| 612 | IcuTestErrorCode errorCode(*this, "TestIteratorFromLinearMatch()"); |
| 613 | UCharsTrie::Iterator iter(*trie, 0, errorCode); |
| 614 | if(errorCode.errIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) { |
| 615 | return; |
| 616 | } |
| 617 | // Expected data: Same as in buildMonthsTrie(), except only the suffixes |
| 618 | // following "janua". |
| 619 | static const StringAndValue data[]={ |
| 620 | { "r", 1 }, |
| 621 | { "ry", 1 } |
| 622 | }; |
| 623 | checkIterator(iter, data, UPRV_LENGTHOF(data)); |
| 624 | // Reset, and we should get the same result. |
| 625 | logln("after iter.reset()"); |
| 626 | checkIterator(iter.reset(), data, UPRV_LENGTHOF(data)); |
| 627 | } |
| 628 | |
| 629 | void UCharsTrieTest::TestTruncatingIteratorFromRoot() { |
| 630 | LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST)); |
| 631 | if(trie.isNull()) { |
| 632 | return; // buildTrie() reported an error |
| 633 | } |
| 634 | IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromRoot()"); |
| 635 | UCharsTrie::Iterator iter(*trie, 4, errorCode); |
| 636 | if(errorCode.errIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) { |
| 637 | return; |
| 638 | } |
| 639 | // Expected data: Same as in buildMonthsTrie(), except only the first 4 characters |
| 640 | // of each string, and no string duplicates from the truncation. |
| 641 | static const StringAndValue data[]={ |
| 642 | { "augu", -1 }, |
| 643 | { "jan", 1 }, |
| 644 | { "jan.", 1 }, |
| 645 | { "jana", 1 }, |
| 646 | { "janb", -1 }, |
| 647 | { "janc", 1 }, |
| 648 | { "jand", -1 }, |
| 649 | { "jane", -1 }, |
| 650 | { "janf", 1 }, |
| 651 | { "jang", -1 }, |
| 652 | { "janh", 1 }, |
| 653 | { "jani", -1 }, |
| 654 | { "janj", 1 }, |
| 655 | { "jank", -1 }, |
| 656 | { "janl", 1 }, |
| 657 | { "janm", 1 }, |
| 658 | { "jann", -1 }, |
| 659 | { "jano", 1 }, |
| 660 | { "janp", -1 }, |
| 661 | { "janq", -1 }, |
| 662 | { "janr", 1 }, |
| 663 | { "janu", -1 }, |
| 664 | { "july", 7 }, |
| 665 | { "jun", 6 }, |
| 666 | { "jun.", 6 }, |
| 667 | { "june", 6 } |
| 668 | }; |
| 669 | checkIterator(iter, data, UPRV_LENGTHOF(data)); |
| 670 | // Reset, and we should get the same result. |
| 671 | logln("after iter.reset()"); |
| 672 | checkIterator(iter.reset(), data, UPRV_LENGTHOF(data)); |
| 673 | } |
| 674 | |
| 675 | void UCharsTrieTest::TestTruncatingIteratorFromLinearMatchShort() { |
| 676 | static const StringAndValue data[]={ |
| 677 | { "abcdef", 10 }, |
| 678 | { "abcdepq", 200 }, |
| 679 | { "abcdeyz", 3000 } |
| 680 | }; |
| 681 | LocalPointer<UCharsTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST)); |
| 682 | if(trie.isNull()) { |
| 683 | return; // buildTrie() reported an error |
| 684 | } |
| 685 | // Go into a linear-match node. |
| 686 | trie->next(u_a); |
| 687 | trie->next(u_b); |
| 688 | IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchShort()"); |
| 689 | // Truncate within the linear-match node. |
| 690 | UCharsTrie::Iterator iter(*trie, 2, errorCode); |
| 691 | if(errorCode.errIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) { |
| 692 | return; |
| 693 | } |
| 694 | static const StringAndValue expected[]={ |
| 695 | { "cd", -1 } |
| 696 | }; |
| 697 | checkIterator(iter, expected, UPRV_LENGTHOF(expected)); |
| 698 | // Reset, and we should get the same result. |
| 699 | logln("after iter.reset()"); |
| 700 | checkIterator(iter.reset(), expected, UPRV_LENGTHOF(expected)); |
| 701 | } |
| 702 | |
| 703 | void UCharsTrieTest::TestTruncatingIteratorFromLinearMatchLong() { |
| 704 | static const StringAndValue data[]={ |
| 705 | { "abcdef", 10 }, |
| 706 | { "abcdepq", 200 }, |
| 707 | { "abcdeyz", 3000 } |
| 708 | }; |
| 709 | LocalPointer<UCharsTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST)); |
| 710 | if(trie.isNull()) { |
| 711 | return; // buildTrie() reported an error |
| 712 | } |
| 713 | // Go into a linear-match node. |
| 714 | trie->next(u_a); |
| 715 | trie->next(u_b); |
| 716 | trie->next(u_c); |
| 717 | IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchLong()"); |
| 718 | // Truncate after the linear-match node. |
| 719 | UCharsTrie::Iterator iter(*trie, 3, errorCode); |
| 720 | if(errorCode.errIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) { |
| 721 | return; |
| 722 | } |
| 723 | static const StringAndValue expected[]={ |
| 724 | { "def", 10 }, |
| 725 | { "dep", -1 }, |
| 726 | { "dey", -1 } |
| 727 | }; |
| 728 | checkIterator(iter, expected, UPRV_LENGTHOF(expected)); |
| 729 | // Reset, and we should get the same result. |
| 730 | logln("after iter.reset()"); |
| 731 | checkIterator(iter.reset(), expected, UPRV_LENGTHOF(expected)); |
| 732 | } |
| 733 | |
| 734 | void UCharsTrieTest::TestIteratorFromUChars() { |
| 735 | static const StringAndValue data[]={ |
| 736 | { "mm", 3 }, |
| 737 | { "mmm", 33 }, |
| 738 | { "mmnop", 333 } |
| 739 | }; |
| 740 | builder_->clear(); |
| 741 | IcuTestErrorCode errorCode(*this, "TestIteratorFromUChars()"); |
| 742 | for(int32_t i=0; i<UPRV_LENGTHOF(data); ++i) { |
| 743 | builder_->add(data[i].s, data[i].value, errorCode); |
| 744 | } |
| 745 | UnicodeString trieUChars; |
| 746 | builder_->buildUnicodeString(USTRINGTRIE_BUILD_FAST, trieUChars, errorCode); |
| 747 | UCharsTrie::Iterator iter(trieUChars.getBuffer(), 0, errorCode); |
| 748 | checkIterator(iter, data, UPRV_LENGTHOF(data)); |
| 749 | } |
| 750 | |
| 751 | void UCharsTrieTest::checkData(const StringAndValue data[], int32_t dataLength) { |
| 752 | logln("checkData(dataLength=%d, fast)", (int)dataLength); |
| 753 | checkData(data, dataLength, USTRINGTRIE_BUILD_FAST); |
| 754 | logln("checkData(dataLength=%d, small)", (int)dataLength); |
| 755 | checkData(data, dataLength, USTRINGTRIE_BUILD_SMALL); |
| 756 | } |
| 757 | |
| 758 | void UCharsTrieTest::checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption) { |
| 759 | LocalPointer<UCharsTrie> trie(buildTrie(data, dataLength, buildOption)); |
| 760 | if(trie.isNull()) { |
| 761 | return; // buildTrie() reported an error |
| 762 | } |
| 763 | checkFirst(*trie, data, dataLength); |
| 764 | checkNext(*trie, data, dataLength); |
| 765 | checkNextWithState(*trie, data, dataLength); |
| 766 | checkNextWithState64(*trie, data, dataLength); |
| 767 | checkNextString(*trie, data, dataLength); |
| 768 | checkIterator(*trie, data, dataLength); |
| 769 | } |
| 770 | |
| 771 | UCharsTrie *UCharsTrieTest::buildTrie(const StringAndValue data[], int32_t dataLength, |
| 772 | UStringTrieBuildOption buildOption) { |
| 773 | IcuTestErrorCode errorCode(*this, "buildTrie()"); |
| 774 | // Add the items to the trie builder in an interesting (not trivial, not random) order. |
| 775 | int32_t index, step; |
| 776 | if(dataLength&1) { |
| 777 | // Odd number of items. |
| 778 | index=dataLength/2; |
| 779 | step=2; |
| 780 | } else if((dataLength%3)!=0) { |
| 781 | // Not a multiple of 3. |
| 782 | index=dataLength/5; |
| 783 | step=3; |
| 784 | } else { |
| 785 | index=dataLength-1; |
| 786 | step=-1; |
| 787 | } |
| 788 | builder_->clear(); |
| 789 | for(int32_t i=0; i<dataLength; ++i) { |
| 790 | builder_->add(UnicodeString(data[index].s, -1, US_INV).unescape(), |
| 791 | data[index].value, errorCode); |
| 792 | index=(index+step)%dataLength; |
| 793 | } |
| 794 | UnicodeString trieUChars; |
| 795 | builder_->buildUnicodeString(buildOption, trieUChars, errorCode); |
| 796 | LocalPointer<UCharsTrie> trie(builder_->build(buildOption, errorCode)); |
| 797 | if(!errorCode.errIfFailureAndReset("add()/build()")) { |
| 798 | builder_->add("zzz", 999, errorCode); |
| 799 | if(errorCode.reset()!=U_NO_WRITE_PERMISSION) { |
| 800 | errln("builder.build().add(zzz) did not set U_NO_WRITE_PERMISSION"); |
| 801 | } |
| 802 | } |
| 803 | logln("serialized trie size: %ld UChars\n", (long)trieUChars.length()); |
| 804 | UnicodeString trieUChars2; |
| 805 | builder_->buildUnicodeString(buildOption, trieUChars2, errorCode); |
| 806 | if(trieUChars.getBuffer()==trieUChars2.getBuffer()) { |
| 807 | errln("builder.buildUnicodeString() before & after build() returned same array"); |
| 808 | } |
| 809 | if(errorCode.isFailure()) { |
| 810 | return NULL; |
| 811 | } |
| 812 | // Tries from either build() method should be identical but |
| 813 | // UCharsTrie does not implement equals(). |
| 814 | // We just return either one. |
| 815 | if((dataLength&1)!=0) { |
| 816 | return trie.orphan(); |
| 817 | } else { |
| 818 | return new UCharsTrie(trieUChars2.getBuffer()); |
| 819 | } |
| 820 | } |
| 821 | |
| 822 | void UCharsTrieTest::checkFirst(UCharsTrie &trie, |
| 823 | const StringAndValue data[], int32_t dataLength) { |
| 824 | for(int32_t i=0; i<dataLength; ++i) { |
| 825 | if(*data[i].s==0) { |
| 826 | continue; // skip empty string |
| 827 | } |
| 828 | UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape(); |
| 829 | UChar32 c=expectedString[0]; |
| 830 | UChar32 nextCp=expectedString.length()>1 ? expectedString[1] : 0; |
| 831 | UStringTrieResult firstResult=trie.first(c); |
| 832 | int32_t firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1; |
| 833 | UStringTrieResult nextResult=trie.next(nextCp); |
| 834 | if(firstResult!=trie.reset().next(c) || |
| 835 | firstResult!=trie.current() || |
| 836 | firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1) || |
| 837 | nextResult!=trie.next(nextCp) |
| 838 | ) { |
| 839 | errln("trie.first(U+%04X)!=trie.reset().next(same) for %s", |
| 840 | c, data[i].s); |
| 841 | } |
| 842 | c=expectedString.char32At(0); |
| 843 | int32_t cLength=U16_LENGTH(c); |
| 844 | nextCp=expectedString.length()>cLength ? expectedString.char32At(cLength) : 0; |
| 845 | firstResult=trie.firstForCodePoint(c); |
| 846 | firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1; |
| 847 | nextResult=trie.nextForCodePoint(nextCp); |
| 848 | if(firstResult!=trie.reset().nextForCodePoint(c) || |
| 849 | firstResult!=trie.current() || |
| 850 | firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1) || |
| 851 | nextResult!=trie.nextForCodePoint(nextCp) |
| 852 | ) { |
| 853 | errln("trie.firstForCodePoint(U+%04X)!=trie.reset().nextForCodePoint(same) for %s", |
| 854 | c, data[i].s); |
| 855 | } |
| 856 | } |
| 857 | trie.reset(); |
| 858 | } |
| 859 | |
| 860 | void UCharsTrieTest::checkNext(UCharsTrie &trie, |
| 861 | const StringAndValue data[], int32_t dataLength) { |
| 862 | UCharsTrie::State state; |
| 863 | for(int32_t i=0; i<dataLength; ++i) { |
| 864 | UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape(); |
| 865 | int32_t stringLength= (i&1) ? -1 : expectedString.length(); |
| 866 | UStringTrieResult result; |
| 867 | if( !USTRINGTRIE_HAS_VALUE( |
| 868 | result=trie.next(expectedString.getTerminatedBuffer(), stringLength)) || |
| 869 | result!=trie.current() |
| 870 | ) { |
| 871 | errln("trie does not seem to contain %s", data[i].s); |
| 872 | } else if(trie.getValue()!=data[i].value) { |
| 873 | errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx", |
| 874 | data[i].s, |
| 875 | (long)trie.getValue(), (long)trie.getValue(), |
| 876 | (long)data[i].value, (long)data[i].value); |
| 877 | } else if(result!=trie.current() || trie.getValue()!=data[i].value) { |
| 878 | errln("trie value for %s changes when repeating current()/getValue()", data[i].s); |
| 879 | } |
| 880 | trie.reset(); |
| 881 | stringLength=expectedString.length(); |
| 882 | result=trie.current(); |
| 883 | for(int32_t j=0; j<stringLength; ++j) { |
| 884 | if(!USTRINGTRIE_HAS_NEXT(result)) { |
| 885 | errln("trie.current()!=hasNext before end of %s (at index %d)", data[i].s, j); |
| 886 | break; |
| 887 | } |
| 888 | if(result==USTRINGTRIE_INTERMEDIATE_VALUE) { |
| 889 | trie.getValue(); |
| 890 | if(trie.current()!=USTRINGTRIE_INTERMEDIATE_VALUE) { |
| 891 | errln("trie.getValue().current()!=USTRINGTRIE_INTERMEDIATE_VALUE before end of %s (at index %d)", data[i].s, j); |
| 892 | break; |
| 893 | } |
| 894 | } |
| 895 | result=trie.next(expectedString[j]); |
| 896 | if(!USTRINGTRIE_MATCHES(result)) { |
| 897 | errln("trie.next()=USTRINGTRIE_NO_MATCH before end of %s (at index %d)", data[i].s, j); |
| 898 | break; |
| 899 | } |
| 900 | if(result!=trie.current()) { |
| 901 | errln("trie.next()!=following current() before end of %s (at index %d)", data[i].s, j); |
| 902 | break; |
| 903 | } |
| 904 | } |
| 905 | if(!USTRINGTRIE_HAS_VALUE(result)) { |
| 906 | errln("trie.next()!=hasValue at the end of %s", data[i].s); |
| 907 | continue; |
| 908 | } |
| 909 | trie.getValue(); |
| 910 | if(result!=trie.current()) { |
| 911 | errln("trie.current() != current()+getValue()+current() after end of %s", |
| 912 | data[i].s); |
| 913 | } |
| 914 | // Compare the final current() with whether next() can actually continue. |
| 915 | trie.saveState(state); |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 916 | UBool nextContinues=false; |
Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 917 | for(int32_t c=0x20; c<0xe000; ++c) { |
| 918 | if(c==0x80) { |
| 919 | c=0xd800; // Check for ASCII and surrogates but not all of the BMP. |
| 920 | } |
| 921 | if(trie.resetToState(state).next(c)) { |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 922 | nextContinues=true; |
Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 923 | break; |
| 924 | } |
| 925 | } |
| 926 | if((result==USTRINGTRIE_INTERMEDIATE_VALUE)!=nextContinues) { |
| 927 | errln("(trie.current()==USTRINGTRIE_INTERMEDIATE_VALUE) contradicts " |
| 928 | "(trie.next(some UChar)!=USTRINGTRIE_NO_MATCH) after end of %s", data[i].s); |
| 929 | } |
| 930 | trie.reset(); |
| 931 | } |
| 932 | } |
| 933 | |
| 934 | void UCharsTrieTest::checkNextWithState(UCharsTrie &trie, |
| 935 | const StringAndValue data[], int32_t dataLength) { |
| 936 | UCharsTrie::State noState, state; |
| 937 | for(int32_t i=0; i<dataLength; ++i) { |
| 938 | if((i&1)==0) { |
| 939 | // This should have no effect. |
| 940 | trie.resetToState(noState); |
| 941 | } |
| 942 | UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape(); |
| 943 | int32_t stringLength=expectedString.length(); |
| 944 | int32_t partialLength=stringLength/3; |
| 945 | for(int32_t j=0; j<partialLength; ++j) { |
| 946 | if(!USTRINGTRIE_MATCHES(trie.next(expectedString[j]))) { |
| 947 | errln("trie.next()=USTRINGTRIE_NO_MATCH for a prefix of %s", data[i].s); |
| 948 | return; |
| 949 | } |
| 950 | } |
| 951 | trie.saveState(state); |
| 952 | UStringTrieResult resultAtState=trie.current(); |
| 953 | UStringTrieResult result; |
| 954 | int32_t valueAtState=-99; |
| 955 | if(USTRINGTRIE_HAS_VALUE(resultAtState)) { |
| 956 | valueAtState=trie.getValue(); |
| 957 | } |
| 958 | result=trie.next(0); // mismatch |
| 959 | if(result!=USTRINGTRIE_NO_MATCH || result!=trie.current()) { |
| 960 | errln("trie.next(0) matched after part of %s", data[i].s); |
| 961 | } |
| 962 | if( resultAtState!=trie.resetToState(state).current() || |
| 963 | (USTRINGTRIE_HAS_VALUE(resultAtState) && valueAtState!=trie.getValue()) |
| 964 | ) { |
| 965 | errln("trie.next(part of %s) changes current()/getValue() after " |
| 966 | "saveState/next(0)/resetToState", |
| 967 | data[i].s); |
| 968 | } else if(!USTRINGTRIE_HAS_VALUE( |
| 969 | result=trie.next(expectedString.getTerminatedBuffer()+partialLength, |
| 970 | stringLength-partialLength)) || |
| 971 | result!=trie.current()) { |
| 972 | errln("trie.next(rest of %s) does not seem to contain %s after " |
| 973 | "saveState/next(0)/resetToState", |
| 974 | data[i].s, data[i].s); |
| 975 | } else if(!USTRINGTRIE_HAS_VALUE( |
| 976 | result=trie.resetToState(state). |
| 977 | next(expectedString.getTerminatedBuffer()+partialLength, |
| 978 | stringLength-partialLength)) || |
| 979 | result!=trie.current()) { |
| 980 | errln("trie does not seem to contain %s after saveState/next(rest)/resetToState", |
| 981 | data[i].s); |
| 982 | } else if(trie.getValue()!=data[i].value) { |
| 983 | errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx", |
| 984 | data[i].s, |
| 985 | (long)trie.getValue(), (long)trie.getValue(), |
| 986 | (long)data[i].value, (long)data[i].value); |
| 987 | } |
| 988 | trie.reset(); |
| 989 | } |
| 990 | } |
| 991 | |
| 992 | void UCharsTrieTest::checkNextWithState64(UCharsTrie &trie, |
| 993 | const StringAndValue data[], int32_t dataLength) { |
| 994 | assertTrue("trie(initial state).getState64()!=0", trie.getState64() != 0); |
| 995 | for(int32_t i=0; i<dataLength; ++i) { |
| 996 | UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape(); |
| 997 | int32_t stringLength=expectedString.length(); |
| 998 | int32_t partialLength = stringLength / 3; |
| 999 | for(int32_t j=0; j<partialLength; ++j) { |
| 1000 | if(!USTRINGTRIE_MATCHES(trie.next(expectedString[j]))) { |
| 1001 | errln("trie.next()=USTRINGTRIE_NO_MATCH for a prefix of %s", data[i].s); |
| 1002 | return; |
| 1003 | } |
| 1004 | } |
| 1005 | uint64_t state = trie.getState64(); |
| 1006 | assertTrue("trie.getState64()!=0", state != 0); |
| 1007 | UStringTrieResult resultAtState=trie.current(); |
| 1008 | UStringTrieResult result; |
| 1009 | int32_t valueAtState=-99; |
| 1010 | if(USTRINGTRIE_HAS_VALUE(resultAtState)) { |
| 1011 | valueAtState=trie.getValue(); |
| 1012 | } |
| 1013 | result=trie.next(0); // mismatch |
| 1014 | if(result!=USTRINGTRIE_NO_MATCH || result!=trie.current()) { |
| 1015 | errln("trie.next(0) matched after part of %s", data[i].s); |
| 1016 | } |
| 1017 | if( resultAtState!=trie.resetToState64(state).current() || |
| 1018 | (USTRINGTRIE_HAS_VALUE(resultAtState) && valueAtState!=trie.getValue()) |
| 1019 | ) { |
| 1020 | errln("trie.next(part of %s) changes current()/getValue() after " |
| 1021 | "getState64/next(0)/resetToState64", |
| 1022 | data[i].s); |
| 1023 | } else if(!USTRINGTRIE_HAS_VALUE( |
| 1024 | result=trie.next(expectedString.getTerminatedBuffer()+partialLength, |
| 1025 | stringLength-partialLength)) || |
| 1026 | result!=trie.current()) { |
| 1027 | errln("trie.next(rest of %s) does not seem to contain %s after " |
| 1028 | "getState64/next(0)/resetToState64", |
| 1029 | data[i].s, data[i].s); |
| 1030 | } else if(!USTRINGTRIE_HAS_VALUE( |
| 1031 | result=trie.resetToState64(state). |
| 1032 | next(expectedString.getTerminatedBuffer()+partialLength, |
| 1033 | stringLength-partialLength)) || |
| 1034 | result!=trie.current()) { |
| 1035 | errln("trie does not seem to contain %s after getState64/next(rest)/resetToState64", |
| 1036 | data[i].s); |
| 1037 | } else if(trie.getValue()!=data[i].value) { |
| 1038 | errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx", |
| 1039 | data[i].s, |
| 1040 | (long)trie.getValue(), (long)trie.getValue(), |
| 1041 | (long)data[i].value, (long)data[i].value); |
| 1042 | } |
| 1043 | trie.reset(); |
| 1044 | } |
| 1045 | } |
| 1046 | |
| 1047 | // next(string) is also tested in other functions, |
| 1048 | // but here we try to go partway through the string, and then beyond it. |
| 1049 | void UCharsTrieTest::checkNextString(UCharsTrie &trie, |
| 1050 | const StringAndValue data[], int32_t dataLength) { |
| 1051 | for(int32_t i=0; i<dataLength; ++i) { |
| 1052 | UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape(); |
| 1053 | int32_t stringLength=expectedString.length(); |
| 1054 | if(!trie.next(expectedString.getTerminatedBuffer(), stringLength/2)) { |
| 1055 | errln("trie.next(up to middle of string)=USTRINGTRIE_NO_MATCH for %s", data[i].s); |
| 1056 | continue; |
| 1057 | } |
| 1058 | // Test that we stop properly at the end of the string. |
| 1059 | if(trie.next(expectedString.getTerminatedBuffer()+stringLength/2, |
| 1060 | stringLength+1-stringLength/2)) { |
| 1061 | errln("trie.next(string+NUL)!=USTRINGTRIE_NO_MATCH for %s", data[i].s); |
| 1062 | } |
| 1063 | trie.reset(); |
| 1064 | } |
| 1065 | } |
| 1066 | |
| 1067 | void UCharsTrieTest::checkIterator(UCharsTrie &trie, |
| 1068 | const StringAndValue data[], int32_t dataLength) { |
| 1069 | IcuTestErrorCode errorCode(*this, "checkIterator()"); |
| 1070 | UCharsTrie::Iterator iter(trie, 0, errorCode); |
| 1071 | if(errorCode.errIfFailureAndReset("UCharsTrie::Iterator(trieUChars) constructor")) { |
| 1072 | return; |
| 1073 | } |
| 1074 | checkIterator(iter, data, dataLength); |
| 1075 | } |
| 1076 | |
| 1077 | void UCharsTrieTest::checkIterator(UCharsTrie::Iterator &iter, |
| 1078 | const StringAndValue data[], int32_t dataLength) { |
| 1079 | IcuTestErrorCode errorCode(*this, "checkIterator()"); |
| 1080 | for(int32_t i=0; i<dataLength; ++i) { |
| 1081 | if(!iter.hasNext()) { |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 1082 | errln("trie iterator hasNext()=false for item %d: %s", (int)i, data[i].s); |
Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 1083 | break; |
| 1084 | } |
| 1085 | UBool hasNext=iter.next(errorCode); |
| 1086 | if(errorCode.errIfFailureAndReset("trie iterator next() for item %d: %s", (int)i, data[i].s)) { |
| 1087 | break; |
| 1088 | } |
| 1089 | if(!hasNext) { |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 1090 | errln("trie iterator next()=false for item %d: %s", (int)i, data[i].s); |
Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 1091 | break; |
| 1092 | } |
| 1093 | UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape(); |
| 1094 | if(iter.getString()!=expectedString) { |
| 1095 | char buffer[1000]; |
| 1096 | UnicodeString invString(prettify(iter.getString())); |
| 1097 | invString.extract(0, invString.length(), buffer, UPRV_LENGTHOF(buffer), US_INV); |
| 1098 | errln("trie iterator next().getString()=%s but expected %s for item %d", |
| 1099 | buffer, data[i].s, (int)i); |
| 1100 | } |
| 1101 | if(iter.getValue()!=data[i].value) { |
| 1102 | errln("trie iterator next().getValue()=%ld=0x%lx but expected %ld=0x%lx for item %d: %s", |
| 1103 | (long)iter.getValue(), (long)iter.getValue(), |
| 1104 | (long)data[i].value, (long)data[i].value, |
| 1105 | (int)i, data[i].s); |
| 1106 | } |
| 1107 | } |
| 1108 | if(iter.hasNext()) { |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 1109 | errln("trie iterator hasNext()=true after all items"); |
Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 1110 | } |
| 1111 | UBool hasNext=iter.next(errorCode); |
| 1112 | errorCode.errIfFailureAndReset("trie iterator next() after all items"); |
| 1113 | if(hasNext) { |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 1114 | errln("trie iterator next()=true after all items"); |
Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 1115 | } |
| 1116 | } |