Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 1 | // © 2016 and later: Unicode, Inc. and others. |
| 2 | // License & terms of use: http://www.unicode.org/copyright.html |
| 3 | /* |
| 4 | ********************************************************************** |
| 5 | * Copyright (C) 2005-2016, International Business Machines |
| 6 | * Corporation and others. All Rights Reserved. |
| 7 | ********************************************************************** |
| 8 | */ |
| 9 | |
| 10 | #include "unicode/utypes.h" |
| 11 | |
| 12 | #if !UCONFIG_NO_COLLATION |
| 13 | |
| 14 | #include "cmemory.h" |
| 15 | #include "cstring.h" |
| 16 | #include "usrchimp.h" |
| 17 | |
| 18 | #include "unicode/coll.h" |
| 19 | #include "unicode/tblcoll.h" |
| 20 | #include "unicode/usearch.h" |
| 21 | #include "unicode/uset.h" |
| 22 | #include "unicode/ustring.h" |
| 23 | |
| 24 | #include "unicode/coleitr.h" |
| 25 | #include "unicode/regex.h" // TODO: make conditional on regexp being built. |
| 26 | |
| 27 | #include "colldata.h" |
| 28 | #include "ssearch.h" |
| 29 | #include "xmlparser.h" |
| 30 | |
| 31 | #include <stdio.h> // for sprintf |
| 32 | |
| 33 | char testId[100]; |
| 34 | |
| 35 | #define TEST_ASSERT(x) UPRV_BLOCK_MACRO_BEGIN { \ |
| 36 | if (!(x)) { \ |
| 37 | errln("Failure in file %s, line %d, test ID = \"%s\"", __FILE__, __LINE__, testId); \ |
| 38 | } \ |
| 39 | } UPRV_BLOCK_MACRO_END |
| 40 | |
| 41 | #define TEST_ASSERT_M(x, m) UPRV_BLOCK_MACRO_BEGIN { \ |
| 42 | if (!(x)) { \ |
| 43 | dataerrln("Failure in file %s, line %d. \"%s\"", __FILE__, __LINE__, m); \ |
| 44 | return; \ |
| 45 | } \ |
| 46 | } UPRV_BLOCK_MACRO_END |
| 47 | |
| 48 | #define TEST_ASSERT_SUCCESS(errcode) UPRV_BLOCK_MACRO_BEGIN { \ |
| 49 | if (U_FAILURE(errcode)) { \ |
| 50 | dataerrln("Failure in file %s, line %d, test ID \"%s\", status = \"%s\"", \ |
| 51 | __FILE__, __LINE__, testId, u_errorName(errcode)); \ |
| 52 | } \ |
| 53 | } UPRV_BLOCK_MACRO_END |
| 54 | |
| 55 | #define NEW_ARRAY(type, count) (type *) uprv_malloc((count) * sizeof(type)) |
| 56 | #define DELETE_ARRAY(array) uprv_free((void *) (array)) |
| 57 | |
| 58 | //--------------------------------------------------------------------------- |
| 59 | // |
| 60 | // Test class boilerplate |
| 61 | // |
| 62 | //--------------------------------------------------------------------------- |
| 63 | SSearchTest::SSearchTest() |
| 64 | { |
| 65 | } |
| 66 | |
| 67 | SSearchTest::~SSearchTest() |
| 68 | { |
| 69 | } |
| 70 | |
| 71 | void SSearchTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char *params ) |
| 72 | { |
| 73 | if (exec) logln("TestSuite SSearchTest: "); |
| 74 | switch (index) { |
| 75 | #if !UCONFIG_NO_BREAK_ITERATION |
| 76 | case 0: name = "searchTest"; |
| 77 | if (exec) searchTest(); |
| 78 | break; |
| 79 | |
| 80 | case 1: name = "offsetTest"; |
| 81 | if (exec) offsetTest(); |
| 82 | break; |
| 83 | |
| 84 | case 2: name = "monkeyTest"; |
| 85 | if (exec) monkeyTest(params); |
| 86 | break; |
| 87 | |
| 88 | case 3: name = "sharpSTest"; |
| 89 | if (exec) sharpSTest(); |
| 90 | break; |
| 91 | |
| 92 | case 4: name = "goodSuffixTest"; |
| 93 | if (exec) goodSuffixTest(); |
| 94 | break; |
| 95 | |
| 96 | case 5: name = "searchTime"; |
| 97 | if (exec) searchTime(); |
| 98 | break; |
| 99 | #endif |
| 100 | default: name = ""; |
| 101 | break; //needed to end loop |
| 102 | } |
| 103 | } |
| 104 | |
| 105 | |
| 106 | #if !UCONFIG_NO_BREAK_ITERATION |
| 107 | |
| 108 | #define PATH_BUFFER_SIZE 2048 |
| 109 | const char *SSearchTest::getPath(char buffer[2048], const char *filename) { |
| 110 | UErrorCode status = U_ZERO_ERROR; |
| 111 | const char *testDataDirectory = IntlTest::getSourceTestData(status); |
| 112 | |
| 113 | if (U_FAILURE(status) || strlen(testDataDirectory) + strlen(filename) + 1 >= PATH_BUFFER_SIZE) { |
| 114 | errln("ERROR: getPath() failed - %s", u_errorName(status)); |
| 115 | return NULL; |
| 116 | } |
| 117 | |
| 118 | strcpy(buffer, testDataDirectory); |
| 119 | strcat(buffer, filename); |
| 120 | return buffer; |
| 121 | } |
| 122 | |
| 123 | |
| 124 | void SSearchTest::searchTest() |
| 125 | { |
| 126 | #if !UCONFIG_NO_REGULAR_EXPRESSIONS && !UCONFIG_NO_FILE_IO |
| 127 | UErrorCode status = U_ZERO_ERROR; |
| 128 | char path[PATH_BUFFER_SIZE]; |
| 129 | const char *testFilePath = getPath(path, "ssearch.xml"); |
| 130 | |
| 131 | if (testFilePath == NULL) { |
| 132 | return; /* Couldn't get path: error message already output. */ |
| 133 | } |
| 134 | |
| 135 | LocalPointer<UXMLParser> parser(UXMLParser::createParser(status)); |
| 136 | TEST_ASSERT_SUCCESS(status); |
| 137 | LocalPointer<UXMLElement> root(parser->parseFile(testFilePath, status)); |
| 138 | TEST_ASSERT_SUCCESS(status); |
| 139 | if (U_FAILURE(status)) { |
| 140 | return; |
| 141 | } |
| 142 | |
| 143 | const UnicodeString *debugTestCase = root->getAttribute("debug"); |
| 144 | if (debugTestCase != NULL) { |
| 145 | // setenv("USEARCH_DEBUG", "1", 1); |
| 146 | } |
| 147 | |
| 148 | |
| 149 | const UXMLElement *testCase; |
| 150 | int32_t tc = 0; |
| 151 | |
| 152 | while((testCase = root->nextChildElement(tc)) != NULL) { |
| 153 | |
| 154 | if (testCase->getTagName().compare("test-case") != 0) { |
| 155 | errln("ssearch, unrecognized XML Element in test file"); |
| 156 | continue; |
| 157 | } |
| 158 | const UnicodeString *id = testCase->getAttribute("id"); |
| 159 | *testId = 0; |
| 160 | if (id != NULL) { |
| 161 | id->extract(0, id->length(), testId, sizeof(testId), US_INV); |
| 162 | } |
| 163 | |
| 164 | // If debugging test case has been specified and this is not it, skip to next. |
| 165 | if (id!=NULL && debugTestCase!=NULL && *id != *debugTestCase) { |
| 166 | continue; |
| 167 | } |
| 168 | // |
| 169 | // Get the requested collation strength. |
| 170 | // Default is tertiary if the XML attribute is missing from the test case. |
| 171 | // |
| 172 | const UnicodeString *strength = testCase->getAttribute("strength"); |
| 173 | UColAttributeValue collatorStrength = UCOL_PRIMARY; |
| 174 | if (strength==NULL) { collatorStrength = UCOL_TERTIARY;} |
| 175 | else if (*strength=="PRIMARY") { collatorStrength = UCOL_PRIMARY;} |
| 176 | else if (*strength=="SECONDARY") { collatorStrength = UCOL_SECONDARY;} |
| 177 | else if (*strength=="TERTIARY") { collatorStrength = UCOL_TERTIARY;} |
| 178 | else if (*strength=="QUATERNARY") { collatorStrength = UCOL_QUATERNARY;} |
| 179 | else if (*strength=="IDENTICAL") { collatorStrength = UCOL_IDENTICAL;} |
| 180 | else { |
| 181 | // Bogus value supplied for strength. Shouldn't happen, even from |
| 182 | // typos, if the XML source has been validated. |
| 183 | // This assert is a little deceiving in that strength can be |
| 184 | // any of the allowed values, not just TERTIARY, but it will |
| 185 | // do the job of getting the error output. |
| 186 | TEST_ASSERT(*strength=="TERTIARY"); |
| 187 | } |
| 188 | |
| 189 | // |
| 190 | // Get the collator normalization flag. Default is UCOL_OFF. |
| 191 | // |
| 192 | UColAttributeValue normalize = UCOL_OFF; |
| 193 | const UnicodeString *norm = testCase->getAttribute("norm"); |
| 194 | TEST_ASSERT (norm==NULL || *norm=="ON" || *norm=="OFF"); |
| 195 | if (norm!=NULL && *norm=="ON") { |
| 196 | normalize = UCOL_ON; |
| 197 | } |
| 198 | |
| 199 | // |
| 200 | // Get the alternate_handling flag. Default is UCOL_NON_IGNORABLE. |
| 201 | // |
| 202 | UColAttributeValue alternateHandling = UCOL_NON_IGNORABLE; |
| 203 | const UnicodeString *alt = testCase->getAttribute("alternate_handling"); |
| 204 | TEST_ASSERT (alt == NULL || *alt == "SHIFTED" || *alt == "NON_IGNORABLE"); |
| 205 | if (alt != NULL && *alt == "SHIFTED") { |
| 206 | alternateHandling = UCOL_SHIFTED; |
| 207 | } |
| 208 | |
| 209 | const UnicodeString defLocale("en"); |
| 210 | char clocale[100]; |
| 211 | const UnicodeString *locale = testCase->getAttribute("locale"); |
| 212 | if (locale == NULL || locale->length()==0) { |
| 213 | locale = &defLocale; |
| 214 | } |
| 215 | locale->extract(0, locale->length(), clocale, sizeof(clocale), NULL); |
| 216 | |
| 217 | |
| 218 | UnicodeString text; |
| 219 | UnicodeString target; |
| 220 | UnicodeString pattern; |
| 221 | int32_t expectedMatchStart = -1; |
| 222 | int32_t expectedMatchLimit = -1; |
| 223 | const UXMLElement *n; |
| 224 | int32_t nodeCount = 0; |
| 225 | |
| 226 | n = testCase->getChildElement("pattern"); |
| 227 | TEST_ASSERT(n != NULL); |
| 228 | if (n==NULL) { |
| 229 | continue; |
| 230 | } |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 231 | text = n->getText(false); |
Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 232 | text = text.unescape(); |
| 233 | pattern.append(text); |
| 234 | nodeCount++; |
| 235 | |
| 236 | n = testCase->getChildElement("pre"); |
| 237 | if (n!=NULL) { |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 238 | text = n->getText(false); |
Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 239 | text = text.unescape(); |
| 240 | target.append(text); |
| 241 | nodeCount++; |
| 242 | } |
| 243 | |
| 244 | n = testCase->getChildElement("m"); |
| 245 | if (n!=NULL) { |
| 246 | expectedMatchStart = target.length(); |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 247 | text = n->getText(false); |
Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 248 | text = text.unescape(); |
| 249 | target.append(text); |
| 250 | expectedMatchLimit = target.length(); |
| 251 | nodeCount++; |
| 252 | } |
| 253 | |
| 254 | n = testCase->getChildElement("post"); |
| 255 | if (n!=NULL) { |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 256 | text = n->getText(false); |
Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 257 | text = text.unescape(); |
| 258 | target.append(text); |
| 259 | nodeCount++; |
| 260 | } |
| 261 | |
| 262 | // Check that there weren't extra things in the XML |
| 263 | TEST_ASSERT(nodeCount == testCase->countChildren()); |
| 264 | |
| 265 | // Open a collator and StringSearch based on the parameters |
| 266 | // obtained from the XML. |
| 267 | // |
| 268 | status = U_ZERO_ERROR; |
| 269 | LocalUCollatorPointer collator(ucol_open(clocale, &status)); |
| 270 | ucol_setStrength(collator.getAlias(), collatorStrength); |
| 271 | ucol_setAttribute(collator.getAlias(), UCOL_NORMALIZATION_MODE, normalize, &status); |
| 272 | ucol_setAttribute(collator.getAlias(), UCOL_ALTERNATE_HANDLING, alternateHandling, &status); |
| 273 | LocalUStringSearchPointer uss(usearch_openFromCollator(pattern.getBuffer(), pattern.length(), |
| 274 | target.getBuffer(), target.length(), |
| 275 | collator.getAlias(), |
| 276 | NULL, // the break iterator |
| 277 | &status)); |
| 278 | |
| 279 | TEST_ASSERT_SUCCESS(status); |
| 280 | if (U_FAILURE(status)) { |
| 281 | continue; |
| 282 | } |
| 283 | |
| 284 | int32_t foundStart = 0; |
| 285 | int32_t foundLimit = 0; |
| 286 | UBool foundMatch; |
| 287 | |
| 288 | // |
| 289 | // Do the search, check the match result against the expected results. |
| 290 | // |
| 291 | foundMatch= usearch_search(uss.getAlias(), 0, &foundStart, &foundLimit, &status); |
| 292 | TEST_ASSERT_SUCCESS(status); |
| 293 | if ((foundMatch && expectedMatchStart<0) || |
| 294 | (foundStart != expectedMatchStart) || |
| 295 | (foundLimit != expectedMatchLimit)) { |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 296 | TEST_ASSERT(false); // output generic error position |
Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 297 | infoln("Found, expected match start = %d, %d \n" |
| 298 | "Found, expected match limit = %d, %d", |
| 299 | foundStart, expectedMatchStart, foundLimit, expectedMatchLimit); |
| 300 | } |
| 301 | |
| 302 | // In case there are other matches... |
| 303 | // (should we only do this if the test case passed?) |
| 304 | while (foundMatch) { |
| 305 | expectedMatchStart = foundStart; |
| 306 | expectedMatchLimit = foundLimit; |
| 307 | |
| 308 | foundMatch = usearch_search(uss.getAlias(), foundLimit, &foundStart, &foundLimit, &status); |
| 309 | } |
| 310 | |
| 311 | uss.adoptInstead(usearch_openFromCollator(pattern.getBuffer(), pattern.length(), |
| 312 | target.getBuffer(), target.length(), |
| 313 | collator.getAlias(), |
| 314 | NULL, |
| 315 | &status)); |
| 316 | |
| 317 | // |
| 318 | // Do the backwards search, check the match result against the expected results. |
| 319 | // |
| 320 | foundMatch= usearch_searchBackwards(uss.getAlias(), target.length(), &foundStart, &foundLimit, &status); |
| 321 | TEST_ASSERT_SUCCESS(status); |
| 322 | if ((foundMatch && expectedMatchStart<0) || |
| 323 | (foundStart != expectedMatchStart) || |
| 324 | (foundLimit != expectedMatchLimit)) { |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 325 | TEST_ASSERT(false); // output generic error position |
Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 326 | infoln("Found, expected backwards match start = %d, %d \n" |
| 327 | "Found, expected backwards match limit = %d, %d", |
| 328 | foundStart, expectedMatchStart, foundLimit, expectedMatchLimit); |
| 329 | } |
| 330 | } |
| 331 | #endif |
| 332 | } |
| 333 | |
| 334 | struct Order |
| 335 | { |
| 336 | int32_t order; |
| 337 | int32_t lowOffset; |
| 338 | int32_t highOffset; |
| 339 | }; |
| 340 | |
| 341 | class OrderList |
| 342 | { |
| 343 | public: |
| 344 | OrderList(); |
| 345 | OrderList(UCollator *coll, const UnicodeString &string, int32_t stringOffset = 0); |
| 346 | ~OrderList(); |
| 347 | |
| 348 | int32_t size(void) const; |
| 349 | void add(int32_t order, int32_t low, int32_t high); |
| 350 | const Order *get(int32_t index) const; |
| 351 | int32_t getLowOffset(int32_t index) const; |
| 352 | int32_t getHighOffset(int32_t index) const; |
| 353 | int32_t getOrder(int32_t index) const; |
| 354 | void reverse(void); |
| 355 | UBool compare(const OrderList &other) const; |
| 356 | UBool matchesAt(int32_t offset, const OrderList &other) const; |
| 357 | |
| 358 | private: |
| 359 | Order *list; |
| 360 | int32_t listMax; |
| 361 | int32_t listSize; |
| 362 | }; |
| 363 | |
| 364 | OrderList::OrderList() |
| 365 | : list(NULL), listMax(16), listSize(0) |
| 366 | { |
| 367 | list = new Order[listMax]; |
| 368 | } |
| 369 | |
| 370 | OrderList::OrderList(UCollator *coll, const UnicodeString &string, int32_t stringOffset) |
| 371 | : list(NULL), listMax(16), listSize(0) |
| 372 | { |
| 373 | UErrorCode status = U_ZERO_ERROR; |
| 374 | UCollationElements *elems = ucol_openElements(coll, string.getBuffer(), string.length(), &status); |
| 375 | uint32_t strengthMask = 0; |
| 376 | int32_t order, low, high; |
| 377 | |
| 378 | switch (ucol_getStrength(coll)) |
| 379 | { |
| 380 | default: |
| 381 | strengthMask |= UCOL_TERTIARYORDERMASK; |
| 382 | U_FALLTHROUGH; |
| 383 | case UCOL_SECONDARY: |
| 384 | strengthMask |= UCOL_SECONDARYORDERMASK; |
| 385 | U_FALLTHROUGH; |
| 386 | case UCOL_PRIMARY: |
| 387 | strengthMask |= UCOL_PRIMARYORDERMASK; |
| 388 | } |
| 389 | |
| 390 | list = new Order[listMax]; |
| 391 | |
| 392 | ucol_setOffset(elems, stringOffset, &status); |
| 393 | |
| 394 | do { |
| 395 | low = ucol_getOffset(elems); |
| 396 | order = ucol_next(elems, &status); |
| 397 | high = ucol_getOffset(elems); |
| 398 | |
| 399 | if (order != UCOL_NULLORDER) { |
| 400 | order &= strengthMask; |
| 401 | } |
| 402 | |
| 403 | if (order != UCOL_IGNORABLE) { |
| 404 | add(order, low, high); |
| 405 | } |
| 406 | } while (order != UCOL_NULLORDER); |
| 407 | |
| 408 | ucol_closeElements(elems); |
| 409 | } |
| 410 | |
| 411 | OrderList::~OrderList() |
| 412 | { |
| 413 | delete[] list; |
| 414 | } |
| 415 | |
| 416 | void OrderList::add(int32_t order, int32_t low, int32_t high) |
| 417 | { |
| 418 | if (listSize >= listMax) { |
| 419 | listMax *= 2; |
| 420 | |
| 421 | Order *newList = new Order[listMax]; |
| 422 | |
| 423 | uprv_memcpy(newList, list, listSize * sizeof(Order)); |
| 424 | delete[] list; |
| 425 | list = newList; |
| 426 | } |
| 427 | |
| 428 | list[listSize].order = order; |
| 429 | list[listSize].lowOffset = low; |
| 430 | list[listSize].highOffset = high; |
| 431 | |
| 432 | listSize += 1; |
| 433 | } |
| 434 | |
| 435 | const Order *OrderList::get(int32_t index) const |
| 436 | { |
| 437 | if (index >= listSize) { |
| 438 | return NULL; |
| 439 | } |
| 440 | |
| 441 | return &list[index]; |
| 442 | } |
| 443 | |
| 444 | int32_t OrderList::getLowOffset(int32_t index) const |
| 445 | { |
| 446 | const Order *order = get(index); |
| 447 | |
| 448 | if (order != NULL) { |
| 449 | return order->lowOffset; |
| 450 | } |
| 451 | |
| 452 | return -1; |
| 453 | } |
| 454 | |
| 455 | int32_t OrderList::getHighOffset(int32_t index) const |
| 456 | { |
| 457 | const Order *order = get(index); |
| 458 | |
| 459 | if (order != NULL) { |
| 460 | return order->highOffset; |
| 461 | } |
| 462 | |
| 463 | return -1; |
| 464 | } |
| 465 | |
| 466 | int32_t OrderList::getOrder(int32_t index) const |
| 467 | { |
| 468 | const Order *order = get(index); |
| 469 | |
| 470 | if (order != NULL) { |
| 471 | return order->order; |
| 472 | } |
| 473 | |
| 474 | return UCOL_NULLORDER; |
| 475 | } |
| 476 | |
| 477 | int32_t OrderList::size() const |
| 478 | { |
| 479 | return listSize; |
| 480 | } |
| 481 | |
| 482 | void OrderList::reverse() |
| 483 | { |
| 484 | for(int32_t f = 0, b = listSize - 1; f < b; f += 1, b -= 1) { |
| 485 | Order swap = list[b]; |
| 486 | |
| 487 | list[b] = list[f]; |
| 488 | list[f] = swap; |
| 489 | } |
| 490 | } |
| 491 | |
| 492 | UBool OrderList::compare(const OrderList &other) const |
| 493 | { |
| 494 | if (listSize != other.listSize) { |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 495 | return false; |
Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 496 | } |
| 497 | |
| 498 | for(int32_t i = 0; i < listSize; i += 1) { |
| 499 | if (list[i].order != other.list[i].order || |
| 500 | list[i].lowOffset != other.list[i].lowOffset || |
| 501 | list[i].highOffset != other.list[i].highOffset) { |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 502 | return false; |
Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 503 | } |
| 504 | } |
| 505 | |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 506 | return true; |
Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 507 | } |
| 508 | |
| 509 | UBool OrderList::matchesAt(int32_t offset, const OrderList &other) const |
| 510 | { |
| 511 | // NOTE: sizes include the NULLORDER, which we don't want to compare. |
| 512 | int32_t otherSize = other.size() - 1; |
| 513 | |
| 514 | if (listSize - 1 - offset < otherSize) { |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 515 | return false; |
Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 516 | } |
| 517 | |
| 518 | for (int32_t i = offset, j = 0; j < otherSize; i += 1, j += 1) { |
| 519 | if (getOrder(i) != other.getOrder(j)) { |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 520 | return false; |
Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 521 | } |
| 522 | } |
| 523 | |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 524 | return true; |
Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 525 | } |
| 526 | |
| 527 | static char *printOffsets(char *buffer, OrderList &list) |
| 528 | { |
| 529 | int32_t size = list.size(); |
| 530 | char *s = buffer; |
| 531 | |
| 532 | for(int32_t i = 0; i < size; i += 1) { |
| 533 | const Order *order = list.get(i); |
| 534 | |
| 535 | if (i != 0) { |
| 536 | s += sprintf(s, ", "); |
| 537 | } |
| 538 | |
| 539 | s += sprintf(s, "(%d, %d)", order->lowOffset, order->highOffset); |
| 540 | } |
| 541 | |
| 542 | return buffer; |
| 543 | } |
| 544 | |
| 545 | static char *printOrders(char *buffer, OrderList &list) |
| 546 | { |
| 547 | int32_t size = list.size(); |
| 548 | char *s = buffer; |
| 549 | |
| 550 | for(int32_t i = 0; i < size; i += 1) { |
| 551 | const Order *order = list.get(i); |
| 552 | |
| 553 | if (i != 0) { |
| 554 | s += sprintf(s, ", "); |
| 555 | } |
| 556 | |
| 557 | s += sprintf(s, "%8.8X", order->order); |
| 558 | } |
| 559 | |
| 560 | return buffer; |
| 561 | } |
| 562 | |
| 563 | void SSearchTest::offsetTest() |
| 564 | { |
| 565 | const char *test[] = { |
| 566 | // The sequence \u0FB3\u0F71\u0F71\u0F80 contains a discontiguous |
| 567 | // contraction (\u0FB3\u0F71\u0F80) logically followed by \u0F71. |
| 568 | "\\u1E33\\u0FB3\\u0F71\\u0F71\\u0F80\\uD835\\uDF6C\\u01B0", |
| 569 | |
| 570 | "\\ua191\\u16ef\\u2036\\u017a", |
| 571 | |
| 572 | #if 0 |
| 573 | // This results in a complex interaction between contraction, |
| 574 | // expansion and normalization that confuses the backwards offset fixups. |
| 575 | "\\u0F7F\\u0F80\\u0F81\\u0F82\\u0F83\\u0F84\\u0F85", |
| 576 | #endif |
| 577 | |
| 578 | "\\u0F80\\u0F81\\u0F82\\u0F83\\u0F84\\u0F85", |
| 579 | "\\u07E9\\u07EA\\u07F1\\u07F2\\u07F3", |
| 580 | |
| 581 | "\\u02FE\\u02FF" |
| 582 | "\\u0300\\u0301\\u0302\\u0303\\u0304\\u0305\\u0306\\u0307\\u0308\\u0309\\u030A\\u030B\\u030C\\u030D\\u030E\\u030F" |
| 583 | "\\u0310\\u0311\\u0312\\u0313\\u0314\\u0315\\u0316\\u0317\\u0318\\u0319\\u031A\\u031B\\u031C\\u031D\\u031E\\u031F" |
| 584 | "\\u0320\\u0321\\u0322\\u0323\\u0324\\u0325\\u0326\\u0327\\u0328\\u0329\\u032A\\u032B\\u032C\\u032D\\u032E\\u032F" |
| 585 | "\\u0330\\u0331\\u0332\\u0333\\u0334\\u0335\\u0336\\u0337\\u0338\\u0339\\u033A\\u033B\\u033C\\u033D\\u033E\\u033F" |
| 586 | "\\u0340\\u0341\\u0342\\u0343\\u0344\\u0345\\u0346\\u0347\\u0348\\u0349\\u034A\\u034B\\u034C\\u034D\\u034E", // currently not working, see #8081 |
| 587 | |
| 588 | "\\u02FE\\u02FF\\u0300\\u0301\\u0302\\u0303\\u0316\\u0317\\u0318", // currently not working, see #8081 |
| 589 | "a\\u02FF\\u0301\\u0316", // currently not working, see #8081 |
| 590 | "a\\u02FF\\u0316\\u0301", |
| 591 | "a\\u0430\\u0301\\u0316", |
| 592 | "a\\u0430\\u0316\\u0301", |
| 593 | "abc\\u0E41\\u0301\\u0316", |
| 594 | "abc\\u0E41\\u0316\\u0301", |
| 595 | "\\u0E41\\u0301\\u0316", |
| 596 | "\\u0E41\\u0316\\u0301", |
| 597 | "a\\u0301\\u0316", |
| 598 | "a\\u0316\\u0301", |
| 599 | "\\uAC52\\uAC53", |
| 600 | "\\u34CA\\u34CB", |
| 601 | "\\u11ED\\u11EE", |
| 602 | "\\u30C3\\u30D0", |
| 603 | "p\\u00E9ch\\u00E9", |
| 604 | "a\\u0301\\u0325", |
| 605 | "a\\u0300\\u0325", |
| 606 | "a\\u0325\\u0300", |
| 607 | "A\\u0323\\u0300B", |
| 608 | "A\\u0300\\u0323B", |
| 609 | "A\\u0301\\u0323B", |
| 610 | "A\\u0302\\u0301\\u0323B", |
| 611 | "abc", |
| 612 | "ab\\u0300c", |
| 613 | "ab\\u0300\\u0323c", |
| 614 | " \\uD800\\uDC00\\uDC00", |
| 615 | "a\\uD800\\uDC00\\uDC00", |
| 616 | "A\\u0301\\u0301", |
| 617 | "A\\u0301\\u0323", |
| 618 | "A\\u0301\\u0323B", |
| 619 | "B\\u0301\\u0323C", |
| 620 | "A\\u0300\\u0323B", |
| 621 | "\\u0301A\\u0301\\u0301", |
| 622 | "abcd\\r\\u0301", |
| 623 | "p\\u00EAche", |
| 624 | "pe\\u0302che", |
| 625 | }; |
| 626 | |
| 627 | int32_t testCount = UPRV_LENGTHOF(test); |
| 628 | UErrorCode status = U_ZERO_ERROR; |
| 629 | RuleBasedCollator *col = (RuleBasedCollator *) Collator::createInstance(Locale::getEnglish(), status); |
| 630 | if (U_FAILURE(status)) { |
| 631 | errcheckln(status, "Failed to create collator in offsetTest! - %s", u_errorName(status)); |
| 632 | return; |
| 633 | } |
| 634 | char buffer[4096]; // A bit of a hack... just happens to be long enough for all the test cases... |
| 635 | // We could allocate one that's the right size by (CE_count * 10) + 2 |
| 636 | // 10 chars is enough room for 8 hex digits plus ", ". 2 extra chars for "[" and "]" |
| 637 | |
| 638 | col->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); |
| 639 | |
| 640 | for(int32_t i = 0; i < testCount; i += 1) { |
| 641 | UnicodeString ts = CharsToUnicodeString(test[i]); |
| 642 | CollationElementIterator *iter = col->createCollationElementIterator(ts); |
| 643 | OrderList forwardList; |
| 644 | OrderList backwardList; |
| 645 | int32_t order, low, high; |
| 646 | |
| 647 | do { |
| 648 | low = iter->getOffset(); |
| 649 | order = iter->next(status); |
| 650 | high = iter->getOffset(); |
| 651 | |
| 652 | forwardList.add(order, low, high); |
| 653 | } while (order != CollationElementIterator::NULLORDER); |
| 654 | |
| 655 | iter->reset(); |
| 656 | iter->setOffset(ts.length(), status); |
| 657 | |
| 658 | backwardList.add(CollationElementIterator::NULLORDER, iter->getOffset(), iter->getOffset()); |
| 659 | |
| 660 | do { |
| 661 | high = iter->getOffset(); |
| 662 | order = iter->previous(status); |
| 663 | low = iter->getOffset(); |
| 664 | |
| 665 | if (order == CollationElementIterator::NULLORDER) { |
| 666 | break; |
| 667 | } |
| 668 | |
| 669 | backwardList.add(order, low, high); |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 670 | } while (true); |
Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 671 | |
| 672 | backwardList.reverse(); |
| 673 | |
| 674 | if (forwardList.compare(backwardList)) { |
| 675 | logln("Works with \"%s\"", test[i]); |
| 676 | logln("Forward offsets: [%s]", printOffsets(buffer, forwardList)); |
| 677 | // logln("Backward offsets: [%s]", printOffsets(buffer, backwardList)); |
| 678 | |
| 679 | logln("Forward CEs: [%s]", printOrders(buffer, forwardList)); |
| 680 | // logln("Backward CEs: [%s]", printOrders(buffer, backwardList)); |
| 681 | |
| 682 | logln(); |
| 683 | } else { |
| 684 | errln("Fails with \"%s\"", test[i]); |
| 685 | infoln("Forward offsets: [%s]", printOffsets(buffer, forwardList)); |
| 686 | infoln("Backward offsets: [%s]", printOffsets(buffer, backwardList)); |
| 687 | |
| 688 | infoln("Forward CEs: [%s]", printOrders(buffer, forwardList)); |
| 689 | infoln("Backward CEs: [%s]", printOrders(buffer, backwardList)); |
| 690 | |
| 691 | infoln(); |
| 692 | } |
| 693 | delete iter; |
| 694 | } |
| 695 | delete col; |
| 696 | } |
| 697 | |
| 698 | #if 0 |
| 699 | static UnicodeString &escape(const UnicodeString &string, UnicodeString &buffer) |
| 700 | { |
| 701 | for(int32_t i = 0; i < string.length(); i += 1) { |
| 702 | UChar32 ch = string.char32At(i); |
| 703 | |
| 704 | if (ch >= 0x0020 && ch <= 0x007F) { |
| 705 | if (ch == 0x005C) { |
| 706 | buffer.append("\\\\"); |
| 707 | } else { |
| 708 | buffer.append(ch); |
| 709 | } |
| 710 | } else { |
| 711 | char cbuffer[12]; |
| 712 | |
| 713 | if (ch <= 0xFFFFL) { |
| 714 | sprintf(cbuffer, "\\u%4.4X", ch); |
| 715 | } else { |
| 716 | sprintf(cbuffer, "\\U%8.8X", ch); |
| 717 | } |
| 718 | |
| 719 | buffer.append(cbuffer); |
| 720 | } |
| 721 | |
| 722 | if (ch >= 0x10000L) { |
| 723 | i += 1; |
| 724 | } |
| 725 | } |
| 726 | |
| 727 | return buffer; |
| 728 | } |
| 729 | #endif |
| 730 | |
| 731 | void SSearchTest::sharpSTest() |
| 732 | { |
| 733 | UErrorCode status = U_ZERO_ERROR; |
| 734 | UCollator *coll = NULL; |
| 735 | UnicodeString lp = "fuss"; |
| 736 | UnicodeString sp = "fu\\u00DF"; |
| 737 | UnicodeString targets[] = {"fu\\u00DF", "fu\\u00DFball", "1fu\\u00DFball", "12fu\\u00DFball", "123fu\\u00DFball", "1234fu\\u00DFball", |
| 738 | "ffu\\u00DF", "fufu\\u00DF", "fusfu\\u00DF", |
| 739 | "fuss", "ffuss", "fufuss", "fusfuss", "1fuss", "12fuss", "123fuss", "1234fuss", "fu\\u00DF", "1fu\\u00DF", "12fu\\u00DF", "123fu\\u00DF", "1234fu\\u00DF"}; |
| 740 | int32_t start = -1, end = -1; |
| 741 | |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 742 | coll = ucol_openFromShortString("LEN_S1", false, NULL, &status); |
Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 743 | TEST_ASSERT_SUCCESS(status); |
| 744 | |
| 745 | UnicodeString lpUnescaped = lp.unescape(); |
| 746 | UnicodeString spUnescaped = sp.unescape(); |
| 747 | |
| 748 | LocalUStringSearchPointer ussLong(usearch_openFromCollator(lpUnescaped.getBuffer(), lpUnescaped.length(), |
| 749 | lpUnescaped.getBuffer(), lpUnescaped.length(), // actual test data will be set later |
| 750 | coll, |
| 751 | NULL, // the break iterator |
| 752 | &status)); |
| 753 | |
| 754 | LocalUStringSearchPointer ussShort(usearch_openFromCollator(spUnescaped.getBuffer(), spUnescaped.length(), |
| 755 | spUnescaped.getBuffer(), spUnescaped.length(), // actual test data will be set later |
| 756 | coll, |
| 757 | NULL, // the break iterator |
| 758 | &status)); |
| 759 | TEST_ASSERT_SUCCESS(status); |
| 760 | |
| 761 | for (uint32_t t = 0; t < UPRV_LENGTHOF(targets); t += 1) { |
| 762 | UBool bFound; |
| 763 | UnicodeString target = targets[t].unescape(); |
| 764 | |
| 765 | start = end = -1; |
| 766 | usearch_setText(ussLong.getAlias(), target.getBuffer(), target.length(), &status); |
| 767 | bFound = usearch_search(ussLong.getAlias(), 0, &start, &end, &status); |
| 768 | TEST_ASSERT_SUCCESS(status); |
| 769 | if (bFound) { |
| 770 | logln("Test %d: found long pattern at [%d, %d].", t, start, end); |
| 771 | } else { |
| 772 | dataerrln("Test %d: did not find long pattern.", t); |
| 773 | } |
| 774 | |
| 775 | usearch_setText(ussShort.getAlias(), target.getBuffer(), target.length(), &status); |
| 776 | bFound = usearch_search(ussShort.getAlias(), 0, &start, &end, &status); |
| 777 | TEST_ASSERT_SUCCESS(status); |
| 778 | if (bFound) { |
| 779 | logln("Test %d: found long pattern at [%d, %d].", t, start, end); |
| 780 | } else { |
| 781 | dataerrln("Test %d: did not find long pattern.", t); |
| 782 | } |
| 783 | } |
| 784 | |
| 785 | ucol_close(coll); |
| 786 | } |
| 787 | |
| 788 | void SSearchTest::goodSuffixTest() |
| 789 | { |
| 790 | UErrorCode status = U_ZERO_ERROR; |
| 791 | UCollator *coll = NULL; |
| 792 | UnicodeString pat = /*"gcagagag"*/ "fxeld"; |
| 793 | UnicodeString target = /*"gcatcgcagagagtatacagtacg"*/ "cloveldfxeld"; |
| 794 | int32_t start = -1, end = -1; |
| 795 | UBool bFound; |
| 796 | |
| 797 | coll = ucol_open(NULL, &status); |
| 798 | TEST_ASSERT_SUCCESS(status); |
| 799 | |
| 800 | LocalUStringSearchPointer ss(usearch_openFromCollator(pat.getBuffer(), pat.length(), |
| 801 | target.getBuffer(), target.length(), |
| 802 | coll, |
| 803 | NULL, // the break iterator |
| 804 | &status)); |
| 805 | TEST_ASSERT_SUCCESS(status); |
| 806 | |
| 807 | bFound = usearch_search(ss.getAlias(), 0, &start, &end, &status); |
| 808 | TEST_ASSERT_SUCCESS(status); |
| 809 | if (bFound) { |
| 810 | logln("Found pattern at [%d, %d].", start, end); |
| 811 | } else { |
| 812 | dataerrln("Did not find pattern."); |
| 813 | } |
| 814 | |
| 815 | ucol_close(coll); |
| 816 | } |
| 817 | |
| 818 | // |
| 819 | // searchTime() A quick and dirty performance test for string search. |
| 820 | // Probably doesn't really belong as part of intltest, but it |
| 821 | // does check that the search succeeds, and gets the right result, |
| 822 | // so it serves as a functionality test also. |
| 823 | // |
| 824 | // To run as a perf test, up the loop count, select by commenting |
| 825 | // and uncommenting in the code the operation to be measured, |
| 826 | // rebuild, and measure the running time of this test alone. |
| 827 | // |
| 828 | // time LD_LIBRARY_PATH=whatever ./intltest collate/SSearchTest/searchTime |
| 829 | // |
| 830 | void SSearchTest::searchTime() { |
| 831 | static const char *longishText = |
| 832 | "Whylom, as olde stories tellen us,\n" |
| 833 | "Ther was a duk that highte Theseus:\n" |
| 834 | "Of Athenes he was lord and governour,\n" |
| 835 | "And in his tyme swich a conquerour,\n" |
| 836 | "That gretter was ther noon under the sonne.\n" |
| 837 | "Ful many a riche contree hadde he wonne;\n" |
| 838 | "What with his wisdom and his chivalrye,\n" |
| 839 | "He conquered al the regne of Femenye,\n" |
| 840 | "That whylom was y-cleped Scithia;\n" |
| 841 | "And weddede the quene Ipolita,\n" |
| 842 | "And broghte hir hoom with him in his contree\n" |
| 843 | "With muchel glorie and greet solempnitee,\n" |
| 844 | "And eek hir yonge suster Emelye.\n" |
| 845 | "And thus with victorie and with melodye\n" |
| 846 | "Lete I this noble duk to Athenes ryde,\n" |
| 847 | "And al his hoost, in armes, him bisyde.\n" |
| 848 | "And certes, if it nere to long to here,\n" |
| 849 | "I wolde han told yow fully the manere,\n" |
| 850 | "How wonnen was the regne of Femenye\n" |
| 851 | "By Theseus, and by his chivalrye;\n" |
| 852 | "And of the grete bataille for the nones\n" |
| 853 | "Bitwixen Athen's and Amazones;\n" |
| 854 | "And how asseged was Ipolita,\n" |
| 855 | "The faire hardy quene of Scithia;\n" |
| 856 | "And of the feste that was at hir weddinge,\n" |
| 857 | "And of the tempest at hir hoom-cominge;\n" |
| 858 | "But al that thing I moot as now forbere.\n" |
| 859 | "I have, God woot, a large feeld to ere,\n" |
| 860 | "And wayke been the oxen in my plough.\n" |
| 861 | "The remenant of the tale is long y-nough.\n" |
| 862 | "I wol nat letten eek noon of this route;\n" |
| 863 | "Lat every felawe telle his tale aboute,\n" |
| 864 | "And lat see now who shal the soper winne;\n" |
| 865 | "And ther I lefte, I wol ageyn biginne.\n" |
| 866 | "This duk, of whom I make mencioun,\n" |
| 867 | "When he was come almost unto the toun,\n" |
| 868 | "In al his wele and in his moste pryde,\n" |
| 869 | "He was war, as he caste his eye asyde,\n" |
| 870 | "Wher that ther kneled in the hye weye\n" |
| 871 | "A companye of ladies, tweye and tweye,\n" |
| 872 | "Ech after other, clad in clothes blake; \n" |
| 873 | "But swich a cry and swich a wo they make,\n" |
| 874 | "That in this world nis creature livinge,\n" |
| 875 | "That herde swich another weymentinge;\n" |
| 876 | "And of this cry they nolde never stenten,\n" |
| 877 | "Til they the reynes of his brydel henten.\n" |
| 878 | "'What folk ben ye, that at myn hoomcominge\n" |
| 879 | "Perturben so my feste with cryinge'?\n" |
| 880 | "Quod Theseus, 'have ye so greet envye\n" |
| 881 | "Of myn honour, that thus compleyne and crye? \n" |
| 882 | "Or who hath yow misboden, or offended?\n" |
| 883 | "And telleth me if it may been amended;\n" |
| 884 | "And why that ye ben clothed thus in blak'?\n" |
| 885 | "The eldest lady of hem alle spak,\n" |
| 886 | "When she hadde swowned with a deedly chere,\n" |
| 887 | "That it was routhe for to seen and here,\n" |
| 888 | "And seyde: 'Lord, to whom Fortune hath yiven\n" |
| 889 | "Victorie, and as a conquerour to liven,\n" |
| 890 | "Noght greveth us your glorie and your honour;\n" |
| 891 | "But we biseken mercy and socour.\n" |
| 892 | "Have mercy on our wo and our distresse.\n" |
| 893 | "Som drope of pitee, thurgh thy gentilesse,\n" |
| 894 | "Up-on us wrecched wommen lat thou falle.\n" |
| 895 | "For certes, lord, ther nis noon of us alle,\n" |
| 896 | "That she nath been a duchesse or a quene;\n" |
| 897 | "Now be we caitifs, as it is wel sene:\n" |
| 898 | "Thanked be Fortune, and hir false wheel,\n" |
| 899 | "That noon estat assureth to be weel.\n" |
| 900 | "And certes, lord, t'abyden your presence,\n" |
| 901 | "Here in the temple of the goddesse Clemence\n" |
| 902 | "We han ben waytinge al this fourtenight;\n" |
| 903 | "Now help us, lord, sith it is in thy might.\n" |
| 904 | "I wrecche, which that wepe and waille thus,\n" |
| 905 | "Was whylom wyf to king Capaneus,\n" |
| 906 | "That starf at Thebes, cursed be that day!\n" |
| 907 | "And alle we, that been in this array,\n" |
| 908 | "And maken al this lamentacioun,\n" |
| 909 | "We losten alle our housbondes at that toun,\n" |
| 910 | "Whyl that the sege ther-aboute lay.\n" |
| 911 | "And yet now th'olde Creon, weylaway!\n" |
| 912 | "The lord is now of Thebes the citee, \n" |
| 913 | "Fulfild of ire and of iniquitee,\n" |
| 914 | "He, for despyt, and for his tirannye,\n" |
| 915 | "To do the dede bodyes vileinye,\n" |
| 916 | "Of alle our lordes, whiche that ben slawe,\n" |
| 917 | "Hath alle the bodyes on an heep y-drawe,\n" |
| 918 | "And wol nat suffren hem, by noon assent,\n" |
| 919 | "Neither to been y-buried nor y-brent,\n" |
| 920 | "But maketh houndes ete hem in despyt. zet'\n"; |
| 921 | |
| 922 | const char *cPattern = "maketh houndes ete hem"; |
| 923 | //const char *cPattern = "Whylom"; |
| 924 | //const char *cPattern = "zet"; |
| 925 | const char *testId = "searchTime()"; // for error macros. |
| 926 | UnicodeString target = longishText; |
| 927 | UErrorCode status = U_ZERO_ERROR; |
| 928 | |
| 929 | |
| 930 | LocalUCollatorPointer collator(ucol_open("en", &status)); |
| 931 | //ucol_setStrength(collator.getAlias(), collatorStrength); |
| 932 | //ucol_setAttribute(collator.getAlias(), UCOL_NORMALIZATION_MODE, normalize, &status); |
| 933 | UnicodeString uPattern = cPattern; |
| 934 | LocalUStringSearchPointer uss(usearch_openFromCollator(uPattern.getBuffer(), uPattern.length(), |
| 935 | target.getBuffer(), target.length(), |
| 936 | collator.getAlias(), |
| 937 | NULL, // the break iterator |
| 938 | &status)); |
| 939 | TEST_ASSERT_SUCCESS(status); |
| 940 | |
| 941 | // int32_t foundStart; |
| 942 | // int32_t foundEnd; |
| 943 | UBool found; |
| 944 | |
| 945 | // Find the match position usgin strstr |
| 946 | const char *pm = strstr(longishText, cPattern); |
| 947 | TEST_ASSERT_M(pm!=NULL, "No pattern match with strstr"); |
| 948 | int32_t refMatchPos = (int32_t)(pm - longishText); |
| 949 | int32_t icuMatchPos; |
| 950 | int32_t icuMatchEnd; |
| 951 | usearch_search(uss.getAlias(), 0, &icuMatchPos, &icuMatchEnd, &status); |
| 952 | TEST_ASSERT_SUCCESS(status); |
| 953 | TEST_ASSERT_M(refMatchPos == icuMatchPos, "strstr and icu give different match positions."); |
| 954 | |
| 955 | int32_t i; |
| 956 | // int32_t j=0; |
| 957 | |
| 958 | // Try loopcounts around 100000 to some millions, depending on the operation, |
| 959 | // to get runtimes of at least several seconds. |
| 960 | for (i=0; i<10000; i++) { |
| 961 | found = usearch_search(uss.getAlias(), 0, &icuMatchPos, &icuMatchEnd, &status); |
| 962 | (void)found; // Suppress set but not used warning. |
| 963 | //TEST_ASSERT_SUCCESS(status); |
| 964 | //TEST_ASSERT(found); |
| 965 | |
| 966 | // usearch_setOffset(uss.getAlias(), 0, &status); |
| 967 | // icuMatchPos = usearch_next(uss.getAlias(), &status); |
| 968 | |
| 969 | // The i+j stuff is to confuse the optimizer and get it to actually leave the |
| 970 | // call to strstr in place. |
| 971 | //pm = strstr(longishText+j, cPattern); |
| 972 | //j = (j + i)%5; |
| 973 | } |
| 974 | |
| 975 | //printf("%ld, %d\n", pm-longishText, j); |
| 976 | } |
| 977 | |
| 978 | //---------------------------------------------------------------------------------------- |
| 979 | // |
| 980 | // Random Numbers. Similar to standard lib rand() and srand() |
| 981 | // Not using library to |
| 982 | // 1. Get same results on all platforms. |
| 983 | // 2. Get access to current seed, to more easily reproduce failures. |
| 984 | // |
| 985 | //--------------------------------------------------------------------------------------- |
| 986 | static uint32_t m_seed = 1; |
| 987 | |
| 988 | static uint32_t m_rand() |
| 989 | { |
| 990 | m_seed = m_seed * 1103515245 + 12345; |
| 991 | return (uint32_t)(m_seed/65536) % 32768; |
| 992 | } |
| 993 | |
| 994 | class Monkey |
| 995 | { |
| 996 | public: |
| 997 | virtual void append(UnicodeString &test, UnicodeString &alternate) = 0; |
| 998 | |
| 999 | protected: |
| 1000 | Monkey(); |
| 1001 | virtual ~Monkey(); |
| 1002 | }; |
| 1003 | |
| 1004 | Monkey::Monkey() |
| 1005 | { |
| 1006 | // ook? |
| 1007 | } |
| 1008 | |
| 1009 | Monkey::~Monkey() |
| 1010 | { |
| 1011 | // ook? |
| 1012 | } |
| 1013 | |
| 1014 | class SetMonkey : public Monkey |
| 1015 | { |
| 1016 | public: |
| 1017 | SetMonkey(const USet *theSet); |
| 1018 | ~SetMonkey(); |
| 1019 | |
| 1020 | virtual void append(UnicodeString &test, UnicodeString &alternate) override; |
| 1021 | |
| 1022 | private: |
| 1023 | const USet *set; |
| 1024 | }; |
| 1025 | |
| 1026 | SetMonkey::SetMonkey(const USet *theSet) |
| 1027 | : Monkey(), set(theSet) |
| 1028 | { |
| 1029 | // ook? |
| 1030 | } |
| 1031 | |
| 1032 | SetMonkey::~SetMonkey() |
| 1033 | { |
| 1034 | //ook... |
| 1035 | } |
| 1036 | |
| 1037 | void SetMonkey::append(UnicodeString &test, UnicodeString &alternate) |
| 1038 | { |
| 1039 | int32_t size = uset_size(set); |
| 1040 | int32_t index = m_rand() % size; |
| 1041 | UChar32 ch = uset_charAt(set, index); |
| 1042 | UnicodeString str(ch); |
| 1043 | |
| 1044 | test.append(str); |
| 1045 | alternate.append(str); // flip case, or some junk? |
| 1046 | } |
| 1047 | |
| 1048 | class StringSetMonkey : public Monkey |
| 1049 | { |
| 1050 | public: |
| 1051 | StringSetMonkey(const USet *theSet, UCollator *theCollator, CollData *theCollData); |
| 1052 | ~StringSetMonkey(); |
| 1053 | |
| 1054 | void append(UnicodeString &testCase, UnicodeString &alternate) override; |
| 1055 | |
| 1056 | private: |
| 1057 | UnicodeString &generateAlternative(const UnicodeString &testCase, UnicodeString &alternate); |
| 1058 | |
| 1059 | const USet *set; |
| 1060 | UCollator *coll; |
| 1061 | CollData *collData; |
| 1062 | }; |
| 1063 | |
| 1064 | StringSetMonkey::StringSetMonkey(const USet *theSet, UCollator *theCollator, CollData *theCollData) |
| 1065 | : Monkey(), set(theSet), coll(theCollator), collData(theCollData) |
| 1066 | { |
| 1067 | // ook. |
| 1068 | } |
| 1069 | |
| 1070 | StringSetMonkey::~StringSetMonkey() |
| 1071 | { |
| 1072 | // ook? |
| 1073 | } |
| 1074 | |
| 1075 | void StringSetMonkey::append(UnicodeString &testCase, UnicodeString &alternate) |
| 1076 | { |
| 1077 | int32_t itemCount = uset_getItemCount(set), len = 0; |
| 1078 | int32_t index = m_rand() % itemCount; |
| 1079 | UChar32 rangeStart = 0, rangeEnd = 0; |
| 1080 | UChar buffer[16]; |
| 1081 | UErrorCode err = U_ZERO_ERROR; |
| 1082 | |
| 1083 | len = uset_getItem(set, index, &rangeStart, &rangeEnd, buffer, 16, &err); |
| 1084 | |
| 1085 | if (len == 0) { |
| 1086 | int32_t offset = m_rand() % (rangeEnd - rangeStart + 1); |
| 1087 | UChar32 ch = rangeStart + offset; |
| 1088 | UnicodeString str(ch); |
| 1089 | |
| 1090 | testCase.append(str); |
| 1091 | generateAlternative(str, alternate); |
| 1092 | } else if (len > 0) { |
| 1093 | // should check that len < 16... |
| 1094 | UnicodeString str(buffer, len); |
| 1095 | |
| 1096 | testCase.append(str); |
| 1097 | generateAlternative(str, alternate); |
| 1098 | } else { |
| 1099 | // shouldn't happen... |
| 1100 | } |
| 1101 | } |
| 1102 | |
| 1103 | UnicodeString &StringSetMonkey::generateAlternative(const UnicodeString &testCase, UnicodeString &alternate) |
| 1104 | { |
| 1105 | // find out shortest string for the longest sequence of ces. |
| 1106 | // needs to be refined to use dynamic programming, but will be roughly right |
| 1107 | UErrorCode status = U_ZERO_ERROR; |
| 1108 | CEList ceList(coll, testCase, status); |
| 1109 | UnicodeString alt; |
| 1110 | int32_t offset = 0; |
| 1111 | |
| 1112 | if (ceList.size() == 0) { |
| 1113 | return alternate.append(testCase); |
| 1114 | } |
| 1115 | |
| 1116 | while (offset < ceList.size()) { |
| 1117 | int32_t ce = ceList.get(offset); |
| 1118 | const StringList *strings = collData->getStringList(ce); |
| 1119 | |
| 1120 | if (strings == NULL) { |
| 1121 | return alternate.append(testCase); |
| 1122 | } |
| 1123 | |
| 1124 | int32_t stringCount = strings->size(); |
| 1125 | int32_t tries = 0; |
| 1126 | |
| 1127 | // find random string that generates the same CEList |
| 1128 | const CEList *ceList2 = NULL; |
| 1129 | const UnicodeString *string = NULL; |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 1130 | UBool matches = false; |
Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 1131 | |
| 1132 | do { |
| 1133 | int32_t s = m_rand() % stringCount; |
| 1134 | |
| 1135 | if (tries++ > stringCount) { |
| 1136 | alternate.append(testCase); |
| 1137 | return alternate; |
| 1138 | } |
| 1139 | |
| 1140 | string = strings->get(s); |
| 1141 | ceList2 = collData->getCEList(string); |
| 1142 | matches = ceList.matchesAt(offset, ceList2); |
| 1143 | |
| 1144 | if (! matches) { |
| 1145 | collData->freeCEList((CEList *) ceList2); |
| 1146 | } |
| 1147 | } while (! matches); |
| 1148 | |
| 1149 | alt.append(*string); |
| 1150 | offset += ceList2->size(); |
| 1151 | collData->freeCEList(ceList2); |
| 1152 | } |
| 1153 | |
| 1154 | const CEList altCEs(coll, alt, status); |
| 1155 | |
| 1156 | if (ceList.matchesAt(0, &altCEs)) { |
| 1157 | return alternate.append(alt); |
| 1158 | } |
| 1159 | |
| 1160 | return alternate.append(testCase); |
| 1161 | } |
| 1162 | |
| 1163 | static void generateTestCase(UCollator *coll, Monkey *monkeys[], int32_t monkeyCount, UnicodeString &testCase, UnicodeString &alternate) |
| 1164 | { |
| 1165 | int32_t pieces = (m_rand() % 4) + 1; |
| 1166 | UErrorCode status = U_ZERO_ERROR; |
| 1167 | UBool matches; |
| 1168 | |
| 1169 | do { |
| 1170 | testCase.remove(); |
| 1171 | alternate.remove(); |
| 1172 | monkeys[0]->append(testCase, alternate); |
| 1173 | |
| 1174 | for(int32_t piece = 0; piece < pieces; piece += 1) { |
| 1175 | int32_t monkey = m_rand() % monkeyCount; |
| 1176 | |
| 1177 | monkeys[monkey]->append(testCase, alternate); |
| 1178 | } |
| 1179 | |
| 1180 | const CEList ceTest(coll, testCase, status); |
| 1181 | const CEList ceAlt(coll, alternate, status); |
| 1182 | |
| 1183 | matches = ceTest.matchesAt(0, &ceAlt); |
| 1184 | } while (! matches); |
| 1185 | } |
| 1186 | |
| 1187 | static UBool simpleSearch(UCollator *coll, const UnicodeString &target, int32_t offset, const UnicodeString &pattern, int32_t &matchStart, int32_t &matchEnd) |
| 1188 | { |
| 1189 | UErrorCode status = U_ZERO_ERROR; |
| 1190 | OrderList targetOrders(coll, target, offset); |
| 1191 | OrderList patternOrders(coll, pattern); |
| 1192 | int32_t targetSize = targetOrders.size() - 1; |
| 1193 | int32_t patternSize = patternOrders.size() - 1; |
| 1194 | UBreakIterator *charBreakIterator = ubrk_open(UBRK_CHARACTER, ucol_getLocaleByType(coll, ULOC_VALID_LOCALE, &status), |
| 1195 | target.getBuffer(), target.length(), &status); |
| 1196 | |
| 1197 | if (patternSize == 0) { |
| 1198 | // Searching for an empty pattern always fails |
| 1199 | matchStart = matchEnd = -1; |
| 1200 | ubrk_close(charBreakIterator); |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 1201 | return false; |
Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 1202 | } |
| 1203 | |
| 1204 | matchStart = matchEnd = -1; |
| 1205 | |
| 1206 | for(int32_t i = 0; i < targetSize; i += 1) { |
| 1207 | if (targetOrders.matchesAt(i, patternOrders)) { |
| 1208 | int32_t start = targetOrders.getLowOffset(i); |
| 1209 | int32_t maxLimit = targetOrders.getLowOffset(i + patternSize); |
| 1210 | int32_t minLimit = targetOrders.getLowOffset(i + patternSize - 1); |
| 1211 | |
| 1212 | // if the low and high offsets of the first CE in |
| 1213 | // the match are the same, it means that the match |
| 1214 | // starts in the middle of an expansion - all but |
| 1215 | // the first CE of the expansion will have the offset |
| 1216 | // of the following character. |
| 1217 | if (start == targetOrders.getHighOffset(i)) { |
| 1218 | continue; |
| 1219 | } |
| 1220 | |
| 1221 | // Make sure match starts on a grapheme boundary |
| 1222 | if (! ubrk_isBoundary(charBreakIterator, start)) { |
| 1223 | continue; |
| 1224 | } |
| 1225 | |
| 1226 | // If the low and high offsets of the CE after the match |
| 1227 | // are the same, it means that the match ends in the middle |
| 1228 | // of an expansion sequence. |
| 1229 | if (maxLimit == targetOrders.getHighOffset(i + patternSize) && |
| 1230 | targetOrders.getOrder(i + patternSize) != UCOL_NULLORDER) { |
| 1231 | continue; |
| 1232 | } |
| 1233 | |
| 1234 | int32_t mend = maxLimit; |
| 1235 | |
| 1236 | // Find the first grapheme break after the character index |
| 1237 | // of the last CE in the match. If it's after character index |
| 1238 | // that's after the last CE in the match, use that index |
| 1239 | // as the end of the match. |
| 1240 | if (minLimit < maxLimit) { |
| 1241 | // When the last CE's low index is same with its high index, the CE is likely |
| 1242 | // a part of expansion. In this case, the index is located just after the |
| 1243 | // character corresponding to the CEs compared above. If the index is right |
| 1244 | // at the break boundary, move the position to the next boundary will result |
| 1245 | // incorrect match length when there are ignorable characters exist between |
| 1246 | // the position and the next character produces CE(s). See ticket#8482. |
| 1247 | if (minLimit == targetOrders.getHighOffset(i + patternSize - 1) && ubrk_isBoundary(charBreakIterator, minLimit)) { |
| 1248 | mend = minLimit; |
| 1249 | } else { |
| 1250 | int32_t nba = ubrk_following(charBreakIterator, minLimit); |
| 1251 | |
| 1252 | if (nba >= targetOrders.getHighOffset(i + patternSize - 1)) { |
| 1253 | mend = nba; |
| 1254 | } |
| 1255 | } |
| 1256 | } |
| 1257 | |
| 1258 | if (mend > maxLimit) { |
| 1259 | continue; |
| 1260 | } |
| 1261 | |
| 1262 | if (! ubrk_isBoundary(charBreakIterator, mend)) { |
| 1263 | continue; |
| 1264 | } |
| 1265 | |
| 1266 | matchStart = start; |
| 1267 | matchEnd = mend; |
| 1268 | |
| 1269 | ubrk_close(charBreakIterator); |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 1270 | return true; |
Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 1271 | } |
| 1272 | } |
| 1273 | |
| 1274 | ubrk_close(charBreakIterator); |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 1275 | return false; |
Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 1276 | } |
| 1277 | |
| 1278 | #if !UCONFIG_NO_REGULAR_EXPRESSIONS |
| 1279 | static int32_t getIntParam(UnicodeString name, UnicodeString ¶ms, int32_t defaultVal) { |
| 1280 | int32_t val = defaultVal; |
| 1281 | |
| 1282 | name.append(" *= *(-?\\d+)"); |
| 1283 | |
| 1284 | UErrorCode status = U_ZERO_ERROR; |
| 1285 | RegexMatcher m(name, params, 0, status); |
| 1286 | |
| 1287 | if (m.find()) { |
| 1288 | // The param exists. Convert the string to an int. |
| 1289 | char valString[100]; |
| 1290 | int32_t paramLength = m.end(1, status) - m.start(1, status); |
| 1291 | |
| 1292 | if (paramLength >= (int32_t)(sizeof(valString)-1)) { |
| 1293 | paramLength = (int32_t)(sizeof(valString)-2); |
| 1294 | } |
| 1295 | |
| 1296 | params.extract(m.start(1, status), paramLength, valString, sizeof(valString)); |
| 1297 | val = uprv_strtol(valString, NULL, 10); |
| 1298 | |
| 1299 | // Delete this parameter from the params string. |
| 1300 | m.reset(); |
| 1301 | params = m.replaceFirst("", status); |
| 1302 | } |
| 1303 | |
| 1304 | //U_ASSERT(U_SUCCESS(status)); |
| 1305 | if (! U_SUCCESS(status)) { |
| 1306 | val = defaultVal; |
| 1307 | } |
| 1308 | |
| 1309 | return val; |
| 1310 | } |
| 1311 | #endif |
| 1312 | |
| 1313 | #if !UCONFIG_NO_COLLATION |
| 1314 | int32_t SSearchTest::monkeyTestCase(UCollator *coll, const UnicodeString &testCase, const UnicodeString &pattern, const UnicodeString &altPattern, |
| 1315 | const char *name, const char *strength, uint32_t seed) |
| 1316 | { |
| 1317 | UErrorCode status = U_ZERO_ERROR; |
| 1318 | int32_t actualStart = -1, actualEnd = -1; |
| 1319 | //int32_t expectedStart = prefix.length(), expectedEnd = prefix.length() + altPattern.length(); |
| 1320 | int32_t expectedStart = -1, expectedEnd = -1; |
| 1321 | int32_t notFoundCount = 0; |
| 1322 | LocalUStringSearchPointer uss(usearch_openFromCollator(pattern.getBuffer(), pattern.length(), |
| 1323 | testCase.getBuffer(), testCase.length(), |
| 1324 | coll, |
| 1325 | NULL, // the break iterator |
| 1326 | &status)); |
| 1327 | |
| 1328 | // **** TODO: find *all* matches, not just first one **** |
| 1329 | simpleSearch(coll, testCase, 0, pattern, expectedStart, expectedEnd); |
| 1330 | |
| 1331 | usearch_search(uss.getAlias(), 0, &actualStart, &actualEnd, &status); |
| 1332 | |
| 1333 | if (expectedStart >= 0 && (actualStart != expectedStart || actualEnd != expectedEnd)) { |
| 1334 | errln("Search for <pattern> in <%s> failed: expected [%d, %d], got [%d, %d]\n" |
| 1335 | " strength=%s seed=%d", |
| 1336 | name, expectedStart, expectedEnd, actualStart, actualEnd, strength, seed); |
| 1337 | } |
| 1338 | |
| 1339 | if (expectedStart == -1 && actualStart == -1) { |
| 1340 | notFoundCount += 1; |
| 1341 | } |
| 1342 | |
| 1343 | // **** TODO: find *all* matches, not just first one **** |
| 1344 | simpleSearch(coll, testCase, 0, altPattern, expectedStart, expectedEnd); |
| 1345 | |
| 1346 | usearch_setPattern(uss.getAlias(), altPattern.getBuffer(), altPattern.length(), &status); |
| 1347 | |
| 1348 | usearch_search(uss.getAlias(), 0, &actualStart, &actualEnd, &status); |
| 1349 | |
| 1350 | if (expectedStart >= 0 && (actualStart != expectedStart || actualEnd != expectedEnd)) { |
| 1351 | errln("Search for <alt_pattern> in <%s> failed: expected [%d, %d], got [%d, %d]\n" |
| 1352 | " strength=%s seed=%d", |
| 1353 | name, expectedStart, expectedEnd, actualStart, actualEnd, strength, seed); |
| 1354 | } |
| 1355 | |
| 1356 | if (expectedStart == -1 && actualStart == -1) { |
| 1357 | notFoundCount += 1; |
| 1358 | } |
| 1359 | |
| 1360 | return notFoundCount; |
| 1361 | } |
| 1362 | #endif |
| 1363 | |
| 1364 | void SSearchTest::monkeyTest(char *params) |
| 1365 | { |
| 1366 | // ook! |
| 1367 | UErrorCode status = U_ZERO_ERROR; |
| 1368 | //UCollator *coll = ucol_open(NULL, &status); |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 1369 | UCollator *coll = ucol_openFromShortString("S1", false, NULL, &status); |
Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 1370 | |
| 1371 | if (U_FAILURE(status)) { |
| 1372 | errcheckln(status, "Failed to create collator in MonkeyTest! - %s", u_errorName(status)); |
| 1373 | return; |
| 1374 | } |
| 1375 | |
| 1376 | CollData *monkeyData = new CollData(coll, status); |
| 1377 | |
| 1378 | USet *expansions = uset_openEmpty(); |
| 1379 | USet *contractions = uset_openEmpty(); |
| 1380 | |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 1381 | ucol_getContractionsAndExpansions(coll, contractions, expansions, false, &status); |
Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 1382 | |
| 1383 | U_STRING_DECL(letter_pattern, "[[:letter:]-[:ideographic:]-[:hangul:]]", 39); |
| 1384 | U_STRING_INIT(letter_pattern, "[[:letter:]-[:ideographic:]-[:hangul:]]", 39); |
| 1385 | USet *letters = uset_openPattern(letter_pattern, 39, &status); |
| 1386 | SetMonkey letterMonkey(letters); |
| 1387 | StringSetMonkey contractionMonkey(contractions, coll, monkeyData); |
| 1388 | StringSetMonkey expansionMonkey(expansions, coll, monkeyData); |
| 1389 | UnicodeString testCase; |
| 1390 | UnicodeString alternate; |
| 1391 | UnicodeString pattern, altPattern; |
| 1392 | UnicodeString prefix, altPrefix; |
| 1393 | UnicodeString suffix, altSuffix; |
| 1394 | |
| 1395 | Monkey *monkeys[] = { |
| 1396 | &letterMonkey, |
| 1397 | &contractionMonkey, |
| 1398 | &expansionMonkey, |
| 1399 | &contractionMonkey, |
| 1400 | &expansionMonkey, |
| 1401 | &contractionMonkey, |
| 1402 | &expansionMonkey, |
| 1403 | &contractionMonkey, |
| 1404 | &expansionMonkey}; |
| 1405 | int32_t monkeyCount = UPRV_LENGTHOF(monkeys); |
| 1406 | // int32_t nonMatchCount = 0; |
| 1407 | |
| 1408 | UCollationStrength strengths[] = {UCOL_PRIMARY, UCOL_SECONDARY, UCOL_TERTIARY}; |
| 1409 | const char *strengthNames[] = {"primary", "secondary", "tertiary"}; |
| 1410 | int32_t strengthCount = UPRV_LENGTHOF(strengths); |
| 1411 | int32_t loopCount = quick? 1000 : 10000; |
| 1412 | int32_t firstStrength = 0; |
| 1413 | int32_t lastStrength = strengthCount - 1; //*/ 0; |
| 1414 | |
| 1415 | if (params != NULL) { |
| 1416 | #if !UCONFIG_NO_REGULAR_EXPRESSIONS |
| 1417 | UnicodeString p(params); |
| 1418 | |
| 1419 | loopCount = getIntParam("loop", p, loopCount); |
| 1420 | m_seed = getIntParam("seed", p, m_seed); |
| 1421 | |
| 1422 | RegexMatcher m(" *strength *= *(primary|secondary|tertiary) *", p, 0, status); |
| 1423 | if (m.find()) { |
| 1424 | UnicodeString breakType = m.group(1, status); |
| 1425 | |
| 1426 | for (int32_t s = 0; s < strengthCount; s += 1) { |
| 1427 | if (breakType == strengthNames[s]) { |
| 1428 | firstStrength = lastStrength = s; |
| 1429 | break; |
| 1430 | } |
| 1431 | } |
| 1432 | |
| 1433 | m.reset(); |
| 1434 | p = m.replaceFirst("", status); |
| 1435 | } |
| 1436 | |
| 1437 | if (RegexMatcher("\\S", p, 0, status).find()) { |
| 1438 | // Each option is stripped out of the option string as it is processed. |
| 1439 | // All options have been checked. The option string should have been completely emptied.. |
| 1440 | char buf[100]; |
| 1441 | p.extract(buf, sizeof(buf), NULL, status); |
| 1442 | buf[sizeof(buf)-1] = 0; |
| 1443 | errln("Unrecognized or extra parameter: %s\n", buf); |
| 1444 | return; |
| 1445 | } |
| 1446 | #else |
| 1447 | infoln("SSearchTest built with UCONFIG_NO_REGULAR_EXPRESSIONS: ignoring parameters."); |
| 1448 | #endif |
| 1449 | } |
| 1450 | |
| 1451 | for(int32_t s = firstStrength; s <= lastStrength; s += 1) { |
| 1452 | int32_t notFoundCount = 0; |
| 1453 | |
| 1454 | logln("Setting strength to %s.", strengthNames[s]); |
| 1455 | ucol_setStrength(coll, strengths[s]); |
| 1456 | |
| 1457 | // TODO: try alternate prefix and suffix too? |
| 1458 | // TODO: alternates are only equal at primary strength. Is this OK? |
| 1459 | for(int32_t t = 0; t < loopCount; t += 1) { |
| 1460 | uint32_t seed = m_seed; |
| 1461 | // int32_t nmc = 0; |
| 1462 | |
| 1463 | generateTestCase(coll, monkeys, monkeyCount, pattern, altPattern); |
| 1464 | generateTestCase(coll, monkeys, monkeyCount, prefix, altPrefix); |
| 1465 | generateTestCase(coll, monkeys, monkeyCount, suffix, altSuffix); |
| 1466 | |
| 1467 | // pattern |
| 1468 | notFoundCount += monkeyTestCase(coll, pattern, pattern, altPattern, "pattern", strengthNames[s], seed); |
| 1469 | |
| 1470 | testCase.remove(); |
| 1471 | testCase.append(prefix); |
| 1472 | testCase.append(/*alt*/pattern); |
| 1473 | |
| 1474 | // prefix + pattern |
| 1475 | notFoundCount += monkeyTestCase(coll, testCase, pattern, altPattern, "prefix + pattern", strengthNames[s], seed); |
| 1476 | |
| 1477 | testCase.append(suffix); |
| 1478 | |
| 1479 | // prefix + pattern + suffix |
| 1480 | notFoundCount += monkeyTestCase(coll, testCase, pattern, altPattern, "prefix + pattern + suffix", strengthNames[s], seed); |
| 1481 | |
| 1482 | testCase.remove(); |
| 1483 | testCase.append(pattern); |
| 1484 | testCase.append(suffix); |
| 1485 | |
| 1486 | // pattern + suffix |
| 1487 | notFoundCount += monkeyTestCase(coll, testCase, pattern, altPattern, "pattern + suffix", strengthNames[s], seed); |
| 1488 | } |
| 1489 | |
| 1490 | logln("For strength %s the not found count is %d.", strengthNames[s], notFoundCount); |
| 1491 | } |
| 1492 | |
| 1493 | uset_close(contractions); |
| 1494 | uset_close(expansions); |
| 1495 | uset_close(letters); |
| 1496 | delete monkeyData; |
| 1497 | |
| 1498 | ucol_close(coll); |
| 1499 | } |
| 1500 | |
| 1501 | #endif |
| 1502 | |
| 1503 | #endif |