Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 1 | // © 2016 and later: Unicode, Inc. and others. |
| 2 | // License & terms of use: http://www.unicode.org/copyright.html |
| 3 | /* |
| 4 | ******************************************************************************* |
| 5 | * |
| 6 | * Copyright (C) 2003-2014, International Business Machines |
| 7 | * Corporation and others. All Rights Reserved. |
| 8 | * |
| 9 | ******************************************************************************* |
| 10 | * file name: convtest.cpp |
| 11 | * encoding: UTF-8 |
| 12 | * tab size: 8 (not used) |
| 13 | * indentation:4 |
| 14 | * |
| 15 | * created on: 2003jul15 |
| 16 | * created by: Markus W. Scherer |
| 17 | * |
| 18 | * Test file for data-driven conversion tests. |
| 19 | */ |
| 20 | |
| 21 | #include "unicode/utypes.h" |
| 22 | |
| 23 | #if !UCONFIG_NO_LEGACY_CONVERSION |
| 24 | /* |
| 25 | * Note: Turning off all of convtest.cpp if !UCONFIG_NO_LEGACY_CONVERSION |
| 26 | * is slightly unnecessary - it removes tests for Unicode charsets |
| 27 | * like UTF-8 that should work. |
| 28 | * However, there is no easy way for the test to detect whether a test case |
| 29 | * is for a Unicode charset, so it would be difficult to only exclude those. |
| 30 | * Also, regular testing of ICU is done with all modules on, therefore |
| 31 | * not testing conversion for a custom configuration like this should be ok. |
| 32 | */ |
| 33 | |
| 34 | #include "unicode/ucnv.h" |
| 35 | #include "unicode/unistr.h" |
| 36 | #include "unicode/parsepos.h" |
| 37 | #include "unicode/uniset.h" |
| 38 | #include "unicode/usetiter.h" |
| 39 | #include "unicode/ustring.h" |
| 40 | #include "unicode/ures.h" |
| 41 | #include "unicode/utf16.h" |
| 42 | #include "convtest.h" |
| 43 | #include "cmemory.h" |
| 44 | #include "unicode/tstdtmod.h" |
| 45 | #include <string.h> |
| 46 | #include <stdlib.h> |
| 47 | |
| 48 | enum { |
| 49 | // characters used in test data for callbacks |
| 50 | SUB_CB='?', |
| 51 | SKIP_CB='0', |
| 52 | STOP_CB='.', |
| 53 | ESC_CB='&' |
| 54 | }; |
| 55 | |
| 56 | ConversionTest::ConversionTest() { |
| 57 | UErrorCode errorCode=U_ZERO_ERROR; |
| 58 | utf8Cnv=ucnv_open("UTF-8", &errorCode); |
| 59 | ucnv_setToUCallBack(utf8Cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); |
| 60 | if(U_FAILURE(errorCode)) { |
| 61 | errln("unable to open UTF-8 converter"); |
| 62 | } |
| 63 | } |
| 64 | |
| 65 | ConversionTest::~ConversionTest() { |
| 66 | ucnv_close(utf8Cnv); |
| 67 | } |
| 68 | |
| 69 | void |
| 70 | ConversionTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) { |
| 71 | if (exec) logln("TestSuite ConversionTest: "); |
| 72 | TESTCASE_AUTO_BEGIN; |
| 73 | #if !UCONFIG_NO_FILE_IO |
| 74 | TESTCASE_AUTO(TestToUnicode); |
| 75 | TESTCASE_AUTO(TestFromUnicode); |
| 76 | TESTCASE_AUTO(TestGetUnicodeSet); |
| 77 | #endif |
| 78 | TESTCASE_AUTO(TestGetUnicodeSet2); |
| 79 | TESTCASE_AUTO(TestDefaultIgnorableCallback); |
| 80 | TESTCASE_AUTO(TestUTF8ToUTF8Overflow); |
| 81 | TESTCASE_AUTO(TestUTF8ToUTF8Streaming); |
| 82 | TESTCASE_AUTO_END; |
| 83 | } |
| 84 | |
| 85 | // test data interface ----------------------------------------------------- *** |
| 86 | |
| 87 | void |
| 88 | ConversionTest::TestToUnicode() { |
| 89 | ConversionCase cc; |
| 90 | char charset[100], cbopt[4]; |
| 91 | const char *option; |
| 92 | UnicodeString s, unicode; |
| 93 | int32_t offsetsLength; |
| 94 | UConverterToUCallback callback; |
| 95 | |
| 96 | TestDataModule *dataModule; |
| 97 | TestData *testData; |
| 98 | const DataMap *testCase; |
| 99 | UErrorCode errorCode; |
| 100 | int32_t i; |
| 101 | |
| 102 | errorCode=U_ZERO_ERROR; |
| 103 | dataModule=TestDataModule::getTestDataModule("conversion", *this, errorCode); |
| 104 | if(U_SUCCESS(errorCode)) { |
| 105 | testData=dataModule->createTestData("toUnicode", errorCode); |
| 106 | if(U_SUCCESS(errorCode)) { |
| 107 | for(i=0; testData->nextCase(testCase, errorCode); ++i) { |
| 108 | if(U_FAILURE(errorCode)) { |
| 109 | errln("error retrieving conversion/toUnicode test case %d - %s", |
| 110 | i, u_errorName(errorCode)); |
| 111 | errorCode=U_ZERO_ERROR; |
| 112 | continue; |
| 113 | } |
| 114 | |
| 115 | cc.caseNr=i; |
| 116 | |
| 117 | s=testCase->getString("charset", errorCode); |
| 118 | s.extract(0, 0x7fffffff, charset, sizeof(charset), ""); |
| 119 | cc.charset=charset; |
| 120 | |
| 121 | cc.bytes=testCase->getBinary(cc.bytesLength, "bytes", errorCode); |
| 122 | unicode=testCase->getString("unicode", errorCode); |
| 123 | cc.unicode=unicode.getBuffer(); |
| 124 | cc.unicodeLength=unicode.length(); |
| 125 | |
| 126 | offsetsLength=0; |
| 127 | cc.offsets=testCase->getIntVector(offsetsLength, "offsets", errorCode); |
| 128 | if(offsetsLength==0) { |
| 129 | cc.offsets=NULL; |
| 130 | } else if(offsetsLength!=unicode.length()) { |
| 131 | errln("toUnicode[%d] unicode[%d] and offsets[%d] must have the same length", |
| 132 | i, unicode.length(), offsetsLength); |
| 133 | errorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| 134 | } |
| 135 | |
| 136 | cc.finalFlush= 0!=testCase->getInt28("flush", errorCode); |
| 137 | cc.fallbacks= 0!=testCase->getInt28("fallbacks", errorCode); |
| 138 | |
| 139 | s=testCase->getString("errorCode", errorCode); |
| 140 | if(s==UNICODE_STRING("invalid", 7)) { |
| 141 | cc.outErrorCode=U_INVALID_CHAR_FOUND; |
| 142 | } else if(s==UNICODE_STRING("illegal", 7)) { |
| 143 | cc.outErrorCode=U_ILLEGAL_CHAR_FOUND; |
| 144 | } else if(s==UNICODE_STRING("truncated", 9)) { |
| 145 | cc.outErrorCode=U_TRUNCATED_CHAR_FOUND; |
| 146 | } else if(s==UNICODE_STRING("illesc", 6)) { |
| 147 | cc.outErrorCode=U_ILLEGAL_ESCAPE_SEQUENCE; |
| 148 | } else if(s==UNICODE_STRING("unsuppesc", 9)) { |
| 149 | cc.outErrorCode=U_UNSUPPORTED_ESCAPE_SEQUENCE; |
| 150 | } else { |
| 151 | cc.outErrorCode=U_ZERO_ERROR; |
| 152 | } |
| 153 | |
| 154 | s=testCase->getString("callback", errorCode); |
| 155 | s.extract(0, 0x7fffffff, cbopt, sizeof(cbopt), ""); |
| 156 | cc.cbopt=cbopt; |
| 157 | switch(cbopt[0]) { |
| 158 | case SUB_CB: |
| 159 | callback=UCNV_TO_U_CALLBACK_SUBSTITUTE; |
| 160 | break; |
| 161 | case SKIP_CB: |
| 162 | callback=UCNV_TO_U_CALLBACK_SKIP; |
| 163 | break; |
| 164 | case STOP_CB: |
| 165 | callback=UCNV_TO_U_CALLBACK_STOP; |
| 166 | break; |
| 167 | case ESC_CB: |
| 168 | callback=UCNV_TO_U_CALLBACK_ESCAPE; |
| 169 | break; |
| 170 | default: |
| 171 | callback=NULL; |
| 172 | break; |
| 173 | } |
| 174 | option=callback==NULL ? cbopt : cbopt+1; |
| 175 | if(*option==0) { |
| 176 | option=NULL; |
| 177 | } |
| 178 | |
| 179 | cc.invalidChars=testCase->getBinary(cc.invalidLength, "invalidChars", errorCode); |
| 180 | |
| 181 | if(U_FAILURE(errorCode)) { |
| 182 | errln("error parsing conversion/toUnicode test case %d - %s", |
| 183 | i, u_errorName(errorCode)); |
| 184 | errorCode=U_ZERO_ERROR; |
| 185 | } else { |
| 186 | logln("TestToUnicode[%d] %s", i, charset); |
| 187 | ToUnicodeCase(cc, callback, option); |
| 188 | } |
| 189 | } |
| 190 | delete testData; |
| 191 | } |
| 192 | delete dataModule; |
| 193 | } |
| 194 | else { |
| 195 | dataerrln("Could not load test conversion data"); |
| 196 | } |
| 197 | } |
| 198 | |
| 199 | void |
| 200 | ConversionTest::TestFromUnicode() { |
| 201 | ConversionCase cc; |
| 202 | char charset[100], cbopt[4]; |
| 203 | const char *option; |
| 204 | UnicodeString s, unicode, invalidUChars; |
| 205 | int32_t offsetsLength, index; |
| 206 | UConverterFromUCallback callback; |
| 207 | |
| 208 | TestDataModule *dataModule; |
| 209 | TestData *testData; |
| 210 | const DataMap *testCase; |
| 211 | const UChar *p; |
| 212 | UErrorCode errorCode; |
| 213 | int32_t i, length; |
| 214 | |
| 215 | errorCode=U_ZERO_ERROR; |
| 216 | dataModule=TestDataModule::getTestDataModule("conversion", *this, errorCode); |
| 217 | if(U_SUCCESS(errorCode)) { |
| 218 | testData=dataModule->createTestData("fromUnicode", errorCode); |
| 219 | if(U_SUCCESS(errorCode)) { |
| 220 | for(i=0; testData->nextCase(testCase, errorCode); ++i) { |
| 221 | if(U_FAILURE(errorCode)) { |
| 222 | errln("error retrieving conversion/fromUnicode test case %d - %s", |
| 223 | i, u_errorName(errorCode)); |
| 224 | errorCode=U_ZERO_ERROR; |
| 225 | continue; |
| 226 | } |
| 227 | |
| 228 | cc.caseNr=i; |
| 229 | |
| 230 | s=testCase->getString("charset", errorCode); |
| 231 | s.extract(0, 0x7fffffff, charset, sizeof(charset), ""); |
| 232 | cc.charset=charset; |
| 233 | |
| 234 | unicode=testCase->getString("unicode", errorCode); |
| 235 | cc.unicode=unicode.getBuffer(); |
| 236 | cc.unicodeLength=unicode.length(); |
| 237 | cc.bytes=testCase->getBinary(cc.bytesLength, "bytes", errorCode); |
| 238 | |
| 239 | offsetsLength=0; |
| 240 | cc.offsets=testCase->getIntVector(offsetsLength, "offsets", errorCode); |
| 241 | if(offsetsLength==0) { |
| 242 | cc.offsets=NULL; |
| 243 | } else if(offsetsLength!=cc.bytesLength) { |
| 244 | errln("fromUnicode[%d] bytes[%d] and offsets[%d] must have the same length", |
| 245 | i, cc.bytesLength, offsetsLength); |
| 246 | errorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| 247 | } |
| 248 | |
| 249 | cc.finalFlush= 0!=testCase->getInt28("flush", errorCode); |
| 250 | cc.fallbacks= 0!=testCase->getInt28("fallbacks", errorCode); |
| 251 | |
| 252 | s=testCase->getString("errorCode", errorCode); |
| 253 | if(s==UNICODE_STRING("invalid", 7)) { |
| 254 | cc.outErrorCode=U_INVALID_CHAR_FOUND; |
| 255 | } else if(s==UNICODE_STRING("illegal", 7)) { |
| 256 | cc.outErrorCode=U_ILLEGAL_CHAR_FOUND; |
| 257 | } else if(s==UNICODE_STRING("truncated", 9)) { |
| 258 | cc.outErrorCode=U_TRUNCATED_CHAR_FOUND; |
| 259 | } else { |
| 260 | cc.outErrorCode=U_ZERO_ERROR; |
| 261 | } |
| 262 | |
| 263 | s=testCase->getString("callback", errorCode); |
| 264 | cc.setSub=0; // default: no subchar |
| 265 | |
| 266 | if((index=s.indexOf((UChar)0))>0) { |
| 267 | // read NUL-separated subchar first, if any |
| 268 | // copy the subchar from Latin-1 characters |
| 269 | // start after the NUL |
| 270 | p=s.getTerminatedBuffer(); |
| 271 | length=index+1; |
| 272 | p+=length; |
| 273 | length=s.length()-length; |
| 274 | if(length<=0 || length>=(int32_t)sizeof(cc.subchar)) { |
| 275 | errorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| 276 | } else { |
| 277 | int32_t j; |
| 278 | |
| 279 | for(j=0; j<length; ++j) { |
| 280 | cc.subchar[j]=(char)p[j]; |
| 281 | } |
| 282 | // NUL-terminate the subchar |
| 283 | cc.subchar[j]=0; |
| 284 | cc.setSub=1; |
| 285 | } |
| 286 | |
| 287 | // remove the NUL and subchar from s |
| 288 | s.truncate(index); |
| 289 | } else if((index=s.indexOf((UChar)0x3d))>0) /* '=' */ { |
| 290 | // read a substitution string, separated by an equal sign |
| 291 | p=s.getBuffer()+index+1; |
| 292 | length=s.length()-(index+1); |
| 293 | if(length<0 || length>=UPRV_LENGTHOF(cc.subString)) { |
| 294 | errorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| 295 | } else { |
| 296 | u_memcpy(cc.subString, p, length); |
| 297 | // NUL-terminate the subString |
| 298 | cc.subString[length]=0; |
| 299 | cc.setSub=-1; |
| 300 | } |
| 301 | |
| 302 | // remove the equal sign and subString from s |
| 303 | s.truncate(index); |
| 304 | } |
| 305 | |
| 306 | s.extract(0, 0x7fffffff, cbopt, sizeof(cbopt), ""); |
| 307 | cc.cbopt=cbopt; |
| 308 | switch(cbopt[0]) { |
| 309 | case SUB_CB: |
| 310 | callback=UCNV_FROM_U_CALLBACK_SUBSTITUTE; |
| 311 | break; |
| 312 | case SKIP_CB: |
| 313 | callback=UCNV_FROM_U_CALLBACK_SKIP; |
| 314 | break; |
| 315 | case STOP_CB: |
| 316 | callback=UCNV_FROM_U_CALLBACK_STOP; |
| 317 | break; |
| 318 | case ESC_CB: |
| 319 | callback=UCNV_FROM_U_CALLBACK_ESCAPE; |
| 320 | break; |
| 321 | default: |
| 322 | callback=NULL; |
| 323 | break; |
| 324 | } |
| 325 | option=callback==NULL ? cbopt : cbopt+1; |
| 326 | if(*option==0) { |
| 327 | option=NULL; |
| 328 | } |
| 329 | |
| 330 | invalidUChars=testCase->getString("invalidUChars", errorCode); |
| 331 | cc.invalidUChars=invalidUChars.getBuffer(); |
| 332 | cc.invalidLength=invalidUChars.length(); |
| 333 | |
| 334 | if(U_FAILURE(errorCode)) { |
| 335 | errln("error parsing conversion/fromUnicode test case %d - %s", |
| 336 | i, u_errorName(errorCode)); |
| 337 | errorCode=U_ZERO_ERROR; |
| 338 | } else { |
| 339 | logln("TestFromUnicode[%d] %s", i, charset); |
| 340 | FromUnicodeCase(cc, callback, option); |
| 341 | } |
| 342 | } |
| 343 | delete testData; |
| 344 | } |
| 345 | delete dataModule; |
| 346 | } |
| 347 | else { |
| 348 | dataerrln("Could not load test conversion data"); |
| 349 | } |
| 350 | } |
| 351 | |
| 352 | static const UChar ellipsis[]={ 0x2e, 0x2e, 0x2e }; |
| 353 | |
| 354 | void |
| 355 | ConversionTest::TestGetUnicodeSet() { |
| 356 | char charset[100]; |
| 357 | UnicodeString s, map, mapnot; |
| 358 | int32_t which; |
| 359 | |
| 360 | ParsePosition pos; |
| 361 | UnicodeSet cnvSet, mapSet, mapnotSet, diffSet; |
| 362 | UnicodeSet *cnvSetPtr = &cnvSet; |
| 363 | LocalUConverterPointer cnv; |
| 364 | |
| 365 | TestDataModule *dataModule; |
| 366 | TestData *testData; |
| 367 | const DataMap *testCase; |
| 368 | UErrorCode errorCode; |
| 369 | int32_t i; |
| 370 | |
| 371 | errorCode=U_ZERO_ERROR; |
| 372 | dataModule=TestDataModule::getTestDataModule("conversion", *this, errorCode); |
| 373 | if(U_SUCCESS(errorCode)) { |
| 374 | testData=dataModule->createTestData("getUnicodeSet", errorCode); |
| 375 | if(U_SUCCESS(errorCode)) { |
| 376 | for(i=0; testData->nextCase(testCase, errorCode); ++i) { |
| 377 | if(U_FAILURE(errorCode)) { |
| 378 | errln("error retrieving conversion/getUnicodeSet test case %d - %s", |
| 379 | i, u_errorName(errorCode)); |
| 380 | errorCode=U_ZERO_ERROR; |
| 381 | continue; |
| 382 | } |
| 383 | |
| 384 | s=testCase->getString("charset", errorCode); |
| 385 | s.extract(0, 0x7fffffff, charset, sizeof(charset), ""); |
| 386 | |
| 387 | map=testCase->getString("map", errorCode); |
| 388 | mapnot=testCase->getString("mapnot", errorCode); |
| 389 | |
| 390 | which=testCase->getInt28("which", errorCode); |
| 391 | |
| 392 | if(U_FAILURE(errorCode)) { |
| 393 | errln("error parsing conversion/getUnicodeSet test case %d - %s", |
| 394 | i, u_errorName(errorCode)); |
| 395 | errorCode=U_ZERO_ERROR; |
| 396 | continue; |
| 397 | } |
| 398 | |
| 399 | // test this test case |
| 400 | mapSet.clear(); |
| 401 | mapnotSet.clear(); |
| 402 | |
| 403 | pos.setIndex(0); |
| 404 | mapSet.applyPattern(map, pos, 0, NULL, errorCode); |
| 405 | if(U_FAILURE(errorCode) || pos.getIndex()!=map.length()) { |
| 406 | errln("error creating the map set for conversion/getUnicodeSet test case %d - %s\n" |
| 407 | " error index %d index %d U+%04x", |
| 408 | i, u_errorName(errorCode), pos.getErrorIndex(), pos.getIndex(), map.char32At(pos.getIndex())); |
| 409 | errorCode=U_ZERO_ERROR; |
| 410 | continue; |
| 411 | } |
| 412 | |
| 413 | pos.setIndex(0); |
| 414 | mapnotSet.applyPattern(mapnot, pos, 0, NULL, errorCode); |
| 415 | if(U_FAILURE(errorCode) || pos.getIndex()!=mapnot.length()) { |
| 416 | errln("error creating the mapnot set for conversion/getUnicodeSet test case %d - %s\n" |
| 417 | " error index %d index %d U+%04x", |
| 418 | i, u_errorName(errorCode), pos.getErrorIndex(), pos.getIndex(), mapnot.char32At(pos.getIndex())); |
| 419 | errorCode=U_ZERO_ERROR; |
| 420 | continue; |
| 421 | } |
| 422 | |
| 423 | logln("TestGetUnicodeSet[%d] %s", i, charset); |
| 424 | |
| 425 | cnv.adoptInstead(cnv_open(charset, errorCode)); |
| 426 | if(U_FAILURE(errorCode)) { |
| 427 | errcheckln(errorCode, "error opening \"%s\" for conversion/getUnicodeSet test case %d - %s", |
| 428 | charset, i, u_errorName(errorCode)); |
| 429 | errorCode=U_ZERO_ERROR; |
| 430 | continue; |
| 431 | } |
| 432 | |
| 433 | ucnv_getUnicodeSet(cnv.getAlias(), cnvSetPtr->toUSet(), (UConverterUnicodeSet)which, &errorCode); |
| 434 | |
| 435 | if(U_FAILURE(errorCode)) { |
| 436 | errln("error in ucnv_getUnicodeSet(\"%s\") for conversion/getUnicodeSet test case %d - %s", |
| 437 | charset, i, u_errorName(errorCode)); |
| 438 | errorCode=U_ZERO_ERROR; |
| 439 | continue; |
| 440 | } |
| 441 | |
| 442 | // are there items that must be in cnvSet but are not? |
| 443 | (diffSet=mapSet).removeAll(cnvSet); |
| 444 | if(!diffSet.isEmpty()) { |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 445 | diffSet.toPattern(s, true); |
Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 446 | if(s.length()>100) { |
| 447 | s.replace(100, 0x7fffffff, ellipsis, UPRV_LENGTHOF(ellipsis)); |
| 448 | } |
| 449 | errln("error: ucnv_getUnicodeSet(\"%s\") is missing items - conversion/getUnicodeSet test case %d", |
| 450 | charset, i); |
| 451 | errln(s); |
| 452 | } |
| 453 | |
| 454 | // are there items that must not be in cnvSet but are? |
| 455 | (diffSet=mapnotSet).retainAll(cnvSet); |
| 456 | if(!diffSet.isEmpty()) { |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 457 | diffSet.toPattern(s, true); |
Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 458 | if(s.length()>100) { |
| 459 | s.replace(100, 0x7fffffff, ellipsis, UPRV_LENGTHOF(ellipsis)); |
| 460 | } |
| 461 | errln("error: ucnv_getUnicodeSet(\"%s\") contains unexpected items - conversion/getUnicodeSet test case %d", |
| 462 | charset, i); |
| 463 | errln(s); |
| 464 | } |
| 465 | } |
| 466 | delete testData; |
| 467 | } |
| 468 | delete dataModule; |
| 469 | } |
| 470 | else { |
| 471 | dataerrln("Could not load test conversion data"); |
| 472 | } |
| 473 | } |
| 474 | |
| 475 | U_CDECL_BEGIN |
| 476 | static void U_CALLCONV |
| 477 | getUnicodeSetCallback(const void *context, |
| 478 | UConverterFromUnicodeArgs * /*fromUArgs*/, |
| 479 | const UChar* /*codeUnits*/, |
| 480 | int32_t /*length*/, |
| 481 | UChar32 codePoint, |
| 482 | UConverterCallbackReason reason, |
| 483 | UErrorCode *pErrorCode) { |
| 484 | if(reason<=UCNV_IRREGULAR) { |
| 485 | ((UnicodeSet *)context)->remove(codePoint); // the converter cannot convert this code point |
| 486 | *pErrorCode=U_ZERO_ERROR; // skip |
| 487 | } // else ignore the reset, close and clone calls. |
| 488 | } |
| 489 | U_CDECL_END |
| 490 | |
| 491 | // Compare ucnv_getUnicodeSet() with the set of characters that can be converted. |
| 492 | void |
| 493 | ConversionTest::TestGetUnicodeSet2() { |
| 494 | // Build a string with all code points. |
| 495 | UChar32 cpLimit; |
| 496 | int32_t s0Length; |
| 497 | if(quick) { |
| 498 | cpLimit=s0Length=0x10000; // BMP only |
| 499 | } else { |
| 500 | cpLimit=0x110000; |
| 501 | s0Length=0x10000+0x200000; // BMP + surrogate pairs |
| 502 | } |
| 503 | UChar *s0=new UChar[s0Length]; |
| 504 | if(s0==NULL) { |
| 505 | return; |
| 506 | } |
| 507 | UChar *s=s0; |
| 508 | UChar32 c; |
| 509 | UChar c2; |
| 510 | // low BMP |
| 511 | for(c=0; c<=0xd7ff; ++c) { |
| 512 | *s++=(UChar)c; |
| 513 | } |
| 514 | // trail surrogates |
| 515 | for(c=0xdc00; c<=0xdfff; ++c) { |
| 516 | *s++=(UChar)c; |
| 517 | } |
| 518 | // lead surrogates |
| 519 | // (after trails so that there is not even one surrogate pair in between) |
| 520 | for(c=0xd800; c<=0xdbff; ++c) { |
| 521 | *s++=(UChar)c; |
| 522 | } |
| 523 | // high BMP |
| 524 | for(c=0xe000; c<=0xffff; ++c) { |
| 525 | *s++=(UChar)c; |
| 526 | } |
| 527 | // supplementary code points = surrogate pairs |
| 528 | if(cpLimit==0x110000) { |
| 529 | for(c=0xd800; c<=0xdbff; ++c) { |
| 530 | for(c2=0xdc00; c2<=0xdfff; ++c2) { |
| 531 | *s++=(UChar)c; |
| 532 | *s++=c2; |
| 533 | } |
| 534 | } |
| 535 | } |
| 536 | |
| 537 | static const char *const cnvNames[]={ |
| 538 | "UTF-8", |
| 539 | "UTF-7", |
| 540 | "UTF-16", |
| 541 | "US-ASCII", |
| 542 | "ISO-8859-1", |
| 543 | "windows-1252", |
| 544 | "Shift-JIS", |
| 545 | "ibm-1390", // EBCDIC_STATEFUL table |
| 546 | "ibm-16684", // DBCS-only extension table based on EBCDIC_STATEFUL table |
| 547 | "HZ", |
| 548 | "ISO-2022-JP", |
| 549 | "JIS7", |
| 550 | "ISO-2022-CN", |
| 551 | "ISO-2022-CN-EXT", |
| 552 | "LMBCS" |
| 553 | }; |
| 554 | LocalUConverterPointer cnv; |
| 555 | char buffer[1024]; |
| 556 | int32_t i; |
| 557 | for(i=0; i<UPRV_LENGTHOF(cnvNames); ++i) { |
| 558 | UErrorCode errorCode=U_ZERO_ERROR; |
| 559 | cnv.adoptInstead(cnv_open(cnvNames[i], errorCode)); |
| 560 | if(U_FAILURE(errorCode)) { |
| 561 | errcheckln(errorCode, "failed to open converter %s - %s", cnvNames[i], u_errorName(errorCode)); |
| 562 | continue; |
| 563 | } |
| 564 | UnicodeSet expected; |
| 565 | ucnv_setFromUCallBack(cnv.getAlias(), getUnicodeSetCallback, &expected, NULL, NULL, &errorCode); |
| 566 | if(U_FAILURE(errorCode)) { |
| 567 | errln("failed to set the callback on converter %s - %s", cnvNames[i], u_errorName(errorCode)); |
| 568 | continue; |
| 569 | } |
| 570 | UConverterUnicodeSet which; |
| 571 | for(which=UCNV_ROUNDTRIP_SET; which<UCNV_SET_COUNT; which=(UConverterUnicodeSet)((int)which+1)) { |
| 572 | if(which==UCNV_ROUNDTRIP_AND_FALLBACK_SET) { |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 573 | ucnv_setFallback(cnv.getAlias(), true); |
Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 574 | } |
| 575 | expected.add(0, cpLimit-1); |
| 576 | s=s0; |
| 577 | UBool flush; |
| 578 | do { |
| 579 | char *t=buffer; |
| 580 | flush=(UBool)(s==s0+s0Length); |
| 581 | ucnv_fromUnicode(cnv.getAlias(), &t, buffer+sizeof(buffer), (const UChar **)&s, s0+s0Length, NULL, flush, &errorCode); |
| 582 | if(U_FAILURE(errorCode)) { |
| 583 | if(errorCode==U_BUFFER_OVERFLOW_ERROR) { |
| 584 | errorCode=U_ZERO_ERROR; |
| 585 | continue; |
| 586 | } else { |
| 587 | break; // unexpected error, should not occur |
| 588 | } |
| 589 | } |
| 590 | } while(!flush); |
| 591 | UnicodeSet set; |
| 592 | ucnv_getUnicodeSet(cnv.getAlias(), set.toUSet(), which, &errorCode); |
| 593 | if(cpLimit<0x110000) { |
| 594 | set.remove(cpLimit, 0x10ffff); |
| 595 | } |
| 596 | if(which==UCNV_ROUNDTRIP_SET) { |
| 597 | // ignore PUA code points because they will be converted even if they |
| 598 | // are fallbacks and when other fallbacks are turned off, |
| 599 | // but ucnv_getUnicodeSet(UCNV_ROUNDTRIP_SET) delivers true roundtrips |
| 600 | expected.remove(0xe000, 0xf8ff); |
| 601 | expected.remove(0xf0000, 0xffffd); |
| 602 | expected.remove(0x100000, 0x10fffd); |
| 603 | set.remove(0xe000, 0xf8ff); |
| 604 | set.remove(0xf0000, 0xffffd); |
| 605 | set.remove(0x100000, 0x10fffd); |
| 606 | } |
| 607 | if(set!=expected) { |
| 608 | // First try to see if we have different sets because ucnv_getUnicodeSet() |
| 609 | // added strings: The above conversion method does not tell us what strings might be convertible. |
| 610 | // Remove strings from the set and compare again. |
| 611 | set.removeAllStrings(); |
| 612 | } |
| 613 | if(set!=expected) { |
| 614 | UnicodeSet diffSet; |
| 615 | UnicodeString out; |
| 616 | |
| 617 | // are there items that must be in the set but are not? |
| 618 | (diffSet=expected).removeAll(set); |
| 619 | if(!diffSet.isEmpty()) { |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 620 | diffSet.toPattern(out, true); |
Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 621 | if(out.length()>100) { |
| 622 | out.replace(100, 0x7fffffff, ellipsis, UPRV_LENGTHOF(ellipsis)); |
| 623 | } |
| 624 | errln("error: ucnv_getUnicodeSet(\"%s\") is missing items - which set: %d", |
| 625 | cnvNames[i], which); |
| 626 | errln(out); |
| 627 | } |
| 628 | |
| 629 | // are there items that must not be in the set but are? |
| 630 | (diffSet=set).removeAll(expected); |
| 631 | if(!diffSet.isEmpty()) { |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 632 | diffSet.toPattern(out, true); |
Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 633 | if(out.length()>100) { |
| 634 | out.replace(100, 0x7fffffff, ellipsis, UPRV_LENGTHOF(ellipsis)); |
| 635 | } |
| 636 | errln("error: ucnv_getUnicodeSet(\"%s\") contains unexpected items - which set: %d", |
| 637 | cnvNames[i], which); |
| 638 | errln(out); |
| 639 | } |
| 640 | } |
| 641 | } |
| 642 | } |
| 643 | |
| 644 | delete [] s0; |
| 645 | } |
| 646 | |
| 647 | // Test that all code points which have the default ignorable Unicode property |
| 648 | // are ignored if they have no mapping. |
| 649 | // If there are any failures, the hard coded list (IS_DEFAULT_IGNORABLE_CODE_POINT) |
| 650 | // in ucnv_err.cpp should be updated. |
| 651 | void |
| 652 | ConversionTest::TestDefaultIgnorableCallback() { |
| 653 | UErrorCode status = U_ZERO_ERROR; |
| 654 | const char *cnv_name = "euc-jp-2007"; |
| 655 | const char *pattern_ignorable = "[:Default_Ignorable_Code_Point:]"; |
| 656 | const char *pattern_not_ignorable = |
| 657 | "[[:^Default_Ignorable_Code_Point:]" |
| 658 | // For test performance, skip large ranges that will likely remain unassigned |
| 659 | // for a long time, and private use code points. |
| 660 | "-[\\U00040000-\\U000DFFFF]-[:Co:]" |
| 661 | "]"; |
| 662 | |
| 663 | LocalPointer<UnicodeSet> set_ignorable(new UnicodeSet(pattern_ignorable, status)); |
| 664 | if (U_FAILURE(status)) { |
| 665 | dataerrln("Unable to create Unicodeset: %s - %s\n", pattern_ignorable, u_errorName(status)); |
| 666 | return; |
| 667 | } |
| 668 | |
| 669 | LocalPointer<UnicodeSet> set_not_ignorable(new UnicodeSet(pattern_not_ignorable, status)); |
| 670 | if (U_FAILURE(status)) { |
| 671 | dataerrln("Unable to create Unicodeset: %s - %s\n", pattern_not_ignorable, u_errorName(status)); |
| 672 | return; |
| 673 | } |
| 674 | |
| 675 | LocalUConverterPointer cnv(cnv_open(cnv_name, status)); |
| 676 | if (U_FAILURE(status)) { |
| 677 | dataerrln("Unable to open converter: %s - %s\n", cnv_name, u_errorName(status)); |
| 678 | return; |
| 679 | } |
| 680 | |
| 681 | // set callback for the converter |
| 682 | ucnv_setFromUCallBack(cnv.getAlias(), UCNV_FROM_U_CALLBACK_SUBSTITUTE, NULL, NULL, NULL, &status); |
| 683 | |
| 684 | UChar32 input[1]; |
| 685 | char output[10]; |
| 686 | int32_t outputLength; |
| 687 | |
| 688 | // test default ignorables are ignored |
| 689 | UnicodeSetIterator iter(*set_ignorable); |
| 690 | while (iter.next()) { |
| 691 | status = U_ZERO_ERROR; |
| 692 | outputLength= 0; |
| 693 | |
| 694 | input[0] = iter.getCodepoint(); |
| 695 | |
| 696 | outputLength = ucnv_fromUChars(cnv.getAlias(), output, 10, UnicodeString::fromUTF32(input, 1).getTerminatedBuffer(), -1, &status); |
| 697 | if (U_FAILURE(status) || outputLength != 0) { |
| 698 | errln("Ignorable code point: U+%04X not skipped as expected - %s", input[0], u_errorName(status)); |
| 699 | } |
| 700 | } |
| 701 | |
| 702 | // test non-ignorables are not ignored |
| 703 | iter.reset(*set_not_ignorable); |
| 704 | while (iter.next()) { |
| 705 | status = U_ZERO_ERROR; |
| 706 | outputLength= 0; |
| 707 | |
| 708 | input[0] = iter.getCodepoint(); |
| 709 | |
| 710 | if (input[0] == 0) { |
| 711 | continue; |
| 712 | } |
| 713 | |
| 714 | outputLength = ucnv_fromUChars(cnv.getAlias(), output, 10, UnicodeString::fromUTF32(input, 1).getTerminatedBuffer(), -1, &status); |
| 715 | if (U_FAILURE(status) || outputLength <= 0) { |
| 716 | errln("Non-ignorable code point: U+%04X skipped unexpectedly - %s", input[0], u_errorName(status)); |
| 717 | } |
| 718 | } |
| 719 | } |
| 720 | |
| 721 | void |
| 722 | ConversionTest::TestUTF8ToUTF8Overflow() { |
| 723 | IcuTestErrorCode errorCode(*this, "TestUTF8ToUTF8Overflow"); |
| 724 | LocalUConverterPointer cnv1(ucnv_open("UTF-8", errorCode)); |
| 725 | LocalUConverterPointer cnv2(ucnv_open("UTF-8", errorCode)); |
| 726 | static const char *text = "aä"; // ä: 2 bytes |
| 727 | const char *source = text; |
| 728 | const char *sourceLimit = text + strlen(text); |
| 729 | char result[20]; |
| 730 | char *target = result; |
| 731 | const char *targetLimit = result + sizeof(result); |
| 732 | UChar buffer16[20]; |
| 733 | UChar *pivotSource = buffer16; |
| 734 | UChar *pivotTarget = buffer16; |
| 735 | const UChar *pivotLimit = buffer16 + UPRV_LENGTHOF(buffer16); |
| 736 | int32_t length; |
| 737 | |
| 738 | // Convert with insufficient target capacity. |
| 739 | result[2] = 5; |
| 740 | ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(), |
| 741 | &target, result + 2, &source, sourceLimit, |
| 742 | buffer16, &pivotSource, &pivotTarget, pivotLimit, |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 743 | false, false, errorCode); |
Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 744 | assertEquals("overflow", U_BUFFER_OVERFLOW_ERROR, errorCode.reset()); |
| 745 | length = (int32_t)(target - result); |
| 746 | assertEquals("number of bytes written", 2, length); |
| 747 | assertEquals("next byte not clobbered", 5, result[2]); |
| 748 | |
| 749 | // Convert the rest and flush. |
| 750 | ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(), |
| 751 | &target, targetLimit, &source, sourceLimit, |
| 752 | buffer16, &pivotSource, &pivotTarget, pivotLimit, |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 753 | false, true, errorCode); |
Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 754 | |
| 755 | assertSuccess("UTF-8->UTF-8", errorCode); |
| 756 | length = (int32_t)(target - result); |
| 757 | assertEquals("3 bytes", 3, length); |
| 758 | if (length == 3) { |
| 759 | assertTrue("result same as input", memcmp(text, result, length) == 0); |
| 760 | } |
| 761 | |
| 762 | ucnv_reset(cnv1.getAlias()); |
| 763 | ucnv_reset(cnv2.getAlias()); |
| 764 | memset(result, 0, sizeof(result)); |
| 765 | static const char *text2 = "a🚲"; // U+1F6B2 bicycle: 4 bytes |
| 766 | source = text2; |
| 767 | sourceLimit = text2 + strlen(text2); |
| 768 | target = result; |
| 769 | pivotSource = pivotTarget = buffer16; |
| 770 | |
| 771 | // Convert with insufficient target capacity. |
| 772 | result[3] = 5; |
| 773 | ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(), |
| 774 | &target, result + 3, &source, sourceLimit, |
| 775 | buffer16, &pivotSource, &pivotTarget, pivotLimit, |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 776 | false, false, errorCode); |
Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 777 | assertEquals("text2 overflow", U_BUFFER_OVERFLOW_ERROR, errorCode.reset()); |
| 778 | length = (int32_t)(target - result); |
| 779 | assertEquals("text2 number of bytes written", 3, length); |
| 780 | assertEquals("text2 next byte not clobbered", 5, result[3]); |
| 781 | |
| 782 | // Convert the rest and flush. |
| 783 | ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(), |
| 784 | &target, targetLimit, &source, sourceLimit, |
| 785 | buffer16, &pivotSource, &pivotTarget, pivotLimit, |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 786 | false, true, errorCode); |
Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 787 | |
| 788 | assertSuccess("text2 UTF-8->UTF-8", errorCode); |
| 789 | length = (int32_t)(target - result); |
| 790 | assertEquals("text2 5 bytes", 5, length); |
| 791 | if (length == 5) { |
| 792 | assertTrue("text2 result same as input", memcmp(text2, result, length) == 0); |
| 793 | } |
| 794 | |
| 795 | ucnv_reset(cnv1.getAlias()); |
| 796 | ucnv_reset(cnv2.getAlias()); |
| 797 | memset(result, 0, sizeof(result)); |
| 798 | static const char *illFormed = "\xf1\x91\x93\x96\x91\x94"; // U+514D6 + two more trail bytes |
| 799 | source = illFormed; |
| 800 | sourceLimit = illFormed + strlen(illFormed); |
| 801 | target = result; |
| 802 | pivotSource = pivotTarget = buffer16; |
| 803 | |
| 804 | ucnv_setToUCallBack(cnv1.getAlias(), UCNV_TO_U_CALLBACK_STOP, nullptr, nullptr, nullptr, errorCode); |
| 805 | |
| 806 | // Convert only two bytes and flush (but expect failure). |
| 807 | char errorBytes[10]; |
| 808 | int8_t errorLength; |
| 809 | result[0] = 5; |
| 810 | ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(), |
| 811 | &target, targetLimit, &source, source + 2, |
| 812 | buffer16, &pivotSource, &pivotTarget, pivotLimit, |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 813 | false, true, errorCode); |
Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 814 | assertEquals("illFormed truncated", U_TRUNCATED_CHAR_FOUND, errorCode.reset()); |
| 815 | length = (int32_t)(target - result); |
| 816 | assertEquals("illFormed number of bytes written", 0, length); |
| 817 | errorLength = UPRV_LENGTHOF(errorBytes); |
| 818 | ucnv_getInvalidChars(cnv1.getAlias(), errorBytes, &errorLength, errorCode); |
| 819 | assertEquals("illFormed truncated errorLength", 2, (int32_t)errorLength); |
| 820 | if (errorLength == 2) { |
| 821 | assertEquals("illFormed truncated errorBytes", 0xf191, |
| 822 | ((int32_t)(uint8_t)errorBytes[0] << 8) | (uint8_t)errorBytes[1]); |
| 823 | } |
| 824 | |
| 825 | // Continue conversion starting with a trail byte. |
| 826 | ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(), |
| 827 | &target, targetLimit, &source, sourceLimit, |
| 828 | buffer16, &pivotSource, &pivotTarget, pivotLimit, |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 829 | false, true, errorCode); |
Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 830 | |
| 831 | assertEquals("illFormed trail byte", U_ILLEGAL_CHAR_FOUND, errorCode.reset()); |
| 832 | length = (int32_t)(target - result); |
| 833 | assertEquals("illFormed trail byte number of bytes written", 0, length); |
| 834 | errorLength = UPRV_LENGTHOF(errorBytes); |
| 835 | ucnv_getInvalidChars(cnv1.getAlias(), errorBytes, &errorLength, errorCode); |
| 836 | assertEquals("illFormed trail byte errorLength", 1, (int32_t)errorLength); |
| 837 | if (errorLength == 1) { |
| 838 | assertEquals("illFormed trail byte errorBytes", 0x93, (int32_t)(uint8_t)errorBytes[0]); |
| 839 | } |
| 840 | } |
| 841 | |
| 842 | void |
| 843 | ConversionTest::TestUTF8ToUTF8Streaming() { |
| 844 | IcuTestErrorCode errorCode(*this, "TestUTF8ToUTF8Streaming"); |
| 845 | LocalUConverterPointer cnv1(ucnv_open("UTF-8", errorCode)); |
| 846 | LocalUConverterPointer cnv2(ucnv_open("UTF-8", errorCode)); |
| 847 | |
| 848 | // UTF8 encoded cyrillic part of 'Lorem ipsum' |
| 849 | static const char* text = |
| 850 | "\xd0\xb5\xd1\x82\x20\xd1\x81\xd1\x86\xd0\xb0\xd0\xb5\xd0\xb2\xd0" |
| 851 | "\xbe\xd0\xbb\xd0\xb0\x20\xd1\x81\xd0\xb0\xd0\xb4\xd0\xb8\xd0\xbf" |
| 852 | "\xd1\x81\xd1\x86\xd0\xb8\xd0\xbd\xd0\xb3\x20\xd0\xb0\xd1\x86\xd1" |
| 853 | "\x86\xd0\xbe\xd0\xbc\xd0\xbc\xd0\xbe\xd0\xb4\xd0\xb0\xd1\x80\xd0" |
| 854 | "\xb5\x20\xd1\x85\xd0\xb0\xd1\x81"; |
| 855 | |
| 856 | int32_t chunk1 = 25; // partial lead at the end: 0xd0 |
| 857 | int32_t chunk2 = 47; // partial tail at the beginning: 0xb0 |
| 858 | |
| 859 | char result[128]; |
| 860 | |
| 861 | int32_t sourceLen = (int32_t)strlen(text); |
| 862 | const char* source = text; |
| 863 | const char* sourceLimit = text + chunk1; |
| 864 | |
| 865 | int32_t targetLen = sizeof(result); |
| 866 | char* target = result; |
| 867 | const char* targetLimit = result + targetLen; |
| 868 | |
| 869 | UChar buffer16[20]; |
| 870 | UChar* pivotSource = buffer16; |
| 871 | UChar* pivotTarget = buffer16; |
| 872 | const UChar* pivotLimit = buffer16 + UPRV_LENGTHOF(buffer16); |
| 873 | |
| 874 | int32_t length; |
| 875 | ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(), |
| 876 | &target, result + targetLen, &source, sourceLimit, |
| 877 | buffer16, &pivotSource, &pivotTarget, pivotLimit, |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 878 | false, false, errorCode); |
Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 879 | |
| 880 | length = (int32_t)(target - result); |
| 881 | targetLen -= length; |
| 882 | assertEquals("First chunk -1 doesn't match converted length", chunk1 - 1, length); |
| 883 | |
| 884 | source = text + chunk1; |
| 885 | sourceLimit = source + chunk2; |
| 886 | |
| 887 | // Convert the rest and flush. |
| 888 | ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(), |
| 889 | &target, targetLimit, &source, sourceLimit, |
| 890 | buffer16, &pivotSource, &pivotTarget, pivotLimit, |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 891 | false, true, errorCode); |
Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 892 | |
| 893 | length = (int32_t)(target - result - length); |
| 894 | targetLen -= length; |
| 895 | assertEquals("Second chunk + 2 doesn't match converted length", chunk2 + 1, length); |
| 896 | |
| 897 | assertEquals("Full text length match", sourceLen, sizeof(result) - targetLen); |
| 898 | assertSuccess("UTF-8->UTF-8", errorCode); |
| 899 | } |
| 900 | |
| 901 | // open testdata or ICU data converter ------------------------------------- *** |
| 902 | |
| 903 | UConverter * |
| 904 | ConversionTest::cnv_open(const char *name, UErrorCode &errorCode) { |
| 905 | if(name!=NULL && *name=='+') { |
| 906 | // Converter names that start with '+' are ignored in ICU4J tests. |
| 907 | ++name; |
| 908 | } |
| 909 | if(name!=NULL && *name=='*') { |
| 910 | /* loadTestData(): set the data directory */ |
| 911 | return ucnv_openPackage(loadTestData(errorCode), name+1, &errorCode); |
| 912 | } else { |
| 913 | return ucnv_open(name, &errorCode); |
| 914 | } |
| 915 | } |
| 916 | |
| 917 | // output helpers ---------------------------------------------------------- *** |
| 918 | |
| 919 | static inline char |
| 920 | hexDigit(uint8_t digit) { |
| 921 | return digit<=9 ? (char)('0'+digit) : (char)('a'-10+digit); |
| 922 | } |
| 923 | |
| 924 | static char * |
| 925 | printBytes(const uint8_t *bytes, int32_t length, char *out) { |
| 926 | uint8_t b; |
| 927 | |
| 928 | if(length>0) { |
| 929 | b=*bytes++; |
| 930 | --length; |
| 931 | *out++=hexDigit((uint8_t)(b>>4)); |
| 932 | *out++=hexDigit((uint8_t)(b&0xf)); |
| 933 | } |
| 934 | |
| 935 | while(length>0) { |
| 936 | b=*bytes++; |
| 937 | --length; |
| 938 | *out++=' '; |
| 939 | *out++=hexDigit((uint8_t)(b>>4)); |
| 940 | *out++=hexDigit((uint8_t)(b&0xf)); |
| 941 | } |
| 942 | *out++=0; |
| 943 | return out; |
| 944 | } |
| 945 | |
| 946 | static char * |
| 947 | printUnicode(const UChar *unicode, int32_t length, char *out) { |
| 948 | UChar32 c; |
| 949 | int32_t i; |
| 950 | |
| 951 | for(i=0; i<length;) { |
| 952 | if(i>0) { |
| 953 | *out++=' '; |
| 954 | } |
| 955 | U16_NEXT(unicode, i, length, c); |
| 956 | // write 4..6 digits |
| 957 | if(c>=0x100000) { |
| 958 | *out++='1'; |
| 959 | } |
| 960 | if(c>=0x10000) { |
| 961 | *out++=hexDigit((uint8_t)((c>>16)&0xf)); |
| 962 | } |
| 963 | *out++=hexDigit((uint8_t)((c>>12)&0xf)); |
| 964 | *out++=hexDigit((uint8_t)((c>>8)&0xf)); |
| 965 | *out++=hexDigit((uint8_t)((c>>4)&0xf)); |
| 966 | *out++=hexDigit((uint8_t)(c&0xf)); |
| 967 | } |
| 968 | *out++=0; |
| 969 | return out; |
| 970 | } |
| 971 | |
| 972 | static char * |
| 973 | printOffsets(const int32_t *offsets, int32_t length, char *out) { |
| 974 | int32_t i, o, d; |
| 975 | |
| 976 | if(offsets==NULL) { |
| 977 | length=0; |
| 978 | } |
| 979 | |
| 980 | for(i=0; i<length; ++i) { |
| 981 | if(i>0) { |
| 982 | *out++=' '; |
| 983 | } |
| 984 | o=offsets[i]; |
| 985 | |
| 986 | // print all offsets with 2 characters each (-x, -9..99, xx) |
| 987 | if(o<-9) { |
| 988 | *out++='-'; |
| 989 | *out++='x'; |
| 990 | } else if(o<0) { |
| 991 | *out++='-'; |
| 992 | *out++=(char)('0'-o); |
| 993 | } else if(o<=99) { |
| 994 | *out++=(d=o/10)==0 ? ' ' : (char)('0'+d); |
| 995 | *out++=(char)('0'+o%10); |
| 996 | } else /* o>99 */ { |
| 997 | *out++='x'; |
| 998 | *out++='x'; |
| 999 | } |
| 1000 | } |
| 1001 | *out++=0; |
| 1002 | return out; |
| 1003 | } |
| 1004 | |
| 1005 | // toUnicode test worker functions ----------------------------------------- *** |
| 1006 | |
| 1007 | static int32_t |
| 1008 | stepToUnicode(ConversionCase &cc, UConverter *cnv, |
| 1009 | UChar *result, int32_t resultCapacity, |
| 1010 | int32_t *resultOffsets, /* also resultCapacity */ |
| 1011 | int32_t step, |
| 1012 | UErrorCode *pErrorCode) { |
| 1013 | const char *source, *sourceLimit, *bytesLimit; |
| 1014 | UChar *target, *targetLimit, *resultLimit; |
| 1015 | UBool flush; |
| 1016 | |
| 1017 | source=(const char *)cc.bytes; |
| 1018 | target=result; |
| 1019 | bytesLimit=source+cc.bytesLength; |
| 1020 | resultLimit=result+resultCapacity; |
| 1021 | |
| 1022 | if(step>=0) { |
| 1023 | // call ucnv_toUnicode() with in/out buffers no larger than (step) at a time |
| 1024 | // move only one buffer (in vs. out) at a time to be extra mean |
| 1025 | // step==0 performs bulk conversion and generates offsets |
| 1026 | |
| 1027 | // initialize the partial limits for the loop |
| 1028 | if(step==0) { |
| 1029 | // use the entire buffers |
| 1030 | sourceLimit=bytesLimit; |
| 1031 | targetLimit=resultLimit; |
| 1032 | flush=cc.finalFlush; |
| 1033 | } else { |
| 1034 | // start with empty partial buffers |
| 1035 | sourceLimit=source; |
| 1036 | targetLimit=target; |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 1037 | flush=false; |
Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 1038 | |
| 1039 | // output offsets only for bulk conversion |
| 1040 | resultOffsets=NULL; |
| 1041 | } |
| 1042 | |
| 1043 | for(;;) { |
| 1044 | // resetting the opposite conversion direction must not affect this one |
| 1045 | ucnv_resetFromUnicode(cnv); |
| 1046 | |
| 1047 | // convert |
| 1048 | ucnv_toUnicode(cnv, |
| 1049 | &target, targetLimit, |
| 1050 | &source, sourceLimit, |
| 1051 | resultOffsets, |
| 1052 | flush, pErrorCode); |
| 1053 | |
| 1054 | // check pointers and errors |
| 1055 | if(source>sourceLimit || target>targetLimit) { |
| 1056 | *pErrorCode=U_INTERNAL_PROGRAM_ERROR; |
| 1057 | break; |
| 1058 | } else if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { |
| 1059 | if(target!=targetLimit) { |
| 1060 | // buffer overflow must only be set when the target is filled |
| 1061 | *pErrorCode=U_INTERNAL_PROGRAM_ERROR; |
| 1062 | break; |
| 1063 | } else if(targetLimit==resultLimit) { |
| 1064 | // not just a partial overflow |
| 1065 | break; |
| 1066 | } |
| 1067 | |
| 1068 | // the partial target is filled, set a new limit, reset the error and continue |
| 1069 | targetLimit=(resultLimit-target)>=step ? target+step : resultLimit; |
| 1070 | *pErrorCode=U_ZERO_ERROR; |
| 1071 | } else if(U_FAILURE(*pErrorCode)) { |
| 1072 | // some other error occurred, done |
| 1073 | break; |
| 1074 | } else { |
| 1075 | if(source!=sourceLimit) { |
| 1076 | // when no error occurs, then the input must be consumed |
| 1077 | *pErrorCode=U_INTERNAL_PROGRAM_ERROR; |
| 1078 | break; |
| 1079 | } |
| 1080 | |
| 1081 | if(sourceLimit==bytesLimit) { |
| 1082 | // we are done |
| 1083 | break; |
| 1084 | } |
| 1085 | |
| 1086 | // the partial conversion succeeded, set a new limit and continue |
| 1087 | sourceLimit=(bytesLimit-source)>=step ? source+step : bytesLimit; |
| 1088 | flush=(UBool)(cc.finalFlush && sourceLimit==bytesLimit); |
| 1089 | } |
| 1090 | } |
| 1091 | } else /* step<0 */ { |
| 1092 | /* |
| 1093 | * step==-1: call only ucnv_getNextUChar() |
| 1094 | * otherwise alternate between ucnv_toUnicode() and ucnv_getNextUChar() |
| 1095 | * if step==-2 or -3, then give ucnv_toUnicode() the whole remaining input, |
| 1096 | * else give it at most (-step-2)/2 bytes |
| 1097 | */ |
| 1098 | UChar32 c; |
| 1099 | |
| 1100 | // end the loop by getting an index out of bounds error |
| 1101 | for(;;) { |
| 1102 | // resetting the opposite conversion direction must not affect this one |
| 1103 | ucnv_resetFromUnicode(cnv); |
| 1104 | |
| 1105 | // convert |
| 1106 | if((step&1)!=0 /* odd: -1, -3, -5, ... */) { |
| 1107 | sourceLimit=source; // use sourceLimit not as a real limit |
| 1108 | // but to remember the pre-getNextUChar source pointer |
| 1109 | c=ucnv_getNextUChar(cnv, &source, bytesLimit, pErrorCode); |
| 1110 | |
| 1111 | // check pointers and errors |
| 1112 | if(*pErrorCode==U_INDEX_OUTOFBOUNDS_ERROR) { |
| 1113 | if(source!=bytesLimit) { |
| 1114 | *pErrorCode=U_INTERNAL_PROGRAM_ERROR; |
| 1115 | } else { |
| 1116 | *pErrorCode=U_ZERO_ERROR; |
| 1117 | } |
| 1118 | break; |
| 1119 | } else if(U_FAILURE(*pErrorCode)) { |
| 1120 | break; |
| 1121 | } |
| 1122 | // source may not move if c is from previous overflow |
| 1123 | |
| 1124 | if(target==resultLimit) { |
| 1125 | *pErrorCode=U_BUFFER_OVERFLOW_ERROR; |
| 1126 | break; |
| 1127 | } |
| 1128 | if(c<=0xffff) { |
| 1129 | *target++=(UChar)c; |
| 1130 | } else { |
| 1131 | *target++=U16_LEAD(c); |
| 1132 | if(target==resultLimit) { |
| 1133 | *pErrorCode=U_BUFFER_OVERFLOW_ERROR; |
| 1134 | break; |
| 1135 | } |
| 1136 | *target++=U16_TRAIL(c); |
| 1137 | } |
| 1138 | |
| 1139 | // alternate between -n-1 and -n but leave -1 alone |
| 1140 | if(step<-1) { |
| 1141 | ++step; |
| 1142 | } |
| 1143 | } else /* step is even */ { |
| 1144 | // allow only one UChar output |
| 1145 | targetLimit=target<resultLimit ? target+1 : resultLimit; |
| 1146 | |
| 1147 | // as with ucnv_getNextUChar(), we always flush (if we go to bytesLimit) |
| 1148 | // and never output offsets |
| 1149 | if(step==-2) { |
| 1150 | sourceLimit=bytesLimit; |
| 1151 | } else { |
| 1152 | sourceLimit=source+(-step-2)/2; |
| 1153 | if(sourceLimit>bytesLimit) { |
| 1154 | sourceLimit=bytesLimit; |
| 1155 | } |
| 1156 | } |
| 1157 | |
| 1158 | ucnv_toUnicode(cnv, |
| 1159 | &target, targetLimit, |
| 1160 | &source, sourceLimit, |
| 1161 | NULL, (UBool)(sourceLimit==bytesLimit), pErrorCode); |
| 1162 | |
| 1163 | // check pointers and errors |
| 1164 | if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { |
| 1165 | if(target!=targetLimit) { |
| 1166 | // buffer overflow must only be set when the target is filled |
| 1167 | *pErrorCode=U_INTERNAL_PROGRAM_ERROR; |
| 1168 | break; |
| 1169 | } else if(targetLimit==resultLimit) { |
| 1170 | // not just a partial overflow |
| 1171 | break; |
| 1172 | } |
| 1173 | |
| 1174 | // the partial target is filled, set a new limit and continue |
| 1175 | *pErrorCode=U_ZERO_ERROR; |
| 1176 | } else if(U_FAILURE(*pErrorCode)) { |
| 1177 | // some other error occurred, done |
| 1178 | break; |
| 1179 | } else { |
| 1180 | if(source!=sourceLimit) { |
| 1181 | // when no error occurs, then the input must be consumed |
| 1182 | *pErrorCode=U_INTERNAL_PROGRAM_ERROR; |
| 1183 | break; |
| 1184 | } |
| 1185 | |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 1186 | // we are done (flush==true) but we continue, to get the index out of bounds error above |
Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 1187 | } |
| 1188 | |
| 1189 | --step; |
| 1190 | } |
| 1191 | } |
| 1192 | } |
| 1193 | |
| 1194 | return (int32_t)(target-result); |
| 1195 | } |
| 1196 | |
| 1197 | UBool |
| 1198 | ConversionTest::ToUnicodeCase(ConversionCase &cc, UConverterToUCallback callback, const char *option) { |
| 1199 | // open the converter |
| 1200 | IcuTestErrorCode errorCode(*this, "ToUnicodeCase"); |
| 1201 | LocalUConverterPointer cnv(cnv_open(cc.charset, errorCode)); |
| 1202 | // with no data, the above crashes with "pointer being freed was not allocated" for charset "x11-compound-text", see #13078 |
| 1203 | if(errorCode.isFailure()) { |
| 1204 | errcheckln(errorCode, "toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_open() failed - %s", |
| 1205 | cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, errorCode.errorName()); |
| 1206 | errorCode.reset(); |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 1207 | return false; |
Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 1208 | } |
| 1209 | |
| 1210 | // set the callback |
| 1211 | if(callback!=NULL) { |
| 1212 | ucnv_setToUCallBack(cnv.getAlias(), callback, option, NULL, NULL, errorCode); |
| 1213 | if(U_FAILURE(errorCode)) { |
| 1214 | errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setToUCallBack() failed - %s", |
| 1215 | cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, u_errorName(errorCode)); |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 1216 | return false; |
Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 1217 | } |
| 1218 | } |
| 1219 | |
| 1220 | int32_t resultOffsets[256]; |
| 1221 | UChar result[256]; |
| 1222 | int32_t resultLength; |
| 1223 | UBool ok; |
| 1224 | |
| 1225 | static const struct { |
| 1226 | int32_t step; |
| 1227 | const char *name; |
| 1228 | } steps[]={ |
| 1229 | { 0, "bulk" }, // must be first for offsets to be checked |
| 1230 | { 1, "step=1" }, |
| 1231 | { 3, "step=3" }, |
| 1232 | { 7, "step=7" }, |
| 1233 | { -1, "getNext" }, |
| 1234 | { -2, "toU(bulk)+getNext" }, |
| 1235 | { -3, "getNext+toU(bulk)" }, |
| 1236 | { -4, "toU(1)+getNext" }, |
| 1237 | { -5, "getNext+toU(1)" }, |
| 1238 | { -12, "toU(5)+getNext" }, |
| 1239 | { -13, "getNext+toU(5)" }, |
| 1240 | }; |
| 1241 | int32_t i, step; |
| 1242 | |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 1243 | ok=true; |
Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 1244 | for(i=0; i<UPRV_LENGTHOF(steps) && ok; ++i) { |
| 1245 | step=steps[i].step; |
| 1246 | if(step<0 && !cc.finalFlush) { |
| 1247 | // skip ucnv_getNextUChar() if !finalFlush because |
| 1248 | // ucnv_getNextUChar() always implies flush |
| 1249 | continue; |
| 1250 | } |
| 1251 | if(step!=0) { |
| 1252 | // bulk test is first, then offsets are not checked any more |
| 1253 | cc.offsets=NULL; |
| 1254 | } |
| 1255 | else { |
| 1256 | for (int32_t i = 0; i < UPRV_LENGTHOF(resultOffsets); i++) { |
| 1257 | resultOffsets[i] = -1; |
| 1258 | } |
| 1259 | } |
| 1260 | for (int32_t i = 0; i < UPRV_LENGTHOF(result); i++) { |
| 1261 | result[i] = -1; |
| 1262 | } |
| 1263 | errorCode.reset(); |
| 1264 | resultLength=stepToUnicode(cc, cnv.getAlias(), |
| 1265 | result, UPRV_LENGTHOF(result), |
| 1266 | step==0 ? resultOffsets : NULL, |
| 1267 | step, errorCode); |
| 1268 | ok=checkToUnicode( |
| 1269 | cc, cnv.getAlias(), steps[i].name, |
| 1270 | result, resultLength, |
| 1271 | cc.offsets!=NULL ? resultOffsets : NULL, |
| 1272 | errorCode); |
| 1273 | if(errorCode.isFailure() || !cc.finalFlush) { |
| 1274 | // reset if an error occurred or we did not flush |
| 1275 | // otherwise do nothing to make sure that flushing resets |
| 1276 | ucnv_resetToUnicode(cnv.getAlias()); |
| 1277 | } |
| 1278 | if (cc.offsets != NULL && resultOffsets[resultLength] != -1) { |
| 1279 | errln("toUnicode[%d](%s) Conversion wrote too much to offsets at index %d", |
| 1280 | cc.caseNr, cc.charset, resultLength); |
| 1281 | } |
| 1282 | if (result[resultLength] != (UChar)-1) { |
| 1283 | errln("toUnicode[%d](%s) Conversion wrote too much to result at index %d", |
| 1284 | cc.caseNr, cc.charset, resultLength); |
| 1285 | } |
| 1286 | } |
| 1287 | |
| 1288 | // not a real loop, just a convenience for breaking out of the block |
| 1289 | while(ok && cc.finalFlush) { |
| 1290 | // test ucnv_toUChars() |
| 1291 | memset(result, 0, sizeof(result)); |
| 1292 | |
| 1293 | errorCode.reset(); |
| 1294 | resultLength=ucnv_toUChars(cnv.getAlias(), |
| 1295 | result, UPRV_LENGTHOF(result), |
| 1296 | (const char *)cc.bytes, cc.bytesLength, |
| 1297 | errorCode); |
| 1298 | ok=checkToUnicode( |
| 1299 | cc, cnv.getAlias(), "toUChars", |
| 1300 | result, resultLength, |
| 1301 | NULL, |
| 1302 | errorCode); |
| 1303 | if(!ok) { |
| 1304 | break; |
| 1305 | } |
| 1306 | |
| 1307 | // test preflighting |
| 1308 | // keep the correct result for simple checking |
| 1309 | errorCode.reset(); |
| 1310 | resultLength=ucnv_toUChars(cnv.getAlias(), |
| 1311 | NULL, 0, |
| 1312 | (const char *)cc.bytes, cc.bytesLength, |
| 1313 | errorCode); |
| 1314 | if(errorCode.get()==U_STRING_NOT_TERMINATED_WARNING || errorCode.get()==U_BUFFER_OVERFLOW_ERROR) { |
| 1315 | errorCode.reset(); |
| 1316 | } |
| 1317 | ok=checkToUnicode( |
| 1318 | cc, cnv.getAlias(), "preflight toUChars", |
| 1319 | result, resultLength, |
| 1320 | NULL, |
| 1321 | errorCode); |
| 1322 | break; |
| 1323 | } |
| 1324 | |
| 1325 | errorCode.reset(); // all errors have already been reported |
| 1326 | return ok; |
| 1327 | } |
| 1328 | |
| 1329 | UBool |
| 1330 | ConversionTest::checkToUnicode(ConversionCase &cc, UConverter *cnv, const char *name, |
| 1331 | const UChar *result, int32_t resultLength, |
| 1332 | const int32_t *resultOffsets, |
| 1333 | UErrorCode resultErrorCode) { |
| 1334 | char resultInvalidChars[8]; |
| 1335 | int8_t resultInvalidLength; |
| 1336 | UErrorCode errorCode; |
| 1337 | |
| 1338 | const char *msg; |
| 1339 | |
| 1340 | // reset the message; NULL will mean "ok" |
| 1341 | msg=NULL; |
| 1342 | |
| 1343 | errorCode=U_ZERO_ERROR; |
| 1344 | resultInvalidLength=sizeof(resultInvalidChars); |
| 1345 | ucnv_getInvalidChars(cnv, resultInvalidChars, &resultInvalidLength, &errorCode); |
| 1346 | if(U_FAILURE(errorCode)) { |
| 1347 | errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) ucnv_getInvalidChars() failed - %s", |
| 1348 | cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name, u_errorName(errorCode)); |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 1349 | return false; |
Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 1350 | } |
| 1351 | |
| 1352 | // check everything that might have gone wrong |
| 1353 | if(cc.unicodeLength!=resultLength) { |
| 1354 | msg="wrong result length"; |
| 1355 | } else if(0!=u_memcmp(cc.unicode, result, cc.unicodeLength)) { |
| 1356 | msg="wrong result string"; |
| 1357 | } else if(cc.offsets!=NULL && 0!=memcmp(cc.offsets, resultOffsets, cc.unicodeLength*sizeof(*cc.offsets))) { |
| 1358 | msg="wrong offsets"; |
| 1359 | } else if(cc.outErrorCode!=resultErrorCode) { |
| 1360 | msg="wrong error code"; |
| 1361 | } else if(cc.invalidLength!=resultInvalidLength) { |
| 1362 | msg="wrong length of last invalid input"; |
| 1363 | } else if(0!=memcmp(cc.invalidChars, resultInvalidChars, cc.invalidLength)) { |
| 1364 | msg="wrong last invalid input"; |
| 1365 | } |
| 1366 | |
| 1367 | if(msg==NULL) { |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 1368 | return true; |
Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 1369 | } else { |
| 1370 | char buffer[2000]; // one buffer for all strings |
| 1371 | char *s, *bytesString, *unicodeString, *resultString, |
| 1372 | *offsetsString, *resultOffsetsString, |
| 1373 | *invalidCharsString, *resultInvalidCharsString; |
| 1374 | |
| 1375 | bytesString=s=buffer; |
| 1376 | s=printBytes(cc.bytes, cc.bytesLength, bytesString); |
| 1377 | s=printUnicode(cc.unicode, cc.unicodeLength, unicodeString=s); |
| 1378 | s=printUnicode(result, resultLength, resultString=s); |
| 1379 | s=printOffsets(cc.offsets, cc.unicodeLength, offsetsString=s); |
| 1380 | s=printOffsets(resultOffsets, resultLength, resultOffsetsString=s); |
| 1381 | s=printBytes(cc.invalidChars, cc.invalidLength, invalidCharsString=s); |
| 1382 | s=printBytes((uint8_t *)resultInvalidChars, resultInvalidLength, resultInvalidCharsString=s); |
| 1383 | |
| 1384 | if((s-buffer)>(int32_t)sizeof(buffer)) { |
| 1385 | errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) fatal error: checkToUnicode() test output buffer overflow writing %d chars\n", |
| 1386 | cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name, (int)(s-buffer)); |
| 1387 | exit(1); |
| 1388 | } |
| 1389 | |
| 1390 | errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) failed: %s\n" |
| 1391 | " bytes <%s>[%d]\n" |
| 1392 | " expected <%s>[%d]\n" |
| 1393 | " result <%s>[%d]\n" |
| 1394 | " offsets <%s>\n" |
| 1395 | " result offsets <%s>\n" |
| 1396 | " error code expected %s got %s\n" |
| 1397 | " invalidChars expected <%s> got <%s>\n", |
| 1398 | cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name, msg, |
| 1399 | bytesString, cc.bytesLength, |
| 1400 | unicodeString, cc.unicodeLength, |
| 1401 | resultString, resultLength, |
| 1402 | offsetsString, |
| 1403 | resultOffsetsString, |
| 1404 | u_errorName(cc.outErrorCode), u_errorName(resultErrorCode), |
| 1405 | invalidCharsString, resultInvalidCharsString); |
| 1406 | |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 1407 | return false; |
Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 1408 | } |
| 1409 | } |
| 1410 | |
| 1411 | // fromUnicode test worker functions --------------------------------------- *** |
| 1412 | |
| 1413 | static int32_t |
| 1414 | stepFromUTF8(ConversionCase &cc, |
| 1415 | UConverter *utf8Cnv, UConverter *cnv, |
| 1416 | char *result, int32_t resultCapacity, |
| 1417 | int32_t step, |
| 1418 | UErrorCode *pErrorCode) { |
| 1419 | const char *source, *sourceLimit, *utf8Limit; |
| 1420 | UChar pivotBuffer[32]; |
| 1421 | UChar *pivotSource, *pivotTarget, *pivotLimit; |
| 1422 | char *target, *targetLimit, *resultLimit; |
| 1423 | UBool flush; |
| 1424 | |
| 1425 | source=cc.utf8; |
| 1426 | pivotSource=pivotTarget=pivotBuffer; |
| 1427 | target=result; |
| 1428 | utf8Limit=source+cc.utf8Length; |
| 1429 | resultLimit=result+resultCapacity; |
| 1430 | |
| 1431 | // call ucnv_convertEx() with in/out buffers no larger than (step) at a time |
| 1432 | // move only one buffer (in vs. out) at a time to be extra mean |
| 1433 | // step==0 performs bulk conversion |
| 1434 | |
| 1435 | // initialize the partial limits for the loop |
| 1436 | if(step==0) { |
| 1437 | // use the entire buffers |
| 1438 | sourceLimit=utf8Limit; |
| 1439 | targetLimit=resultLimit; |
| 1440 | flush=cc.finalFlush; |
| 1441 | |
| 1442 | pivotLimit=pivotBuffer+UPRV_LENGTHOF(pivotBuffer); |
| 1443 | } else { |
| 1444 | // start with empty partial buffers |
| 1445 | sourceLimit=source; |
| 1446 | targetLimit=target; |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 1447 | flush=false; |
Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 1448 | |
| 1449 | // empty pivot is not allowed, make it of length step |
| 1450 | pivotLimit=pivotBuffer+step; |
| 1451 | } |
| 1452 | |
| 1453 | for(;;) { |
| 1454 | // resetting the opposite conversion direction must not affect this one |
| 1455 | ucnv_resetFromUnicode(utf8Cnv); |
| 1456 | ucnv_resetToUnicode(cnv); |
| 1457 | |
| 1458 | // convert |
| 1459 | ucnv_convertEx(cnv, utf8Cnv, |
| 1460 | &target, targetLimit, |
| 1461 | &source, sourceLimit, |
| 1462 | pivotBuffer, &pivotSource, &pivotTarget, pivotLimit, |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 1463 | false, flush, pErrorCode); |
Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 1464 | |
| 1465 | // check pointers and errors |
| 1466 | if(source>sourceLimit || target>targetLimit) { |
| 1467 | *pErrorCode=U_INTERNAL_PROGRAM_ERROR; |
| 1468 | break; |
| 1469 | } else if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { |
| 1470 | if(target!=targetLimit) { |
| 1471 | // buffer overflow must only be set when the target is filled |
| 1472 | *pErrorCode=U_INTERNAL_PROGRAM_ERROR; |
| 1473 | break; |
| 1474 | } else if(targetLimit==resultLimit) { |
| 1475 | // not just a partial overflow |
| 1476 | break; |
| 1477 | } |
| 1478 | |
| 1479 | // the partial target is filled, set a new limit, reset the error and continue |
| 1480 | targetLimit=(resultLimit-target)>=step ? target+step : resultLimit; |
| 1481 | *pErrorCode=U_ZERO_ERROR; |
| 1482 | } else if(U_FAILURE(*pErrorCode)) { |
| 1483 | if(pivotSource==pivotBuffer) { |
| 1484 | // toUnicode error, should not occur |
| 1485 | // toUnicode errors are tested in cintltst TestConvertExFromUTF8() |
| 1486 | break; |
| 1487 | } else { |
| 1488 | // fromUnicode error |
| 1489 | // some other error occurred, done |
| 1490 | break; |
| 1491 | } |
| 1492 | } else { |
| 1493 | if(source!=sourceLimit) { |
| 1494 | // when no error occurs, then the input must be consumed |
| 1495 | *pErrorCode=U_INTERNAL_PROGRAM_ERROR; |
| 1496 | break; |
| 1497 | } |
| 1498 | |
| 1499 | if(sourceLimit==utf8Limit) { |
| 1500 | // we are done |
| 1501 | if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) { |
| 1502 | // ucnv_convertEx() warns about not terminating the output |
| 1503 | // but ucnv_fromUnicode() does not and so |
| 1504 | // checkFromUnicode() does not expect it |
| 1505 | *pErrorCode=U_ZERO_ERROR; |
| 1506 | } |
| 1507 | break; |
| 1508 | } |
| 1509 | |
| 1510 | // the partial conversion succeeded, set a new limit and continue |
| 1511 | sourceLimit=(utf8Limit-source)>=step ? source+step : utf8Limit; |
| 1512 | flush=(UBool)(cc.finalFlush && sourceLimit==utf8Limit); |
| 1513 | } |
| 1514 | } |
| 1515 | |
| 1516 | return (int32_t)(target-result); |
| 1517 | } |
| 1518 | |
| 1519 | static int32_t |
| 1520 | stepFromUnicode(ConversionCase &cc, UConverter *cnv, |
| 1521 | char *result, int32_t resultCapacity, |
| 1522 | int32_t *resultOffsets, /* also resultCapacity */ |
| 1523 | int32_t step, |
| 1524 | UErrorCode *pErrorCode) { |
| 1525 | const UChar *source, *sourceLimit, *unicodeLimit; |
| 1526 | char *target, *targetLimit, *resultLimit; |
| 1527 | UBool flush; |
| 1528 | |
| 1529 | source=cc.unicode; |
| 1530 | target=result; |
| 1531 | unicodeLimit=source+cc.unicodeLength; |
| 1532 | resultLimit=result+resultCapacity; |
| 1533 | |
| 1534 | // call ucnv_fromUnicode() with in/out buffers no larger than (step) at a time |
| 1535 | // move only one buffer (in vs. out) at a time to be extra mean |
| 1536 | // step==0 performs bulk conversion and generates offsets |
| 1537 | |
| 1538 | // initialize the partial limits for the loop |
| 1539 | if(step==0) { |
| 1540 | // use the entire buffers |
| 1541 | sourceLimit=unicodeLimit; |
| 1542 | targetLimit=resultLimit; |
| 1543 | flush=cc.finalFlush; |
| 1544 | } else { |
| 1545 | // start with empty partial buffers |
| 1546 | sourceLimit=source; |
| 1547 | targetLimit=target; |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 1548 | flush=false; |
Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 1549 | |
| 1550 | // output offsets only for bulk conversion |
| 1551 | resultOffsets=NULL; |
| 1552 | } |
| 1553 | |
| 1554 | for(;;) { |
| 1555 | // resetting the opposite conversion direction must not affect this one |
| 1556 | ucnv_resetToUnicode(cnv); |
| 1557 | |
| 1558 | // convert |
| 1559 | ucnv_fromUnicode(cnv, |
| 1560 | &target, targetLimit, |
| 1561 | &source, sourceLimit, |
| 1562 | resultOffsets, |
| 1563 | flush, pErrorCode); |
| 1564 | |
| 1565 | // check pointers and errors |
| 1566 | if(source>sourceLimit || target>targetLimit) { |
| 1567 | *pErrorCode=U_INTERNAL_PROGRAM_ERROR; |
| 1568 | break; |
| 1569 | } else if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { |
| 1570 | if(target!=targetLimit) { |
| 1571 | // buffer overflow must only be set when the target is filled |
| 1572 | *pErrorCode=U_INTERNAL_PROGRAM_ERROR; |
| 1573 | break; |
| 1574 | } else if(targetLimit==resultLimit) { |
| 1575 | // not just a partial overflow |
| 1576 | break; |
| 1577 | } |
| 1578 | |
| 1579 | // the partial target is filled, set a new limit, reset the error and continue |
| 1580 | targetLimit=(resultLimit-target)>=step ? target+step : resultLimit; |
| 1581 | *pErrorCode=U_ZERO_ERROR; |
| 1582 | } else if(U_FAILURE(*pErrorCode)) { |
| 1583 | // some other error occurred, done |
| 1584 | break; |
| 1585 | } else { |
| 1586 | if(source!=sourceLimit) { |
| 1587 | // when no error occurs, then the input must be consumed |
| 1588 | *pErrorCode=U_INTERNAL_PROGRAM_ERROR; |
| 1589 | break; |
| 1590 | } |
| 1591 | |
| 1592 | if(sourceLimit==unicodeLimit) { |
| 1593 | // we are done |
| 1594 | break; |
| 1595 | } |
| 1596 | |
| 1597 | // the partial conversion succeeded, set a new limit and continue |
| 1598 | sourceLimit=(unicodeLimit-source)>=step ? source+step : unicodeLimit; |
| 1599 | flush=(UBool)(cc.finalFlush && sourceLimit==unicodeLimit); |
| 1600 | } |
| 1601 | } |
| 1602 | |
| 1603 | return (int32_t)(target-result); |
| 1604 | } |
| 1605 | |
| 1606 | UBool |
| 1607 | ConversionTest::FromUnicodeCase(ConversionCase &cc, UConverterFromUCallback callback, const char *option) { |
| 1608 | UConverter *cnv; |
| 1609 | UErrorCode errorCode; |
| 1610 | |
| 1611 | // open the converter |
| 1612 | errorCode=U_ZERO_ERROR; |
| 1613 | cnv=cnv_open(cc.charset, errorCode); |
| 1614 | if(U_FAILURE(errorCode)) { |
| 1615 | errcheckln(errorCode, "fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_open() failed - %s", |
| 1616 | cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, u_errorName(errorCode)); |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 1617 | return false; |
Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 1618 | } |
| 1619 | ucnv_resetToUnicode(utf8Cnv); |
| 1620 | |
| 1621 | // set the callback |
| 1622 | if(callback!=NULL) { |
| 1623 | ucnv_setFromUCallBack(cnv, callback, option, NULL, NULL, &errorCode); |
| 1624 | if(U_FAILURE(errorCode)) { |
| 1625 | errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setFromUCallBack() failed - %s", |
| 1626 | cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, u_errorName(errorCode)); |
| 1627 | ucnv_close(cnv); |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 1628 | return false; |
Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 1629 | } |
| 1630 | } |
| 1631 | |
| 1632 | // set the fallbacks flag |
| 1633 | // TODO change with Jitterbug 2401, then add a similar call for toUnicode too |
| 1634 | ucnv_setFallback(cnv, cc.fallbacks); |
| 1635 | |
| 1636 | // set the subchar |
| 1637 | int32_t length; |
| 1638 | |
| 1639 | if(cc.setSub>0) { |
| 1640 | length=(int32_t)strlen(cc.subchar); |
| 1641 | ucnv_setSubstChars(cnv, cc.subchar, (int8_t)length, &errorCode); |
| 1642 | if(U_FAILURE(errorCode)) { |
| 1643 | errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setSubstChars() failed - %s", |
| 1644 | cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, u_errorName(errorCode)); |
| 1645 | ucnv_close(cnv); |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 1646 | return false; |
Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 1647 | } |
| 1648 | } else if(cc.setSub<0) { |
| 1649 | ucnv_setSubstString(cnv, cc.subString, -1, &errorCode); |
| 1650 | if(U_FAILURE(errorCode)) { |
| 1651 | errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setSubstString() failed - %s", |
| 1652 | cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, u_errorName(errorCode)); |
| 1653 | ucnv_close(cnv); |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 1654 | return false; |
Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 1655 | } |
| 1656 | } |
| 1657 | |
| 1658 | // convert unicode to utf8 |
| 1659 | char utf8[256]; |
| 1660 | cc.utf8=utf8; |
| 1661 | u_strToUTF8(utf8, UPRV_LENGTHOF(utf8), &cc.utf8Length, |
| 1662 | cc.unicode, cc.unicodeLength, |
| 1663 | &errorCode); |
| 1664 | if(U_FAILURE(errorCode)) { |
| 1665 | // skip UTF-8 testing of a string with an unpaired surrogate, |
| 1666 | // or of one that's too long |
| 1667 | // toUnicode errors are tested in cintltst TestConvertExFromUTF8() |
| 1668 | cc.utf8Length=-1; |
| 1669 | } |
| 1670 | |
| 1671 | int32_t resultOffsets[256]; |
| 1672 | char result[256]; |
| 1673 | int32_t resultLength; |
| 1674 | UBool ok; |
| 1675 | |
| 1676 | static const struct { |
| 1677 | int32_t step; |
| 1678 | const char *name, *utf8Name; |
| 1679 | } steps[]={ |
| 1680 | { 0, "bulk", "utf8" }, // must be first for offsets to be checked |
| 1681 | { 1, "step=1", "utf8 step=1" }, |
| 1682 | { 3, "step=3", "utf8 step=3" }, |
| 1683 | { 7, "step=7", "utf8 step=7" } |
| 1684 | }; |
| 1685 | int32_t i, step; |
| 1686 | |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 1687 | ok=true; |
Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 1688 | for(i=0; i<UPRV_LENGTHOF(steps) && ok; ++i) { |
| 1689 | step=steps[i].step; |
| 1690 | for (int32_t i = 0; i < UPRV_LENGTHOF(resultOffsets); i++) { |
| 1691 | resultOffsets[i] = -1; |
| 1692 | } |
| 1693 | for (int32_t i = 0; i < UPRV_LENGTHOF(result); i++) { |
| 1694 | result[i] = -1; |
| 1695 | } |
| 1696 | errorCode=U_ZERO_ERROR; |
| 1697 | resultLength=stepFromUnicode(cc, cnv, |
| 1698 | result, UPRV_LENGTHOF(result), |
| 1699 | step==0 ? resultOffsets : NULL, |
| 1700 | step, &errorCode); |
| 1701 | ok=checkFromUnicode( |
| 1702 | cc, cnv, steps[i].name, |
| 1703 | (uint8_t *)result, resultLength, |
| 1704 | cc.offsets!=NULL ? resultOffsets : NULL, |
| 1705 | errorCode); |
| 1706 | if(U_FAILURE(errorCode) || !cc.finalFlush) { |
| 1707 | // reset if an error occurred or we did not flush |
| 1708 | // otherwise do nothing to make sure that flushing resets |
| 1709 | ucnv_resetFromUnicode(cnv); |
| 1710 | } |
| 1711 | if (resultOffsets[resultLength] != -1) { |
| 1712 | errln("fromUnicode[%d](%s) Conversion wrote too much to offsets at index %d", |
| 1713 | cc.caseNr, cc.charset, resultLength); |
| 1714 | } |
| 1715 | if (result[resultLength] != (char)-1) { |
| 1716 | errln("fromUnicode[%d](%s) Conversion wrote too much to result at index %d", |
| 1717 | cc.caseNr, cc.charset, resultLength); |
| 1718 | } |
| 1719 | |
| 1720 | // bulk test is first, then offsets are not checked any more |
| 1721 | cc.offsets=NULL; |
| 1722 | |
| 1723 | // test direct conversion from UTF-8 |
| 1724 | if(cc.utf8Length>=0) { |
| 1725 | errorCode=U_ZERO_ERROR; |
| 1726 | resultLength=stepFromUTF8(cc, utf8Cnv, cnv, |
| 1727 | result, UPRV_LENGTHOF(result), |
| 1728 | step, &errorCode); |
| 1729 | ok=checkFromUnicode( |
| 1730 | cc, cnv, steps[i].utf8Name, |
| 1731 | (uint8_t *)result, resultLength, |
| 1732 | NULL, |
| 1733 | errorCode); |
| 1734 | if(U_FAILURE(errorCode) || !cc.finalFlush) { |
| 1735 | // reset if an error occurred or we did not flush |
| 1736 | // otherwise do nothing to make sure that flushing resets |
| 1737 | ucnv_resetToUnicode(utf8Cnv); |
| 1738 | ucnv_resetFromUnicode(cnv); |
| 1739 | } |
| 1740 | } |
| 1741 | } |
| 1742 | |
| 1743 | // not a real loop, just a convenience for breaking out of the block |
| 1744 | while(ok && cc.finalFlush) { |
| 1745 | // test ucnv_fromUChars() |
| 1746 | memset(result, 0, sizeof(result)); |
| 1747 | |
| 1748 | errorCode=U_ZERO_ERROR; |
| 1749 | resultLength=ucnv_fromUChars(cnv, |
| 1750 | result, UPRV_LENGTHOF(result), |
| 1751 | cc.unicode, cc.unicodeLength, |
| 1752 | &errorCode); |
| 1753 | ok=checkFromUnicode( |
| 1754 | cc, cnv, "fromUChars", |
| 1755 | (uint8_t *)result, resultLength, |
| 1756 | NULL, |
| 1757 | errorCode); |
| 1758 | if(!ok) { |
| 1759 | break; |
| 1760 | } |
| 1761 | |
| 1762 | // test preflighting |
| 1763 | // keep the correct result for simple checking |
| 1764 | errorCode=U_ZERO_ERROR; |
| 1765 | resultLength=ucnv_fromUChars(cnv, |
| 1766 | NULL, 0, |
| 1767 | cc.unicode, cc.unicodeLength, |
| 1768 | &errorCode); |
| 1769 | if(errorCode==U_STRING_NOT_TERMINATED_WARNING || errorCode==U_BUFFER_OVERFLOW_ERROR) { |
| 1770 | errorCode=U_ZERO_ERROR; |
| 1771 | } |
| 1772 | ok=checkFromUnicode( |
| 1773 | cc, cnv, "preflight fromUChars", |
| 1774 | (uint8_t *)result, resultLength, |
| 1775 | NULL, |
| 1776 | errorCode); |
| 1777 | break; |
| 1778 | } |
| 1779 | |
| 1780 | ucnv_close(cnv); |
| 1781 | return ok; |
| 1782 | } |
| 1783 | |
| 1784 | UBool |
| 1785 | ConversionTest::checkFromUnicode(ConversionCase &cc, UConverter *cnv, const char *name, |
| 1786 | const uint8_t *result, int32_t resultLength, |
| 1787 | const int32_t *resultOffsets, |
| 1788 | UErrorCode resultErrorCode) { |
| 1789 | UChar resultInvalidUChars[8]; |
| 1790 | int8_t resultInvalidLength; |
| 1791 | UErrorCode errorCode; |
| 1792 | |
| 1793 | const char *msg; |
| 1794 | |
| 1795 | // reset the message; NULL will mean "ok" |
| 1796 | msg=NULL; |
| 1797 | |
| 1798 | errorCode=U_ZERO_ERROR; |
| 1799 | resultInvalidLength=UPRV_LENGTHOF(resultInvalidUChars); |
| 1800 | ucnv_getInvalidUChars(cnv, resultInvalidUChars, &resultInvalidLength, &errorCode); |
| 1801 | if(U_FAILURE(errorCode)) { |
| 1802 | errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) ucnv_getInvalidUChars() failed - %s", |
| 1803 | cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name, u_errorName(errorCode)); |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 1804 | return false; |
Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 1805 | } |
| 1806 | |
| 1807 | // check everything that might have gone wrong |
| 1808 | if(cc.bytesLength!=resultLength) { |
| 1809 | msg="wrong result length"; |
| 1810 | } else if(0!=memcmp(cc.bytes, result, cc.bytesLength)) { |
| 1811 | msg="wrong result string"; |
| 1812 | } else if(cc.offsets!=NULL && 0!=memcmp(cc.offsets, resultOffsets, cc.bytesLength*sizeof(*cc.offsets))) { |
| 1813 | msg="wrong offsets"; |
| 1814 | } else if(cc.outErrorCode!=resultErrorCode) { |
| 1815 | msg="wrong error code"; |
| 1816 | } else if(cc.invalidLength!=resultInvalidLength) { |
| 1817 | msg="wrong length of last invalid input"; |
| 1818 | } else if(0!=u_memcmp(cc.invalidUChars, resultInvalidUChars, cc.invalidLength)) { |
| 1819 | msg="wrong last invalid input"; |
| 1820 | } |
| 1821 | |
| 1822 | if(msg==NULL) { |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 1823 | return true; |
Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 1824 | } else { |
| 1825 | char buffer[2000]; // one buffer for all strings |
| 1826 | char *s, *unicodeString, *bytesString, *resultString, |
| 1827 | *offsetsString, *resultOffsetsString, |
| 1828 | *invalidCharsString, *resultInvalidUCharsString; |
| 1829 | |
| 1830 | unicodeString=s=buffer; |
| 1831 | s=printUnicode(cc.unicode, cc.unicodeLength, unicodeString); |
| 1832 | s=printBytes(cc.bytes, cc.bytesLength, bytesString=s); |
| 1833 | s=printBytes(result, resultLength, resultString=s); |
| 1834 | s=printOffsets(cc.offsets, cc.bytesLength, offsetsString=s); |
| 1835 | s=printOffsets(resultOffsets, resultLength, resultOffsetsString=s); |
| 1836 | s=printUnicode(cc.invalidUChars, cc.invalidLength, invalidCharsString=s); |
| 1837 | s=printUnicode(resultInvalidUChars, resultInvalidLength, resultInvalidUCharsString=s); |
| 1838 | |
| 1839 | if((s-buffer)>(int32_t)sizeof(buffer)) { |
| 1840 | errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) fatal error: checkFromUnicode() test output buffer overflow writing %d chars\n", |
| 1841 | cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name, (int)(s-buffer)); |
| 1842 | exit(1); |
| 1843 | } |
| 1844 | |
| 1845 | errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) failed: %s\n" |
| 1846 | " unicode <%s>[%d]\n" |
| 1847 | " expected <%s>[%d]\n" |
| 1848 | " result <%s>[%d]\n" |
| 1849 | " offsets <%s>\n" |
| 1850 | " result offsets <%s>\n" |
| 1851 | " error code expected %s got %s\n" |
| 1852 | " invalidChars expected <%s> got <%s>\n", |
| 1853 | cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name, msg, |
| 1854 | unicodeString, cc.unicodeLength, |
| 1855 | bytesString, cc.bytesLength, |
| 1856 | resultString, resultLength, |
| 1857 | offsetsString, |
| 1858 | resultOffsetsString, |
| 1859 | u_errorName(cc.outErrorCode), u_errorName(resultErrorCode), |
| 1860 | invalidCharsString, resultInvalidUCharsString); |
| 1861 | |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 1862 | return false; |
Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 1863 | } |
| 1864 | } |
| 1865 | |
| 1866 | #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ |