blob: a8f2caf99d7bfab719e722f961c389f7122050c0 [file] [log] [blame]
Frank Tang3e05d9d2021-11-08 14:04:04 -08001// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4*******************************************************************************
5*
6* Copyright (C) 2002-2016, International Business Machines
7* Corporation and others. All Rights Reserved.
8*
9*******************************************************************************
10* file name: strcase.cpp
11* encoding: UTF-8
12* tab size: 8 (not used)
13* indentation:4
14*
15* created on: 2002mar12
16* created by: Markus W. Scherer
17*
18* Test file for string casing C++ API functions.
19*/
20
21#include "unicode/std_string.h"
22#include "unicode/brkiter.h"
23#include "unicode/casemap.h"
24#include "unicode/edits.h"
25#include "unicode/uchar.h"
26#include "unicode/ures.h"
27#include "unicode/uloc.h"
28#include "unicode/locid.h"
29#include "unicode/ubrk.h"
30#include "unicode/unistr.h"
31#include "unicode/ucasemap.h"
32#include "unicode/ustring.h"
33#include "ucase.h"
34#include "ustrtest.h"
35#include "unicode/tstdtmod.h"
36#include "cmemory.h"
37#include "testutil.h"
38
39class StringCaseTest: public IntlTest {
40public:
41 StringCaseTest();
42 virtual ~StringCaseTest();
43
44 void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=0) override;
45
46 void TestCaseConversion();
47
48 void TestCasingImpl(const UnicodeString &input,
49 const UnicodeString &output,
50 int32_t whichCase,
51 void *iter, const char *localeID, uint32_t options);
52 void TestCasing();
53 void TestTitleOptions();
Frank Tangd2858cb2022-04-08 20:34:12 -070054 void TestDutchTitle();
Frank Tang3e05d9d2021-11-08 14:04:04 -080055 void TestFullCaseFoldingIterator();
56 void TestGreekUpper();
57 void TestArmenian();
58 void TestLongUpper();
59 void TestMalformedUTF8();
60 void TestBufferOverflow();
61 void TestEdits();
62 void TestCopyMoveEdits();
63 void TestEditsFindFwdBwd();
64 void TestMergeEdits();
65 void TestCaseMapWithEdits();
66 void TestCaseMapUTF8WithEdits();
67 void TestCaseMapToString();
68 void TestCaseMapUTF8ToString();
69 void TestLongUnicodeString();
70 void TestBug13127();
71 void TestInPlaceTitle();
72 void TestCaseMapEditsIteratorDocs();
73 void TestCaseMapGreekExtended();
74
75private:
76 void assertGreekUpper(const char16_t *s, const char16_t *expected);
77
78 Locale GREEK_LOCALE_;
79};
80
81StringCaseTest::StringCaseTest() : GREEK_LOCALE_("el") {}
82
83StringCaseTest::~StringCaseTest() {}
84
85extern IntlTest *createStringCaseTest() {
86 return new StringCaseTest();
87}
88
89void
90StringCaseTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
91 if(exec) {
92 logln("TestSuite StringCaseTest: ");
93 }
94 TESTCASE_AUTO_BEGIN;
95 TESTCASE_AUTO(TestCaseConversion);
96#if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
97 TESTCASE_AUTO(TestCasing);
98 TESTCASE_AUTO(TestTitleOptions);
Frank Tangd2858cb2022-04-08 20:34:12 -070099 TESTCASE_AUTO(TestDutchTitle);
Frank Tang3e05d9d2021-11-08 14:04:04 -0800100#endif
101 TESTCASE_AUTO(TestFullCaseFoldingIterator);
102 TESTCASE_AUTO(TestGreekUpper);
103 TESTCASE_AUTO(TestArmenian);
104 TESTCASE_AUTO(TestLongUpper);
105 TESTCASE_AUTO(TestMalformedUTF8);
106 TESTCASE_AUTO(TestBufferOverflow);
107 TESTCASE_AUTO(TestEdits);
108 TESTCASE_AUTO(TestCopyMoveEdits);
109 TESTCASE_AUTO(TestEditsFindFwdBwd);
110 TESTCASE_AUTO(TestMergeEdits);
111 TESTCASE_AUTO(TestCaseMapWithEdits);
112 TESTCASE_AUTO(TestCaseMapUTF8WithEdits);
113 TESTCASE_AUTO(TestCaseMapToString);
114 TESTCASE_AUTO(TestCaseMapUTF8ToString);
115 TESTCASE_AUTO(TestLongUnicodeString);
116#if !UCONFIG_NO_BREAK_ITERATION
117 TESTCASE_AUTO(TestBug13127);
118 TESTCASE_AUTO(TestInPlaceTitle);
119#endif
120 TESTCASE_AUTO(TestCaseMapEditsIteratorDocs);
121 TESTCASE_AUTO(TestCaseMapGreekExtended);
122 TESTCASE_AUTO_END;
123}
124
125void
126StringCaseTest::TestCaseConversion()
127{
128 static const UChar uppercaseGreek[] =
129 { 0x399, 0x395, 0x3a3, 0x3a5, 0x3a3, 0x20, 0x03a7, 0x3a1, 0x399, 0x3a3, 0x3a4,
130 0x39f, 0x3a3, 0 };
131 // "IESUS CHRISTOS"
132
133 static const UChar lowercaseGreek[] =
134 { 0x3b9, 0x3b5, 0x3c3, 0x3c5, 0x3c2, 0x20, 0x03c7, 0x3c1, 0x3b9, 0x3c3, 0x3c4,
135 0x3bf, 0x3c2, 0 };
136 // "iesus christos"
137
138 static const UChar lowercaseTurkish[] =
139 { 0x69, 0x73, 0x74, 0x61, 0x6e, 0x62, 0x75, 0x6c, 0x2c, 0x20, 0x6e, 0x6f, 0x74, 0x20, 0x63, 0x6f,
140 0x6e, 0x73, 0x74, 0x61, 0x6e, 0x74, 0x0131, 0x6e, 0x6f, 0x70, 0x6c, 0x65, 0x21, 0 };
141
142 static const UChar uppercaseTurkish[] =
143 { 0x54, 0x4f, 0x50, 0x4b, 0x41, 0x50, 0x49, 0x20, 0x50, 0x41, 0x4c, 0x41, 0x43, 0x45, 0x2c, 0x20,
144 0x0130, 0x53, 0x54, 0x41, 0x4e, 0x42, 0x55, 0x4c, 0 };
145
146 UnicodeString expectedResult;
147 UnicodeString test3;
148
149 test3 += (UChar32)0x0130;
150 test3 += "STANBUL, NOT CONSTANTINOPLE!";
151
152 UnicodeString test4(test3);
153 test4.toLower(Locale(""));
154 expectedResult = UnicodeString("i\\u0307stanbul, not constantinople!", "").unescape();
155 if (test4 != expectedResult)
156 errln("1. toLower failed: expected \"" + expectedResult + "\", got \"" + test4 + "\".");
157
158 test4 = test3;
159 test4.toLower(Locale("tr", "TR"));
160 expectedResult = lowercaseTurkish;
161 if (test4 != expectedResult)
162 errln("2. toLower failed: expected \"" + expectedResult + "\", got \"" + test4 + "\".");
163
164 test3 = "topkap";
165 test3 += (UChar32)0x0131;
166 test3 += " palace, istanbul";
167 test4 = test3;
168
169 test4.toUpper(Locale(""));
170 expectedResult = "TOPKAPI PALACE, ISTANBUL";
171 if (test4 != expectedResult)
172 errln("toUpper failed: expected \"" + expectedResult + "\", got \"" + test4 + "\".");
173
174 test4 = test3;
175 test4.toUpper(Locale("tr", "TR"));
176 expectedResult = uppercaseTurkish;
177 if (test4 != expectedResult)
178 errln("toUpper failed: expected \"" + expectedResult + "\", got \"" + test4 + "\".");
179
180 test3 = CharsToUnicodeString("S\\u00FC\\u00DFmayrstra\\u00DFe");
181
182 test3.toUpper(Locale("de", "DE"));
183 expectedResult = CharsToUnicodeString("S\\u00DCSSMAYRSTRASSE");
184 if (test3 != expectedResult)
185 errln("toUpper failed: expected \"" + expectedResult + "\", got \"" + test3 + "\".");
186
187 test4.replace(0, test4.length(), uppercaseGreek);
188
189 test4.toLower(Locale("el", "GR"));
190 expectedResult = lowercaseGreek;
191 if (test4 != expectedResult)
192 errln("toLower failed: expected \"" + expectedResult + "\", got \"" + test4 + "\".");
193
194 test4.replace(0, test4.length(), lowercaseGreek);
195
196 test4.toUpper();
197 expectedResult = uppercaseGreek;
198 if (test4 != expectedResult)
199 errln("toUpper failed: expected \"" + expectedResult + "\", got \"" + test4 + "\".");
200
201 // more string case mapping tests with the new implementation
202 {
203 static const UChar
204
205 beforeLower[]= { 0x61, 0x42, 0x49, 0x3a3, 0xdf, 0x3a3, 0x2f, 0xd93f, 0xdfff },
206 lowerRoot[]= { 0x61, 0x62, 0x69, 0x3c3, 0xdf, 0x3c2, 0x2f, 0xd93f, 0xdfff },
207 lowerTurkish[]={ 0x61, 0x62, 0x131, 0x3c3, 0xdf, 0x3c2, 0x2f, 0xd93f, 0xdfff },
208
209 beforeUpper[]= { 0x61, 0x42, 0x69, 0x3c2, 0xdf, 0x3c3, 0x2f, 0xfb03, 0xfb03, 0xfb03, 0xd93f, 0xdfff },
210 upperRoot[]= { 0x41, 0x42, 0x49, 0x3a3, 0x53, 0x53, 0x3a3, 0x2f, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0xd93f, 0xdfff },
211 upperTurkish[]={ 0x41, 0x42, 0x130, 0x3a3, 0x53, 0x53, 0x3a3, 0x2f, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0xd93f, 0xdfff },
212
213 beforeMiniUpper[]= { 0xdf, 0x61 },
214 miniUpper[]= { 0x53, 0x53, 0x41 };
215
216 UnicodeString s;
217
218 /* lowercase with root locale */
Frank Tang1f164ee2022-11-08 12:31:27 -0800219 s=UnicodeString(false, beforeLower, UPRV_LENGTHOF(beforeLower));
Frank Tang3e05d9d2021-11-08 14:04:04 -0800220 s.toLower("");
221 if( s.length()!=UPRV_LENGTHOF(lowerRoot) ||
Frank Tang1f164ee2022-11-08 12:31:27 -0800222 s!=UnicodeString(false, lowerRoot, s.length())
Frank Tang3e05d9d2021-11-08 14:04:04 -0800223 ) {
Frank Tang1f164ee2022-11-08 12:31:27 -0800224 errln("error in toLower(root locale)=\"" + s + "\" expected \"" + UnicodeString(false, lowerRoot, UPRV_LENGTHOF(lowerRoot)) + "\"");
Frank Tang3e05d9d2021-11-08 14:04:04 -0800225 }
226
227 /* lowercase with turkish locale */
Frank Tang1f164ee2022-11-08 12:31:27 -0800228 s=UnicodeString(false, beforeLower, UPRV_LENGTHOF(beforeLower));
Frank Tang3e05d9d2021-11-08 14:04:04 -0800229 s.setCharAt(0, beforeLower[0]).toLower(Locale("tr"));
230 if( s.length()!=UPRV_LENGTHOF(lowerTurkish) ||
Frank Tang1f164ee2022-11-08 12:31:27 -0800231 s!=UnicodeString(false, lowerTurkish, s.length())
Frank Tang3e05d9d2021-11-08 14:04:04 -0800232 ) {
Frank Tang1f164ee2022-11-08 12:31:27 -0800233 errln("error in toLower(turkish locale)=\"" + s + "\" expected \"" + UnicodeString(false, lowerTurkish, UPRV_LENGTHOF(lowerTurkish)) + "\"");
Frank Tang3e05d9d2021-11-08 14:04:04 -0800234 }
235
236 /* uppercase with root locale */
Frank Tang1f164ee2022-11-08 12:31:27 -0800237 s=UnicodeString(false, beforeUpper, UPRV_LENGTHOF(beforeUpper));
Frank Tang3e05d9d2021-11-08 14:04:04 -0800238 s.setCharAt(0, beforeUpper[0]).toUpper(Locale(""));
239 if( s.length()!=UPRV_LENGTHOF(upperRoot) ||
Frank Tang1f164ee2022-11-08 12:31:27 -0800240 s!=UnicodeString(false, upperRoot, s.length())
Frank Tang3e05d9d2021-11-08 14:04:04 -0800241 ) {
Frank Tang1f164ee2022-11-08 12:31:27 -0800242 errln("error in toUpper(root locale)=\"" + s + "\" expected \"" + UnicodeString(false, upperRoot, UPRV_LENGTHOF(upperRoot)) + "\"");
Frank Tang3e05d9d2021-11-08 14:04:04 -0800243 }
244
245 /* uppercase with turkish locale */
Frank Tang1f164ee2022-11-08 12:31:27 -0800246 s=UnicodeString(false, beforeUpper, UPRV_LENGTHOF(beforeUpper));
Frank Tang3e05d9d2021-11-08 14:04:04 -0800247 s.toUpper(Locale("tr"));
248 if( s.length()!=UPRV_LENGTHOF(upperTurkish) ||
Frank Tang1f164ee2022-11-08 12:31:27 -0800249 s!=UnicodeString(false, upperTurkish, s.length())
Frank Tang3e05d9d2021-11-08 14:04:04 -0800250 ) {
Frank Tang1f164ee2022-11-08 12:31:27 -0800251 errln("error in toUpper(turkish locale)=\"" + s + "\" expected \"" + UnicodeString(false, upperTurkish, UPRV_LENGTHOF(upperTurkish)) + "\"");
Frank Tang3e05d9d2021-11-08 14:04:04 -0800252 }
253
254 /* uppercase a short string with root locale */
Frank Tang1f164ee2022-11-08 12:31:27 -0800255 s=UnicodeString(false, beforeMiniUpper, UPRV_LENGTHOF(beforeMiniUpper));
Frank Tang3e05d9d2021-11-08 14:04:04 -0800256 s.setCharAt(0, beforeMiniUpper[0]).toUpper("");
257 if( s.length()!=UPRV_LENGTHOF(miniUpper) ||
Frank Tang1f164ee2022-11-08 12:31:27 -0800258 s!=UnicodeString(false, miniUpper, s.length())
Frank Tang3e05d9d2021-11-08 14:04:04 -0800259 ) {
Frank Tang1f164ee2022-11-08 12:31:27 -0800260 errln("error in toUpper(root locale)=\"" + s + "\" expected \"" + UnicodeString(false, miniUpper, UPRV_LENGTHOF(miniUpper)) + "\"");
Frank Tang3e05d9d2021-11-08 14:04:04 -0800261 }
262 }
263
264 // test some supplementary characters (>= Unicode 3.1)
265 {
266 UnicodeString t;
267
268 UnicodeString
269 deseretInput=UnicodeString("\\U0001043C\\U00010414", "").unescape(),
270 deseretLower=UnicodeString("\\U0001043C\\U0001043C", "").unescape(),
271 deseretUpper=UnicodeString("\\U00010414\\U00010414", "").unescape();
272 (t=deseretInput).toLower();
273 if(t!=deseretLower) {
274 errln("error lowercasing Deseret (plane 1) characters");
275 }
276 (t=deseretInput).toUpper();
277 if(t!=deseretUpper) {
278 errln("error uppercasing Deseret (plane 1) characters");
279 }
280 }
281
282 // test some more cases that looked like problems
283 {
284 UnicodeString t;
285
286 UnicodeString
287 ljInput=UnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 \\U0001043C\\U00010414", "").unescape(),
288 ljLower=UnicodeString("ab'cd \\uFB00i\\u0131ii\\u0307 \\u01C9\\u01C9\\u01C9 \\U0001043C\\U0001043C", "").unescape(),
289 ljUpper=UnicodeString("AB'CD FFIII\\u0130 \\u01C7\\u01C7\\u01C7 \\U00010414\\U00010414", "").unescape();
290 (t=ljInput).toLower("en");
291 if(t!=ljLower) {
292 errln("error lowercasing LJ characters");
293 }
294 (t=ljInput).toUpper("en");
295 if(t!=ljUpper) {
296 errln("error uppercasing LJ characters");
297 }
298 }
299
300#if !UCONFIG_NO_NORMALIZATION
301 // some context-sensitive casing depends on normalization data being present
302
303 // Unicode 3.1.1 SpecialCasing tests
304 {
305 UnicodeString t;
306
307 // sigmas preceded and/or followed by cased letters
308 UnicodeString
309 sigmas=UnicodeString("i\\u0307\\u03a3\\u0308j \\u0307\\u03a3\\u0308j i\\u00ad\\u03a3\\u0308 \\u0307\\u03a3\\u0308 ", "").unescape(),
310 sigmasLower=UnicodeString("i\\u0307\\u03c3\\u0308j \\u0307\\u03c3\\u0308j i\\u00ad\\u03c2\\u0308 \\u0307\\u03c3\\u0308 ", "").unescape(),
311 sigmasUpper=UnicodeString("I\\u0307\\u03a3\\u0308J \\u0307\\u03a3\\u0308J I\\u00ad\\u03a3\\u0308 \\u0307\\u03a3\\u0308 ", "").unescape();
312
313 (t=sigmas).toLower();
314 if(t!=sigmasLower) {
315 errln("error in sigmas.toLower()=\"" + t + "\" expected \"" + sigmasLower + "\"");
316 }
317
318 (t=sigmas).toUpper(Locale(""));
319 if(t!=sigmasUpper) {
320 errln("error in sigmas.toUpper()=\"" + t + "\" expected \"" + sigmasUpper + "\"");
321 }
322
323 // turkish & azerbaijani dotless i & dotted I
324 // remove dot above if there was a capital I before and there are no more accents above
325 UnicodeString
326 dots=UnicodeString("I \\u0130 I\\u0307 I\\u0327\\u0307 I\\u0301\\u0307 I\\u0327\\u0307\\u0301", "").unescape(),
327 dotsTurkish=UnicodeString("\\u0131 i i i\\u0327 \\u0131\\u0301\\u0307 i\\u0327\\u0301", "").unescape(),
328 dotsDefault=UnicodeString("i i\\u0307 i\\u0307 i\\u0327\\u0307 i\\u0301\\u0307 i\\u0327\\u0307\\u0301", "").unescape();
329
330 (t=dots).toLower("tr");
331 if(t!=dotsTurkish) {
332 errln("error in dots.toLower(tr)=\"" + t + "\" expected \"" + dotsTurkish + "\"");
333 }
334
335 (t=dots).toLower("de");
336 if(t!=dotsDefault) {
337 errln("error in dots.toLower(de)=\"" + t + "\" expected \"" + dotsDefault + "\"");
338 }
339 }
340
341 // more Unicode 3.1.1 tests
342 {
343 UnicodeString t;
344
345 // lithuanian dot above in uppercasing
346 UnicodeString
347 dots=UnicodeString("a\\u0307 \\u0307 i\\u0307 j\\u0327\\u0307 j\\u0301\\u0307", "").unescape(),
348 dotsLithuanian=UnicodeString("A\\u0307 \\u0307 I J\\u0327 J\\u0301\\u0307", "").unescape(),
349 dotsDefault=UnicodeString("A\\u0307 \\u0307 I\\u0307 J\\u0327\\u0307 J\\u0301\\u0307", "").unescape();
350
351 (t=dots).toUpper("lt");
352 if(t!=dotsLithuanian) {
353 errln("error in dots.toUpper(lt)=\"" + t + "\" expected \"" + dotsLithuanian + "\"");
354 }
355
356 (t=dots).toUpper("de");
357 if(t!=dotsDefault) {
358 errln("error in dots.toUpper(de)=\"" + t + "\" expected \"" + dotsDefault + "\"");
359 }
360
361 // lithuanian adds dot above to i in lowercasing if there are more above accents
362 UnicodeString
363 i=UnicodeString("I I\\u0301 J J\\u0301 \\u012e \\u012e\\u0301 \\u00cc\\u00cd\\u0128", "").unescape(),
364 iLithuanian=UnicodeString("i i\\u0307\\u0301 j j\\u0307\\u0301 \\u012f \\u012f\\u0307\\u0301 i\\u0307\\u0300i\\u0307\\u0301i\\u0307\\u0303", "").unescape(),
365 iDefault=UnicodeString("i i\\u0301 j j\\u0301 \\u012f \\u012f\\u0301 \\u00ec\\u00ed\\u0129", "").unescape();
366
367 (t=i).toLower("lt");
368 if(t!=iLithuanian) {
369 errln("error in i.toLower(lt)=\"" + t + "\" expected \"" + iLithuanian + "\"");
370 }
371
372 (t=i).toLower("de");
373 if(t!=iDefault) {
374 errln("error in i.toLower(de)=\"" + t + "\" expected \"" + iDefault + "\"");
375 }
376 }
377
378#endif
379
380 // test case folding
381 {
382 UnicodeString
383 s=UnicodeString("A\\u00df\\u00b5\\ufb03\\U0001040c\\u0130\\u0131", "").unescape(),
384 f=UnicodeString("ass\\u03bcffi\\U00010434i\\u0307\\u0131", "").unescape(),
385 g=UnicodeString("ass\\u03bcffi\\U00010434i\\u0131", "").unescape(),
386 t;
387
388 (t=s).foldCase();
389 if(f!=t) {
390 errln("error in foldCase(\"" + s + "\", default)=\"" + t + "\" but expected \"" + f + "\"");
391 }
392
393 // alternate handling for dotted I/dotless i (U+0130, U+0131)
394 (t=s).foldCase(U_FOLD_CASE_EXCLUDE_SPECIAL_I);
395 if(g!=t) {
396 errln("error in foldCase(\"" + s + "\", U_FOLD_CASE_EXCLUDE_SPECIAL_I)=\"" + t + "\" but expected \"" + g + "\"");
397 }
398 }
399}
400
401// data-driven case mapping tests ------------------------------------------ ***
402
403enum {
404 TEST_LOWER,
405 TEST_UPPER,
406 TEST_TITLE,
407 TEST_FOLD,
408 TEST_COUNT
409};
410
411// names of TestData children in casing.txt
412static const char *const dataNames[TEST_COUNT+1]={
413 "lowercasing",
414 "uppercasing",
415 "titlecasing",
416 "casefolding",
417 ""
418};
419
420void
421StringCaseTest::TestCasingImpl(const UnicodeString &input,
422 const UnicodeString &output,
423 int32_t whichCase,
424 void *iter, const char *localeID, uint32_t options) {
425 // UnicodeString
426 UnicodeString result;
427 const char *name;
428 Locale locale(localeID);
429
430 result=input;
431 switch(whichCase) {
432 case TEST_LOWER:
433 name="toLower";
434 result.toLower(locale);
435 break;
436 case TEST_UPPER:
437 name="toUpper";
438 result.toUpper(locale);
439 break;
440#if !UCONFIG_NO_BREAK_ITERATION
441 case TEST_TITLE:
442 name="toTitle";
443 result.toTitle((BreakIterator *)iter, locale, options);
444 break;
445#endif
446 case TEST_FOLD:
447 name="foldCase";
448 result.foldCase(options);
449 break;
450 default:
451 name="";
452 break; // won't happen
453 }
454 if(result!=output) {
455 dataerrln("error: UnicodeString.%s() got a wrong result for a test case from casing.res", name);
Frank Tangd2858cb2022-04-08 20:34:12 -0700456 dataerrln(UnicodeString("input = [") + input + "], expected = [" + output + "], actual = [" + result + "]");
Frank Tang3e05d9d2021-11-08 14:04:04 -0800457 }
458#if !UCONFIG_NO_BREAK_ITERATION
459 if(whichCase==TEST_TITLE && options==0) {
460 result=input;
461 result.toTitle((BreakIterator *)iter, locale);
462 if(result!=output) {
463 dataerrln("error: UnicodeString.toTitle(options=0) got a wrong result for a test case from casing.res");
464 }
465 }
466#endif
467
468 // UTF-8
469 char utf8In[100], utf8Out[100];
470 int32_t utf8InLength, utf8OutLength, resultLength;
471 UChar *buffer;
472
473 IcuTestErrorCode errorCode(*this, "TestCasingImpl");
474 LocalUCaseMapPointer csm(ucasemap_open(localeID, options, errorCode));
475#if !UCONFIG_NO_BREAK_ITERATION
476 if(iter!=NULL) {
477 // Clone the break iterator so that the UCaseMap can safely adopt it.
478 UBreakIterator *clone=ubrk_safeClone((UBreakIterator *)iter, NULL, NULL, errorCode);
479 ucasemap_setBreakIterator(csm.getAlias(), clone, errorCode);
480 }
481#endif
482
483 u_strToUTF8(utf8In, (int32_t)sizeof(utf8In), &utf8InLength, input.getBuffer(), input.length(), errorCode);
484 switch(whichCase) {
485 case TEST_LOWER:
486 name="ucasemap_utf8ToLower";
487 utf8OutLength=ucasemap_utf8ToLower(csm.getAlias(),
488 utf8Out, (int32_t)sizeof(utf8Out),
489 utf8In, utf8InLength, errorCode);
490 break;
491 case TEST_UPPER:
492 name="ucasemap_utf8ToUpper";
493 utf8OutLength=ucasemap_utf8ToUpper(csm.getAlias(),
494 utf8Out, (int32_t)sizeof(utf8Out),
495 utf8In, utf8InLength, errorCode);
496 break;
497#if !UCONFIG_NO_BREAK_ITERATION
498 case TEST_TITLE:
499 name="ucasemap_utf8ToTitle";
500 utf8OutLength=ucasemap_utf8ToTitle(csm.getAlias(),
501 utf8Out, (int32_t)sizeof(utf8Out),
502 utf8In, utf8InLength, errorCode);
503 break;
504#endif
505 case TEST_FOLD:
506 name="ucasemap_utf8FoldCase";
507 utf8OutLength=ucasemap_utf8FoldCase(csm.getAlias(),
508 utf8Out, (int32_t)sizeof(utf8Out),
509 utf8In, utf8InLength, errorCode);
510 break;
511 default:
512 name="";
513 utf8OutLength=0;
514 break; // won't happen
515 }
516 buffer=result.getBuffer(utf8OutLength);
517 u_strFromUTF8(buffer, result.getCapacity(), &resultLength, utf8Out, utf8OutLength, errorCode);
518 result.releaseBuffer(errorCode.isSuccess() ? resultLength : 0);
519
520 if(errorCode.isFailure()) {
521 errcheckln(errorCode, "error: %s() got an error for a test case from casing.res - %s", name, u_errorName(errorCode));
522 errorCode.reset();
523 } else if(result!=output) {
524 errln("error: %s() got a wrong result for a test case from casing.res", name);
525 errln("expected \"" + output + "\" got \"" + result + "\"" );
526 }
527}
528
529void
530StringCaseTest::TestCasing() {
531 UErrorCode status = U_ZERO_ERROR;
532#if !UCONFIG_NO_BREAK_ITERATION
533 LocalUBreakIteratorPointer iter;
534#endif
535 char cLocaleID[100];
536 UnicodeString locale, input, output, optionsString, result;
537 uint32_t options;
538 int32_t whichCase, type;
539 LocalPointer<TestDataModule> driver(TestDataModule::getTestDataModule("casing", *this, status));
540 if(U_SUCCESS(status)) {
541 for(whichCase=0; whichCase<TEST_COUNT; ++whichCase) {
542#if UCONFIG_NO_BREAK_ITERATION
543 if(whichCase==TEST_TITLE) {
544 continue;
545 }
546#endif
547 LocalPointer<TestData> casingTest(driver->createTestData(dataNames[whichCase], status));
548 if(U_FAILURE(status)) {
549 errln("TestCasing failed to createTestData(%s) - %s", dataNames[whichCase], u_errorName(status));
550 break;
551 }
552 const DataMap *myCase = NULL;
553 while(casingTest->nextCase(myCase, status)) {
554 input = myCase->getString("Input", status);
555 output = myCase->getString("Output", status);
556
557 if(whichCase!=TEST_FOLD) {
558 locale = myCase->getString("Locale", status);
559 }
560 locale.extract(0, 0x7fffffff, cLocaleID, sizeof(cLocaleID), "");
561
562#if !UCONFIG_NO_BREAK_ITERATION
563 if(whichCase==TEST_TITLE) {
564 type = myCase->getInt("Type", status);
565 if(type>=0) {
566 iter.adoptInstead(ubrk_open((UBreakIteratorType)type, cLocaleID, NULL, 0, &status));
567 } else if(type==-2) {
568 // Open a trivial break iterator that only delivers { 0, length }
569 // or even just { 0 } as boundaries.
570 static const UChar rules[] = { 0x2e, 0x2a, 0x3b }; // ".*;"
571 UParseError parseError;
572 iter.adoptInstead(ubrk_openRules(rules, UPRV_LENGTHOF(rules), NULL, 0, &parseError, &status));
573 }
574 }
575#endif
576 options = 0;
577 if(whichCase==TEST_TITLE || whichCase==TEST_FOLD) {
578 optionsString = myCase->getString("Options", status);
579 if(optionsString.indexOf((UChar)0x54)>=0) { // T
580 options|=U_FOLD_CASE_EXCLUDE_SPECIAL_I;
581 }
582 if(optionsString.indexOf((UChar)0x4c)>=0) { // L
583 options|=U_TITLECASE_NO_LOWERCASE;
584 }
585 if(optionsString.indexOf((UChar)0x41)>=0) { // A
586 options|=U_TITLECASE_NO_BREAK_ADJUSTMENT;
587 }
588 }
589
590 if(U_FAILURE(status)) {
591 dataerrln("error: TestCasing() setup failed for %s test case from casing.res: %s", dataNames[whichCase], u_errorName(status));
592 status = U_ZERO_ERROR;
593 } else {
594#if UCONFIG_NO_BREAK_ITERATION
595 LocalPointer<UMemory> iter;
596#endif
597 TestCasingImpl(input, output, whichCase, iter.getAlias(), cLocaleID, options);
598 }
599
600#if !UCONFIG_NO_BREAK_ITERATION
601 iter.adoptInstead(NULL);
602#endif
603 }
604 }
605 }
606
607#if !UCONFIG_NO_BREAK_ITERATION
608 // more tests for API coverage
609 status=U_ZERO_ERROR;
610 input=UNICODE_STRING_SIMPLE("sTrA\\u00dfE").unescape();
611 (result=input).toTitle(NULL);
612 if(result!=UNICODE_STRING_SIMPLE("Stra\\u00dfe").unescape()) {
613 dataerrln("UnicodeString::toTitle(NULL) failed.");
614 }
615#endif
616}
617
618void
619StringCaseTest::TestTitleOptions() {
620 // New options in ICU 60.
621 TestCasingImpl(u"ʻcAt! ʻeTc.", u"ʻCat! ʻetc.", TEST_TITLE,
622 nullptr, "", U_TITLECASE_WHOLE_STRING);
623 TestCasingImpl(u"a ʻCaT. A ʻdOg! ʻeTc.", u"A ʻCaT. A ʻdOg! ʻETc.", TEST_TITLE,
624 nullptr, "", U_TITLECASE_SENTENCES|U_TITLECASE_NO_LOWERCASE);
625 TestCasingImpl(u"49eRs", u"49ers", TEST_TITLE,
626 nullptr, "", U_TITLECASE_WHOLE_STRING);
627 TestCasingImpl(u"«丰(aBc)»", u"«丰(abc)»", TEST_TITLE,
628 nullptr, "", U_TITLECASE_WHOLE_STRING);
629 TestCasingImpl(u"49eRs", u"49Ers", TEST_TITLE,
630 nullptr, "", U_TITLECASE_WHOLE_STRING|U_TITLECASE_ADJUST_TO_CASED);
631 TestCasingImpl(u"«丰(aBc)»", u"«丰(Abc)»", TEST_TITLE,
632 nullptr, "", U_TITLECASE_WHOLE_STRING|U_TITLECASE_ADJUST_TO_CASED);
633 TestCasingImpl(u" john. Smith", u" John. Smith", TEST_TITLE,
634 nullptr, "", U_TITLECASE_WHOLE_STRING|U_TITLECASE_NO_LOWERCASE);
635 TestCasingImpl(u" john. Smith", u" john. smith", TEST_TITLE,
636 nullptr, "", U_TITLECASE_WHOLE_STRING|U_TITLECASE_NO_BREAK_ADJUSTMENT);
637 TestCasingImpl(u"«ijs»", u"«IJs»", TEST_TITLE,
638 nullptr, "nl-BE", U_TITLECASE_WHOLE_STRING);
639 TestCasingImpl(u"«ijs»", u"«İjs»", TEST_TITLE,
640 nullptr, "tr-DE", U_TITLECASE_WHOLE_STRING);
641
642#if !UCONFIG_NO_BREAK_ITERATION
643 // Test conflicting settings.
644 // If & when we add more options, then the ORed combinations may become
645 // indistinguishable from valid values.
646 IcuTestErrorCode errorCode(*this, "TestTitleOptions");
647 CaseMap::toTitle("", U_TITLECASE_NO_BREAK_ADJUSTMENT|U_TITLECASE_ADJUST_TO_CASED, nullptr,
648 u"", 0, nullptr, 0, nullptr, errorCode);
649 if (errorCode.get() != U_ILLEGAL_ARGUMENT_ERROR) {
650 errln("CaseMap::toTitle(multiple adjustment options) -> %s not illegal argument",
651 errorCode.errorName());
652 }
653 errorCode.reset();
654 CaseMap::toTitle("", U_TITLECASE_WHOLE_STRING|U_TITLECASE_SENTENCES, nullptr,
655 u"", 0, nullptr, 0, nullptr, errorCode);
656 if (errorCode.get() != U_ILLEGAL_ARGUMENT_ERROR) {
657 errln("CaseMap::toTitle(multiple iterator options) -> %s not illegal argument",
658 errorCode.errorName());
659 }
660 errorCode.reset();
661 LocalPointer<BreakIterator> iter(
662 BreakIterator::createCharacterInstance(Locale::getRoot(), errorCode));
663 CaseMap::toTitle("", U_TITLECASE_WHOLE_STRING, iter.getAlias(),
664 u"", 0, nullptr, 0, nullptr, errorCode);
665 if (errorCode.get() != U_ILLEGAL_ARGUMENT_ERROR) {
666 errln("CaseMap::toTitle(iterator option + iterator) -> %s not illegal argument",
667 errorCode.errorName());
668 }
669 errorCode.reset();
670#endif
671}
672
Frank Tangd2858cb2022-04-08 20:34:12 -0700673#if !UCONFIG_NO_BREAK_ITERATION
674void StringCaseTest::TestDutchTitle() {
675 IcuTestErrorCode errorCode(*this, "TestDutchTitle");
676
677 Locale nl("nl"); // Dutch
678 LocalPointer<BreakIterator> iter(
679 BreakIterator::createWordInstance(nl, errorCode));
680
681 // Dutch titlecase check in English
682 TestCasingImpl(
683 u"ijssel igloo IJMUIDEN",
684 u"Ijssel Igloo Ijmuiden",
685 TEST_TITLE,
686 nullptr,
687 "en",
688 0);
689
690 // Dutch titlecase check in Dutch
691 TestCasingImpl(
692 u"ijssel igloo IJMUIDEN",
693 u"IJssel Igloo IJmuiden",
694 TEST_TITLE,
695 nullptr,
696 "nl",
697 0);
698
699 // Dutch titlecase check in Dutch with nolowercase option
700 if (U_SUCCESS(errorCode)) {
701 iter->setText(u"ijssel igloo IjMUIdEN iPoD ijenough");
702 TestCasingImpl(
703 u"ijssel igloo IjMUIdEN iPoD ijenough",
704 u"IJssel Igloo IJMUIdEN IPoD IJenough",
705 TEST_TITLE,
706 nullptr,
707 "nl",
708 U_TITLECASE_NO_LOWERCASE);
709 }
710
711 errorCode.reset();
712
713 // Accented IJ testing
714
715 struct dutchTitleTestCase {
716 const UnicodeString input;
717 const UnicodeString expectedFull;
718 const UnicodeString expectedOnlyChanged;
719 } dutchTitleTestCases[] = {
720 // input, expectedFull, expectedOnlyChanged
721 {u"ij", u"IJ", u"IJ"},
722 {u"IJ", u"IJ", u""},
723 {u"íj́", u"ÍJ́", u"ÍJ"},
724 {u"ÍJ́", u"ÍJ́", u""},
725 {u"íJ́", u"ÍJ́", u"Í"},
726 {u"Ij́", u"Ij́", u""},
727 {u"ij́", u"Ij́", u"I"},
728 {u"ïj́", u"Ïj́", u"Ï"},
729 {u"íj\u0308", u"Íj\u0308", u"Í"},
730 {u"íj́\U0001D16E", u"Íj́\U0001D16E", u"Í"},
731 {u"íj\u1ABE", u"Íj\u1ABE", u"Í"},
732
733 {u"ijabc", u"IJabc", u"IJ"},
734 {u"IJabc", u"IJabc", u""},
735 {u"íj́abc", u"ÍJ́abc", u"ÍJ"},
736 {u"ÍJ́abc", u"ÍJ́abc", u""},
737 {u"íJ́abc", u"ÍJ́abc", u"Í"},
738 {u"Ij́abc", u"Ij́abc", u""},
739 {u"ij́abc", u"Ij́abc", u"I"},
740 {u"ïj́abc", u"Ïj́abc", u"Ï"},
741 {u"íjabc\u0308", u"Íjabc\u0308", u"Í"},
742 {u"íj́abc\U0001D16E", u"ÍJ́abc\U0001D16E", u"ÍJ"},
743 {u"íjabc\u1ABE", u"Íjabc\u1ABE", u"Í"},
744
745 // Bug ICU-21919
746 {u"Í", u"Í", u""},
747 };
748
749 for (const auto& cas : dutchTitleTestCases) {
750 const UnicodeString &input = cas.input;
751 const UnicodeString &expectedFull = cas.expectedFull;
752 const UnicodeString &expectedOnlyChanged = cas.expectedOnlyChanged;
753
754 for (const auto& isOnlyChanged : {true, false}) {
755 uint32_t testOptions = U_TITLECASE_NO_LOWERCASE
756 | (isOnlyChanged ? U_OMIT_UNCHANGED_TEXT : 0);
757
758 const UnicodeString &expected = isOnlyChanged ? expectedOnlyChanged : expectedFull;
759
760 TestCasingImpl(
761 input,
762 expected,
763 TEST_TITLE,
764 nullptr,
765 "nl",
766 testOptions
767 );
768 }
769 }
770}
771#endif
772
Frank Tang3e05d9d2021-11-08 14:04:04 -0800773void
774StringCaseTest::TestFullCaseFoldingIterator() {
775 UnicodeString ffi=UNICODE_STRING_SIMPLE("ffi");
776 UnicodeString ss=UNICODE_STRING_SIMPLE("ss");
777 FullCaseFoldingIterator iter;
778 int32_t count=0;
779 int32_t countSpecific=0;
780 UChar32 c;
781 UnicodeString full;
782 while((c=iter.next(full))>=0) {
783 ++count;
784 // Check that the full Case_Folding has more than 1 code point.
785 if(!full.hasMoreChar32Than(0, 0x7fffffff, 1)) {
786 errln("error: FullCaseFoldingIterator.next()=U+%04lX full Case_Folding has at most 1 code point", (long)c);
787 continue;
788 }
789 // Check that full == Case_Folding(c).
790 UnicodeString cf(c);
791 cf.foldCase();
792 if(full!=cf) {
793 errln("error: FullCaseFoldingIterator.next()=U+%04lX full Case_Folding != cf(c)", (long)c);
794 continue;
795 }
796 // Spot-check a couple of specific cases.
797 if((full==ffi && c==0xfb03) || (full==ss && (c==0xdf || c==0x1e9e))) {
798 ++countSpecific;
799 }
800 }
801 if(countSpecific!=3) {
802 errln("error: FullCaseFoldingIterator did not yield exactly the expected specific cases");
803 }
804 if(count<70) {
805 errln("error: FullCaseFoldingIterator yielded only %d (cp, full) pairs", (int)count);
806 }
807}
808
809void
810StringCaseTest::assertGreekUpper(const char16_t *s, const char16_t *expected) {
811 UnicodeString s16(s);
812 UnicodeString expected16(expected);
813 UnicodeString msg = UnicodeString("UnicodeString::toUpper/Greek(\"") + s16 + "\")";
814 UnicodeString result16(s16);
815 result16.toUpper(GREEK_LOCALE_);
816 assertEquals(msg, expected16, result16);
817
818 msg = UnicodeString("u_strToUpper/Greek(\"") + s16 + "\") cap=";
819 int32_t length = expected16.length();
820 int32_t capacities[] = {
821 // Keep in sync with the UTF-8 capacities near the bottom of this function.
822 0, length / 2, length - 1, length, length + 1
823 };
824 for (int32_t i = 0; i < UPRV_LENGTHOF(capacities); ++i) {
825 int32_t cap = capacities[i];
826 UChar *dest16 = result16.getBuffer(expected16.length() + 1);
827 u_memset(dest16, 0x55AA, result16.getCapacity());
828 UErrorCode errorCode = U_ZERO_ERROR;
829 length = u_strToUpper(dest16, cap, s16.getBuffer(), s16.length(), "el", &errorCode);
830 assertEquals(msg + cap, expected16.length(), length);
831 UErrorCode expectedErrorCode;
832 if (cap < expected16.length()) {
833 expectedErrorCode = U_BUFFER_OVERFLOW_ERROR;
834 } else if (cap == expected16.length()) {
835 expectedErrorCode = U_STRING_NOT_TERMINATED_WARNING;
836 } else {
837 expectedErrorCode = U_ZERO_ERROR;
838 assertEquals(msg + cap + " NUL", 0, dest16[length]);
839 }
840 assertEquals(msg + cap + " errorCode", expectedErrorCode, errorCode);
841 result16.releaseBuffer(length);
842 if (cap >= expected16.length()) {
843 assertEquals(msg + cap, expected16, result16);
844 }
845 }
846
847 UErrorCode errorCode = U_ZERO_ERROR;
848 LocalUCaseMapPointer csm(ucasemap_open("el", 0, &errorCode));
849 assertSuccess("ucasemap_open", errorCode);
850 std::string s8;
851 s16.toUTF8String(s8);
852 msg = UnicodeString("ucasemap_utf8ToUpper/Greek(\"") + s16 + "\")";
853 char dest8[1000];
854 length = ucasemap_utf8ToUpper(csm.getAlias(), dest8, UPRV_LENGTHOF(dest8),
855 s8.data(), static_cast<int32_t>(s8.length()), &errorCode);
856 assertSuccess("ucasemap_utf8ToUpper", errorCode);
857 StringPiece result8(dest8, length);
858 UnicodeString result16From8 = UnicodeString::fromUTF8(result8);
859 assertEquals(msg, expected16, result16From8);
860
861 msg += " cap=";
862 capacities[1] = length / 2;
863 capacities[2] = length - 1;
864 capacities[3] = length;
865 capacities[4] = length + 1;
866 char dest8b[1000];
867 int32_t expected8Length = length; // Assuming the previous call worked.
868 for (int32_t i = 0; i < UPRV_LENGTHOF(capacities); ++i) {
869 int32_t cap = capacities[i];
870 memset(dest8b, 0x5A, UPRV_LENGTHOF(dest8b));
871 UErrorCode errorCode = U_ZERO_ERROR;
872 length = ucasemap_utf8ToUpper(csm.getAlias(), dest8b, cap,
873 s8.data(), static_cast<int32_t>(s8.length()), &errorCode);
874 assertEquals(msg + cap, expected8Length, length);
875 UErrorCode expectedErrorCode;
876 if (cap < expected8Length) {
877 expectedErrorCode = U_BUFFER_OVERFLOW_ERROR;
878 } else if (cap == expected8Length) {
879 expectedErrorCode = U_STRING_NOT_TERMINATED_WARNING;
880 } else {
881 expectedErrorCode = U_ZERO_ERROR;
882 // Casts to int32_t to avoid matching UBool.
883 assertEquals(msg + cap + " NUL", (int32_t)0, (int32_t)dest8b[length]);
884 }
885 assertEquals(msg + cap + " errorCode", expectedErrorCode, errorCode);
886 if (cap >= expected8Length) {
887 assertEquals(msg + cap + " (memcmp)", 0, memcmp(dest8, dest8b, expected8Length));
888 }
889 }
890}
891
892void
893StringCaseTest::TestGreekUpper() {
894 // https://unicode-org.atlassian.net/browse/ICU-5456
895 assertGreekUpper(u"άδικος, κείμενο, ίριδα", u"ΑΔΙΚΟΣ, ΚΕΙΜΕΝΟ, ΙΡΙΔΑ");
896 // https://bugzilla.mozilla.org/show_bug.cgi?id=307039
897 // https://bug307039.bmoattachments.org/attachment.cgi?id=194893
898 assertGreekUpper(u"Πατάτα", u"ΠΑΤΑΤΑ");
899 assertGreekUpper(u"Αέρας, Μυστήριο, Ωραίο", u"ΑΕΡΑΣ, ΜΥΣΤΗΡΙΟ, ΩΡΑΙΟ");
900 assertGreekUpper(u"Μαΐου, Πόρος, Ρύθμιση", u"ΜΑΪΟΥ, ΠΟΡΟΣ, ΡΥΘΜΙΣΗ");
901 assertGreekUpper(u"ΰ, Τηρώ, Μάιος", u"Ϋ, ΤΗΡΩ, ΜΑΪΟΣ");
902 assertGreekUpper(u"άυλος", u"ΑΫΛΟΣ");
903 assertGreekUpper(u"ΑΫΛΟΣ", u"ΑΫΛΟΣ");
904 assertGreekUpper(u"Άκλιτα ρήματα ή άκλιτες μετοχές", u"ΑΚΛΙΤΑ ΡΗΜΑΤΑ Ή ΑΚΛΙΤΕΣ ΜΕΤΟΧΕΣ");
905 // http://www.unicode.org/udhr/d/udhr_ell_monotonic.html
906 assertGreekUpper(u"Επειδή η αναγνώριση της αξιοπρέπειας", u"ΕΠΕΙΔΗ Η ΑΝΑΓΝΩΡΙΣΗ ΤΗΣ ΑΞΙΟΠΡΕΠΕΙΑΣ");
907 assertGreekUpper(u"νομικού ή διεθνούς", u"ΝΟΜΙΚΟΥ Ή ΔΙΕΘΝΟΥΣ");
908 // http://unicode.org/udhr/d/udhr_ell_polytonic.html
909 assertGreekUpper(u"Ἐπειδὴ ἡ ἀναγνώριση", u"ΕΠΕΙΔΗ Η ΑΝΑΓΝΩΡΙΣΗ");
910 assertGreekUpper(u"νομικοῦ ἢ διεθνοῦς", u"ΝΟΜΙΚΟΥ Ή ΔΙΕΘΝΟΥΣ");
911 // From Google bug report
912 assertGreekUpper(u"Νέο, Δημιουργία", u"ΝΕΟ, ΔΗΜΙΟΥΡΓΙΑ");
913 // http://crbug.com/234797
914 assertGreekUpper(u"Ελάτε να φάτε τα καλύτερα παϊδάκια!", u"ΕΛΑΤΕ ΝΑ ΦΑΤΕ ΤΑ ΚΑΛΥΤΕΡΑ ΠΑΪΔΑΚΙΑ!");
915 assertGreekUpper(u"Μαΐου, τρόλεϊ", u"ΜΑΪΟΥ, ΤΡΟΛΕΪ");
916 assertGreekUpper(u"Το ένα ή το άλλο.", u"ΤΟ ΕΝΑ Ή ΤΟ ΑΛΛΟ.");
917 // http://multilingualtypesetting.co.uk/blog/greek-typesetting-tips/
918 assertGreekUpper(u"ρωμέικα", u"ΡΩΜΕΪΚΑ");
919 assertGreekUpper(u"ή.", u"Ή.");
920}
921
922void StringCaseTest::TestArmenian() {
923 Locale hy("hy"); // Eastern Armenian
924 Locale hyw("hyw"); // Western Armenian
925 Locale root = Locale::getRoot();
926 // See ICU-13416:
927 // և ligature ech-yiwn
928 // uppercases to ԵՒ=ech+yiwn by default and in Western Armenian,
929 // but to ԵՎ=ech+vew in Eastern Armenian.
930 UnicodeString s(u"և Երևանի");
931
932 assertEquals("upper root", u"ԵՒ ԵՐԵՒԱՆԻ", UnicodeString(s).toUpper(root));
933 assertEquals("upper hy", u"ԵՎ ԵՐԵՎԱՆԻ", UnicodeString(s).toUpper(hy));
934 assertEquals("upper hyw", u"ԵՒ ԵՐԵՒԱՆԻ", UnicodeString(s).toUpper(hyw));
935#if !UCONFIG_NO_BREAK_ITERATION
936 assertEquals("title root", u"Եւ Երևանի", UnicodeString(s).toTitle(nullptr, root));
937 assertEquals("title hy", u"Եվ Երևանի", UnicodeString(s).toTitle(nullptr, hy));
938 assertEquals("title hyw", u"Եւ Երևանի", UnicodeString(s).toTitle(nullptr, hyw));
939#endif
940}
941
942void
943StringCaseTest::TestLongUpper() {
944 if (quick) {
945 logln("not exhaustive mode: skipping this test");
946 return;
947 }
948 // Ticket #12663, crash with an extremely long string where
949 // U+0390 maps to 0399 0308 0301 so that the result is three times as long
950 // and overflows an int32_t.
951 int32_t length = 0x40000004; // more than 1G UChars
952 UnicodeString s(length, (UChar32)0x390, length);
953 UnicodeString result;
954 UChar *dest = result.getBuffer(length + 1);
955 if (s.isBogus() || dest == NULL) {
956 logln("Out of memory, unable to run this test on this machine.");
957 return;
958 }
959 IcuTestErrorCode errorCode(*this, "TestLongUpper");
960 int32_t destLength = u_strToUpper(dest, result.getCapacity(),
961 s.getBuffer(), s.length(), "", errorCode);
962 result.releaseBuffer(destLength);
963 if (errorCode.reset() != U_INDEX_OUTOFBOUNDS_ERROR) {
964 errln("expected U_INDEX_OUTOFBOUNDS_ERROR, got %s (destLength is undefined, got %ld)",
965 errorCode.errorName(), (long)destLength);
966 }
967}
968
969void StringCaseTest::TestMalformedUTF8() {
970 // ticket #12639
971 IcuTestErrorCode errorCode(*this, "TestMalformedUTF8");
972 LocalUCaseMapPointer csm(ucasemap_open("en", U_TITLECASE_NO_BREAK_ADJUSTMENT, errorCode));
973 if (errorCode.isFailure()) {
974 errln("ucasemap_open(English) failed - %s", errorCode.errorName());
975 return;
976 }
977 char src[1] = { (char)0x85 }; // malformed UTF-8
978 char dest[3] = { 0, 0, 0 };
979 int32_t destLength;
980#if !UCONFIG_NO_BREAK_ITERATION
981 destLength = ucasemap_utf8ToTitle(csm.getAlias(), dest, 3, src, 1, errorCode);
982 if (errorCode.isFailure() || destLength != 1 || dest[0] != src[0]) {
983 errln("ucasemap_utf8ToTitle(\\x85) failed: %s destLength=%d dest[0]=0x%02x",
984 errorCode.errorName(), (int)destLength, dest[0]);
985 }
986#endif
987
988 errorCode.reset();
989 dest[0] = 0;
990 destLength = ucasemap_utf8ToLower(csm.getAlias(), dest, 3, src, 1, errorCode);
991 if (errorCode.isFailure() || destLength != 1 || dest[0] != src[0]) {
992 errln("ucasemap_utf8ToLower(\\x85) failed: %s destLength=%d dest[0]=0x%02x",
993 errorCode.errorName(), (int)destLength, dest[0]);
994 }
995
996 errorCode.reset();
997 dest[0] = 0;
998 destLength = ucasemap_utf8ToUpper(csm.getAlias(), dest, 3, src, 1, errorCode);
999 if (errorCode.isFailure() || destLength != 1 || dest[0] != src[0]) {
1000 errln("ucasemap_utf8ToUpper(\\x85) failed: %s destLength=%d dest[0]=0x%02x",
1001 errorCode.errorName(), (int)destLength, dest[0]);
1002 }
1003
1004 errorCode.reset();
1005 dest[0] = 0;
1006 destLength = ucasemap_utf8FoldCase(csm.getAlias(), dest, 3, src, 1, errorCode);
1007 if (errorCode.isFailure() || destLength != 1 || dest[0] != src[0]) {
1008 errln("ucasemap_utf8FoldCase(\\x85) failed: %s destLength=%d dest[0]=0x%02x",
1009 errorCode.errorName(), (int)destLength, dest[0]);
1010 }
1011}
1012
1013void StringCaseTest::TestBufferOverflow() {
1014 // Ticket #12849, incorrect result from Title Case preflight operation,
1015 // when buffer overflow error is expected.
1016 IcuTestErrorCode errorCode(*this, "TestBufferOverflow");
1017 LocalUCaseMapPointer csm(ucasemap_open("en", 0, errorCode));
1018 if (errorCode.isFailure()) {
1019 errln("ucasemap_open(English) failed - %s", errorCode.errorName());
1020 return;
1021 }
1022
1023 UnicodeString data("hello world");
1024 int32_t result;
1025#if !UCONFIG_NO_BREAK_ITERATION
1026 result = ucasemap_toTitle(csm.getAlias(), NULL, 0, data.getBuffer(), data.length(), errorCode);
1027 if (errorCode.get() != U_BUFFER_OVERFLOW_ERROR || result != data.length()) {
1028 errln("%s:%d ucasemap_toTitle(\"hello world\") failed: "
1029 "expected (U_BUFFER_OVERFLOW_ERROR, %d), got (%s, %d)",
1030 __FILE__, __LINE__, data.length(), errorCode.errorName(), result);
1031 }
1032#endif
1033 errorCode.reset();
1034
1035 std::string data_utf8;
1036 data.toUTF8String(data_utf8);
1037#if !UCONFIG_NO_BREAK_ITERATION
1038 result = ucasemap_utf8ToTitle(csm.getAlias(), NULL, 0, data_utf8.c_str(), static_cast<int32_t>(data_utf8.length()), errorCode);
1039 if (errorCode.get() != U_BUFFER_OVERFLOW_ERROR || result != (int32_t)data_utf8.length()) {
1040 errln("%s:%d ucasemap_toTitle(\"hello world\") failed: "
1041 "expected (U_BUFFER_OVERFLOW_ERROR, %d), got (%s, %d)",
1042 __FILE__, __LINE__, data_utf8.length(), errorCode.errorName(), result);
1043 }
1044#endif
1045 errorCode.reset();
1046}
1047
1048void StringCaseTest::TestEdits() {
1049 IcuTestErrorCode errorCode(*this, "TestEdits");
1050 Edits edits;
1051 assertFalse("new Edits hasChanges", edits.hasChanges());
1052 assertEquals("new Edits numberOfChanges", 0, edits.numberOfChanges());
1053 assertEquals("new Edits", 0, edits.lengthDelta());
1054 edits.addUnchanged(1); // multiple unchanged ranges are combined
1055 edits.addUnchanged(10000); // too long, and they are split
1056 edits.addReplace(0, 0);
1057 edits.addUnchanged(2);
1058 assertFalse("unchanged 10003 hasChanges", edits.hasChanges());
1059 assertEquals("unchanged 10003 numberOfChanges", 0, edits.numberOfChanges());
1060 assertEquals("unchanged 10003", 0, edits.lengthDelta());
1061 edits.addReplace(2, 1); // multiple short equal-lengths edits are compressed
1062 edits.addUnchanged(0);
1063 edits.addReplace(2, 1);
1064 edits.addReplace(2, 1);
1065 edits.addReplace(0, 10);
1066 edits.addReplace(100, 0);
1067 edits.addReplace(3000, 4000); // variable-length encoding
1068 edits.addReplace(100000, 100000);
1069 assertTrue("some edits hasChanges", edits.hasChanges());
1070 assertEquals("some edits numberOfChanges", 7, edits.numberOfChanges());
1071 assertEquals("some edits", -3 + 10 - 100 + 1000, edits.lengthDelta());
1072 UErrorCode outErrorCode = U_ZERO_ERROR;
1073 assertFalse("edits done: copyErrorTo", edits.copyErrorTo(outErrorCode));
1074
1075 static const EditChange coarseExpectedChanges[] = {
Frank Tang1f164ee2022-11-08 12:31:27 -08001076 { false, 10003, 10003 },
1077 { true, 103106, 104013 }
Frank Tang3e05d9d2021-11-08 14:04:04 -08001078 };
1079 TestUtility::checkEditsIter(*this, u"coarse",
1080 edits.getCoarseIterator(), edits.getCoarseIterator(),
Frank Tang1f164ee2022-11-08 12:31:27 -08001081 coarseExpectedChanges, UPRV_LENGTHOF(coarseExpectedChanges), true, errorCode);
Frank Tang3e05d9d2021-11-08 14:04:04 -08001082 TestUtility::checkEditsIter(*this, u"coarse changes",
1083 edits.getCoarseChangesIterator(), edits.getCoarseChangesIterator(),
Frank Tang1f164ee2022-11-08 12:31:27 -08001084 coarseExpectedChanges, UPRV_LENGTHOF(coarseExpectedChanges), false, errorCode);
Frank Tang3e05d9d2021-11-08 14:04:04 -08001085
1086 static const EditChange fineExpectedChanges[] = {
Frank Tang1f164ee2022-11-08 12:31:27 -08001087 { false, 10003, 10003 },
1088 { true, 2, 1 },
1089 { true, 2, 1 },
1090 { true, 2, 1 },
1091 { true, 0, 10 },
1092 { true, 100, 0 },
1093 { true, 3000, 4000 },
1094 { true, 100000, 100000 }
Frank Tang3e05d9d2021-11-08 14:04:04 -08001095 };
1096 TestUtility::checkEditsIter(*this, u"fine",
1097 edits.getFineIterator(), edits.getFineIterator(),
Frank Tang1f164ee2022-11-08 12:31:27 -08001098 fineExpectedChanges, UPRV_LENGTHOF(fineExpectedChanges), true, errorCode);
Frank Tang3e05d9d2021-11-08 14:04:04 -08001099 TestUtility::checkEditsIter(*this, u"fine changes",
1100 edits.getFineChangesIterator(), edits.getFineChangesIterator(),
Frank Tang1f164ee2022-11-08 12:31:27 -08001101 fineExpectedChanges, UPRV_LENGTHOF(fineExpectedChanges), false, errorCode);
Frank Tang3e05d9d2021-11-08 14:04:04 -08001102
1103 edits.reset();
1104 assertFalse("reset hasChanges", edits.hasChanges());
1105 assertEquals("reset numberOfChanges", 0, edits.numberOfChanges());
1106 assertEquals("reset", 0, edits.lengthDelta());
1107 Edits::Iterator ei = edits.getCoarseChangesIterator();
1108 assertFalse("reset then iterator", ei.next(errorCode));
1109}
1110
1111void StringCaseTest::TestCopyMoveEdits() {
1112 IcuTestErrorCode errorCode(*this, "TestCopyMoveEdits");
1113 // Exceed the stack array capacity.
1114 Edits a;
1115 for (int32_t i = 0; i < 250; ++i) {
1116 a.addReplace(i % 10, (i % 10) + 1);
1117 }
1118 assertEquals("a: many edits, length delta", 250, a.lengthDelta());
1119
1120 // copy
1121 Edits b(a);
1122 assertEquals("b: copy of many edits, length delta", 250, b.lengthDelta());
1123 assertEquals("a remains: many edits, length delta", 250, a.lengthDelta());
1124 TestUtility::checkEqualEdits(*this, u"b copy of a", a, b, errorCode);
1125
1126 // assign
1127 Edits c;
1128 c.addUnchanged(99);
1129 c.addReplace(88, 77);
1130 c = b;
1131 assertEquals("c: assigned many edits, length delta", 250, c.lengthDelta());
1132 assertEquals("b remains: many edits, length delta", 250, b.lengthDelta());
1133 TestUtility::checkEqualEdits(*this, u"c = b", b, c, errorCode);
1134
1135 // std::move trouble on these platforms.
1136 // See https://unicode-org.atlassian.net/browse/ICU-13393
1137#if !(U_PLATFORM == U_PF_AIX || U_PLATFORM == U_PF_OS390)
1138 // move constructor empties object with heap array
1139 Edits d(std::move(a));
1140 assertEquals("d: move-constructed many edits, length delta", 250, d.lengthDelta());
1141 assertFalse("a moved away: no more hasChanges", a.hasChanges());
1142 TestUtility::checkEqualEdits(*this, u"d() <- a", d, b, errorCode);
1143 Edits empty;
1144 TestUtility::checkEqualEdits(*this, u"a moved away", empty, a, errorCode);
1145
1146 // move assignment empties object with heap array
1147 Edits e;
1148 e.addReplace(0, 1000);
1149 e = std::move(b);
1150 assertEquals("e: move-assigned many edits, length delta", 250, e.lengthDelta());
1151 assertFalse("b moved away: no more hasChanges", b.hasChanges());
1152 TestUtility::checkEqualEdits(*this, u"e <- b", e, c, errorCode);
1153 TestUtility::checkEqualEdits(*this, u"b moved away", empty, b, errorCode);
1154
1155 // Edits::Iterator default constructor.
1156 Edits::Iterator iter;
1157 assertFalse("Edits::Iterator().next()", iter.next(errorCode));
1158 assertSuccess("Edits::Iterator().next()", errorCode);
1159 iter = e.getFineChangesIterator();
1160 assertTrue("iter.next()", iter.next(errorCode));
1161 assertSuccess("iter.next()", errorCode);
1162 assertTrue("iter.hasChange()", iter.hasChange());
1163 assertEquals("iter.newLength()", 1, iter.newLength());
1164#endif
1165}
1166
1167void StringCaseTest::TestEditsFindFwdBwd() {
1168 IcuTestErrorCode errorCode(*this, "TestEditsFindFwdBwd");
1169 // Some users need index mappings to be efficient when they are out of order.
1170 // The most interesting failure case for this test is it taking a very long time.
1171 Edits e;
1172 constexpr int32_t N = 200000;
1173 for (int32_t i = 0; i < N; ++i) {
1174 e.addUnchanged(1);
1175 e.addReplace(3, 1);
1176 }
1177 Edits::Iterator iter = e.getFineIterator();
1178 for (int32_t i = 0; i <= N; i += 2) {
1179 assertEquals("ascending", i * 2, iter.sourceIndexFromDestinationIndex(i, errorCode));
1180 assertEquals("ascending", i * 2 + 1, iter.sourceIndexFromDestinationIndex(i + 1, errorCode));
1181 }
1182 for (int32_t i = N; i >= 0; i -= 2) {
1183 assertEquals("descending", i * 2 + 1, iter.sourceIndexFromDestinationIndex(i + 1, errorCode));
1184 assertEquals("descending", i * 2, iter.sourceIndexFromDestinationIndex(i, errorCode));
1185 }
1186}
1187
1188void StringCaseTest::TestMergeEdits() {
1189 // For debugging, set -v to see matching edits up to a failure.
1190 IcuTestErrorCode errorCode(*this, "TestMergeEdits");
1191 Edits ab, bc, ac, expected_ac;
1192
1193 // Simple: Two parallel non-changes.
1194 ab.addUnchanged(2);
1195 bc.addUnchanged(2);
1196 expected_ac.addUnchanged(2);
1197
1198 // Simple: Two aligned changes.
1199 ab.addReplace(3, 2);
1200 bc.addReplace(2, 1);
1201 expected_ac.addReplace(3, 1);
1202
1203 // Unequal non-changes.
1204 ab.addUnchanged(5);
1205 bc.addUnchanged(3);
1206 expected_ac.addUnchanged(3);
1207 // ab ahead by 2
1208
1209 // Overlapping changes accumulate until they share a boundary.
1210 ab.addReplace(4, 3);
1211 bc.addReplace(3, 2);
1212 ab.addReplace(4, 3);
1213 bc.addReplace(3, 2);
1214 ab.addReplace(4, 3);
1215 bc.addReplace(3, 2);
1216 bc.addUnchanged(4);
1217 expected_ac.addReplace(14, 8);
1218 // bc ahead by 2
1219
1220 // Balance out intermediate-string lengths.
1221 ab.addUnchanged(2);
1222 expected_ac.addUnchanged(2);
1223
1224 // Insert something and delete it: Should disappear.
1225 ab.addReplace(0, 5);
1226 ab.addReplace(0, 2);
1227 bc.addReplace(7, 0);
1228
1229 // Parallel change to make a new boundary.
1230 ab.addReplace(1, 2);
1231 bc.addReplace(2, 3);
1232 expected_ac.addReplace(1, 3);
1233
1234 // Multiple ab deletions should remain separate at the boundary.
1235 ab.addReplace(1, 0);
1236 ab.addReplace(2, 0);
1237 ab.addReplace(3, 0);
1238 expected_ac.addReplace(1, 0);
1239 expected_ac.addReplace(2, 0);
1240 expected_ac.addReplace(3, 0);
1241
1242 // Unequal non-changes can be split for another boundary.
1243 ab.addUnchanged(2);
1244 bc.addUnchanged(1);
1245 expected_ac.addUnchanged(1);
1246 // ab ahead by 1
1247
1248 // Multiple bc insertions should create a boundary and remain separate.
1249 bc.addReplace(0, 4);
1250 bc.addReplace(0, 5);
1251 bc.addReplace(0, 6);
1252 expected_ac.addReplace(0, 4);
1253 expected_ac.addReplace(0, 5);
1254 expected_ac.addReplace(0, 6);
1255 // ab ahead by 1
1256
1257 // Multiple ab deletions in the middle of a bc change are merged.
1258 bc.addReplace(2, 2);
1259 // bc ahead by 1
1260 ab.addReplace(1, 0);
1261 ab.addReplace(2, 0);
1262 ab.addReplace(3, 0);
1263 ab.addReplace(4, 1);
1264 expected_ac.addReplace(11, 2);
1265
1266 // Multiple bc insertions in the middle of an ab change are merged.
1267 ab.addReplace(5, 6);
1268 bc.addReplace(3, 3);
1269 // ab ahead by 3
1270 bc.addReplace(0, 4);
1271 bc.addReplace(0, 5);
1272 bc.addReplace(0, 6);
1273 bc.addReplace(3, 7);
1274 expected_ac.addReplace(5, 25);
1275
1276 // Delete around a deletion.
1277 ab.addReplace(4, 4);
1278 ab.addReplace(3, 0);
1279 ab.addUnchanged(2);
1280 bc.addReplace(2, 2);
1281 bc.addReplace(4, 0);
1282 expected_ac.addReplace(9, 2);
1283
1284 // Insert into an insertion.
1285 ab.addReplace(0, 2);
1286 bc.addReplace(1, 1);
1287 bc.addReplace(0, 8);
1288 bc.addUnchanged(4);
1289 expected_ac.addReplace(0, 10);
1290 // bc ahead by 3
1291
1292 // Balance out intermediate-string lengths.
1293 ab.addUnchanged(3);
1294 expected_ac.addUnchanged(3);
1295
1296 // Deletions meet insertions.
1297 // Output order is arbitrary in principle, but we expect insertions first
1298 // and want to keep it that way.
1299 ab.addReplace(2, 0);
1300 ab.addReplace(4, 0);
1301 ab.addReplace(6, 0);
1302 bc.addReplace(0, 1);
1303 bc.addReplace(0, 3);
1304 bc.addReplace(0, 5);
1305 expected_ac.addReplace(0, 1);
1306 expected_ac.addReplace(0, 3);
1307 expected_ac.addReplace(0, 5);
1308 expected_ac.addReplace(2, 0);
1309 expected_ac.addReplace(4, 0);
1310 expected_ac.addReplace(6, 0);
1311
1312 // End with a non-change, so that further edits are never reordered.
1313 ab.addUnchanged(1);
1314 bc.addUnchanged(1);
1315 expected_ac.addUnchanged(1);
1316
1317 ac.mergeAndAppend(ab, bc, errorCode);
1318 assertSuccess("ab+bc", errorCode);
1319 if (!TestUtility::checkEqualEdits(*this, u"ab+bc", expected_ac, ac, errorCode)) {
1320 return;
1321 }
1322
1323 // Append more Edits.
1324 Edits ab2, bc2;
1325 ab2.addUnchanged(5);
1326 bc2.addReplace(1, 2);
1327 bc2.addUnchanged(4);
1328 expected_ac.addReplace(1, 2);
1329 expected_ac.addUnchanged(4);
1330 ac.mergeAndAppend(ab2, bc2, errorCode);
1331 assertSuccess("ab2+bc2", errorCode);
1332 if (!TestUtility::checkEqualEdits(*this, u"ab2+bc2", expected_ac, ac, errorCode)) {
1333 return;
1334 }
1335
1336 // Append empty edits.
1337 Edits empty;
1338 ac.mergeAndAppend(empty, empty, errorCode);
1339 assertSuccess("empty+empty", errorCode);
1340 if (!TestUtility::checkEqualEdits(*this, u"empty+empty", expected_ac, ac, errorCode)) {
1341 return;
1342 }
1343
1344 // Error: Append more edits with mismatched intermediate-string lengths.
1345 Edits mismatch;
1346 mismatch.addReplace(1, 1);
1347 ac.mergeAndAppend(ab2, mismatch, errorCode);
1348 assertEquals("ab2+mismatch", U_ILLEGAL_ARGUMENT_ERROR, errorCode.get());
1349 errorCode.reset();
1350 ac.mergeAndAppend(mismatch, bc2, errorCode);
1351 assertEquals("mismatch+bc2", U_ILLEGAL_ARGUMENT_ERROR, errorCode.get());
1352 errorCode.reset();
1353}
1354
1355void StringCaseTest::TestCaseMapWithEdits() {
1356 IcuTestErrorCode errorCode(*this, "TestCaseMapWithEdits");
1357 UChar dest[20];
1358 Edits edits;
1359
1360 int32_t length = CaseMap::toLower("tr", U_OMIT_UNCHANGED_TEXT,
1361 u"IstanBul", 8, dest, UPRV_LENGTHOF(dest), &edits, errorCode);
Frank Tang1f164ee2022-11-08 12:31:27 -08001362 assertEquals(u"toLower(IstanBul)", UnicodeString(u"ıb"), UnicodeString(true, dest, length));
Frank Tang3e05d9d2021-11-08 14:04:04 -08001363 static const EditChange lowerExpectedChanges[] = {
Frank Tang1f164ee2022-11-08 12:31:27 -08001364 { true, 1, 1 },
1365 { false, 4, 4 },
1366 { true, 1, 1 },
1367 { false, 2, 2 }
Frank Tang3e05d9d2021-11-08 14:04:04 -08001368 };
1369 TestUtility::checkEditsIter(*this, u"toLower(IstanBul)",
1370 edits.getFineIterator(), edits.getFineIterator(),
1371 lowerExpectedChanges, UPRV_LENGTHOF(lowerExpectedChanges),
Frank Tang1f164ee2022-11-08 12:31:27 -08001372 true, errorCode);
Frank Tang3e05d9d2021-11-08 14:04:04 -08001373
1374 edits.reset();
1375 length = CaseMap::toUpper("el", U_OMIT_UNCHANGED_TEXT,
1376 u"Πατάτα", 6, dest, UPRV_LENGTHOF(dest), &edits, errorCode);
Frank Tang1f164ee2022-11-08 12:31:27 -08001377 assertEquals(u"toUpper(Πατάτα)", UnicodeString(u"ΑΤΑΤΑ"), UnicodeString(true, dest, length));
Frank Tang3e05d9d2021-11-08 14:04:04 -08001378 static const EditChange upperExpectedChanges[] = {
Frank Tang1f164ee2022-11-08 12:31:27 -08001379 { false, 1, 1 },
1380 { true, 1, 1 },
1381 { true, 1, 1 },
1382 { true, 1, 1 },
1383 { true, 1, 1 },
1384 { true, 1, 1 }
Frank Tang3e05d9d2021-11-08 14:04:04 -08001385 };
1386 TestUtility::checkEditsIter(*this, u"toUpper(Πατάτα)",
1387 edits.getFineIterator(), edits.getFineIterator(),
1388 upperExpectedChanges, UPRV_LENGTHOF(upperExpectedChanges),
Frank Tang1f164ee2022-11-08 12:31:27 -08001389 true, errorCode);
Frank Tang3e05d9d2021-11-08 14:04:04 -08001390
1391 edits.reset();
1392
1393#if !UCONFIG_NO_BREAK_ITERATION
1394 length = CaseMap::toTitle("nl",
1395 U_OMIT_UNCHANGED_TEXT |
1396 U_TITLECASE_NO_BREAK_ADJUSTMENT |
1397 U_TITLECASE_NO_LOWERCASE,
1398 nullptr, u"IjssEL IglOo", 12,
1399 dest, UPRV_LENGTHOF(dest), &edits, errorCode);
Frank Tang1f164ee2022-11-08 12:31:27 -08001400 assertEquals(u"toTitle(IjssEL IglOo)", UnicodeString(u"J"), UnicodeString(true, dest, length));
Frank Tang3e05d9d2021-11-08 14:04:04 -08001401 static const EditChange titleExpectedChanges[] = {
Frank Tang1f164ee2022-11-08 12:31:27 -08001402 { false, 1, 1 },
1403 { true, 1, 1 },
1404 { false, 10, 10 }
Frank Tang3e05d9d2021-11-08 14:04:04 -08001405 };
1406 TestUtility::checkEditsIter(*this, u"toTitle(IjssEL IglOo)",
1407 edits.getFineIterator(), edits.getFineIterator(),
1408 titleExpectedChanges, UPRV_LENGTHOF(titleExpectedChanges),
Frank Tang1f164ee2022-11-08 12:31:27 -08001409 true, errorCode);
Frank Tang3e05d9d2021-11-08 14:04:04 -08001410#endif
1411
1412 // No explicit nor automatic edits.reset(). Edits should be appended.
1413 length = CaseMap::fold(U_OMIT_UNCHANGED_TEXT | U_EDITS_NO_RESET | U_FOLD_CASE_EXCLUDE_SPECIAL_I,
1414 u"IßtanBul", 8, dest, UPRV_LENGTHOF(dest), &edits, errorCode);
Frank Tang1f164ee2022-11-08 12:31:27 -08001415 assertEquals(u"foldCase(IßtanBul)", UnicodeString(u"ıssb"), UnicodeString(true, dest, length));
Frank Tang3e05d9d2021-11-08 14:04:04 -08001416 static const EditChange foldExpectedChanges[] = {
1417#if !UCONFIG_NO_BREAK_ITERATION
1418 // From titlecasing.
Frank Tang1f164ee2022-11-08 12:31:27 -08001419 { false, 1, 1 },
1420 { true, 1, 1 },
1421 { false, 10, 10 },
Frank Tang3e05d9d2021-11-08 14:04:04 -08001422#endif
1423 // From case folding.
Frank Tang1f164ee2022-11-08 12:31:27 -08001424 { true, 1, 1 },
1425 { true, 1, 2 },
1426 { false, 3, 3 },
1427 { true, 1, 1 },
1428 { false, 2, 2 }
Frank Tang3e05d9d2021-11-08 14:04:04 -08001429 };
1430 TestUtility::checkEditsIter(*this, u"foldCase(no Edits reset, IßtanBul)",
1431 edits.getFineIterator(), edits.getFineIterator(),
1432 foldExpectedChanges, UPRV_LENGTHOF(foldExpectedChanges),
Frank Tang1f164ee2022-11-08 12:31:27 -08001433 true, errorCode);
Frank Tang3e05d9d2021-11-08 14:04:04 -08001434}
1435
1436void StringCaseTest::TestCaseMapUTF8WithEdits() {
1437 IcuTestErrorCode errorCode(*this, "TestCaseMapUTF8WithEdits");
1438 char dest[50];
1439 Edits edits;
1440
1441 int32_t length = CaseMap::utf8ToLower("tr", U_OMIT_UNCHANGED_TEXT,
1442 reinterpret_cast<const char*>(u8"IstanBul"), 8,
1443 dest, UPRV_LENGTHOF(dest), &edits, errorCode);
1444 assertEquals(u"toLower(IstanBul)", UnicodeString(u"ıb"),
1445 UnicodeString::fromUTF8(StringPiece(dest, length)));
1446 static const EditChange lowerExpectedChanges[] = {
Frank Tang1f164ee2022-11-08 12:31:27 -08001447 { true, 1, 2 },
1448 { false, 4, 4 },
1449 { true, 1, 1 },
1450 { false, 2, 2 }
Frank Tang3e05d9d2021-11-08 14:04:04 -08001451 };
1452 TestUtility::checkEditsIter(*this, u"toLower(IstanBul)",
1453 edits.getFineIterator(), edits.getFineIterator(),
1454 lowerExpectedChanges, UPRV_LENGTHOF(lowerExpectedChanges),
Frank Tang1f164ee2022-11-08 12:31:27 -08001455 true, errorCode);
Frank Tang3e05d9d2021-11-08 14:04:04 -08001456
1457 edits.reset();
1458 length = CaseMap::utf8ToUpper("el", U_OMIT_UNCHANGED_TEXT,
1459 reinterpret_cast<const char*>(u8"Πατάτα"), 6 * 2,
1460 dest, UPRV_LENGTHOF(dest), &edits, errorCode);
1461 assertEquals(u"toUpper(Πατάτα)", UnicodeString(u"ΑΤΑΤΑ"),
1462 UnicodeString::fromUTF8(StringPiece(dest, length)));
1463 static const EditChange upperExpectedChanges[] = {
Frank Tang1f164ee2022-11-08 12:31:27 -08001464 { false, 2, 2 },
1465 { true, 2, 2 },
1466 { true, 2, 2 },
1467 { true, 2, 2 },
1468 { true, 2, 2 },
1469 { true, 2, 2 }
Frank Tang3e05d9d2021-11-08 14:04:04 -08001470 };
1471 TestUtility::checkEditsIter(*this, u"toUpper(Πατάτα)",
1472 edits.getFineIterator(), edits.getFineIterator(),
1473 upperExpectedChanges, UPRV_LENGTHOF(upperExpectedChanges),
Frank Tang1f164ee2022-11-08 12:31:27 -08001474 true, errorCode);
Frank Tang3e05d9d2021-11-08 14:04:04 -08001475
1476 edits.reset();
1477#if !UCONFIG_NO_BREAK_ITERATION
1478 length = CaseMap::utf8ToTitle("nl",
1479 U_OMIT_UNCHANGED_TEXT |
1480 U_TITLECASE_NO_BREAK_ADJUSTMENT |
1481 U_TITLECASE_NO_LOWERCASE,
1482 nullptr, reinterpret_cast<const char*>(u8"IjssEL IglOo"), 12,
1483 dest, UPRV_LENGTHOF(dest), &edits, errorCode);
1484 assertEquals(u"toTitle(IjssEL IglOo)", UnicodeString(u"J"),
1485 UnicodeString::fromUTF8(StringPiece(dest, length)));
1486 static const EditChange titleExpectedChanges[] = {
Frank Tang1f164ee2022-11-08 12:31:27 -08001487 { false, 1, 1 },
1488 { true, 1, 1 },
1489 { false, 10, 10 }
Frank Tang3e05d9d2021-11-08 14:04:04 -08001490 };
1491 TestUtility::checkEditsIter(*this, u"toTitle(IjssEL IglOo)",
1492 edits.getFineIterator(), edits.getFineIterator(),
1493 titleExpectedChanges, UPRV_LENGTHOF(titleExpectedChanges),
Frank Tang1f164ee2022-11-08 12:31:27 -08001494 true, errorCode);
Frank Tang3e05d9d2021-11-08 14:04:04 -08001495#endif
1496
1497 // No explicit nor automatic edits.reset(). Edits should be appended.
1498 length = CaseMap::utf8Fold(U_OMIT_UNCHANGED_TEXT | U_EDITS_NO_RESET |
1499 U_FOLD_CASE_EXCLUDE_SPECIAL_I,
1500 reinterpret_cast<const char*>(u8"IßtanBul"), 1 + 2 + 6,
1501 dest, UPRV_LENGTHOF(dest), &edits, errorCode);
1502 assertEquals(u"foldCase(IßtanBul)", UnicodeString(u"ıssb"),
1503 UnicodeString::fromUTF8(StringPiece(dest, length)));
1504 static const EditChange foldExpectedChanges[] = {
1505#if !UCONFIG_NO_BREAK_ITERATION
1506 // From titlecasing.
Frank Tang1f164ee2022-11-08 12:31:27 -08001507 { false, 1, 1 },
1508 { true, 1, 1 },
1509 { false, 10, 10 },
Frank Tang3e05d9d2021-11-08 14:04:04 -08001510#endif
1511 // From case folding.
Frank Tang1f164ee2022-11-08 12:31:27 -08001512 { true, 1, 2 },
1513 { true, 2, 2 },
1514 { false, 3, 3 },
1515 { true, 1, 1 },
1516 { false, 2, 2 }
Frank Tang3e05d9d2021-11-08 14:04:04 -08001517 };
1518 TestUtility::checkEditsIter(*this, u"foldCase(IßtanBul)",
1519 edits.getFineIterator(), edits.getFineIterator(),
1520 foldExpectedChanges, UPRV_LENGTHOF(foldExpectedChanges),
Frank Tang1f164ee2022-11-08 12:31:27 -08001521 true, errorCode);
Frank Tang3e05d9d2021-11-08 14:04:04 -08001522}
1523
1524void StringCaseTest::TestCaseMapToString() {
1525 // This test function name is parallel with one in UCharacterCaseTest.java.
1526 // It is a bit of a misnomer until we have CaseMap API that writes to
1527 // a UnicodeString, at which point we should change this code here.
1528 IcuTestErrorCode errorCode(*this, "TestCaseMapToString");
1529 UChar dest[20];
1530
1531 // Omit unchanged text.
1532 int32_t length = CaseMap::toLower("tr", U_OMIT_UNCHANGED_TEXT,
1533 u"IstanBul", 8, dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
1534 assertEquals(u"toLower(IstanBul)",
Frank Tang1f164ee2022-11-08 12:31:27 -08001535 UnicodeString(u"ıb"), UnicodeString(true, dest, length));
Frank Tang3e05d9d2021-11-08 14:04:04 -08001536 length = CaseMap::toUpper("el", U_OMIT_UNCHANGED_TEXT,
1537 u"Πατάτα", 6, dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
1538 assertEquals(u"toUpper(Πατάτα)",
Frank Tang1f164ee2022-11-08 12:31:27 -08001539 UnicodeString(u"ΑΤΑΤΑ"), UnicodeString(true, dest, length));
Frank Tang3e05d9d2021-11-08 14:04:04 -08001540#if !UCONFIG_NO_BREAK_ITERATION
1541 length = CaseMap::toTitle("nl",
1542 U_OMIT_UNCHANGED_TEXT |
1543 U_TITLECASE_NO_BREAK_ADJUSTMENT |
1544 U_TITLECASE_NO_LOWERCASE,
1545 nullptr, u"IjssEL IglOo", 12,
1546 dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
1547 assertEquals(u"toTitle(IjssEL IglOo)",
Frank Tang1f164ee2022-11-08 12:31:27 -08001548 UnicodeString(u"J"), UnicodeString(true, dest, length));
Frank Tang3e05d9d2021-11-08 14:04:04 -08001549#endif
1550 length = CaseMap::fold(U_OMIT_UNCHANGED_TEXT | U_FOLD_CASE_EXCLUDE_SPECIAL_I,
1551 u"IßtanBul", 8, dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
1552 assertEquals(u"foldCase(IßtanBul)",
Frank Tang1f164ee2022-11-08 12:31:27 -08001553 UnicodeString(u"ıssb"), UnicodeString(true, dest, length));
Frank Tang3e05d9d2021-11-08 14:04:04 -08001554
1555 // Return the whole result string.
1556 length = CaseMap::toLower("tr", 0,
1557 u"IstanBul", 8, dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
1558 assertEquals(u"toLower(IstanBul)",
Frank Tang1f164ee2022-11-08 12:31:27 -08001559 UnicodeString(u"ıstanbul"), UnicodeString(true, dest, length));
Frank Tang3e05d9d2021-11-08 14:04:04 -08001560 length = CaseMap::toUpper("el", 0,
1561 u"Πατάτα", 6, dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
1562 assertEquals(u"toUpper(Πατάτα)",
Frank Tang1f164ee2022-11-08 12:31:27 -08001563 UnicodeString(u"ΠΑΤΑΤΑ"), UnicodeString(true, dest, length));
Frank Tang3e05d9d2021-11-08 14:04:04 -08001564#if !UCONFIG_NO_BREAK_ITERATION
1565 length = CaseMap::toTitle("nl",
1566 U_TITLECASE_NO_BREAK_ADJUSTMENT |
1567 U_TITLECASE_NO_LOWERCASE,
1568 nullptr, u"IjssEL IglOo", 12,
1569 dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
1570 assertEquals(u"toTitle(IjssEL IglOo)",
Frank Tang1f164ee2022-11-08 12:31:27 -08001571 UnicodeString(u"IJssEL IglOo"), UnicodeString(true, dest, length));
Frank Tang3e05d9d2021-11-08 14:04:04 -08001572#endif
1573 length = CaseMap::fold(U_FOLD_CASE_EXCLUDE_SPECIAL_I,
1574 u"IßtanBul", 8, dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
1575 assertEquals(u"foldCase(IßtanBul)",
Frank Tang1f164ee2022-11-08 12:31:27 -08001576 UnicodeString(u"ısstanbul"), UnicodeString(true, dest, length));
Frank Tang3e05d9d2021-11-08 14:04:04 -08001577}
1578
1579void StringCaseTest::TestCaseMapUTF8ToString() {
1580 IcuTestErrorCode errorCode(*this, "TestCaseMapUTF8ToString");
1581 std::string dest;
1582 StringByteSink<std::string> sink(&dest);
1583
1584 // Omit unchanged text.
1585 CaseMap::utf8ToLower("tr", U_OMIT_UNCHANGED_TEXT, u8"IstanBul", sink, nullptr, errorCode);
1586 assertEquals(u"toLower(IstanBul)", UnicodeString(u"ıb"), UnicodeString::fromUTF8(dest));
1587 dest.clear();
1588 CaseMap::utf8ToUpper("el", U_OMIT_UNCHANGED_TEXT, u8"Πατάτα", sink, nullptr, errorCode);
1589 assertEquals(u"toUpper(Πατάτα)", UnicodeString(u"ΑΤΑΤΑ"),
1590 UnicodeString::fromUTF8(dest));
1591#if !UCONFIG_NO_BREAK_ITERATION
1592 dest.clear();
1593 CaseMap::utf8ToTitle(
1594 "nl", U_OMIT_UNCHANGED_TEXT | U_TITLECASE_NO_BREAK_ADJUSTMENT | U_TITLECASE_NO_LOWERCASE,
1595 nullptr, u8"IjssEL IglOo", sink, nullptr, errorCode);
1596 assertEquals(u"toTitle(IjssEL IglOo)", UnicodeString(u"J"),
1597 UnicodeString::fromUTF8(dest));
1598#endif
1599 dest.clear();
1600 CaseMap::utf8Fold(U_OMIT_UNCHANGED_TEXT | U_FOLD_CASE_EXCLUDE_SPECIAL_I,
1601 u8"IßtanBul", sink, nullptr, errorCode);
1602 assertEquals(u"foldCase(IßtanBul)", UnicodeString(u"ıssb"),
1603 UnicodeString::fromUTF8(dest));
1604
1605 // Return the whole result string.
1606 dest.clear();
1607 CaseMap::utf8ToLower("tr", 0, u8"IstanBul", sink, nullptr, errorCode);
1608 assertEquals(u"toLower(IstanBul)", UnicodeString(u"ıstanbul"),
1609 UnicodeString::fromUTF8(dest));
1610 dest.clear();
1611 CaseMap::utf8ToUpper("el", 0, u8"Πατάτα", sink, nullptr, errorCode);
1612 assertEquals(u"toUpper(Πατάτα)", UnicodeString(u"ΠΑΤΑΤΑ"),
1613 UnicodeString::fromUTF8(dest));
1614#if !UCONFIG_NO_BREAK_ITERATION
1615 dest.clear();
1616 CaseMap::utf8ToTitle("nl", U_TITLECASE_NO_BREAK_ADJUSTMENT | U_TITLECASE_NO_LOWERCASE,
1617 nullptr, u8"IjssEL IglOo", sink, nullptr, errorCode);
1618 assertEquals(u"toTitle(IjssEL IglOo)", UnicodeString(u"IJssEL IglOo"),
1619 UnicodeString::fromUTF8(dest));
1620#endif
1621 dest.clear();
1622 CaseMap::utf8Fold(U_FOLD_CASE_EXCLUDE_SPECIAL_I, u8"IßtanBul", sink, nullptr, errorCode);
1623 assertEquals(u"foldCase(IßtanBul)", UnicodeString(u"ısstanbul"),
1624 UnicodeString::fromUTF8(dest));
1625}
1626
1627void StringCaseTest::TestLongUnicodeString() {
1628 // Code coverage for UnicodeString case mapping code handling
1629 // long strings or many changes in a string.
Frank Tang1f164ee2022-11-08 12:31:27 -08001630 UnicodeString s(true,
Frank Tang3e05d9d2021-11-08 14:04:04 -08001631 (const UChar *)
1632 u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF"
1633 u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF"
1634 u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF"
1635 u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF"
1636 u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF"
1637 u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF", 6 * 51);
Frank Tang1f164ee2022-11-08 12:31:27 -08001638 UnicodeString expected(true,
Frank Tang3e05d9d2021-11-08 14:04:04 -08001639 (const UChar *)
1640 u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF"
1641 u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF"
1642 u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF"
1643 u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF"
1644 u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF"
1645 u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF", 6 * 51);
1646 s.toUpper(Locale::getRoot());
1647 assertEquals("string length 306", expected, s);
1648}
1649
1650#if !UCONFIG_NO_BREAK_ITERATION
1651void StringCaseTest::TestBug13127() {
1652 // Test case crashed when the bug was present.
1653 const char16_t *s16 = u"日本語";
Frank Tang1f164ee2022-11-08 12:31:27 -08001654 UnicodeString s(true, s16, -1);
Frank Tang3e05d9d2021-11-08 14:04:04 -08001655 s.toTitle(0, Locale::getEnglish());
1656}
1657
1658void StringCaseTest::TestInPlaceTitle() {
1659 // Similar to TestBug13127. u_strToTitle() can modify the buffer in-place.
1660 IcuTestErrorCode errorCode(*this, "TestInPlaceTitle");
1661 char16_t s[32] = u"ß ß ß日本語 abcdef";
1662 const char16_t *expected = u"Ss Ss Ss日本語 Abcdef";
1663 int32_t length = u_strToTitle(s, UPRV_LENGTHOF(s), s, -1, nullptr, "", errorCode);
1664 assertEquals("u_strToTitle(in-place) length", u_strlen(expected), length);
1665 assertEquals("u_strToTitle(in-place)", expected, s);
1666}
1667#endif
1668
1669void StringCaseTest::TestCaseMapEditsIteratorDocs() {
1670 IcuTestErrorCode status(*this, "TestCaseMapEditsIteratorDocs");
1671 const char16_t* input = u"abcßDeF";
1672 int32_t inputLength = u_strlen(input);
1673 // output: "abcssdef"
1674
1675 char16_t output[10];
1676 Edits edits;
1677 CaseMap::fold(0, input, -1, output, 10, &edits, status);
1678
1679 static const char16_t* fineIteratorExpected[] = {
1680 u"{ src[0..3] ≡ dest[0..3] (no-change) }",
1681 u"{ src[3..4] ⇝ dest[3..5], repl[0..2] }",
1682 u"{ src[4..5] ⇝ dest[5..6], repl[2..3] }",
1683 u"{ src[5..6] ≡ dest[6..7] (no-change) }",
1684 u"{ src[6..7] ⇝ dest[7..8], repl[3..4] }",
1685 };
1686 static const char16_t* fineChangesIteratorExpected[] = {
1687 u"{ src[3..4] ⇝ dest[3..5], repl[0..2] }",
1688 u"{ src[4..5] ⇝ dest[5..6], repl[2..3] }",
1689 u"{ src[6..7] ⇝ dest[7..8], repl[3..4] }",
1690 };
1691 static const char16_t* coarseIteratorExpected[] = {
1692 u"{ src[0..3] ≡ dest[0..3] (no-change) }",
1693 u"{ src[3..5] ⇝ dest[3..6], repl[0..3] }",
1694 u"{ src[5..6] ≡ dest[6..7] (no-change) }",
1695 u"{ src[6..7] ⇝ dest[7..8], repl[3..4] }",
1696 };
1697 static const char16_t* coarseChangesIteratorExpected[] = {
1698 u"{ src[3..5] ⇝ dest[3..6], repl[0..3] }",
1699 u"{ src[6..7] ⇝ dest[7..8], repl[3..4] }",
1700 };
1701
1702 // Expected destination indices when source index is queried
1703 static int32_t expectedDestFineEditIndices[] = {0, 0, 0, 3, 5, 6, 7};
1704 static int32_t expectedDestCoarseEditIndices[] = {0, 0, 0, 3, 3, 6, 7};
1705 static int32_t expectedDestFineStringIndices[] = {0, 1, 2, 3, 5, 6, 7};
1706 static int32_t expectedDestCoarseStringIndices[] = {0, 1, 2, 3, 6, 6, 7};
1707
1708 // Expected source indices when destination index is queried
1709 static int32_t expectedSrcFineEditIndices[] = { 0, 0, 0, 3, 3, 4, 5, 6 };
1710 static int32_t expectedSrcCoarseEditIndices[] = { 0, 0, 0, 3, 3, 3, 5, 6 };
1711 static int32_t expectedSrcFineStringIndices[] = { 0, 1, 2, 3, 4, 4, 5, 6 };
1712 static int32_t expectedSrcCoarseStringIndices[] = { 0, 1, 2, 3, 5, 5, 5, 6 };
1713
1714 // Demonstrate the iterator next() method:
1715 Edits::Iterator fineIterator = edits.getFineIterator();
1716 int i = 0;
1717 UnicodeString toString;
1718 while (fineIterator.next(status)) {
1719 UnicodeString expected = fineIteratorExpected[i++];
1720 assertEquals(UnicodeString(u"Iteration #") + i,
1721 expected,
1722 fineIterator.toString(toString.remove()));
1723 }
1724 Edits::Iterator fineChangesIterator = edits.getFineChangesIterator();
1725 i = 0;
1726 while (fineChangesIterator.next(status)) {
1727 UnicodeString expected = fineChangesIteratorExpected[i++];
1728 assertEquals(UnicodeString(u"Iteration #") + i,
1729 expected,
1730 fineChangesIterator.toString(toString.remove()));
1731 }
1732 Edits::Iterator coarseIterator = edits.getCoarseIterator();
1733 i = 0;
1734 while (coarseIterator.next(status)) {
1735 UnicodeString expected = coarseIteratorExpected[i++];
1736 assertEquals(UnicodeString(u"Iteration #") + i,
1737 expected,
1738 coarseIterator.toString(toString.remove()));
1739 }
1740 Edits::Iterator coarseChangesIterator = edits.getCoarseChangesIterator();
1741 i = 0;
1742 while (coarseChangesIterator.next(status)) {
1743 UnicodeString expected = coarseChangesIteratorExpected[i++];
1744 assertEquals(UnicodeString(u"Iteration #") + i,
1745 expected,
1746 coarseChangesIterator.toString(toString.remove()));
1747 }
1748
1749 // Demonstrate the iterator indexing methods:
1750 // fineIterator should have the same behavior as fineChangesIterator, and
1751 // coarseIterator should have the same behavior as coarseChangesIterator.
1752 for (int32_t srcIndex=0; srcIndex<inputLength; srcIndex++) {
1753 fineIterator.findSourceIndex(srcIndex, status);
1754 fineChangesIterator.findSourceIndex(srcIndex, status);
1755 coarseIterator.findSourceIndex(srcIndex, status);
1756 coarseChangesIterator.findSourceIndex(srcIndex, status);
1757
1758 assertEquals(UnicodeString("Source index: ") + srcIndex,
1759 expectedDestFineEditIndices[srcIndex],
1760 fineIterator.destinationIndex());
1761 assertEquals(UnicodeString("Source index: ") + srcIndex,
1762 expectedDestFineEditIndices[srcIndex],
1763 fineChangesIterator.destinationIndex());
1764 assertEquals(UnicodeString("Source index: ") + srcIndex,
1765 expectedDestCoarseEditIndices[srcIndex],
1766 coarseIterator.destinationIndex());
1767 assertEquals(UnicodeString("Source index: ") + srcIndex,
1768 expectedDestCoarseEditIndices[srcIndex],
1769 coarseChangesIterator.destinationIndex());
1770
1771 assertEquals(UnicodeString("Source index: ") + srcIndex,
1772 expectedDestFineStringIndices[srcIndex],
1773 fineIterator.destinationIndexFromSourceIndex(srcIndex, status));
1774 assertEquals(UnicodeString("Source index: ") + srcIndex,
1775 expectedDestFineStringIndices[srcIndex],
1776 fineChangesIterator.destinationIndexFromSourceIndex(srcIndex, status));
1777 assertEquals(UnicodeString("Source index: ") + srcIndex,
1778 expectedDestCoarseStringIndices[srcIndex],
1779 coarseIterator.destinationIndexFromSourceIndex(srcIndex, status));
1780 assertEquals(UnicodeString("Source index: ") + srcIndex,
1781 expectedDestCoarseStringIndices[srcIndex],
1782 coarseChangesIterator.destinationIndexFromSourceIndex(srcIndex, status));
1783 }
1784 for (int32_t destIndex=0; destIndex<inputLength; destIndex++) {
1785 fineIterator.findDestinationIndex(destIndex, status);
1786 fineChangesIterator.findDestinationIndex(destIndex, status);
1787 coarseIterator.findDestinationIndex(destIndex, status);
1788 coarseChangesIterator.findDestinationIndex(destIndex, status);
1789
1790 assertEquals(UnicodeString("Destination index: ") + destIndex,
1791 expectedSrcFineEditIndices[destIndex],
1792 fineIterator.sourceIndex());
1793 assertEquals(UnicodeString("Destination index: ") + destIndex,
1794 expectedSrcFineEditIndices[destIndex],
1795 fineChangesIterator.sourceIndex());
1796 assertEquals(UnicodeString("Destination index: ") + destIndex,
1797 expectedSrcCoarseEditIndices[destIndex],
1798 coarseIterator.sourceIndex());
1799 assertEquals(UnicodeString("Destination index: ") + destIndex,
1800 expectedSrcCoarseEditIndices[destIndex],
1801 coarseChangesIterator.sourceIndex());
1802
1803 assertEquals(UnicodeString("Destination index: ") + destIndex,
1804 expectedSrcFineStringIndices[destIndex],
1805 fineIterator.sourceIndexFromDestinationIndex(destIndex, status));
1806 assertEquals(UnicodeString("Destination index: ") + destIndex,
1807 expectedSrcFineStringIndices[destIndex],
1808 fineChangesIterator.sourceIndexFromDestinationIndex(destIndex, status));
1809 assertEquals(UnicodeString("Destination index: ") + destIndex,
1810 expectedSrcCoarseStringIndices[destIndex],
1811 coarseIterator.sourceIndexFromDestinationIndex(destIndex, status));
1812 assertEquals(UnicodeString("Destination index: ") + destIndex,
1813 expectedSrcCoarseStringIndices[destIndex],
1814 coarseChangesIterator.sourceIndexFromDestinationIndex(destIndex, status));
1815 }
1816}
1817
1818void StringCaseTest::TestCaseMapGreekExtended() {
1819 // Ticket 13851
1820 UnicodeString s(u"\u1F80\u1F88\u1FFC");
1821 UnicodeString result(s);
1822 result.toLower(Locale::getRoot());
1823 assertEquals(u"lower", u"\u1F80\u1F80\u1FF3", result);
1824#if !UCONFIG_NO_BREAK_ITERATION
1825 result = s;
1826 result.toTitle(nullptr, Locale::getRoot());
1827 assertEquals(u"title", u"\u1F88\u1F80\u1FF3", result);
1828#endif
1829}
1830
1831//#endif