Blame - source/test/intltest/strcase.cpp - chromium.googlesource.com/chromium/deps/icu

blob: a8f2caf99d7bfab719e722f961c389f7122050c0 [file] [log] [blame]

Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1	// © 2016 and later: Unicode, Inc. and others.
				2	// License & terms of use: http://www.unicode.org/copyright.html
				3	/*
				4	*******************************************************************************
				5	*
				6	* Copyright (C) 2002-2016, International Business Machines
				7	* Corporation and others. All Rights Reserved.
				8	*
				9	*******************************************************************************
				10	* file name: strcase.cpp
				11	* encoding: UTF-8
				12	* tab size: 8 (not used)
				13	* indentation:4
				14	*
				15	* created on: 2002mar12
				16	* created by: Markus W. Scherer
				17	*
				18	* Test file for string casing C++ API functions.
				19	*/
				20
				21	#include "unicode/std_string.h"
				22	#include "unicode/brkiter.h"
				23	#include "unicode/casemap.h"
				24	#include "unicode/edits.h"
				25	#include "unicode/uchar.h"
				26	#include "unicode/ures.h"
				27	#include "unicode/uloc.h"
				28	#include "unicode/locid.h"
				29	#include "unicode/ubrk.h"
				30	#include "unicode/unistr.h"
				31	#include "unicode/ucasemap.h"
				32	#include "unicode/ustring.h"
				33	#include "ucase.h"
				34	#include "ustrtest.h"
				35	#include "unicode/tstdtmod.h"
				36	#include "cmemory.h"
				37	#include "testutil.h"
				38
				39	class StringCaseTest: public IntlTest {
				40	public:
				41	StringCaseTest();
				42	virtual ~StringCaseTest();
				43
				44	void runIndexedTest(int32_t index, UBool exec, const char &name, char par=0) override;
				45
				46	void TestCaseConversion();
				47
				48	void TestCasingImpl(const UnicodeString &input,
				49	const UnicodeString &output,
				50	int32_t whichCase,
				51	void iter, const char localeID, uint32_t options);
				52	void TestCasing();
				53	void TestTitleOptions();
Frank Tang	d2858cb	2022-04-08 20:34:12 -0700	[diff] [blame]	54	void TestDutchTitle();
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	55	void TestFullCaseFoldingIterator();
				56	void TestGreekUpper();
				57	void TestArmenian();
				58	void TestLongUpper();
				59	void TestMalformedUTF8();
				60	void TestBufferOverflow();
				61	void TestEdits();
				62	void TestCopyMoveEdits();
				63	void TestEditsFindFwdBwd();
				64	void TestMergeEdits();
				65	void TestCaseMapWithEdits();
				66	void TestCaseMapUTF8WithEdits();
				67	void TestCaseMapToString();
				68	void TestCaseMapUTF8ToString();
				69	void TestLongUnicodeString();
				70	void TestBug13127();
				71	void TestInPlaceTitle();
				72	void TestCaseMapEditsIteratorDocs();
				73	void TestCaseMapGreekExtended();
				74
				75	private:
				76	void assertGreekUpper(const char16_t s, const char16_t expected);
				77
				78	Locale GREEK_LOCALE_;
				79	};
				80
				81	StringCaseTest::StringCaseTest() : GREEK_LOCALE_("el") {}
				82
				83	StringCaseTest::~StringCaseTest() {}
				84
				85	extern IntlTest *createStringCaseTest() {
				86	return new StringCaseTest();
				87	}
				88
				89	void
				90	StringCaseTest::runIndexedTest(int32_t index, UBool exec, const char &name, char /par/) {
				91	if(exec) {
				92	logln("TestSuite StringCaseTest: ");
				93	}
				94	TESTCASE_AUTO_BEGIN;
				95	TESTCASE_AUTO(TestCaseConversion);
				96	#if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
				97	TESTCASE_AUTO(TestCasing);
				98	TESTCASE_AUTO(TestTitleOptions);
Frank Tang	d2858cb	2022-04-08 20:34:12 -0700	[diff] [blame]	99	TESTCASE_AUTO(TestDutchTitle);
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	100	#endif
				101	TESTCASE_AUTO(TestFullCaseFoldingIterator);
				102	TESTCASE_AUTO(TestGreekUpper);
				103	TESTCASE_AUTO(TestArmenian);
				104	TESTCASE_AUTO(TestLongUpper);
				105	TESTCASE_AUTO(TestMalformedUTF8);
				106	TESTCASE_AUTO(TestBufferOverflow);
				107	TESTCASE_AUTO(TestEdits);
				108	TESTCASE_AUTO(TestCopyMoveEdits);
				109	TESTCASE_AUTO(TestEditsFindFwdBwd);
				110	TESTCASE_AUTO(TestMergeEdits);
				111	TESTCASE_AUTO(TestCaseMapWithEdits);
				112	TESTCASE_AUTO(TestCaseMapUTF8WithEdits);
				113	TESTCASE_AUTO(TestCaseMapToString);
				114	TESTCASE_AUTO(TestCaseMapUTF8ToString);
				115	TESTCASE_AUTO(TestLongUnicodeString);
				116	#if !UCONFIG_NO_BREAK_ITERATION
				117	TESTCASE_AUTO(TestBug13127);
				118	TESTCASE_AUTO(TestInPlaceTitle);
				119	#endif
				120	TESTCASE_AUTO(TestCaseMapEditsIteratorDocs);
				121	TESTCASE_AUTO(TestCaseMapGreekExtended);
				122	TESTCASE_AUTO_END;
				123	}
				124
				125	void
				126	StringCaseTest::TestCaseConversion()
				127	{
				128	static const UChar uppercaseGreek[] =
				129	{ 0x399, 0x395, 0x3a3, 0x3a5, 0x3a3, 0x20, 0x03a7, 0x3a1, 0x399, 0x3a3, 0x3a4,
				130	0x39f, 0x3a3, 0 };
				131	// "IESUS CHRISTOS"
				132
				133	static const UChar lowercaseGreek[] =
				134	{ 0x3b9, 0x3b5, 0x3c3, 0x3c5, 0x3c2, 0x20, 0x03c7, 0x3c1, 0x3b9, 0x3c3, 0x3c4,
				135	0x3bf, 0x3c2, 0 };
				136	// "iesus christos"
				137
				138	static const UChar lowercaseTurkish[] =
				139	{ 0x69, 0x73, 0x74, 0x61, 0x6e, 0x62, 0x75, 0x6c, 0x2c, 0x20, 0x6e, 0x6f, 0x74, 0x20, 0x63, 0x6f,
				140	0x6e, 0x73, 0x74, 0x61, 0x6e, 0x74, 0x0131, 0x6e, 0x6f, 0x70, 0x6c, 0x65, 0x21, 0 };
				141
				142	static const UChar uppercaseTurkish[] =
				143	{ 0x54, 0x4f, 0x50, 0x4b, 0x41, 0x50, 0x49, 0x20, 0x50, 0x41, 0x4c, 0x41, 0x43, 0x45, 0x2c, 0x20,
				144	0x0130, 0x53, 0x54, 0x41, 0x4e, 0x42, 0x55, 0x4c, 0 };
				145
				146	UnicodeString expectedResult;
				147	UnicodeString test3;
				148
				149	test3 += (UChar32)0x0130;
				150	test3 += "STANBUL, NOT CONSTANTINOPLE!";
				151
				152	UnicodeString test4(test3);
				153	test4.toLower(Locale(""));
				154	expectedResult = UnicodeString("i\\u0307stanbul, not constantinople!", "").unescape();
				155	if (test4 != expectedResult)
				156	errln("1. toLower failed: expected \"" + expectedResult + "\", got \"" + test4 + "\".");
				157
				158	test4 = test3;
				159	test4.toLower(Locale("tr", "TR"));
				160	expectedResult = lowercaseTurkish;
				161	if (test4 != expectedResult)
				162	errln("2. toLower failed: expected \"" + expectedResult + "\", got \"" + test4 + "\".");
				163
				164	test3 = "topkap";
				165	test3 += (UChar32)0x0131;
				166	test3 += " palace, istanbul";
				167	test4 = test3;
				168
				169	test4.toUpper(Locale(""));
				170	expectedResult = "TOPKAPI PALACE, ISTANBUL";
				171	if (test4 != expectedResult)
				172	errln("toUpper failed: expected \"" + expectedResult + "\", got \"" + test4 + "\".");
				173
				174	test4 = test3;
				175	test4.toUpper(Locale("tr", "TR"));
				176	expectedResult = uppercaseTurkish;
				177	if (test4 != expectedResult)
				178	errln("toUpper failed: expected \"" + expectedResult + "\", got \"" + test4 + "\".");
				179
				180	test3 = CharsToUnicodeString("S\\u00FC\\u00DFmayrstra\\u00DFe");
				181
				182	test3.toUpper(Locale("de", "DE"));
				183	expectedResult = CharsToUnicodeString("S\\u00DCSSMAYRSTRASSE");
				184	if (test3 != expectedResult)
				185	errln("toUpper failed: expected \"" + expectedResult + "\", got \"" + test3 + "\".");
				186
				187	test4.replace(0, test4.length(), uppercaseGreek);
				188
				189	test4.toLower(Locale("el", "GR"));
				190	expectedResult = lowercaseGreek;
				191	if (test4 != expectedResult)
				192	errln("toLower failed: expected \"" + expectedResult + "\", got \"" + test4 + "\".");
				193
				194	test4.replace(0, test4.length(), lowercaseGreek);
				195
				196	test4.toUpper();
				197	expectedResult = uppercaseGreek;
				198	if (test4 != expectedResult)
				199	errln("toUpper failed: expected \"" + expectedResult + "\", got \"" + test4 + "\".");
				200
				201	// more string case mapping tests with the new implementation
				202	{
				203	static const UChar
				204
				205	beforeLower[]= { 0x61, 0x42, 0x49, 0x3a3, 0xdf, 0x3a3, 0x2f, 0xd93f, 0xdfff },
				206	lowerRoot[]= { 0x61, 0x62, 0x69, 0x3c3, 0xdf, 0x3c2, 0x2f, 0xd93f, 0xdfff },
				207	lowerTurkish[]={ 0x61, 0x62, 0x131, 0x3c3, 0xdf, 0x3c2, 0x2f, 0xd93f, 0xdfff },
				208
				209	beforeUpper[]= { 0x61, 0x42, 0x69, 0x3c2, 0xdf, 0x3c3, 0x2f, 0xfb03, 0xfb03, 0xfb03, 0xd93f, 0xdfff },
				210	upperRoot[]= { 0x41, 0x42, 0x49, 0x3a3, 0x53, 0x53, 0x3a3, 0x2f, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0xd93f, 0xdfff },
				211	upperTurkish[]={ 0x41, 0x42, 0x130, 0x3a3, 0x53, 0x53, 0x3a3, 0x2f, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0xd93f, 0xdfff },
				212
				213	beforeMiniUpper[]= { 0xdf, 0x61 },
				214	miniUpper[]= { 0x53, 0x53, 0x41 };
				215
				216	UnicodeString s;
				217
				218	/* lowercase with root locale */
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	219	s=UnicodeString(false, beforeLower, UPRV_LENGTHOF(beforeLower));
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	220	s.toLower("");
				221	if( s.length()!=UPRV_LENGTHOF(lowerRoot) \|\|
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	222	s!=UnicodeString(false, lowerRoot, s.length())
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	223	) {
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	224	errln("error in toLower(root locale)=\"" + s + "\" expected \"" + UnicodeString(false, lowerRoot, UPRV_LENGTHOF(lowerRoot)) + "\"");
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	225	}
				226
				227	/* lowercase with turkish locale */
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	228	s=UnicodeString(false, beforeLower, UPRV_LENGTHOF(beforeLower));
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	229	s.setCharAt(0, beforeLower[0]).toLower(Locale("tr"));
				230	if( s.length()!=UPRV_LENGTHOF(lowerTurkish) \|\|
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	231	s!=UnicodeString(false, lowerTurkish, s.length())
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	232	) {
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	233	errln("error in toLower(turkish locale)=\"" + s + "\" expected \"" + UnicodeString(false, lowerTurkish, UPRV_LENGTHOF(lowerTurkish)) + "\"");
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	234	}
				235
				236	/* uppercase with root locale */
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	237	s=UnicodeString(false, beforeUpper, UPRV_LENGTHOF(beforeUpper));
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	238	s.setCharAt(0, beforeUpper[0]).toUpper(Locale(""));
				239	if( s.length()!=UPRV_LENGTHOF(upperRoot) \|\|
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	240	s!=UnicodeString(false, upperRoot, s.length())
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	241	) {
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	242	errln("error in toUpper(root locale)=\"" + s + "\" expected \"" + UnicodeString(false, upperRoot, UPRV_LENGTHOF(upperRoot)) + "\"");
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	243	}
				244
				245	/* uppercase with turkish locale */
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	246	s=UnicodeString(false, beforeUpper, UPRV_LENGTHOF(beforeUpper));
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	247	s.toUpper(Locale("tr"));
				248	if( s.length()!=UPRV_LENGTHOF(upperTurkish) \|\|
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	249	s!=UnicodeString(false, upperTurkish, s.length())
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	250	) {
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	251	errln("error in toUpper(turkish locale)=\"" + s + "\" expected \"" + UnicodeString(false, upperTurkish, UPRV_LENGTHOF(upperTurkish)) + "\"");
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	252	}
				253
				254	/* uppercase a short string with root locale */
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	255	s=UnicodeString(false, beforeMiniUpper, UPRV_LENGTHOF(beforeMiniUpper));
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	256	s.setCharAt(0, beforeMiniUpper[0]).toUpper("");
				257	if( s.length()!=UPRV_LENGTHOF(miniUpper) \|\|
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	258	s!=UnicodeString(false, miniUpper, s.length())
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	259	) {
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	260	errln("error in toUpper(root locale)=\"" + s + "\" expected \"" + UnicodeString(false, miniUpper, UPRV_LENGTHOF(miniUpper)) + "\"");
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	261	}
				262	}
				263
				264	// test some supplementary characters (>= Unicode 3.1)
				265	{
				266	UnicodeString t;
				267
				268	UnicodeString
				269	deseretInput=UnicodeString("\\U0001043C\\U00010414", "").unescape(),
				270	deseretLower=UnicodeString("\\U0001043C\\U0001043C", "").unescape(),
				271	deseretUpper=UnicodeString("\\U00010414\\U00010414", "").unescape();
				272	(t=deseretInput).toLower();
				273	if(t!=deseretLower) {
				274	errln("error lowercasing Deseret (plane 1) characters");
				275	}
				276	(t=deseretInput).toUpper();
				277	if(t!=deseretUpper) {
				278	errln("error uppercasing Deseret (plane 1) characters");
				279	}
				280	}
				281
				282	// test some more cases that looked like problems
				283	{
				284	UnicodeString t;
				285
				286	UnicodeString
				287	ljInput=UnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 \\U0001043C\\U00010414", "").unescape(),
				288	ljLower=UnicodeString("ab'cd \\uFB00i\\u0131ii\\u0307 \\u01C9\\u01C9\\u01C9 \\U0001043C\\U0001043C", "").unescape(),
				289	ljUpper=UnicodeString("AB'CD FFIII\\u0130 \\u01C7\\u01C7\\u01C7 \\U00010414\\U00010414", "").unescape();
				290	(t=ljInput).toLower("en");
				291	if(t!=ljLower) {
				292	errln("error lowercasing LJ characters");
				293	}
				294	(t=ljInput).toUpper("en");
				295	if(t!=ljUpper) {
				296	errln("error uppercasing LJ characters");
				297	}
				298	}
				299
				300	#if !UCONFIG_NO_NORMALIZATION
				301	// some context-sensitive casing depends on normalization data being present
				302
				303	// Unicode 3.1.1 SpecialCasing tests
				304	{
				305	UnicodeString t;
				306
				307	// sigmas preceded and/or followed by cased letters
				308	UnicodeString
				309	sigmas=UnicodeString("i\\u0307\\u03a3\\u0308j \\u0307\\u03a3\\u0308j i\\u00ad\\u03a3\\u0308 \\u0307\\u03a3\\u0308 ", "").unescape(),
				310	sigmasLower=UnicodeString("i\\u0307\\u03c3\\u0308j \\u0307\\u03c3\\u0308j i\\u00ad\\u03c2\\u0308 \\u0307\\u03c3\\u0308 ", "").unescape(),
				311	sigmasUpper=UnicodeString("I\\u0307\\u03a3\\u0308J \\u0307\\u03a3\\u0308J I\\u00ad\\u03a3\\u0308 \\u0307\\u03a3\\u0308 ", "").unescape();
				312
				313	(t=sigmas).toLower();
				314	if(t!=sigmasLower) {
				315	errln("error in sigmas.toLower()=\"" + t + "\" expected \"" + sigmasLower + "\"");
				316	}
				317
				318	(t=sigmas).toUpper(Locale(""));
				319	if(t!=sigmasUpper) {
				320	errln("error in sigmas.toUpper()=\"" + t + "\" expected \"" + sigmasUpper + "\"");
				321	}
				322
				323	// turkish & azerbaijani dotless i & dotted I
				324	// remove dot above if there was a capital I before and there are no more accents above
				325	UnicodeString
				326	dots=UnicodeString("I \\u0130 I\\u0307 I\\u0327\\u0307 I\\u0301\\u0307 I\\u0327\\u0307\\u0301", "").unescape(),
				327	dotsTurkish=UnicodeString("\\u0131 i i i\\u0327 \\u0131\\u0301\\u0307 i\\u0327\\u0301", "").unescape(),
				328	dotsDefault=UnicodeString("i i\\u0307 i\\u0307 i\\u0327\\u0307 i\\u0301\\u0307 i\\u0327\\u0307\\u0301", "").unescape();
				329
				330	(t=dots).toLower("tr");
				331	if(t!=dotsTurkish) {
				332	errln("error in dots.toLower(tr)=\"" + t + "\" expected \"" + dotsTurkish + "\"");
				333	}
				334
				335	(t=dots).toLower("de");
				336	if(t!=dotsDefault) {
				337	errln("error in dots.toLower(de)=\"" + t + "\" expected \"" + dotsDefault + "\"");
				338	}
				339	}
				340
				341	// more Unicode 3.1.1 tests
				342	{
				343	UnicodeString t;
				344
				345	// lithuanian dot above in uppercasing
				346	UnicodeString
				347	dots=UnicodeString("a\\u0307 \\u0307 i\\u0307 j\\u0327\\u0307 j\\u0301\\u0307", "").unescape(),
				348	dotsLithuanian=UnicodeString("A\\u0307 \\u0307 I J\\u0327 J\\u0301\\u0307", "").unescape(),
				349	dotsDefault=UnicodeString("A\\u0307 \\u0307 I\\u0307 J\\u0327\\u0307 J\\u0301\\u0307", "").unescape();
				350
				351	(t=dots).toUpper("lt");
				352	if(t!=dotsLithuanian) {
				353	errln("error in dots.toUpper(lt)=\"" + t + "\" expected \"" + dotsLithuanian + "\"");
				354	}
				355
				356	(t=dots).toUpper("de");
				357	if(t!=dotsDefault) {
				358	errln("error in dots.toUpper(de)=\"" + t + "\" expected \"" + dotsDefault + "\"");
				359	}
				360
				361	// lithuanian adds dot above to i in lowercasing if there are more above accents
				362	UnicodeString
				363	i=UnicodeString("I I\\u0301 J J\\u0301 \\u012e \\u012e\\u0301 \\u00cc\\u00cd\\u0128", "").unescape(),
				364	iLithuanian=UnicodeString("i i\\u0307\\u0301 j j\\u0307\\u0301 \\u012f \\u012f\\u0307\\u0301 i\\u0307\\u0300i\\u0307\\u0301i\\u0307\\u0303", "").unescape(),
				365	iDefault=UnicodeString("i i\\u0301 j j\\u0301 \\u012f \\u012f\\u0301 \\u00ec\\u00ed\\u0129", "").unescape();
				366
				367	(t=i).toLower("lt");
				368	if(t!=iLithuanian) {
				369	errln("error in i.toLower(lt)=\"" + t + "\" expected \"" + iLithuanian + "\"");
				370	}
				371
				372	(t=i).toLower("de");
				373	if(t!=iDefault) {
				374	errln("error in i.toLower(de)=\"" + t + "\" expected \"" + iDefault + "\"");
				375	}
				376	}
				377
				378	#endif
				379
				380	// test case folding
				381	{
				382	UnicodeString
				383	s=UnicodeString("A\\u00df\\u00b5\\ufb03\\U0001040c\\u0130\\u0131", "").unescape(),
				384	f=UnicodeString("ass\\u03bcffi\\U00010434i\\u0307\\u0131", "").unescape(),
				385	g=UnicodeString("ass\\u03bcffi\\U00010434i\\u0131", "").unescape(),
				386	t;
				387
				388	(t=s).foldCase();
				389	if(f!=t) {
				390	errln("error in foldCase(\"" + s + "\", default)=\"" + t + "\" but expected \"" + f + "\"");
				391	}
				392
				393	// alternate handling for dotted I/dotless i (U+0130, U+0131)
				394	(t=s).foldCase(U_FOLD_CASE_EXCLUDE_SPECIAL_I);
				395	if(g!=t) {
				396	errln("error in foldCase(\"" + s + "\", U_FOLD_CASE_EXCLUDE_SPECIAL_I)=\"" + t + "\" but expected \"" + g + "\"");
				397	}
				398	}
				399	}
				400
				401	// data-driven case mapping tests ------------------------------------------ ***
				402
				403	enum {
				404	TEST_LOWER,
				405	TEST_UPPER,
				406	TEST_TITLE,
				407	TEST_FOLD,
				408	TEST_COUNT
				409	};
				410
				411	// names of TestData children in casing.txt
				412	static const char *const dataNames[TEST_COUNT+1]={
				413	"lowercasing",
				414	"uppercasing",
				415	"titlecasing",
				416	"casefolding",
				417	""
				418	};
				419
				420	void
				421	StringCaseTest::TestCasingImpl(const UnicodeString &input,
				422	const UnicodeString &output,
				423	int32_t whichCase,
				424	void iter, const char localeID, uint32_t options) {
				425	// UnicodeString
				426	UnicodeString result;
				427	const char *name;
				428	Locale locale(localeID);
				429
				430	result=input;
				431	switch(whichCase) {
				432	case TEST_LOWER:
				433	name="toLower";
				434	result.toLower(locale);
				435	break;
				436	case TEST_UPPER:
				437	name="toUpper";
				438	result.toUpper(locale);
				439	break;
				440	#if !UCONFIG_NO_BREAK_ITERATION
				441	case TEST_TITLE:
				442	name="toTitle";
				443	result.toTitle((BreakIterator *)iter, locale, options);
				444	break;
				445	#endif
				446	case TEST_FOLD:
				447	name="foldCase";
				448	result.foldCase(options);
				449	break;
				450	default:
				451	name="";
				452	break; // won't happen
				453	}
				454	if(result!=output) {
				455	dataerrln("error: UnicodeString.%s() got a wrong result for a test case from casing.res", name);
Frank Tang	d2858cb	2022-04-08 20:34:12 -0700	[diff] [blame]	456	dataerrln(UnicodeString("input = [") + input + "], expected = [" + output + "], actual = [" + result + "]");
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	457	}
				458	#if !UCONFIG_NO_BREAK_ITERATION
				459	if(whichCase==TEST_TITLE && options==0) {
				460	result=input;
				461	result.toTitle((BreakIterator *)iter, locale);
				462	if(result!=output) {
				463	dataerrln("error: UnicodeString.toTitle(options=0) got a wrong result for a test case from casing.res");
				464	}
				465	}
				466	#endif
				467
				468	// UTF-8
				469	char utf8In[100], utf8Out[100];
				470	int32_t utf8InLength, utf8OutLength, resultLength;
				471	UChar *buffer;
				472
				473	IcuTestErrorCode errorCode(*this, "TestCasingImpl");
				474	LocalUCaseMapPointer csm(ucasemap_open(localeID, options, errorCode));
				475	#if !UCONFIG_NO_BREAK_ITERATION
				476	if(iter!=NULL) {
				477	// Clone the break iterator so that the UCaseMap can safely adopt it.
				478	UBreakIterator clone=ubrk_safeClone((UBreakIterator )iter, NULL, NULL, errorCode);
				479	ucasemap_setBreakIterator(csm.getAlias(), clone, errorCode);
				480	}
				481	#endif
				482
				483	u_strToUTF8(utf8In, (int32_t)sizeof(utf8In), &utf8InLength, input.getBuffer(), input.length(), errorCode);
				484	switch(whichCase) {
				485	case TEST_LOWER:
				486	name="ucasemap_utf8ToLower";
				487	utf8OutLength=ucasemap_utf8ToLower(csm.getAlias(),
				488	utf8Out, (int32_t)sizeof(utf8Out),
				489	utf8In, utf8InLength, errorCode);
				490	break;
				491	case TEST_UPPER:
				492	name="ucasemap_utf8ToUpper";
				493	utf8OutLength=ucasemap_utf8ToUpper(csm.getAlias(),
				494	utf8Out, (int32_t)sizeof(utf8Out),
				495	utf8In, utf8InLength, errorCode);
				496	break;
				497	#if !UCONFIG_NO_BREAK_ITERATION
				498	case TEST_TITLE:
				499	name="ucasemap_utf8ToTitle";
				500	utf8OutLength=ucasemap_utf8ToTitle(csm.getAlias(),
				501	utf8Out, (int32_t)sizeof(utf8Out),
				502	utf8In, utf8InLength, errorCode);
				503	break;
				504	#endif
				505	case TEST_FOLD:
				506	name="ucasemap_utf8FoldCase";
				507	utf8OutLength=ucasemap_utf8FoldCase(csm.getAlias(),
				508	utf8Out, (int32_t)sizeof(utf8Out),
				509	utf8In, utf8InLength, errorCode);
				510	break;
				511	default:
				512	name="";
				513	utf8OutLength=0;
				514	break; // won't happen
				515	}
				516	buffer=result.getBuffer(utf8OutLength);
				517	u_strFromUTF8(buffer, result.getCapacity(), &resultLength, utf8Out, utf8OutLength, errorCode);
				518	result.releaseBuffer(errorCode.isSuccess() ? resultLength : 0);
				519
				520	if(errorCode.isFailure()) {
				521	errcheckln(errorCode, "error: %s() got an error for a test case from casing.res - %s", name, u_errorName(errorCode));
				522	errorCode.reset();
				523	} else if(result!=output) {
				524	errln("error: %s() got a wrong result for a test case from casing.res", name);
				525	errln("expected \"" + output + "\" got \"" + result + "\"" );
				526	}
				527	}
				528
				529	void
				530	StringCaseTest::TestCasing() {
				531	UErrorCode status = U_ZERO_ERROR;
				532	#if !UCONFIG_NO_BREAK_ITERATION
				533	LocalUBreakIteratorPointer iter;
				534	#endif
				535	char cLocaleID[100];
				536	UnicodeString locale, input, output, optionsString, result;
				537	uint32_t options;
				538	int32_t whichCase, type;
				539	LocalPointer<TestDataModule> driver(TestDataModule::getTestDataModule("casing", *this, status));
				540	if(U_SUCCESS(status)) {
				541	for(whichCase=0; whichCase<TEST_COUNT; ++whichCase) {
				542	#if UCONFIG_NO_BREAK_ITERATION
				543	if(whichCase==TEST_TITLE) {
				544	continue;
				545	}
				546	#endif
				547	LocalPointer<TestData> casingTest(driver->createTestData(dataNames[whichCase], status));
				548	if(U_FAILURE(status)) {
				549	errln("TestCasing failed to createTestData(%s) - %s", dataNames[whichCase], u_errorName(status));
				550	break;
				551	}
				552	const DataMap *myCase = NULL;
				553	while(casingTest->nextCase(myCase, status)) {
				554	input = myCase->getString("Input", status);
				555	output = myCase->getString("Output", status);
				556
				557	if(whichCase!=TEST_FOLD) {
				558	locale = myCase->getString("Locale", status);
				559	}
				560	locale.extract(0, 0x7fffffff, cLocaleID, sizeof(cLocaleID), "");
				561
				562	#if !UCONFIG_NO_BREAK_ITERATION
				563	if(whichCase==TEST_TITLE) {
				564	type = myCase->getInt("Type", status);
				565	if(type>=0) {
				566	iter.adoptInstead(ubrk_open((UBreakIteratorType)type, cLocaleID, NULL, 0, &status));
				567	} else if(type==-2) {
				568	// Open a trivial break iterator that only delivers { 0, length }
				569	// or even just { 0 } as boundaries.
				570	static const UChar rules[] = { 0x2e, 0x2a, 0x3b }; // ".*;"
				571	UParseError parseError;
				572	iter.adoptInstead(ubrk_openRules(rules, UPRV_LENGTHOF(rules), NULL, 0, &parseError, &status));
				573	}
				574	}
				575	#endif
				576	options = 0;
				577	if(whichCase==TEST_TITLE \|\| whichCase==TEST_FOLD) {
				578	optionsString = myCase->getString("Options", status);
				579	if(optionsString.indexOf((UChar)0x54)>=0) { // T
				580	options\|=U_FOLD_CASE_EXCLUDE_SPECIAL_I;
				581	}
				582	if(optionsString.indexOf((UChar)0x4c)>=0) { // L
				583	options\|=U_TITLECASE_NO_LOWERCASE;
				584	}
				585	if(optionsString.indexOf((UChar)0x41)>=0) { // A
				586	options\|=U_TITLECASE_NO_BREAK_ADJUSTMENT;
				587	}
				588	}
				589
				590	if(U_FAILURE(status)) {
				591	dataerrln("error: TestCasing() setup failed for %s test case from casing.res: %s", dataNames[whichCase], u_errorName(status));
				592	status = U_ZERO_ERROR;
				593	} else {
				594	#if UCONFIG_NO_BREAK_ITERATION
				595	LocalPointer<UMemory> iter;
				596	#endif
				597	TestCasingImpl(input, output, whichCase, iter.getAlias(), cLocaleID, options);
				598	}
				599
				600	#if !UCONFIG_NO_BREAK_ITERATION
				601	iter.adoptInstead(NULL);
				602	#endif
				603	}
				604	}
				605	}
				606
				607	#if !UCONFIG_NO_BREAK_ITERATION
				608	// more tests for API coverage
				609	status=U_ZERO_ERROR;
				610	input=UNICODE_STRING_SIMPLE("sTrA\\u00dfE").unescape();
				611	(result=input).toTitle(NULL);
				612	if(result!=UNICODE_STRING_SIMPLE("Stra\\u00dfe").unescape()) {
				613	dataerrln("UnicodeString::toTitle(NULL) failed.");
				614	}
				615	#endif
				616	}
				617
				618	void
				619	StringCaseTest::TestTitleOptions() {
				620	// New options in ICU 60.
				621	TestCasingImpl(u"ʻcAt! ʻeTc.", u"ʻCat! ʻetc.", TEST_TITLE,
				622	nullptr, "", U_TITLECASE_WHOLE_STRING);
				623	TestCasingImpl(u"a ʻCaT. A ʻdOg! ʻeTc.", u"A ʻCaT. A ʻdOg! ʻETc.", TEST_TITLE,
				624	nullptr, "", U_TITLECASE_SENTENCES\|U_TITLECASE_NO_LOWERCASE);
				625	TestCasingImpl(u"49eRs", u"49ers", TEST_TITLE,
				626	nullptr, "", U_TITLECASE_WHOLE_STRING);
				627	TestCasingImpl(u"«丰(aBc)»", u"«丰(abc)»", TEST_TITLE,
				628	nullptr, "", U_TITLECASE_WHOLE_STRING);
				629	TestCasingImpl(u"49eRs", u"49Ers", TEST_TITLE,
				630	nullptr, "", U_TITLECASE_WHOLE_STRING\|U_TITLECASE_ADJUST_TO_CASED);
				631	TestCasingImpl(u"«丰(aBc)»", u"«丰(Abc)»", TEST_TITLE,
				632	nullptr, "", U_TITLECASE_WHOLE_STRING\|U_TITLECASE_ADJUST_TO_CASED);
				633	TestCasingImpl(u" john. Smith", u" John. Smith", TEST_TITLE,
				634	nullptr, "", U_TITLECASE_WHOLE_STRING\|U_TITLECASE_NO_LOWERCASE);
				635	TestCasingImpl(u" john. Smith", u" john. smith", TEST_TITLE,
				636	nullptr, "", U_TITLECASE_WHOLE_STRING\|U_TITLECASE_NO_BREAK_ADJUSTMENT);
				637	TestCasingImpl(u"«ijs»", u"«IJs»", TEST_TITLE,
				638	nullptr, "nl-BE", U_TITLECASE_WHOLE_STRING);
				639	TestCasingImpl(u"«ijs»", u"«İjs»", TEST_TITLE,
				640	nullptr, "tr-DE", U_TITLECASE_WHOLE_STRING);
				641
				642	#if !UCONFIG_NO_BREAK_ITERATION
				643	// Test conflicting settings.
				644	// If & when we add more options, then the ORed combinations may become
				645	// indistinguishable from valid values.
				646	IcuTestErrorCode errorCode(*this, "TestTitleOptions");
				647	CaseMap::toTitle("", U_TITLECASE_NO_BREAK_ADJUSTMENT\|U_TITLECASE_ADJUST_TO_CASED, nullptr,
				648	u"", 0, nullptr, 0, nullptr, errorCode);
				649	if (errorCode.get() != U_ILLEGAL_ARGUMENT_ERROR) {
				650	errln("CaseMap::toTitle(multiple adjustment options) -> %s not illegal argument",
				651	errorCode.errorName());
				652	}
				653	errorCode.reset();
				654	CaseMap::toTitle("", U_TITLECASE_WHOLE_STRING\|U_TITLECASE_SENTENCES, nullptr,
				655	u"", 0, nullptr, 0, nullptr, errorCode);
				656	if (errorCode.get() != U_ILLEGAL_ARGUMENT_ERROR) {
				657	errln("CaseMap::toTitle(multiple iterator options) -> %s not illegal argument",
				658	errorCode.errorName());
				659	}
				660	errorCode.reset();
				661	LocalPointer<BreakIterator> iter(
				662	BreakIterator::createCharacterInstance(Locale::getRoot(), errorCode));
				663	CaseMap::toTitle("", U_TITLECASE_WHOLE_STRING, iter.getAlias(),
				664	u"", 0, nullptr, 0, nullptr, errorCode);
				665	if (errorCode.get() != U_ILLEGAL_ARGUMENT_ERROR) {
				666	errln("CaseMap::toTitle(iterator option + iterator) -> %s not illegal argument",
				667	errorCode.errorName());
				668	}
				669	errorCode.reset();
				670	#endif
				671	}
				672
Frank Tang	d2858cb	2022-04-08 20:34:12 -0700	[diff] [blame]	673	#if !UCONFIG_NO_BREAK_ITERATION
				674	void StringCaseTest::TestDutchTitle() {
				675	IcuTestErrorCode errorCode(*this, "TestDutchTitle");
				676
				677	Locale nl("nl"); // Dutch
				678	LocalPointer<BreakIterator> iter(
				679	BreakIterator::createWordInstance(nl, errorCode));
				680
				681	// Dutch titlecase check in English
				682	TestCasingImpl(
				683	u"ijssel igloo IJMUIDEN",
				684	u"Ijssel Igloo Ijmuiden",
				685	TEST_TITLE,
				686	nullptr,
				687	"en",
				688	0);
				689
				690	// Dutch titlecase check in Dutch
				691	TestCasingImpl(
				692	u"ijssel igloo IJMUIDEN",
				693	u"IJssel Igloo IJmuiden",
				694	TEST_TITLE,
				695	nullptr,
				696	"nl",
				697	0);
				698
				699	// Dutch titlecase check in Dutch with nolowercase option
				700	if (U_SUCCESS(errorCode)) {
				701	iter->setText(u"ijssel igloo IjMUIdEN iPoD ijenough");
				702	TestCasingImpl(
				703	u"ijssel igloo IjMUIdEN iPoD ijenough",
				704	u"IJssel Igloo IJMUIdEN IPoD IJenough",
				705	TEST_TITLE,
				706	nullptr,
				707	"nl",
				708	U_TITLECASE_NO_LOWERCASE);
				709	}
				710
				711	errorCode.reset();
				712
				713	// Accented IJ testing
				714
				715	struct dutchTitleTestCase {
				716	const UnicodeString input;
				717	const UnicodeString expectedFull;
				718	const UnicodeString expectedOnlyChanged;
				719	} dutchTitleTestCases[] = {
				720	// input, expectedFull, expectedOnlyChanged
				721	{u"ij", u"IJ", u"IJ"},
				722	{u"IJ", u"IJ", u""},
				723	{u"íj́", u"ÍJ́", u"ÍJ"},
				724	{u"ÍJ́", u"ÍJ́", u""},
				725	{u"íJ́", u"ÍJ́", u"Í"},
				726	{u"Ij́", u"Ij́", u""},
				727	{u"ij́", u"Ij́", u"I"},
				728	{u"ïj́", u"Ïj́", u"Ï"},
				729	{u"íj\u0308", u"Íj\u0308", u"Í"},
				730	{u"íj́\U0001D16E", u"Íj́\U0001D16E", u"Í"},
				731	{u"íj\u1ABE", u"Íj\u1ABE", u"Í"},
				732
				733	{u"ijabc", u"IJabc", u"IJ"},
				734	{u"IJabc", u"IJabc", u""},
				735	{u"íj́abc", u"ÍJ́abc", u"ÍJ"},
				736	{u"ÍJ́abc", u"ÍJ́abc", u""},
				737	{u"íJ́abc", u"ÍJ́abc", u"Í"},
				738	{u"Ij́abc", u"Ij́abc", u""},
				739	{u"ij́abc", u"Ij́abc", u"I"},
				740	{u"ïj́abc", u"Ïj́abc", u"Ï"},
				741	{u"íjabc\u0308", u"Íjabc\u0308", u"Í"},
				742	{u"íj́abc\U0001D16E", u"ÍJ́abc\U0001D16E", u"ÍJ"},
				743	{u"íjabc\u1ABE", u"Íjabc\u1ABE", u"Í"},
				744
				745	// Bug ICU-21919
				746	{u"Í", u"Í", u""},
				747	};
				748
				749	for (const auto& cas : dutchTitleTestCases) {
				750	const UnicodeString &input = cas.input;
				751	const UnicodeString &expectedFull = cas.expectedFull;
				752	const UnicodeString &expectedOnlyChanged = cas.expectedOnlyChanged;
				753
				754	for (const auto& isOnlyChanged : {true, false}) {
				755	uint32_t testOptions = U_TITLECASE_NO_LOWERCASE
				756	\| (isOnlyChanged ? U_OMIT_UNCHANGED_TEXT : 0);
				757
				758	const UnicodeString &expected = isOnlyChanged ? expectedOnlyChanged : expectedFull;
				759
				760	TestCasingImpl(
				761	input,
				762	expected,
				763	TEST_TITLE,
				764	nullptr,
				765	"nl",
				766	testOptions
				767	);
				768	}
				769	}
				770	}
				771	#endif
				772
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	773	void
				774	StringCaseTest::TestFullCaseFoldingIterator() {
				775	UnicodeString ffi=UNICODE_STRING_SIMPLE("ffi");
				776	UnicodeString ss=UNICODE_STRING_SIMPLE("ss");
				777	FullCaseFoldingIterator iter;
				778	int32_t count=0;
				779	int32_t countSpecific=0;
				780	UChar32 c;
				781	UnicodeString full;
				782	while((c=iter.next(full))>=0) {
				783	++count;
				784	// Check that the full Case_Folding has more than 1 code point.
				785	if(!full.hasMoreChar32Than(0, 0x7fffffff, 1)) {
				786	errln("error: FullCaseFoldingIterator.next()=U+%04lX full Case_Folding has at most 1 code point", (long)c);
				787	continue;
				788	}
				789	// Check that full == Case_Folding(c).
				790	UnicodeString cf(c);
				791	cf.foldCase();
				792	if(full!=cf) {
				793	errln("error: FullCaseFoldingIterator.next()=U+%04lX full Case_Folding != cf(c)", (long)c);
				794	continue;
				795	}
				796	// Spot-check a couple of specific cases.
				797	if((full==ffi && c==0xfb03) \|\| (full==ss && (c==0xdf \|\| c==0x1e9e))) {
				798	++countSpecific;
				799	}
				800	}
				801	if(countSpecific!=3) {
				802	errln("error: FullCaseFoldingIterator did not yield exactly the expected specific cases");
				803	}
				804	if(count<70) {
				805	errln("error: FullCaseFoldingIterator yielded only %d (cp, full) pairs", (int)count);
				806	}
				807	}
				808
				809	void
				810	StringCaseTest::assertGreekUpper(const char16_t s, const char16_t expected) {
				811	UnicodeString s16(s);
				812	UnicodeString expected16(expected);
				813	UnicodeString msg = UnicodeString("UnicodeString::toUpper/Greek(\"") + s16 + "\")";
				814	UnicodeString result16(s16);
				815	result16.toUpper(GREEK_LOCALE_);
				816	assertEquals(msg, expected16, result16);
				817
				818	msg = UnicodeString("u_strToUpper/Greek(\"") + s16 + "\") cap=";
				819	int32_t length = expected16.length();
				820	int32_t capacities[] = {
				821	// Keep in sync with the UTF-8 capacities near the bottom of this function.
				822	0, length / 2, length - 1, length, length + 1
				823	};
				824	for (int32_t i = 0; i < UPRV_LENGTHOF(capacities); ++i) {
				825	int32_t cap = capacities[i];
				826	UChar *dest16 = result16.getBuffer(expected16.length() + 1);
				827	u_memset(dest16, 0x55AA, result16.getCapacity());
				828	UErrorCode errorCode = U_ZERO_ERROR;
				829	length = u_strToUpper(dest16, cap, s16.getBuffer(), s16.length(), "el", &errorCode);
				830	assertEquals(msg + cap, expected16.length(), length);
				831	UErrorCode expectedErrorCode;
				832	if (cap < expected16.length()) {
				833	expectedErrorCode = U_BUFFER_OVERFLOW_ERROR;
				834	} else if (cap == expected16.length()) {
				835	expectedErrorCode = U_STRING_NOT_TERMINATED_WARNING;
				836	} else {
				837	expectedErrorCode = U_ZERO_ERROR;
				838	assertEquals(msg + cap + " NUL", 0, dest16[length]);
				839	}
				840	assertEquals(msg + cap + " errorCode", expectedErrorCode, errorCode);
				841	result16.releaseBuffer(length);
				842	if (cap >= expected16.length()) {
				843	assertEquals(msg + cap, expected16, result16);
				844	}
				845	}
				846
				847	UErrorCode errorCode = U_ZERO_ERROR;
				848	LocalUCaseMapPointer csm(ucasemap_open("el", 0, &errorCode));
				849	assertSuccess("ucasemap_open", errorCode);
				850	std::string s8;
				851	s16.toUTF8String(s8);
				852	msg = UnicodeString("ucasemap_utf8ToUpper/Greek(\"") + s16 + "\")";
				853	char dest8[1000];
				854	length = ucasemap_utf8ToUpper(csm.getAlias(), dest8, UPRV_LENGTHOF(dest8),
				855	s8.data(), static_cast<int32_t>(s8.length()), &errorCode);
				856	assertSuccess("ucasemap_utf8ToUpper", errorCode);
				857	StringPiece result8(dest8, length);
				858	UnicodeString result16From8 = UnicodeString::fromUTF8(result8);
				859	assertEquals(msg, expected16, result16From8);
				860
				861	msg += " cap=";
				862	capacities[1] = length / 2;
				863	capacities[2] = length - 1;
				864	capacities[3] = length;
				865	capacities[4] = length + 1;
				866	char dest8b[1000];
				867	int32_t expected8Length = length; // Assuming the previous call worked.
				868	for (int32_t i = 0; i < UPRV_LENGTHOF(capacities); ++i) {
				869	int32_t cap = capacities[i];
				870	memset(dest8b, 0x5A, UPRV_LENGTHOF(dest8b));
				871	UErrorCode errorCode = U_ZERO_ERROR;
				872	length = ucasemap_utf8ToUpper(csm.getAlias(), dest8b, cap,
				873	s8.data(), static_cast<int32_t>(s8.length()), &errorCode);
				874	assertEquals(msg + cap, expected8Length, length);
				875	UErrorCode expectedErrorCode;
				876	if (cap < expected8Length) {
				877	expectedErrorCode = U_BUFFER_OVERFLOW_ERROR;
				878	} else if (cap == expected8Length) {
				879	expectedErrorCode = U_STRING_NOT_TERMINATED_WARNING;
				880	} else {
				881	expectedErrorCode = U_ZERO_ERROR;
				882	// Casts to int32_t to avoid matching UBool.
				883	assertEquals(msg + cap + " NUL", (int32_t)0, (int32_t)dest8b[length]);
				884	}
				885	assertEquals(msg + cap + " errorCode", expectedErrorCode, errorCode);
				886	if (cap >= expected8Length) {
				887	assertEquals(msg + cap + " (memcmp)", 0, memcmp(dest8, dest8b, expected8Length));
				888	}
				889	}
				890	}
				891
				892	void
				893	StringCaseTest::TestGreekUpper() {
				894	// https://unicode-org.atlassian.net/browse/ICU-5456
				895	assertGreekUpper(u"άδικος, κείμενο, ίριδα", u"ΑΔΙΚΟΣ, ΚΕΙΜΕΝΟ, ΙΡΙΔΑ");
				896	// https://bugzilla.mozilla.org/show_bug.cgi?id=307039
				897	// https://bug307039.bmoattachments.org/attachment.cgi?id=194893
				898	assertGreekUpper(u"Πατάτα", u"ΠΑΤΑΤΑ");
				899	assertGreekUpper(u"Αέρας, Μυστήριο, Ωραίο", u"ΑΕΡΑΣ, ΜΥΣΤΗΡΙΟ, ΩΡΑΙΟ");
				900	assertGreekUpper(u"Μαΐου, Πόρος, Ρύθμιση", u"ΜΑΪΟΥ, ΠΟΡΟΣ, ΡΥΘΜΙΣΗ");
				901	assertGreekUpper(u"ΰ, Τηρώ, Μάιος", u"Ϋ, ΤΗΡΩ, ΜΑΪΟΣ");
				902	assertGreekUpper(u"άυλος", u"ΑΫΛΟΣ");
				903	assertGreekUpper(u"ΑΫΛΟΣ", u"ΑΫΛΟΣ");
				904	assertGreekUpper(u"Άκλιτα ρήματα ή άκλιτες μετοχές", u"ΑΚΛΙΤΑ ΡΗΜΑΤΑ Ή ΑΚΛΙΤΕΣ ΜΕΤΟΧΕΣ");
				905	// http://www.unicode.org/udhr/d/udhr_ell_monotonic.html
				906	assertGreekUpper(u"Επειδή η αναγνώριση της αξιοπρέπειας", u"ΕΠΕΙΔΗ Η ΑΝΑΓΝΩΡΙΣΗ ΤΗΣ ΑΞΙΟΠΡΕΠΕΙΑΣ");
				907	assertGreekUpper(u"νομικού ή διεθνούς", u"ΝΟΜΙΚΟΥ Ή ΔΙΕΘΝΟΥΣ");
				908	// http://unicode.org/udhr/d/udhr_ell_polytonic.html
				909	assertGreekUpper(u"Ἐπειδὴ ἡ ἀναγνώριση", u"ΕΠΕΙΔΗ Η ΑΝΑΓΝΩΡΙΣΗ");
				910	assertGreekUpper(u"νομικοῦ ἢ διεθνοῦς", u"ΝΟΜΙΚΟΥ Ή ΔΙΕΘΝΟΥΣ");
				911	// From Google bug report
				912	assertGreekUpper(u"Νέο, Δημιουργία", u"ΝΕΟ, ΔΗΜΙΟΥΡΓΙΑ");
				913	// http://crbug.com/234797
				914	assertGreekUpper(u"Ελάτε να φάτε τα καλύτερα παϊδάκια!", u"ΕΛΑΤΕ ΝΑ ΦΑΤΕ ΤΑ ΚΑΛΥΤΕΡΑ ΠΑΪΔΑΚΙΑ!");
				915	assertGreekUpper(u"Μαΐου, τρόλεϊ", u"ΜΑΪΟΥ, ΤΡΟΛΕΪ");
				916	assertGreekUpper(u"Το ένα ή το άλλο.", u"ΤΟ ΕΝΑ Ή ΤΟ ΑΛΛΟ.");
				917	// http://multilingualtypesetting.co.uk/blog/greek-typesetting-tips/
				918	assertGreekUpper(u"ρωμέικα", u"ΡΩΜΕΪΚΑ");
				919	assertGreekUpper(u"ή.", u"Ή.");
				920	}
				921
				922	void StringCaseTest::TestArmenian() {
				923	Locale hy("hy"); // Eastern Armenian
				924	Locale hyw("hyw"); // Western Armenian
				925	Locale root = Locale::getRoot();
				926	// See ICU-13416:
				927	// և ligature ech-yiwn
				928	// uppercases to ԵՒ=ech+yiwn by default and in Western Armenian,
				929	// but to ԵՎ=ech+vew in Eastern Armenian.
				930	UnicodeString s(u"և Երևանի");
				931
				932	assertEquals("upper root", u"ԵՒ ԵՐԵՒԱՆԻ", UnicodeString(s).toUpper(root));
				933	assertEquals("upper hy", u"ԵՎ ԵՐԵՎԱՆԻ", UnicodeString(s).toUpper(hy));
				934	assertEquals("upper hyw", u"ԵՒ ԵՐԵՒԱՆԻ", UnicodeString(s).toUpper(hyw));
				935	#if !UCONFIG_NO_BREAK_ITERATION
				936	assertEquals("title root", u"Եւ Երևանի", UnicodeString(s).toTitle(nullptr, root));
				937	assertEquals("title hy", u"Եվ Երևանի", UnicodeString(s).toTitle(nullptr, hy));
				938	assertEquals("title hyw", u"Եւ Երևանի", UnicodeString(s).toTitle(nullptr, hyw));
				939	#endif
				940	}
				941
				942	void
				943	StringCaseTest::TestLongUpper() {
				944	if (quick) {
				945	logln("not exhaustive mode: skipping this test");
				946	return;
				947	}
				948	// Ticket #12663, crash with an extremely long string where
				949	// U+0390 maps to 0399 0308 0301 so that the result is three times as long
				950	// and overflows an int32_t.
				951	int32_t length = 0x40000004; // more than 1G UChars
				952	UnicodeString s(length, (UChar32)0x390, length);
				953	UnicodeString result;
				954	UChar *dest = result.getBuffer(length + 1);
				955	if (s.isBogus() \|\| dest == NULL) {
				956	logln("Out of memory, unable to run this test on this machine.");
				957	return;
				958	}
				959	IcuTestErrorCode errorCode(*this, "TestLongUpper");
				960	int32_t destLength = u_strToUpper(dest, result.getCapacity(),
				961	s.getBuffer(), s.length(), "", errorCode);
				962	result.releaseBuffer(destLength);
				963	if (errorCode.reset() != U_INDEX_OUTOFBOUNDS_ERROR) {
				964	errln("expected U_INDEX_OUTOFBOUNDS_ERROR, got %s (destLength is undefined, got %ld)",
				965	errorCode.errorName(), (long)destLength);
				966	}
				967	}
				968
				969	void StringCaseTest::TestMalformedUTF8() {
				970	// ticket #12639
				971	IcuTestErrorCode errorCode(*this, "TestMalformedUTF8");
				972	LocalUCaseMapPointer csm(ucasemap_open("en", U_TITLECASE_NO_BREAK_ADJUSTMENT, errorCode));
				973	if (errorCode.isFailure()) {
				974	errln("ucasemap_open(English) failed - %s", errorCode.errorName());
				975	return;
				976	}
				977	char src[1] = { (char)0x85 }; // malformed UTF-8
				978	char dest[3] = { 0, 0, 0 };
				979	int32_t destLength;
				980	#if !UCONFIG_NO_BREAK_ITERATION
				981	destLength = ucasemap_utf8ToTitle(csm.getAlias(), dest, 3, src, 1, errorCode);
				982	if (errorCode.isFailure() \|\| destLength != 1 \|\| dest[0] != src[0]) {
				983	errln("ucasemap_utf8ToTitle(\\x85) failed: %s destLength=%d dest[0]=0x%02x",
				984	errorCode.errorName(), (int)destLength, dest[0]);
				985	}
				986	#endif
				987
				988	errorCode.reset();
				989	dest[0] = 0;
				990	destLength = ucasemap_utf8ToLower(csm.getAlias(), dest, 3, src, 1, errorCode);
				991	if (errorCode.isFailure() \|\| destLength != 1 \|\| dest[0] != src[0]) {
				992	errln("ucasemap_utf8ToLower(\\x85) failed: %s destLength=%d dest[0]=0x%02x",
				993	errorCode.errorName(), (int)destLength, dest[0]);
				994	}
				995
				996	errorCode.reset();
				997	dest[0] = 0;
				998	destLength = ucasemap_utf8ToUpper(csm.getAlias(), dest, 3, src, 1, errorCode);
				999	if (errorCode.isFailure() \|\| destLength != 1 \|\| dest[0] != src[0]) {
				1000	errln("ucasemap_utf8ToUpper(\\x85) failed: %s destLength=%d dest[0]=0x%02x",
				1001	errorCode.errorName(), (int)destLength, dest[0]);
				1002	}
				1003
				1004	errorCode.reset();
				1005	dest[0] = 0;
				1006	destLength = ucasemap_utf8FoldCase(csm.getAlias(), dest, 3, src, 1, errorCode);
				1007	if (errorCode.isFailure() \|\| destLength != 1 \|\| dest[0] != src[0]) {
				1008	errln("ucasemap_utf8FoldCase(\\x85) failed: %s destLength=%d dest[0]=0x%02x",
				1009	errorCode.errorName(), (int)destLength, dest[0]);
				1010	}
				1011	}
				1012
				1013	void StringCaseTest::TestBufferOverflow() {
				1014	// Ticket #12849, incorrect result from Title Case preflight operation,
				1015	// when buffer overflow error is expected.
				1016	IcuTestErrorCode errorCode(*this, "TestBufferOverflow");
				1017	LocalUCaseMapPointer csm(ucasemap_open("en", 0, errorCode));
				1018	if (errorCode.isFailure()) {
				1019	errln("ucasemap_open(English) failed - %s", errorCode.errorName());
				1020	return;
				1021	}
				1022
				1023	UnicodeString data("hello world");
				1024	int32_t result;
				1025	#if !UCONFIG_NO_BREAK_ITERATION
				1026	result = ucasemap_toTitle(csm.getAlias(), NULL, 0, data.getBuffer(), data.length(), errorCode);
				1027	if (errorCode.get() != U_BUFFER_OVERFLOW_ERROR \|\| result != data.length()) {
				1028	errln("%s:%d ucasemap_toTitle(\"hello world\") failed: "
				1029	"expected (U_BUFFER_OVERFLOW_ERROR, %d), got (%s, %d)",
				1030	__FILE__, __LINE__, data.length(), errorCode.errorName(), result);
				1031	}
				1032	#endif
				1033	errorCode.reset();
				1034
				1035	std::string data_utf8;
				1036	data.toUTF8String(data_utf8);
				1037	#if !UCONFIG_NO_BREAK_ITERATION
				1038	result = ucasemap_utf8ToTitle(csm.getAlias(), NULL, 0, data_utf8.c_str(), static_cast<int32_t>(data_utf8.length()), errorCode);
				1039	if (errorCode.get() != U_BUFFER_OVERFLOW_ERROR \|\| result != (int32_t)data_utf8.length()) {
				1040	errln("%s:%d ucasemap_toTitle(\"hello world\") failed: "
				1041	"expected (U_BUFFER_OVERFLOW_ERROR, %d), got (%s, %d)",
				1042	__FILE__, __LINE__, data_utf8.length(), errorCode.errorName(), result);
				1043	}
				1044	#endif
				1045	errorCode.reset();
				1046	}
				1047
				1048	void StringCaseTest::TestEdits() {
				1049	IcuTestErrorCode errorCode(*this, "TestEdits");
				1050	Edits edits;
				1051	assertFalse("new Edits hasChanges", edits.hasChanges());
				1052	assertEquals("new Edits numberOfChanges", 0, edits.numberOfChanges());
				1053	assertEquals("new Edits", 0, edits.lengthDelta());
				1054	edits.addUnchanged(1); // multiple unchanged ranges are combined
				1055	edits.addUnchanged(10000); // too long, and they are split
				1056	edits.addReplace(0, 0);
				1057	edits.addUnchanged(2);
				1058	assertFalse("unchanged 10003 hasChanges", edits.hasChanges());
				1059	assertEquals("unchanged 10003 numberOfChanges", 0, edits.numberOfChanges());
				1060	assertEquals("unchanged 10003", 0, edits.lengthDelta());
				1061	edits.addReplace(2, 1); // multiple short equal-lengths edits are compressed
				1062	edits.addUnchanged(0);
				1063	edits.addReplace(2, 1);
				1064	edits.addReplace(2, 1);
				1065	edits.addReplace(0, 10);
				1066	edits.addReplace(100, 0);
				1067	edits.addReplace(3000, 4000); // variable-length encoding
				1068	edits.addReplace(100000, 100000);
				1069	assertTrue("some edits hasChanges", edits.hasChanges());
				1070	assertEquals("some edits numberOfChanges", 7, edits.numberOfChanges());
				1071	assertEquals("some edits", -3 + 10 - 100 + 1000, edits.lengthDelta());
				1072	UErrorCode outErrorCode = U_ZERO_ERROR;
				1073	assertFalse("edits done: copyErrorTo", edits.copyErrorTo(outErrorCode));
				1074
				1075	static const EditChange coarseExpectedChanges[] = {
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1076	{ false, 10003, 10003 },
				1077	{ true, 103106, 104013 }
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1078	};
				1079	TestUtility::checkEditsIter(*this, u"coarse",
				1080	edits.getCoarseIterator(), edits.getCoarseIterator(),
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1081	coarseExpectedChanges, UPRV_LENGTHOF(coarseExpectedChanges), true, errorCode);
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1082	TestUtility::checkEditsIter(*this, u"coarse changes",
				1083	edits.getCoarseChangesIterator(), edits.getCoarseChangesIterator(),
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1084	coarseExpectedChanges, UPRV_LENGTHOF(coarseExpectedChanges), false, errorCode);
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1085
				1086	static const EditChange fineExpectedChanges[] = {
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1087	{ false, 10003, 10003 },
				1088	{ true, 2, 1 },
				1089	{ true, 2, 1 },
				1090	{ true, 2, 1 },
				1091	{ true, 0, 10 },
				1092	{ true, 100, 0 },
				1093	{ true, 3000, 4000 },
				1094	{ true, 100000, 100000 }
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1095	};
				1096	TestUtility::checkEditsIter(*this, u"fine",
				1097	edits.getFineIterator(), edits.getFineIterator(),
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1098	fineExpectedChanges, UPRV_LENGTHOF(fineExpectedChanges), true, errorCode);
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1099	TestUtility::checkEditsIter(*this, u"fine changes",
				1100	edits.getFineChangesIterator(), edits.getFineChangesIterator(),
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1101	fineExpectedChanges, UPRV_LENGTHOF(fineExpectedChanges), false, errorCode);
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1102
				1103	edits.reset();
				1104	assertFalse("reset hasChanges", edits.hasChanges());
				1105	assertEquals("reset numberOfChanges", 0, edits.numberOfChanges());
				1106	assertEquals("reset", 0, edits.lengthDelta());
				1107	Edits::Iterator ei = edits.getCoarseChangesIterator();
				1108	assertFalse("reset then iterator", ei.next(errorCode));
				1109	}
				1110
				1111	void StringCaseTest::TestCopyMoveEdits() {
				1112	IcuTestErrorCode errorCode(*this, "TestCopyMoveEdits");
				1113	// Exceed the stack array capacity.
				1114	Edits a;
				1115	for (int32_t i = 0; i < 250; ++i) {
				1116	a.addReplace(i % 10, (i % 10) + 1);
				1117	}
				1118	assertEquals("a: many edits, length delta", 250, a.lengthDelta());
				1119
				1120	// copy
				1121	Edits b(a);
				1122	assertEquals("b: copy of many edits, length delta", 250, b.lengthDelta());
				1123	assertEquals("a remains: many edits, length delta", 250, a.lengthDelta());
				1124	TestUtility::checkEqualEdits(*this, u"b copy of a", a, b, errorCode);
				1125
				1126	// assign
				1127	Edits c;
				1128	c.addUnchanged(99);
				1129	c.addReplace(88, 77);
				1130	c = b;
				1131	assertEquals("c: assigned many edits, length delta", 250, c.lengthDelta());
				1132	assertEquals("b remains: many edits, length delta", 250, b.lengthDelta());
				1133	TestUtility::checkEqualEdits(*this, u"c = b", b, c, errorCode);
				1134
				1135	// std::move trouble on these platforms.
				1136	// See https://unicode-org.atlassian.net/browse/ICU-13393
				1137	#if !(U_PLATFORM == U_PF_AIX \|\| U_PLATFORM == U_PF_OS390)
				1138	// move constructor empties object with heap array
				1139	Edits d(std::move(a));
				1140	assertEquals("d: move-constructed many edits, length delta", 250, d.lengthDelta());
				1141	assertFalse("a moved away: no more hasChanges", a.hasChanges());
				1142	TestUtility::checkEqualEdits(*this, u"d() <- a", d, b, errorCode);
				1143	Edits empty;
				1144	TestUtility::checkEqualEdits(*this, u"a moved away", empty, a, errorCode);
				1145
				1146	// move assignment empties object with heap array
				1147	Edits e;
				1148	e.addReplace(0, 1000);
				1149	e = std::move(b);
				1150	assertEquals("e: move-assigned many edits, length delta", 250, e.lengthDelta());
				1151	assertFalse("b moved away: no more hasChanges", b.hasChanges());
				1152	TestUtility::checkEqualEdits(*this, u"e <- b", e, c, errorCode);
				1153	TestUtility::checkEqualEdits(*this, u"b moved away", empty, b, errorCode);
				1154
				1155	// Edits::Iterator default constructor.
				1156	Edits::Iterator iter;
				1157	assertFalse("Edits::Iterator().next()", iter.next(errorCode));
				1158	assertSuccess("Edits::Iterator().next()", errorCode);
				1159	iter = e.getFineChangesIterator();
				1160	assertTrue("iter.next()", iter.next(errorCode));
				1161	assertSuccess("iter.next()", errorCode);
				1162	assertTrue("iter.hasChange()", iter.hasChange());
				1163	assertEquals("iter.newLength()", 1, iter.newLength());
				1164	#endif
				1165	}
				1166
				1167	void StringCaseTest::TestEditsFindFwdBwd() {
				1168	IcuTestErrorCode errorCode(*this, "TestEditsFindFwdBwd");
				1169	// Some users need index mappings to be efficient when they are out of order.
				1170	// The most interesting failure case for this test is it taking a very long time.
				1171	Edits e;
				1172	constexpr int32_t N = 200000;
				1173	for (int32_t i = 0; i < N; ++i) {
				1174	e.addUnchanged(1);
				1175	e.addReplace(3, 1);
				1176	}
				1177	Edits::Iterator iter = e.getFineIterator();
				1178	for (int32_t i = 0; i <= N; i += 2) {
				1179	assertEquals("ascending", i * 2, iter.sourceIndexFromDestinationIndex(i, errorCode));
				1180	assertEquals("ascending", i * 2 + 1, iter.sourceIndexFromDestinationIndex(i + 1, errorCode));
				1181	}
				1182	for (int32_t i = N; i >= 0; i -= 2) {
				1183	assertEquals("descending", i * 2 + 1, iter.sourceIndexFromDestinationIndex(i + 1, errorCode));
				1184	assertEquals("descending", i * 2, iter.sourceIndexFromDestinationIndex(i, errorCode));
				1185	}
				1186	}
				1187
				1188	void StringCaseTest::TestMergeEdits() {
				1189	// For debugging, set -v to see matching edits up to a failure.
				1190	IcuTestErrorCode errorCode(*this, "TestMergeEdits");
				1191	Edits ab, bc, ac, expected_ac;
				1192
				1193	// Simple: Two parallel non-changes.
				1194	ab.addUnchanged(2);
				1195	bc.addUnchanged(2);
				1196	expected_ac.addUnchanged(2);
				1197
				1198	// Simple: Two aligned changes.
				1199	ab.addReplace(3, 2);
				1200	bc.addReplace(2, 1);
				1201	expected_ac.addReplace(3, 1);
				1202
				1203	// Unequal non-changes.
				1204	ab.addUnchanged(5);
				1205	bc.addUnchanged(3);
				1206	expected_ac.addUnchanged(3);
				1207	// ab ahead by 2
				1208
				1209	// Overlapping changes accumulate until they share a boundary.
				1210	ab.addReplace(4, 3);
				1211	bc.addReplace(3, 2);
				1212	ab.addReplace(4, 3);
				1213	bc.addReplace(3, 2);
				1214	ab.addReplace(4, 3);
				1215	bc.addReplace(3, 2);
				1216	bc.addUnchanged(4);
				1217	expected_ac.addReplace(14, 8);
				1218	// bc ahead by 2
				1219
				1220	// Balance out intermediate-string lengths.
				1221	ab.addUnchanged(2);
				1222	expected_ac.addUnchanged(2);
				1223
				1224	// Insert something and delete it: Should disappear.
				1225	ab.addReplace(0, 5);
				1226	ab.addReplace(0, 2);
				1227	bc.addReplace(7, 0);
				1228
				1229	// Parallel change to make a new boundary.
				1230	ab.addReplace(1, 2);
				1231	bc.addReplace(2, 3);
				1232	expected_ac.addReplace(1, 3);
				1233
				1234	// Multiple ab deletions should remain separate at the boundary.
				1235	ab.addReplace(1, 0);
				1236	ab.addReplace(2, 0);
				1237	ab.addReplace(3, 0);
				1238	expected_ac.addReplace(1, 0);
				1239	expected_ac.addReplace(2, 0);
				1240	expected_ac.addReplace(3, 0);
				1241
				1242	// Unequal non-changes can be split for another boundary.
				1243	ab.addUnchanged(2);
				1244	bc.addUnchanged(1);
				1245	expected_ac.addUnchanged(1);
				1246	// ab ahead by 1
				1247
				1248	// Multiple bc insertions should create a boundary and remain separate.
				1249	bc.addReplace(0, 4);
				1250	bc.addReplace(0, 5);
				1251	bc.addReplace(0, 6);
				1252	expected_ac.addReplace(0, 4);
				1253	expected_ac.addReplace(0, 5);
				1254	expected_ac.addReplace(0, 6);
				1255	// ab ahead by 1
				1256
				1257	// Multiple ab deletions in the middle of a bc change are merged.
				1258	bc.addReplace(2, 2);
				1259	// bc ahead by 1
				1260	ab.addReplace(1, 0);
				1261	ab.addReplace(2, 0);
				1262	ab.addReplace(3, 0);
				1263	ab.addReplace(4, 1);
				1264	expected_ac.addReplace(11, 2);
				1265
				1266	// Multiple bc insertions in the middle of an ab change are merged.
				1267	ab.addReplace(5, 6);
				1268	bc.addReplace(3, 3);
				1269	// ab ahead by 3
				1270	bc.addReplace(0, 4);
				1271	bc.addReplace(0, 5);
				1272	bc.addReplace(0, 6);
				1273	bc.addReplace(3, 7);
				1274	expected_ac.addReplace(5, 25);
				1275
				1276	// Delete around a deletion.
				1277	ab.addReplace(4, 4);
				1278	ab.addReplace(3, 0);
				1279	ab.addUnchanged(2);
				1280	bc.addReplace(2, 2);
				1281	bc.addReplace(4, 0);
				1282	expected_ac.addReplace(9, 2);
				1283
				1284	// Insert into an insertion.
				1285	ab.addReplace(0, 2);
				1286	bc.addReplace(1, 1);
				1287	bc.addReplace(0, 8);
				1288	bc.addUnchanged(4);
				1289	expected_ac.addReplace(0, 10);
				1290	// bc ahead by 3
				1291
				1292	// Balance out intermediate-string lengths.
				1293	ab.addUnchanged(3);
				1294	expected_ac.addUnchanged(3);
				1295
				1296	// Deletions meet insertions.
				1297	// Output order is arbitrary in principle, but we expect insertions first
				1298	// and want to keep it that way.
				1299	ab.addReplace(2, 0);
				1300	ab.addReplace(4, 0);
				1301	ab.addReplace(6, 0);
				1302	bc.addReplace(0, 1);
				1303	bc.addReplace(0, 3);
				1304	bc.addReplace(0, 5);
				1305	expected_ac.addReplace(0, 1);
				1306	expected_ac.addReplace(0, 3);
				1307	expected_ac.addReplace(0, 5);
				1308	expected_ac.addReplace(2, 0);
				1309	expected_ac.addReplace(4, 0);
				1310	expected_ac.addReplace(6, 0);
				1311
				1312	// End with a non-change, so that further edits are never reordered.
				1313	ab.addUnchanged(1);
				1314	bc.addUnchanged(1);
				1315	expected_ac.addUnchanged(1);
				1316
				1317	ac.mergeAndAppend(ab, bc, errorCode);
				1318	assertSuccess("ab+bc", errorCode);
				1319	if (!TestUtility::checkEqualEdits(*this, u"ab+bc", expected_ac, ac, errorCode)) {
				1320	return;
				1321	}
				1322
				1323	// Append more Edits.
				1324	Edits ab2, bc2;
				1325	ab2.addUnchanged(5);
				1326	bc2.addReplace(1, 2);
				1327	bc2.addUnchanged(4);
				1328	expected_ac.addReplace(1, 2);
				1329	expected_ac.addUnchanged(4);
				1330	ac.mergeAndAppend(ab2, bc2, errorCode);
				1331	assertSuccess("ab2+bc2", errorCode);
				1332	if (!TestUtility::checkEqualEdits(*this, u"ab2+bc2", expected_ac, ac, errorCode)) {
				1333	return;
				1334	}
				1335
				1336	// Append empty edits.
				1337	Edits empty;
				1338	ac.mergeAndAppend(empty, empty, errorCode);
				1339	assertSuccess("empty+empty", errorCode);
				1340	if (!TestUtility::checkEqualEdits(*this, u"empty+empty", expected_ac, ac, errorCode)) {
				1341	return;
				1342	}
				1343
				1344	// Error: Append more edits with mismatched intermediate-string lengths.
				1345	Edits mismatch;
				1346	mismatch.addReplace(1, 1);
				1347	ac.mergeAndAppend(ab2, mismatch, errorCode);
				1348	assertEquals("ab2+mismatch", U_ILLEGAL_ARGUMENT_ERROR, errorCode.get());
				1349	errorCode.reset();
				1350	ac.mergeAndAppend(mismatch, bc2, errorCode);
				1351	assertEquals("mismatch+bc2", U_ILLEGAL_ARGUMENT_ERROR, errorCode.get());
				1352	errorCode.reset();
				1353	}
				1354
				1355	void StringCaseTest::TestCaseMapWithEdits() {
				1356	IcuTestErrorCode errorCode(*this, "TestCaseMapWithEdits");
				1357	UChar dest[20];
				1358	Edits edits;
				1359
				1360	int32_t length = CaseMap::toLower("tr", U_OMIT_UNCHANGED_TEXT,
				1361	u"IstanBul", 8, dest, UPRV_LENGTHOF(dest), &edits, errorCode);
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1362	assertEquals(u"toLower(IstanBul)", UnicodeString(u"ıb"), UnicodeString(true, dest, length));
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1363	static const EditChange lowerExpectedChanges[] = {
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1364	{ true, 1, 1 },
				1365	{ false, 4, 4 },
				1366	{ true, 1, 1 },
				1367	{ false, 2, 2 }
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1368	};
				1369	TestUtility::checkEditsIter(*this, u"toLower(IstanBul)",
				1370	edits.getFineIterator(), edits.getFineIterator(),
				1371	lowerExpectedChanges, UPRV_LENGTHOF(lowerExpectedChanges),
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1372	true, errorCode);
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1373
				1374	edits.reset();
				1375	length = CaseMap::toUpper("el", U_OMIT_UNCHANGED_TEXT,
				1376	u"Πατάτα", 6, dest, UPRV_LENGTHOF(dest), &edits, errorCode);
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1377	assertEquals(u"toUpper(Πατάτα)", UnicodeString(u"ΑΤΑΤΑ"), UnicodeString(true, dest, length));
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1378	static const EditChange upperExpectedChanges[] = {
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1379	{ false, 1, 1 },
				1380	{ true, 1, 1 },
				1381	{ true, 1, 1 },
				1382	{ true, 1, 1 },
				1383	{ true, 1, 1 },
				1384	{ true, 1, 1 }
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1385	};
				1386	TestUtility::checkEditsIter(*this, u"toUpper(Πατάτα)",
				1387	edits.getFineIterator(), edits.getFineIterator(),
				1388	upperExpectedChanges, UPRV_LENGTHOF(upperExpectedChanges),
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1389	true, errorCode);
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1390
				1391	edits.reset();
				1392
				1393	#if !UCONFIG_NO_BREAK_ITERATION
				1394	length = CaseMap::toTitle("nl",
				1395	U_OMIT_UNCHANGED_TEXT \|
				1396	U_TITLECASE_NO_BREAK_ADJUSTMENT \|
				1397	U_TITLECASE_NO_LOWERCASE,
				1398	nullptr, u"IjssEL IglOo", 12,
				1399	dest, UPRV_LENGTHOF(dest), &edits, errorCode);
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1400	assertEquals(u"toTitle(IjssEL IglOo)", UnicodeString(u"J"), UnicodeString(true, dest, length));
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1401	static const EditChange titleExpectedChanges[] = {
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1402	{ false, 1, 1 },
				1403	{ true, 1, 1 },
				1404	{ false, 10, 10 }
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1405	};
				1406	TestUtility::checkEditsIter(*this, u"toTitle(IjssEL IglOo)",
				1407	edits.getFineIterator(), edits.getFineIterator(),
				1408	titleExpectedChanges, UPRV_LENGTHOF(titleExpectedChanges),
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1409	true, errorCode);
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1410	#endif
				1411
				1412	// No explicit nor automatic edits.reset(). Edits should be appended.
				1413	length = CaseMap::fold(U_OMIT_UNCHANGED_TEXT \| U_EDITS_NO_RESET \| U_FOLD_CASE_EXCLUDE_SPECIAL_I,
				1414	u"IßtanBul", 8, dest, UPRV_LENGTHOF(dest), &edits, errorCode);
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1415	assertEquals(u"foldCase(IßtanBul)", UnicodeString(u"ıssb"), UnicodeString(true, dest, length));
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1416	static const EditChange foldExpectedChanges[] = {
				1417	#if !UCONFIG_NO_BREAK_ITERATION
				1418	// From titlecasing.
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1419	{ false, 1, 1 },
				1420	{ true, 1, 1 },
				1421	{ false, 10, 10 },
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1422	#endif
				1423	// From case folding.
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1424	{ true, 1, 1 },
				1425	{ true, 1, 2 },
				1426	{ false, 3, 3 },
				1427	{ true, 1, 1 },
				1428	{ false, 2, 2 }
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1429	};
				1430	TestUtility::checkEditsIter(*this, u"foldCase(no Edits reset, IßtanBul)",
				1431	edits.getFineIterator(), edits.getFineIterator(),
				1432	foldExpectedChanges, UPRV_LENGTHOF(foldExpectedChanges),
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1433	true, errorCode);
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1434	}
				1435
				1436	void StringCaseTest::TestCaseMapUTF8WithEdits() {
				1437	IcuTestErrorCode errorCode(*this, "TestCaseMapUTF8WithEdits");
				1438	char dest[50];
				1439	Edits edits;
				1440
				1441	int32_t length = CaseMap::utf8ToLower("tr", U_OMIT_UNCHANGED_TEXT,
				1442	reinterpret_cast<const char*>(u8"IstanBul"), 8,
				1443	dest, UPRV_LENGTHOF(dest), &edits, errorCode);
				1444	assertEquals(u"toLower(IstanBul)", UnicodeString(u"ıb"),
				1445	UnicodeString::fromUTF8(StringPiece(dest, length)));
				1446	static const EditChange lowerExpectedChanges[] = {
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1447	{ true, 1, 2 },
				1448	{ false, 4, 4 },
				1449	{ true, 1, 1 },
				1450	{ false, 2, 2 }
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1451	};
				1452	TestUtility::checkEditsIter(*this, u"toLower(IstanBul)",
				1453	edits.getFineIterator(), edits.getFineIterator(),
				1454	lowerExpectedChanges, UPRV_LENGTHOF(lowerExpectedChanges),
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1455	true, errorCode);
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1456
				1457	edits.reset();
				1458	length = CaseMap::utf8ToUpper("el", U_OMIT_UNCHANGED_TEXT,
				1459	reinterpret_cast<const char>(u8"Πατάτα"), 6 2,
				1460	dest, UPRV_LENGTHOF(dest), &edits, errorCode);
				1461	assertEquals(u"toUpper(Πατάτα)", UnicodeString(u"ΑΤΑΤΑ"),
				1462	UnicodeString::fromUTF8(StringPiece(dest, length)));
				1463	static const EditChange upperExpectedChanges[] = {
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1464	{ false, 2, 2 },
				1465	{ true, 2, 2 },
				1466	{ true, 2, 2 },
				1467	{ true, 2, 2 },
				1468	{ true, 2, 2 },
				1469	{ true, 2, 2 }
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1470	};
				1471	TestUtility::checkEditsIter(*this, u"toUpper(Πατάτα)",
				1472	edits.getFineIterator(), edits.getFineIterator(),
				1473	upperExpectedChanges, UPRV_LENGTHOF(upperExpectedChanges),
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1474	true, errorCode);
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1475
				1476	edits.reset();
				1477	#if !UCONFIG_NO_BREAK_ITERATION
				1478	length = CaseMap::utf8ToTitle("nl",
				1479	U_OMIT_UNCHANGED_TEXT \|
				1480	U_TITLECASE_NO_BREAK_ADJUSTMENT \|
				1481	U_TITLECASE_NO_LOWERCASE,
				1482	nullptr, reinterpret_cast<const char*>(u8"IjssEL IglOo"), 12,
				1483	dest, UPRV_LENGTHOF(dest), &edits, errorCode);
				1484	assertEquals(u"toTitle(IjssEL IglOo)", UnicodeString(u"J"),
				1485	UnicodeString::fromUTF8(StringPiece(dest, length)));
				1486	static const EditChange titleExpectedChanges[] = {
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1487	{ false, 1, 1 },
				1488	{ true, 1, 1 },
				1489	{ false, 10, 10 }
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1490	};
				1491	TestUtility::checkEditsIter(*this, u"toTitle(IjssEL IglOo)",
				1492	edits.getFineIterator(), edits.getFineIterator(),
				1493	titleExpectedChanges, UPRV_LENGTHOF(titleExpectedChanges),
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1494	true, errorCode);
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1495	#endif
				1496
				1497	// No explicit nor automatic edits.reset(). Edits should be appended.
				1498	length = CaseMap::utf8Fold(U_OMIT_UNCHANGED_TEXT \| U_EDITS_NO_RESET \|
				1499	U_FOLD_CASE_EXCLUDE_SPECIAL_I,
				1500	reinterpret_cast<const char*>(u8"IßtanBul"), 1 + 2 + 6,
				1501	dest, UPRV_LENGTHOF(dest), &edits, errorCode);
				1502	assertEquals(u"foldCase(IßtanBul)", UnicodeString(u"ıssb"),
				1503	UnicodeString::fromUTF8(StringPiece(dest, length)));
				1504	static const EditChange foldExpectedChanges[] = {
				1505	#if !UCONFIG_NO_BREAK_ITERATION
				1506	// From titlecasing.
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1507	{ false, 1, 1 },
				1508	{ true, 1, 1 },
				1509	{ false, 10, 10 },
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1510	#endif
				1511	// From case folding.
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1512	{ true, 1, 2 },
				1513	{ true, 2, 2 },
				1514	{ false, 3, 3 },
				1515	{ true, 1, 1 },
				1516	{ false, 2, 2 }
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1517	};
				1518	TestUtility::checkEditsIter(*this, u"foldCase(IßtanBul)",
				1519	edits.getFineIterator(), edits.getFineIterator(),
				1520	foldExpectedChanges, UPRV_LENGTHOF(foldExpectedChanges),
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1521	true, errorCode);
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1522	}
				1523
				1524	void StringCaseTest::TestCaseMapToString() {
				1525	// This test function name is parallel with one in UCharacterCaseTest.java.
				1526	// It is a bit of a misnomer until we have CaseMap API that writes to
				1527	// a UnicodeString, at which point we should change this code here.
				1528	IcuTestErrorCode errorCode(*this, "TestCaseMapToString");
				1529	UChar dest[20];
				1530
				1531	// Omit unchanged text.
				1532	int32_t length = CaseMap::toLower("tr", U_OMIT_UNCHANGED_TEXT,
				1533	u"IstanBul", 8, dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
				1534	assertEquals(u"toLower(IstanBul)",
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1535	UnicodeString(u"ıb"), UnicodeString(true, dest, length));
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1536	length = CaseMap::toUpper("el", U_OMIT_UNCHANGED_TEXT,
				1537	u"Πατάτα", 6, dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
				1538	assertEquals(u"toUpper(Πατάτα)",
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1539	UnicodeString(u"ΑΤΑΤΑ"), UnicodeString(true, dest, length));
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1540	#if !UCONFIG_NO_BREAK_ITERATION
				1541	length = CaseMap::toTitle("nl",
				1542	U_OMIT_UNCHANGED_TEXT \|
				1543	U_TITLECASE_NO_BREAK_ADJUSTMENT \|
				1544	U_TITLECASE_NO_LOWERCASE,
				1545	nullptr, u"IjssEL IglOo", 12,
				1546	dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
				1547	assertEquals(u"toTitle(IjssEL IglOo)",
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1548	UnicodeString(u"J"), UnicodeString(true, dest, length));
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1549	#endif
				1550	length = CaseMap::fold(U_OMIT_UNCHANGED_TEXT \| U_FOLD_CASE_EXCLUDE_SPECIAL_I,
				1551	u"IßtanBul", 8, dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
				1552	assertEquals(u"foldCase(IßtanBul)",
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1553	UnicodeString(u"ıssb"), UnicodeString(true, dest, length));
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1554
				1555	// Return the whole result string.
				1556	length = CaseMap::toLower("tr", 0,
				1557	u"IstanBul", 8, dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
				1558	assertEquals(u"toLower(IstanBul)",
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1559	UnicodeString(u"ıstanbul"), UnicodeString(true, dest, length));
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1560	length = CaseMap::toUpper("el", 0,
				1561	u"Πατάτα", 6, dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
				1562	assertEquals(u"toUpper(Πατάτα)",
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1563	UnicodeString(u"ΠΑΤΑΤΑ"), UnicodeString(true, dest, length));
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1564	#if !UCONFIG_NO_BREAK_ITERATION
				1565	length = CaseMap::toTitle("nl",
				1566	U_TITLECASE_NO_BREAK_ADJUSTMENT \|
				1567	U_TITLECASE_NO_LOWERCASE,
				1568	nullptr, u"IjssEL IglOo", 12,
				1569	dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
				1570	assertEquals(u"toTitle(IjssEL IglOo)",
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1571	UnicodeString(u"IJssEL IglOo"), UnicodeString(true, dest, length));
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1572	#endif
				1573	length = CaseMap::fold(U_FOLD_CASE_EXCLUDE_SPECIAL_I,
				1574	u"IßtanBul", 8, dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
				1575	assertEquals(u"foldCase(IßtanBul)",
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1576	UnicodeString(u"ısstanbul"), UnicodeString(true, dest, length));
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1577	}
				1578
				1579	void StringCaseTest::TestCaseMapUTF8ToString() {
				1580	IcuTestErrorCode errorCode(*this, "TestCaseMapUTF8ToString");
				1581	std::string dest;
				1582	StringByteSink<std::string> sink(&dest);
				1583
				1584	// Omit unchanged text.
				1585	CaseMap::utf8ToLower("tr", U_OMIT_UNCHANGED_TEXT, u8"IstanBul", sink, nullptr, errorCode);
				1586	assertEquals(u"toLower(IstanBul)", UnicodeString(u"ıb"), UnicodeString::fromUTF8(dest));
				1587	dest.clear();
				1588	CaseMap::utf8ToUpper("el", U_OMIT_UNCHANGED_TEXT, u8"Πατάτα", sink, nullptr, errorCode);
				1589	assertEquals(u"toUpper(Πατάτα)", UnicodeString(u"ΑΤΑΤΑ"),
				1590	UnicodeString::fromUTF8(dest));
				1591	#if !UCONFIG_NO_BREAK_ITERATION
				1592	dest.clear();
				1593	CaseMap::utf8ToTitle(
				1594	"nl", U_OMIT_UNCHANGED_TEXT \| U_TITLECASE_NO_BREAK_ADJUSTMENT \| U_TITLECASE_NO_LOWERCASE,
				1595	nullptr, u8"IjssEL IglOo", sink, nullptr, errorCode);
				1596	assertEquals(u"toTitle(IjssEL IglOo)", UnicodeString(u"J"),
				1597	UnicodeString::fromUTF8(dest));
				1598	#endif
				1599	dest.clear();
				1600	CaseMap::utf8Fold(U_OMIT_UNCHANGED_TEXT \| U_FOLD_CASE_EXCLUDE_SPECIAL_I,
				1601	u8"IßtanBul", sink, nullptr, errorCode);
				1602	assertEquals(u"foldCase(IßtanBul)", UnicodeString(u"ıssb"),
				1603	UnicodeString::fromUTF8(dest));
				1604
				1605	// Return the whole result string.
				1606	dest.clear();
				1607	CaseMap::utf8ToLower("tr", 0, u8"IstanBul", sink, nullptr, errorCode);
				1608	assertEquals(u"toLower(IstanBul)", UnicodeString(u"ıstanbul"),
				1609	UnicodeString::fromUTF8(dest));
				1610	dest.clear();
				1611	CaseMap::utf8ToUpper("el", 0, u8"Πατάτα", sink, nullptr, errorCode);
				1612	assertEquals(u"toUpper(Πατάτα)", UnicodeString(u"ΠΑΤΑΤΑ"),
				1613	UnicodeString::fromUTF8(dest));
				1614	#if !UCONFIG_NO_BREAK_ITERATION
				1615	dest.clear();
				1616	CaseMap::utf8ToTitle("nl", U_TITLECASE_NO_BREAK_ADJUSTMENT \| U_TITLECASE_NO_LOWERCASE,
				1617	nullptr, u8"IjssEL IglOo", sink, nullptr, errorCode);
				1618	assertEquals(u"toTitle(IjssEL IglOo)", UnicodeString(u"IJssEL IglOo"),
				1619	UnicodeString::fromUTF8(dest));
				1620	#endif
				1621	dest.clear();
				1622	CaseMap::utf8Fold(U_FOLD_CASE_EXCLUDE_SPECIAL_I, u8"IßtanBul", sink, nullptr, errorCode);
				1623	assertEquals(u"foldCase(IßtanBul)", UnicodeString(u"ısstanbul"),
				1624	UnicodeString::fromUTF8(dest));
				1625	}
				1626
				1627	void StringCaseTest::TestLongUnicodeString() {
				1628	// Code coverage for UnicodeString case mapping code handling
				1629	// long strings or many changes in a string.
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1630	UnicodeString s(true,
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1631	(const UChar *)
				1632	u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF"
				1633	u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF"
				1634	u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF"
				1635	u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF"
				1636	u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF"
				1637	u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF", 6 * 51);
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1638	UnicodeString expected(true,
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1639	(const UChar *)
				1640	u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF"
				1641	u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF"
				1642	u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF"
				1643	u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF"
				1644	u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF"
				1645	u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF", 6 * 51);
				1646	s.toUpper(Locale::getRoot());
				1647	assertEquals("string length 306", expected, s);
				1648	}
				1649
				1650	#if !UCONFIG_NO_BREAK_ITERATION
				1651	void StringCaseTest::TestBug13127() {
				1652	// Test case crashed when the bug was present.
				1653	const char16_t *s16 = u"日本語";
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1654	UnicodeString s(true, s16, -1);
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1655	s.toTitle(0, Locale::getEnglish());
				1656	}
				1657
				1658	void StringCaseTest::TestInPlaceTitle() {
				1659	// Similar to TestBug13127. u_strToTitle() can modify the buffer in-place.
				1660	IcuTestErrorCode errorCode(*this, "TestInPlaceTitle");
				1661	char16_t s[32] = u"ß ß ß日本語 abcdef";
				1662	const char16_t *expected = u"Ss Ss Ss日本語 Abcdef";
				1663	int32_t length = u_strToTitle(s, UPRV_LENGTHOF(s), s, -1, nullptr, "", errorCode);
				1664	assertEquals("u_strToTitle(in-place) length", u_strlen(expected), length);
				1665	assertEquals("u_strToTitle(in-place)", expected, s);
				1666	}
				1667	#endif
				1668
				1669	void StringCaseTest::TestCaseMapEditsIteratorDocs() {
				1670	IcuTestErrorCode status(*this, "TestCaseMapEditsIteratorDocs");
				1671	const char16_t* input = u"abcßDeF";
				1672	int32_t inputLength = u_strlen(input);
				1673	// output: "abcssdef"
				1674
				1675	char16_t output[10];
				1676	Edits edits;
				1677	CaseMap::fold(0, input, -1, output, 10, &edits, status);
				1678
				1679	static const char16_t* fineIteratorExpected[] = {
				1680	u"{ src[0..3] ≡ dest[0..3] (no-change) }",
				1681	u"{ src[3..4] ⇝ dest[3..5], repl[0..2] }",
				1682	u"{ src[4..5] ⇝ dest[5..6], repl[2..3] }",
				1683	u"{ src[5..6] ≡ dest[6..7] (no-change) }",
				1684	u"{ src[6..7] ⇝ dest[7..8], repl[3..4] }",
				1685	};
				1686	static const char16_t* fineChangesIteratorExpected[] = {
				1687	u"{ src[3..4] ⇝ dest[3..5], repl[0..2] }",
				1688	u"{ src[4..5] ⇝ dest[5..6], repl[2..3] }",
				1689	u"{ src[6..7] ⇝ dest[7..8], repl[3..4] }",
				1690	};
				1691	static const char16_t* coarseIteratorExpected[] = {
				1692	u"{ src[0..3] ≡ dest[0..3] (no-change) }",
				1693	u"{ src[3..5] ⇝ dest[3..6], repl[0..3] }",
				1694	u"{ src[5..6] ≡ dest[6..7] (no-change) }",
				1695	u"{ src[6..7] ⇝ dest[7..8], repl[3..4] }",
				1696	};
				1697	static const char16_t* coarseChangesIteratorExpected[] = {
				1698	u"{ src[3..5] ⇝ dest[3..6], repl[0..3] }",
				1699	u"{ src[6..7] ⇝ dest[7..8], repl[3..4] }",
				1700	};
				1701
				1702	// Expected destination indices when source index is queried
				1703	static int32_t expectedDestFineEditIndices[] = {0, 0, 0, 3, 5, 6, 7};
				1704	static int32_t expectedDestCoarseEditIndices[] = {0, 0, 0, 3, 3, 6, 7};
				1705	static int32_t expectedDestFineStringIndices[] = {0, 1, 2, 3, 5, 6, 7};
				1706	static int32_t expectedDestCoarseStringIndices[] = {0, 1, 2, 3, 6, 6, 7};
				1707
				1708	// Expected source indices when destination index is queried
				1709	static int32_t expectedSrcFineEditIndices[] = { 0, 0, 0, 3, 3, 4, 5, 6 };
				1710	static int32_t expectedSrcCoarseEditIndices[] = { 0, 0, 0, 3, 3, 3, 5, 6 };
				1711	static int32_t expectedSrcFineStringIndices[] = { 0, 1, 2, 3, 4, 4, 5, 6 };
				1712	static int32_t expectedSrcCoarseStringIndices[] = { 0, 1, 2, 3, 5, 5, 5, 6 };
				1713
				1714	// Demonstrate the iterator next() method:
				1715	Edits::Iterator fineIterator = edits.getFineIterator();
				1716	int i = 0;
				1717	UnicodeString toString;
				1718	while (fineIterator.next(status)) {
				1719	UnicodeString expected = fineIteratorExpected[i++];
				1720	assertEquals(UnicodeString(u"Iteration #") + i,
				1721	expected,
				1722	fineIterator.toString(toString.remove()));
				1723	}
				1724	Edits::Iterator fineChangesIterator = edits.getFineChangesIterator();
				1725	i = 0;
				1726	while (fineChangesIterator.next(status)) {
				1727	UnicodeString expected = fineChangesIteratorExpected[i++];
				1728	assertEquals(UnicodeString(u"Iteration #") + i,
				1729	expected,
				1730	fineChangesIterator.toString(toString.remove()));
				1731	}
				1732	Edits::Iterator coarseIterator = edits.getCoarseIterator();
				1733	i = 0;
				1734	while (coarseIterator.next(status)) {
				1735	UnicodeString expected = coarseIteratorExpected[i++];
				1736	assertEquals(UnicodeString(u"Iteration #") + i,
				1737	expected,
				1738	coarseIterator.toString(toString.remove()));
				1739	}
				1740	Edits::Iterator coarseChangesIterator = edits.getCoarseChangesIterator();
				1741	i = 0;
				1742	while (coarseChangesIterator.next(status)) {
				1743	UnicodeString expected = coarseChangesIteratorExpected[i++];
				1744	assertEquals(UnicodeString(u"Iteration #") + i,
				1745	expected,
				1746	coarseChangesIterator.toString(toString.remove()));
				1747	}
				1748
				1749	// Demonstrate the iterator indexing methods:
				1750	// fineIterator should have the same behavior as fineChangesIterator, and
				1751	// coarseIterator should have the same behavior as coarseChangesIterator.
				1752	for (int32_t srcIndex=0; srcIndex<inputLength; srcIndex++) {
				1753	fineIterator.findSourceIndex(srcIndex, status);
				1754	fineChangesIterator.findSourceIndex(srcIndex, status);
				1755	coarseIterator.findSourceIndex(srcIndex, status);
				1756	coarseChangesIterator.findSourceIndex(srcIndex, status);
				1757
				1758	assertEquals(UnicodeString("Source index: ") + srcIndex,
				1759	expectedDestFineEditIndices[srcIndex],
				1760	fineIterator.destinationIndex());
				1761	assertEquals(UnicodeString("Source index: ") + srcIndex,
				1762	expectedDestFineEditIndices[srcIndex],
				1763	fineChangesIterator.destinationIndex());
				1764	assertEquals(UnicodeString("Source index: ") + srcIndex,
				1765	expectedDestCoarseEditIndices[srcIndex],
				1766	coarseIterator.destinationIndex());
				1767	assertEquals(UnicodeString("Source index: ") + srcIndex,
				1768	expectedDestCoarseEditIndices[srcIndex],
				1769	coarseChangesIterator.destinationIndex());
				1770
				1771	assertEquals(UnicodeString("Source index: ") + srcIndex,
				1772	expectedDestFineStringIndices[srcIndex],
				1773	fineIterator.destinationIndexFromSourceIndex(srcIndex, status));
				1774	assertEquals(UnicodeString("Source index: ") + srcIndex,
				1775	expectedDestFineStringIndices[srcIndex],
				1776	fineChangesIterator.destinationIndexFromSourceIndex(srcIndex, status));
				1777	assertEquals(UnicodeString("Source index: ") + srcIndex,
				1778	expectedDestCoarseStringIndices[srcIndex],
				1779	coarseIterator.destinationIndexFromSourceIndex(srcIndex, status));
				1780	assertEquals(UnicodeString("Source index: ") + srcIndex,
				1781	expectedDestCoarseStringIndices[srcIndex],
				1782	coarseChangesIterator.destinationIndexFromSourceIndex(srcIndex, status));
				1783	}
				1784	for (int32_t destIndex=0; destIndex<inputLength; destIndex++) {
				1785	fineIterator.findDestinationIndex(destIndex, status);
				1786	fineChangesIterator.findDestinationIndex(destIndex, status);
				1787	coarseIterator.findDestinationIndex(destIndex, status);
				1788	coarseChangesIterator.findDestinationIndex(destIndex, status);
				1789
				1790	assertEquals(UnicodeString("Destination index: ") + destIndex,
				1791	expectedSrcFineEditIndices[destIndex],
				1792	fineIterator.sourceIndex());
				1793	assertEquals(UnicodeString("Destination index: ") + destIndex,
				1794	expectedSrcFineEditIndices[destIndex],
				1795	fineChangesIterator.sourceIndex());
				1796	assertEquals(UnicodeString("Destination index: ") + destIndex,
				1797	expectedSrcCoarseEditIndices[destIndex],
				1798	coarseIterator.sourceIndex());
				1799	assertEquals(UnicodeString("Destination index: ") + destIndex,
				1800	expectedSrcCoarseEditIndices[destIndex],
				1801	coarseChangesIterator.sourceIndex());
				1802
				1803	assertEquals(UnicodeString("Destination index: ") + destIndex,
				1804	expectedSrcFineStringIndices[destIndex],
				1805	fineIterator.sourceIndexFromDestinationIndex(destIndex, status));
				1806	assertEquals(UnicodeString("Destination index: ") + destIndex,
				1807	expectedSrcFineStringIndices[destIndex],
				1808	fineChangesIterator.sourceIndexFromDestinationIndex(destIndex, status));
				1809	assertEquals(UnicodeString("Destination index: ") + destIndex,
				1810	expectedSrcCoarseStringIndices[destIndex],
				1811	coarseIterator.sourceIndexFromDestinationIndex(destIndex, status));
				1812	assertEquals(UnicodeString("Destination index: ") + destIndex,
				1813	expectedSrcCoarseStringIndices[destIndex],
				1814	coarseChangesIterator.sourceIndexFromDestinationIndex(destIndex, status));
				1815	}
				1816	}
				1817
				1818	void StringCaseTest::TestCaseMapGreekExtended() {
				1819	// Ticket 13851
				1820	UnicodeString s(u"\u1F80\u1F88\u1FFC");
				1821	UnicodeString result(s);
				1822	result.toLower(Locale::getRoot());
				1823	assertEquals(u"lower", u"\u1F80\u1F80\u1FF3", result);
				1824	#if !UCONFIG_NO_BREAK_ITERATION
				1825	result = s;
				1826	result.toTitle(nullptr, Locale::getRoot());
				1827	assertEquals(u"title", u"\u1F88\u1F80\u1FF3", result);
				1828	#endif
				1829	}
				1830
				1831	//#endif