Blame - source/test/intltest/ustrtest.cpp - chromium.googlesource.com/chromium/deps/icu

blob: a4da8e046657127c52dd399ceb00b0de4cf0e443 [file] [log] [blame]

Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1	// © 2016 and later: Unicode, Inc. and others.
				2	// License & terms of use: http://www.unicode.org/copyright.html
				3	/********************************************************************
				4	* COPYRIGHT:
				5	* Copyright (c) 1997-2016, International Business Machines Corporation and
				6	* others. All Rights Reserved.
				7	********************************************************************/
				8
				9	#include <utility>
				10
				11	#include "ustrtest.h"
				12	#include "unicode/appendable.h"
				13	#include "unicode/std_string.h"
				14	#include "unicode/unistr.h"
				15	#include "unicode/uchar.h"
				16	#include "unicode/ustring.h"
				17	#include "unicode/locid.h"
				18	#include "unicode/strenum.h"
				19	#include "unicode/ucnv.h"
				20	#include "unicode/uenum.h"
				21	#include "unicode/utf16.h"
				22	#include "cmemory.h"
				23	#include "charstr.h"
				24
				25	#if 0
				26	#include "unicode/ustream.h"
				27
				28	#include <iostream>
				29	using namespace std;
				30
				31	#endif
				32
				33	UnicodeStringTest::~UnicodeStringTest() {}
				34
				35	extern IntlTest *createStringCaseTest();
				36
				37	void UnicodeStringTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char *par)
				38	{
				39	if (exec) logln("TestSuite UnicodeStringTest: ");
				40	TESTCASE_AUTO_BEGIN;
				41	TESTCASE_AUTO_CREATE_CLASS(StringCaseTest);
				42	TESTCASE_AUTO(TestBasicManipulation);
				43	TESTCASE_AUTO(TestCompare);
				44	TESTCASE_AUTO(TestExtract);
				45	TESTCASE_AUTO(TestRemoveReplace);
				46	TESTCASE_AUTO(TestSearching);
				47	TESTCASE_AUTO(TestSpacePadding);
				48	TESTCASE_AUTO(TestPrefixAndSuffix);
				49	TESTCASE_AUTO(TestFindAndReplace);
				50	TESTCASE_AUTO(TestBogus);
				51	TESTCASE_AUTO(TestReverse);
				52	TESTCASE_AUTO(TestMiscellaneous);
				53	TESTCASE_AUTO(TestStackAllocation);
				54	TESTCASE_AUTO(TestUnescape);
				55	TESTCASE_AUTO(TestCountChar32);
				56	TESTCASE_AUTO(TestStringEnumeration);
				57	TESTCASE_AUTO(TestNameSpace);
				58	TESTCASE_AUTO(TestUTF32);
				59	TESTCASE_AUTO(TestUTF8);
				60	TESTCASE_AUTO(TestReadOnlyAlias);
				61	TESTCASE_AUTO(TestAppendable);
				62	TESTCASE_AUTO(TestUnicodeStringImplementsAppendable);
				63	TESTCASE_AUTO(TestSizeofUnicodeString);
				64	TESTCASE_AUTO(TestStartsWithAndEndsWithNulTerminated);
				65	TESTCASE_AUTO(TestMoveSwap);
				66	TESTCASE_AUTO(TestUInt16Pointers);
				67	TESTCASE_AUTO(TestWCharPointers);
				68	TESTCASE_AUTO(TestNullPointers);
				69	TESTCASE_AUTO(TestUnicodeStringInsertAppendToSelf);
				70	TESTCASE_AUTO(TestLargeAppend);
				71	TESTCASE_AUTO_END;
				72	}
				73
				74	void
				75	UnicodeStringTest::TestBasicManipulation()
				76	{
				77	UnicodeString test1("Now is the time for all men to come swiftly to the aid of the party.\n");
				78	UnicodeString expectedValue;
				79	UnicodeString *c;
				80
				81	c=test1.clone();
				82	test1.insert(24, "good ");
				83	expectedValue = "Now is the time for all good men to come swiftly to the aid of the party.\n";
				84	if (test1 != expectedValue)
				85	errln("insert() failed: expected \"" + expectedValue + "\"\n,got \"" + test1 + "\"");
				86
				87	c->insert(24, "good ");
				88	if(*c != expectedValue) {
				89	errln("clone()->insert() failed: expected \"" + expectedValue + "\"\n,got \"" + *c + "\"");
				90	}
				91	delete c;
				92
				93	test1.remove(41, 8);
				94	expectedValue = "Now is the time for all good men to come to the aid of the party.\n";
				95	if (test1 != expectedValue)
				96	errln("remove() failed: expected \"" + expectedValue + "\"\n,got \"" + test1 + "\"");
				97
				98	test1.replace(58, 6, "ir country");
				99	expectedValue = "Now is the time for all good men to come to the aid of their country.\n";
				100	if (test1 != expectedValue)
				101	errln("replace() failed: expected \"" + expectedValue + "\"\n,got \"" + test1 + "\"");
				102
				103	UChar temp[80];
				104	test1.extract(0, 15, temp);
				105
				106	UnicodeString test2(temp, 15);
				107
				108	expectedValue = "Now is the time";
				109	if (test2 != expectedValue)
				110	errln("extract() failed: expected \"" + expectedValue + "\"\n,got \"" + test2 + "\"");
				111
				112	test2 += " for me to go!\n";
				113	expectedValue = "Now is the time for me to go!\n";
				114	if (test2 != expectedValue)
				115	errln("operator+=() failed: expected \"" + expectedValue + "\"\n,got \"" + test2 + "\"");
				116
				117	if (test1.length() != 70)
				118	errln(UnicodeString("length() failed: expected 70, got ") + test1.length());
				119	if (test2.length() != 30)
				120	errln(UnicodeString("length() failed: expected 30, got ") + test2.length());
				121
				122	UnicodeString test3;
				123	test3.append((UChar32)0x20402);
				124	if(test3 != CharsToUnicodeString("\\uD841\\uDC02")){
				125	errln((UnicodeString)"append failed for UChar32, expected \"\\\\ud841\\\\udc02\", got " + prettify(test3));
				126	}
				127	if(test3.length() != 2){
				128	errln(UnicodeString("append or length failed for UChar32, expected 2, got ") + test3.length());
				129	}
				130	test3.append((UChar32)0x0074);
				131	if(test3 != CharsToUnicodeString("\\uD841\\uDC02t")){
				132	errln((UnicodeString)"append failed for UChar32, expected \"\\\\uD841\\\\uDC02t\", got " + prettify(test3));
				133	}
				134	if(test3.length() != 3){
				135	errln((UnicodeString)"append or length failed for UChar32, expected 2, got " + test3.length());
				136	}
				137
				138	// test some UChar32 overloads
				139	if( test3.setTo((UChar32)0x10330).length() != 2 \|\|
				140	test3.insert(0, (UChar32)0x20100).length() != 4 \|\|
				141	test3.replace(2, 2, (UChar32)0xe0061).length() != 4 \|\|
				142	(test3 = (UChar32)0x14001).length() != 2
				143	) {
				144	errln((UnicodeString)"simple UChar32 overloads for replace, insert, setTo or = failed");
				145	}
				146
				147	{
				148	// test moveIndex32()
				149	UnicodeString s=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
				150
				151	if(
				152	s.moveIndex32(2, -1)!=0 \|\|
				153	s.moveIndex32(2, 1)!=4 \|\|
				154	s.moveIndex32(2, 2)!=5 \|\|
				155	s.moveIndex32(5, -2)!=2 \|\|
				156	s.moveIndex32(0, -1)!=0 \|\|
				157	s.moveIndex32(6, 1)!=6
				158	) {
				159	errln("UnicodeString::moveIndex32() failed");
				160	}
				161
				162	if(s.getChar32Start(1)!=0 \|\| s.getChar32Start(2)!=2) {
				163	errln("UnicodeString::getChar32Start() failed");
				164	}
				165
				166	if(s.getChar32Limit(1)!=2 \|\| s.getChar32Limit(2)!=2) {
				167	errln("UnicodeString::getChar32Limit() failed");
				168	}
				169	}
				170
				171	{
				172	// test new 2.2 constructors and setTo function that parallel Java's substring function.
				173	UnicodeString src("Hello folks how are you?");
				174	UnicodeString target1("how are you?");
				175	if (target1 != UnicodeString(src, 12)) {
				176	errln("UnicodeString(const UnicodeString&, int32_t) failed");
				177	}
				178	UnicodeString target2("folks");
				179	if (target2 != UnicodeString(src, 6, 5)) {
				180	errln("UnicodeString(const UnicodeString&, int32_t, int32_t) failed");
				181	}
				182	if (target1 != target2.setTo(src, 12)) {
				183	errln("UnicodeString::setTo(const UnicodeString&, int32_t) failed");
				184	}
				185	}
				186
				187	{
				188	// op+ is new in ICU 2.8
				189	UnicodeString s=UnicodeString("abc", "")+UnicodeString("def", "")+UnicodeString("ghi", "");
				190	if(s!=UnicodeString("abcdefghi", "")) {
				191	errln("operator+(UniStr, UniStr) failed");
				192	}
				193	}
				194
				195	{
				196	// tests for Jitterbug 2360
				197	// verify that APIs with source pointer + length accept length == -1
				198	// mostly test only where modified, only few functions did not already do this
				199	if(UnicodeString("abc", -1, "")!=UnicodeString("abc", "")) {
				200	errln("UnicodeString(codepageData, dataLength, codepage) does not work with dataLength==-1");
				201	}
				202
				203	UChar buffer[10]={ 0x61, 0x62, 0x20ac, 0xd900, 0xdc05, 0, 0x62, 0xffff, 0xdbff, 0xdfff };
				204	UnicodeString s, t(buffer, -1, UPRV_LENGTHOF(buffer));
				205
				206	if(s.setTo(buffer, -1, UPRV_LENGTHOF(buffer)).length()!=u_strlen(buffer)) {
				207	errln("UnicodeString.setTo(buffer, length, capacity) does not work with length==-1");
				208	}
				209	if(t.length()!=u_strlen(buffer)) {
				210	errln("UnicodeString(buffer, length, capacity) does not work with length==-1");
				211	}
				212
				213	if(0!=s.caseCompare(buffer, -1, U_FOLD_CASE_DEFAULT)) {
				214	errln("UnicodeString.caseCompare(const UChar *, length, options) does not work with length==-1");
				215	}
				216	if(0!=s.caseCompare(0, s.length(), buffer, U_FOLD_CASE_DEFAULT)) {
				217	errln("UnicodeString.caseCompare(start, _length, const UChar *, options) does not work");
				218	}
				219
				220	buffer[u_strlen(buffer)]=0xe4;
				221	UnicodeString u(buffer, -1, UPRV_LENGTHOF(buffer));
				222	if(s.setTo(buffer, -1, UPRV_LENGTHOF(buffer)).length()!=UPRV_LENGTHOF(buffer)) {
				223	errln("UnicodeString.setTo(buffer without NUL, length, capacity) does not work with length==-1");
				224	}
				225	if(u.length()!=UPRV_LENGTHOF(buffer)) {
				226	errln("UnicodeString(buffer without NUL, length, capacity) does not work with length==-1");
				227	}
				228
				229	static const char cs[]={ 0x61, (char)0xe4, (char)0x85, 0 };
				230	UConverter *cnv;
				231	UErrorCode errorCode=U_ZERO_ERROR;
				232
				233	cnv=ucnv_open("ISO-8859-1", &errorCode);
				234	UnicodeString v(cs, -1, cnv, errorCode);
				235	ucnv_close(cnv);
				236	if(v!=CharsToUnicodeString("a\\xe4\\x85")) {
				237	errln("UnicodeString(const char *, length, cnv, errorCode) does not work with length==-1");
				238	}
				239	}
				240
				241	#if U_CHARSET_IS_UTF8
				242	{
				243	// Test the hardcoded-UTF-8 UnicodeString optimizations.
				244	static const uint8_t utf8[]={ 0x61, 0xC3, 0xA4, 0xC3, 0x9F, 0xE4, 0xB8, 0x80, 0 };
				245	static const UChar utf16[]={ 0x61, 0xE4, 0xDF, 0x4E00 };
				246	UnicodeString from8a = UnicodeString((const char *)utf8);
				247	UnicodeString from8b = UnicodeString((const char *)utf8, (int32_t)sizeof(utf8)-1);
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	248	UnicodeString from16(false, utf16, UPRV_LENGTHOF(utf16));
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	249	if(from8a != from16 \|\| from8b != from16) {
				250	errln("UnicodeString(const char * U_CHARSET_IS_UTF8) failed");
				251	}
				252	char buffer[16];
				253	int32_t length8=from16.extract(0, 0x7fffffff, buffer, (uint32_t)sizeof(buffer));
				254	if(length8!=((int32_t)sizeof(utf8)-1) \|\| 0!=uprv_memcmp(buffer, utf8, sizeof(utf8))) {
				255	errln("UnicodeString::extract(char * U_CHARSET_IS_UTF8) failed");
				256	}
				257	length8=from16.extract(1, 2, buffer, (uint32_t)sizeof(buffer));
				258	if(length8!=4 \|\| buffer[length8]!=0 \|\| 0!=uprv_memcmp(buffer, utf8+1, length8)) {
				259	errln("UnicodeString::extract(substring to char * U_CHARSET_IS_UTF8) failed");
				260	}
				261	}
				262	#endif
				263	}
				264
				265	void
				266	UnicodeStringTest::TestCompare()
				267	{
				268	UnicodeString test1("this is a test");
				269	UnicodeString test2("this is a test");
				270	UnicodeString test3("this is a test of the emergency broadcast system");
				271	UnicodeString test4("never say, \"this is a test\"!!");
				272
				273	UnicodeString test5((UChar)0x5000);
				274	UnicodeString test6((UChar)0x5100);
				275
				276	UChar uniChars[] = { 0x74, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73,
				277	0x20, 0x61, 0x20, 0x74, 0x65, 0x73, 0x74, 0 };
				278	char chars[] = "this is a test";
				279
				280	// test operator== and operator!=
				281	if (test1 != test2 \|\| test1 == test3 \|\| test1 == test4)
				282	errln("operator== or operator!= failed");
				283
				284	// test operator> and operator<
				285	if (test1 > test2 \|\| test1 < test2 \|\| !(test1 < test3) \|\| !(test1 > test4) \|\|
				286	!(test5 < test6)
				287	) {
				288	errln("operator> or operator< failed");
				289	}
				290
				291	// test operator>= and operator<=
				292	if (!(test1 >= test2) \|\| !(test1 <= test2) \|\| !(test1 <= test3) \|\| !(test1 >= test4))
				293	errln("operator>= or operator<= failed");
				294
				295	// test compare(UnicodeString)
				296	if (test1.compare(test2) != 0 \|\| test1.compare(test3) >= 0 \|\| test1.compare(test4) <= 0)
				297	errln("compare(UnicodeString) failed");
				298
				299	//test compare(offset, length, UnicodeString)
				300	if(test1.compare(0, 14, test2) != 0 \|\|
				301	test3.compare(0, 14, test2) != 0 \|\|
				302	test4.compare(12, 14, test2) != 0 \|\|
				303	test3.compare(0, 18, test1) <=0 )
				304	errln("compare(offset, length, UnicodeString) fails");
				305
				306	// test compare(UChar*)
				307	if (test2.compare(uniChars) != 0 \|\| test3.compare(uniChars) <= 0 \|\| test4.compare(uniChars) >= 0)
				308	errln("compare(UChar*) failed");
				309
				310	// test compare(char*)
				311	if (test2.compare(chars) != 0 \|\| test3.compare(chars) <= 0 \|\| test4.compare(chars) >= 0)
				312	errln("compare(char*) failed");
				313
				314	// test compare(UChar*, length)
				315	if (test1.compare(uniChars, 4) <= 0 \|\| test1.compare(uniChars, 4) <= 0)
				316	errln("compare(UChar*, length) failed");
				317
				318	// test compare(thisOffset, thisLength, that, thatOffset, thatLength)
				319	if (test1.compare(0, 14, test2, 0, 14) != 0
				320	\|\| test1.compare(0, 14, test3, 0, 14) != 0
				321	\|\| test1.compare(0, 14, test4, 12, 14) != 0)
				322	errln("1. compare(thisOffset, thisLength, that, thatOffset, thatLength) failed");
				323
				324	if (test1.compare(10, 4, test2, 0, 4) >= 0
				325	\|\| test1.compare(10, 4, test3, 22, 9) <= 0
				326	\|\| test1.compare(10, 4, test4, 22, 4) != 0)
				327	errln("2. compare(thisOffset, thisLength, that, thatOffset, thatLength) failed");
				328
				329	// test compareBetween
				330	if (test1.compareBetween(0, 14, test2, 0, 14) != 0 \|\| test1.compareBetween(0, 14, test3, 0, 14) != 0
				331	\|\| test1.compareBetween(0, 14, test4, 12, 26) != 0)
				332	errln("compareBetween failed");
				333
				334	if (test1.compareBetween(10, 14, test2, 0, 4) >= 0 \|\| test1.compareBetween(10, 14, test3, 22, 31) <= 0
				335	\|\| test1.compareBetween(10, 14, test4, 22, 26) != 0)
				336	errln("compareBetween failed");
				337
				338	// test compare() etc. with strings that share a buffer but are not equal
				339	test2=test1; // share the buffer, length() too large for the stackBuffer
				340	test2.truncate(1); // change only the length, not the buffer
				341	if( test1==test2 \|\| test1<=test2 \|\|
				342	test1.compare(test2)<=0 \|\|
				343	test1.compareCodePointOrder(test2)<=0 \|\|
				344	test1.compareCodePointOrder(0, INT32_MAX, test2)<=0 \|\|
				345	test1.compareCodePointOrder(0, INT32_MAX, test2, 0, INT32_MAX)<=0 \|\|
				346	test1.compareCodePointOrderBetween(0, INT32_MAX, test2, 0, INT32_MAX)<=0 \|\|
				347	test1.caseCompare(test2, U_FOLD_CASE_DEFAULT)<=0
				348	) {
				349	errln("UnicodeStrings that share a buffer but have different lengths compare as equal");
				350	}
				351
				352	/* test compareCodePointOrder() */
				353	{
				354	/* these strings are in ascending order */
				355	static const UChar strings[][4]={
				356	{ 0x61, 0 }, /* U+0061 */
				357	{ 0x20ac, 0xd801, 0 }, /* U+20ac U+d801 */
				358	{ 0x20ac, 0xd800, 0xdc00, 0 }, /* U+20ac U+10000 */
				359	{ 0xd800, 0 }, /* U+d800 */
				360	{ 0xd800, 0xff61, 0 }, /* U+d800 U+ff61 */
				361	{ 0xdfff, 0 }, /* U+dfff */
				362	{ 0xff61, 0xdfff, 0 }, /* U+ff61 U+dfff */
				363	{ 0xff61, 0xd800, 0xdc02, 0 }, /* U+ff61 U+10002 */
				364	{ 0xd800, 0xdc02, 0 }, /* U+10002 */
				365	{ 0xd84d, 0xdc56, 0 } /* U+23456 */
				366	};
				367	UnicodeString u[20]; // must be at least as long as strings[]
				368	int32_t i;
				369
				370	for(i=0; i<UPRV_LENGTHOF(strings); ++i) {
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	371	u[i]=UnicodeString(true, strings[i], -1);
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	372	}
				373
				374	for(i=0; i<UPRV_LENGTHOF(strings)-1; ++i) {
				375	if(u[i].compareCodePointOrder(u[i+1])>=0 \|\| u[i].compareCodePointOrder(0, INT32_MAX, u[i+1].getBuffer())>=0) {
				376	errln("error: UnicodeString::compareCodePointOrder() fails for string %d and the following one\n", i);
				377	}
				378	}
				379	}
				380
				381	/* test caseCompare() */
				382	{
				383	static const UChar
				384	_mixed[]= { 0x61, 0x42, 0x131, 0x3a3, 0xdf, 0x130, 0x49, 0xfb03, 0xd93f, 0xdfff, 0 },
				385	_otherDefault[]= { 0x41, 0x62, 0x131, 0x3c3, 0x73, 0x53, 0x69, 0x307, 0x69, 0x46, 0x66, 0x49, 0xd93f, 0xdfff, 0 },
				386	_otherExcludeSpecialI[]={ 0x41, 0x62, 0x131, 0x3c3, 0x53, 0x73, 0x69, 0x131, 0x66, 0x46, 0x69, 0xd93f, 0xdfff, 0 },
				387	_different[]= { 0x41, 0x62, 0x131, 0x3c3, 0x73, 0x53, 0x130, 0x49, 0x46, 0x66, 0x49, 0xd93f, 0xdffd, 0 };
				388
				389	UnicodeString
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	390	mixed(true, _mixed, -1),
				391	otherDefault(true, _otherDefault, -1),
				392	otherExcludeSpecialI(true, _otherExcludeSpecialI, -1),
				393	different(true, _different, -1);
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	394
				395	int8_t result;
				396
				397	/* test caseCompare() */
				398	result=mixed.caseCompare(otherDefault, U_FOLD_CASE_DEFAULT);
				399	if(result!=0 \|\| 0!=mixed.caseCompareBetween(0, INT32_MAX, otherDefault, 0, INT32_MAX, U_FOLD_CASE_DEFAULT)) {
				400	errln("error: mixed.caseCompare(other, default)=%ld instead of 0\n", result);
				401	}
				402	result=mixed.caseCompare(otherExcludeSpecialI, U_FOLD_CASE_EXCLUDE_SPECIAL_I);
				403	if(result!=0) {
				404	errln("error: mixed.caseCompare(otherExcludeSpecialI, U_FOLD_CASE_EXCLUDE_SPECIAL_I)=%ld instead of 0\n", result);
				405	}
				406	result=mixed.caseCompare(otherDefault, U_FOLD_CASE_EXCLUDE_SPECIAL_I);
				407	if(result==0 \|\| 0==mixed.caseCompareBetween(0, INT32_MAX, otherDefault, 0, INT32_MAX, U_FOLD_CASE_EXCLUDE_SPECIAL_I)) {
				408	errln("error: mixed.caseCompare(other, U_FOLD_CASE_EXCLUDE_SPECIAL_I)=0 instead of !=0\n");
				409	}
				410
				411	/* test caseCompare() */
				412	result=mixed.caseCompare(different, U_FOLD_CASE_DEFAULT);
				413	if(result<=0) {
				414	errln("error: mixed.caseCompare(different, default)=%ld instead of positive\n", result);
				415	}
				416
				417	/* test caseCompare() - include the folded sharp s (U+00df) with different lengths */
				418	result=mixed.caseCompare(1, 4, different, 1, 5, U_FOLD_CASE_DEFAULT);
				419	if(result!=0 \|\| 0!=mixed.caseCompareBetween(1, 5, different, 1, 6, U_FOLD_CASE_DEFAULT)) {
				420	errln("error: mixed.caseCompare(mixed, 1, 4, different, 1, 5, default)=%ld instead of 0\n", result);
				421	}
				422
				423	/* test caseCompare() - stop in the middle of the sharp s (U+00df) */
				424	result=mixed.caseCompare(1, 4, different, 1, 4, U_FOLD_CASE_DEFAULT);
				425	if(result<=0) {
				426	errln("error: mixed.caseCompare(1, 4, different, 1, 4, default)=%ld instead of positive\n", result);
				427	}
				428	}
				429
				430	// test that srcLength=-1 is handled in functions that
				431	// take input const UChar */int32_t srcLength (j785)
				432	{
				433	static const UChar u[]={ 0x61, 0x308, 0x62, 0 };
				434	UnicodeString s=UNICODE_STRING("a\\u0308b", 8).unescape();
				435
				436	if(s.compare(u, -1)!=0 \|\| s.compare(0, 999, u, 0, -1)!=0) {
				437	errln("error UnicodeString::compare(..., const UChar *, srcLength=-1) does not work");
				438	}
				439
				440	if(s.compareCodePointOrder(u, -1)!=0 \|\| s.compareCodePointOrder(0, 999, u, 0, -1)!=0) {
				441	errln("error UnicodeString::compareCodePointOrder(..., const UChar *, srcLength=-1, ...) does not work");
				442	}
				443
				444	if(s.caseCompare(u, -1, U_FOLD_CASE_DEFAULT)!=0 \|\| s.caseCompare(0, 999, u, 0, -1, U_FOLD_CASE_DEFAULT)!=0) {
				445	errln("error UnicodeString::caseCompare(..., const UChar *, srcLength=-1, ...) does not work");
				446	}
				447
				448	if(s.indexOf(u, 1, -1, 0, 999)!=1 \|\| s.indexOf(u+1, -1, 0, 999)!=1 \|\| s.indexOf(u+1, -1, 0)!=1) {
				449	errln("error UnicodeString::indexOf(const UChar *, srcLength=-1, ...) does not work");
				450	}
				451
				452	if(s.lastIndexOf(u, 1, -1, 0, 999)!=1 \|\| s.lastIndexOf(u+1, -1, 0, 999)!=1 \|\| s.lastIndexOf(u+1, -1, 0)!=1) {
				453	errln("error UnicodeString::lastIndexOf(const UChar *, srcLength=-1, ...) does not work");
				454	}
				455
				456	UnicodeString s2, s3;
				457	s2.replace(0, 0, u+1, -1);
				458	s3.replace(0, 0, u, 1, -1);
				459	if(s.compare(1, 999, s2)!=0 \|\| s2!=s3) {
				460	errln("error UnicodeString::replace(..., const UChar *, srcLength=-1, ...) does not work");
				461	}
				462	}
				463	}
				464
				465	void
				466	UnicodeStringTest::TestExtract()
				467	{
				468	UnicodeString test1("Now is the time for all good men to come to the aid of their country.", "");
				469	UnicodeString test2;
				470	UChar test3[13] = {1, 2, 3, 4, 5, 6, 7, 8, 8, 10, 11, 12, 13};
				471	char test4[13] = {1, 2, 3, 4, 5, 6, 7, 8, 8, 10, 11, 12, 13};
				472	UnicodeString test5;
				473	char test6[13] = {1, 2, 3, 4, 5, 6, 7, 8, 8, 10, 11, 12, 13};
				474
				475	test1.extract(11, 12, test2);
				476	test1.extract(11, 12, test3);
				477	if (test1.extract(11, 12, test4) != 12 \|\| test4[12] != 0) {
				478	errln("UnicodeString.extract(char *) failed to return the correct size of destination buffer.");
				479	}
				480
				481	// test proper pinning in extractBetween()
				482	test1.extractBetween(-3, 7, test5);
				483	if(test5!=UNICODE_STRING("Now is ", 7)) {
				484	errln("UnicodeString.extractBetween(-3, 7) did not pin properly.");
				485	}
				486
				487	test1.extractBetween(11, 23, test5);
				488	if (test1.extract(60, 71, test6) != 9) {
				489	errln("UnicodeString.extract() failed to return the correct size of destination buffer for end of buffer.");
				490	}
				491	if (test1.extract(11, 12, test6) != 12) {
				492	errln("UnicodeString.extract() failed to return the correct size of destination buffer.");
				493	}
				494
				495	// convert test4 back to Unicode for comparison
				496	UnicodeString test4b(test4, 12);
				497
				498	if (test1.extract(11, 12, (char *)NULL) != 12) {
				499	errln("UnicodeString.extract(NULL) failed to return the correct size of destination buffer.");
				500	}
				501	if (test1.extract(11, -1, test6) != 0) {
				502	errln("UnicodeString.extract(-1) failed to stop reading the string.");
				503	}
				504
				505	for (int32_t i = 0; i < 12; i++) {
				506	if (test1.charAt((int32_t)(11 + i)) != test2.charAt(i)) {
				507	errln(UnicodeString("extracting into a UnicodeString failed at position ") + i);
				508	break;
				509	}
				510	if (test1.charAt((int32_t)(11 + i)) != test3[i]) {
				511	errln(UnicodeString("extracting into an array of UChar failed at position ") + i);
				512	break;
				513	}
				514	if (((char)test1.charAt((int32_t)(11 + i))) != test4b.charAt(i)) {
				515	errln(UnicodeString("extracting into an array of char failed at position ") + i);
				516	break;
				517	}
				518	if (test1.charAt((int32_t)(11 + i)) != test5.charAt(i)) {
				519	errln(UnicodeString("extracting with extractBetween failed at position ") + i);
				520	break;
				521	}
				522	}
				523
				524	// test preflighting and overflows with invariant conversion
				525	if (test1.extract(0, 10, (char *)NULL, "") != 10) {
				526	errln("UnicodeString.extract(0, 10, (char *)NULL, \"\") != 10");
				527	}
				528
				529	test4[2] = (char)0xff;
				530	if (test1.extract(0, 10, test4, 2, "") != 10) {
				531	errln("UnicodeString.extract(0, 10, test4, 2, \"\") != 10");
				532	}
				533	if (test4[2] != (char)0xff) {
				534	errln("UnicodeString.extract(0, 10, test4, 2, \"\") overwrote test4[2]");
				535	}
				536
				537	{
				538	// test new, NUL-terminating extract() function
				539	UnicodeString s("terminate", "");
				540	UChar dest[20]={
				541	0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5,
				542	0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5
				543	};
				544	UErrorCode errorCode;
				545	int32_t length;
				546
				547	errorCode=U_ZERO_ERROR;
				548	length=s.extract((UChar *)NULL, 0, errorCode);
				549	if(errorCode!=U_BUFFER_OVERFLOW_ERROR \|\| length!=s.length()) {
				550	errln("UnicodeString.extract(NULL, 0)==%d (%s) expected %d (U_BUFFER_OVERFLOW_ERROR)", length, s.length(), u_errorName(errorCode));
				551	}
				552
				553	errorCode=U_ZERO_ERROR;
				554	length=s.extract(dest, s.length()-1, errorCode);
				555	if(errorCode!=U_BUFFER_OVERFLOW_ERROR \|\| length!=s.length()) {
				556	errln("UnicodeString.extract(dest too short)==%d (%s) expected %d (U_BUFFER_OVERFLOW_ERROR)",
				557	length, u_errorName(errorCode), s.length());
				558	}
				559
				560	errorCode=U_ZERO_ERROR;
				561	length=s.extract(dest, s.length(), errorCode);
				562	if(errorCode!=U_STRING_NOT_TERMINATED_WARNING \|\| length!=s.length()) {
				563	errln("UnicodeString.extract(dest just right without NUL)==%d (%s) expected %d (U_STRING_NOT_TERMINATED_WARNING)",
				564	length, u_errorName(errorCode), s.length());
				565	}
				566	if(dest[length-1]!=s[length-1] \|\| dest[length]!=0xa5) {
				567	errln("UnicodeString.extract(dest just right without NUL) did not extract the string correctly");
				568	}
				569
				570	errorCode=U_ZERO_ERROR;
				571	length=s.extract(dest, s.length()+1, errorCode);
				572	if(errorCode!=U_ZERO_ERROR \|\| length!=s.length()) {
				573	errln("UnicodeString.extract(dest large enough)==%d (%s) expected %d (U_ZERO_ERROR)",
				574	length, u_errorName(errorCode), s.length());
				575	}
				576	if(dest[length-1]!=s[length-1] \|\| dest[length]!=0 \|\| dest[length+1]!=0xa5) {
				577	errln("UnicodeString.extract(dest large enough) did not extract the string correctly");
				578	}
				579	}
				580
				581	{
				582	// test new UConverter extract() and constructor
				583	UnicodeString s=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
				584	char buffer[32];
				585	static const char expect[]={
				586	(char)0xf0, (char)0xaf, (char)0xa6, (char)0x99,
				587	(char)0xf0, (char)0x9d, (char)0x85, (char)0x9f,
				588	(char)0xc3, (char)0x84,
				589	(char)0xe1, (char)0xbb, (char)0x90
				590	};
				591	UErrorCode errorCode=U_ZERO_ERROR;
				592	UConverter *cnv=ucnv_open("UTF-8", &errorCode);
				593	int32_t length;
				594
				595	if(U_SUCCESS(errorCode)) {
				596	// test preflighting
				597	if( (length=s.extract(NULL, 0, cnv, errorCode))!=13 \|\|
				598	errorCode!=U_BUFFER_OVERFLOW_ERROR
				599	) {
				600	errln("UnicodeString::extract(NULL, UConverter) preflighting failed (length=%ld, %s)",
				601	length, u_errorName(errorCode));
				602	}
				603	errorCode=U_ZERO_ERROR;
				604	if( (length=s.extract(buffer, 2, cnv, errorCode))!=13 \|\|
				605	errorCode!=U_BUFFER_OVERFLOW_ERROR
				606	) {
				607	errln("UnicodeString::extract(too small, UConverter) preflighting failed (length=%ld, %s)",
				608	length, u_errorName(errorCode));
				609	}
				610
				611	// try error cases
				612	errorCode=U_ZERO_ERROR;
				613	if( s.extract(NULL, 2, cnv, errorCode)==13 \|\| U_SUCCESS(errorCode)) {
				614	errln("UnicodeString::extract(UConverter) succeeded with an illegal destination");
				615	}
				616	errorCode=U_ILLEGAL_ARGUMENT_ERROR;
				617	if( s.extract(NULL, 0, cnv, errorCode)==13 \|\| U_SUCCESS(errorCode)) {
				618	errln("UnicodeString::extract(UConverter) succeeded with a previous error code");
				619	}
				620	errorCode=U_ZERO_ERROR;
				621
				622	// extract for real
				623	if( (length=s.extract(buffer, sizeof(buffer), cnv, errorCode))!=13 \|\|
				624	uprv_memcmp(buffer, expect, 13)!=0 \|\|
				625	buffer[13]!=0 \|\|
				626	U_FAILURE(errorCode)
				627	) {
				628	errln("UnicodeString::extract(UConverter) conversion failed (length=%ld, %s)",
				629	length, u_errorName(errorCode));
				630	}
				631	// Test again with just the converter name.
				632	if( (length=s.extract(0, s.length(), buffer, sizeof(buffer), "UTF-8"))!=13 \|\|
				633	uprv_memcmp(buffer, expect, 13)!=0 \|\|
				634	buffer[13]!=0 \|\|
				635	U_FAILURE(errorCode)
				636	) {
				637	errln("UnicodeString::extract(\"UTF-8\") conversion failed (length=%ld, %s)",
				638	length, u_errorName(errorCode));
				639	}
				640
				641	// try the constructor
				642	UnicodeString t(expect, sizeof(expect), cnv, errorCode);
				643	if(U_FAILURE(errorCode) \|\| s!=t) {
				644	errln("UnicodeString(UConverter) conversion failed (%s)",
				645	u_errorName(errorCode));
				646	}
				647
				648	ucnv_close(cnv);
				649	}
				650	}
				651	}
				652
				653	void
				654	UnicodeStringTest::TestRemoveReplace()
				655	{
				656	UnicodeString test1("The rain in Spain stays mainly on the plain");
				657	UnicodeString test2("eat SPAMburgers!");
				658	UChar test3[] = { 0x53, 0x50, 0x41, 0x4d, 0x4d, 0 };
				659	char test4[] = "SPAM";
				660	UnicodeString& test5 = test1;
				661
				662	test1.replace(4, 4, test2, 4, 4);
				663	test1.replace(12, 5, test3, 4);
				664	test3[4] = 0;
				665	test1.replace(17, 4, test3);
				666	test1.replace(23, 4, test4);
				667	test1.replaceBetween(37, 42, test2, 4, 8);
				668
				669	if (test1 != "The SPAM in SPAM SPAMs SPAMly on the SPAM")
				670	errln("One of the replace methods failed:\n"
				671	" expected \"The SPAM in SPAM SPAMs SPAMly on the SPAM\",\n"
				672	" got \"" + test1 + "\"");
				673
				674	test1.remove(21, 1);
				675	test1.removeBetween(26, 28);
				676
				677	if (test1 != "The SPAM in SPAM SPAM SPAM on the SPAM")
				678	errln("One of the remove methods failed:\n"
				679	" expected \"The SPAM in SPAM SPAM SPAM on the SPAM\",\n"
				680	" got \"" + test1 + "\"");
				681
				682	for (int32_t i = 0; i < test1.length(); i++) {
				683	if (test5[i] != 0x53 && test5[i] != 0x50 && test5[i] != 0x41 && test5[i] != 0x4d && test5[i] != 0x20) {
				684	test1.setCharAt(i, 0x78);
				685	}
				686	}
				687
				688	if (test1 != "xxx SPAM xx SPAM SPAM SPAM xx xxx SPAM")
				689	errln("One of the remove methods failed:\n"
				690	" expected \"xxx SPAM xx SPAM SPAM SPAM xx xxx SPAM\",\n"
				691	" got \"" + test1 + "\"");
				692
				693	test1.remove();
				694	if (test1.length() != 0)
				695	errln("Remove() failed: expected empty string, got \"" + test1 + "\"");
				696	}
				697
				698	void
				699	UnicodeStringTest::TestSearching()
				700	{
				701	UnicodeString test1("test test ttest tetest testesteststt");
				702	UnicodeString test2("test");
				703	UChar testChar = 0x74;
				704
				705	UChar32 testChar32 = 0x20402;
				706	UChar testData[]={
				707	// 0 1 2 3 4 5 6 7
				708	0xd841, 0xdc02, 0x0071, 0xdc02, 0xd841, 0x0071, 0xd841, 0xdc02,
				709
				710	// 8 9 10 11 12 13 14 15
				711	0x0071, 0x0072, 0xd841, 0xdc02, 0x0071, 0xd841, 0xdc02, 0x0071,
				712
				713	// 16 17 18 19
				714	0xdc02, 0xd841, 0x0073, 0x0000
				715	};
				716	UnicodeString test3(testData);
				717	UnicodeString test4(testChar32);
				718
				719	uint16_t occurrences = 0;
				720	int32_t startPos = 0;
				721	for ( ;
				722	startPos != -1 && startPos < test1.length();
				723	(startPos = test1.indexOf(test2, startPos)) != -1 ? (++occurrences, startPos += 4) : 0)
				724	;
				725	if (occurrences != 6)
				726	errln(UnicodeString("indexOf failed: expected to find 6 occurrences, found ") + occurrences);
				727
				728	for ( occurrences = 0, startPos = 10;
				729	startPos != -1 && startPos < test1.length();
				730	(startPos = test1.indexOf(test2, startPos)) != -1 ? (++occurrences, startPos += 4) : 0)
				731	;
				732	if (occurrences != 4)
				733	errln(UnicodeString("indexOf with starting offset failed: "
				734	"expected to find 4 occurrences, found ") + occurrences);
				735
				736	int32_t endPos = 28;
				737	for ( occurrences = 0, startPos = 5;
				738	startPos != -1 && startPos < test1.length();
				739	(startPos = test1.indexOf(test2, startPos, endPos - startPos)) != -1 ? (++occurrences, startPos += 4) : 0)
				740	;
				741	if (occurrences != 4)
				742	errln(UnicodeString("indexOf with starting and ending offsets failed: "
				743	"expected to find 4 occurrences, found ") + occurrences);
				744
				745	//using UChar32 string
				746	for ( startPos=0, occurrences=0;
				747	startPos != -1 && startPos < test3.length();
				748	(startPos = test3.indexOf(test4, startPos)) != -1 ? (++occurrences, startPos += 2) : 0)
				749	;
				750	if (occurrences != 4)
				751	errln((UnicodeString)"indexOf failed: expected to find 4 occurrences, found " + occurrences);
				752
				753	for ( startPos=10, occurrences=0;
				754	startPos != -1 && startPos < test3.length();
				755	(startPos = test3.indexOf(test4, startPos)) != -1 ? (++occurrences, startPos += 2) : 0)
				756	;
				757	if (occurrences != 2)
				758	errln(UnicodeString("indexOf failed: expected to find 2 occurrences, found ") + occurrences);
				759	//---
				760
				761	for ( occurrences = 0, startPos = 0;
				762	startPos != -1 && startPos < test1.length();
				763	(startPos = test1.indexOf(testChar, startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
				764	;
				765	if (occurrences != 16)
				766	errln(UnicodeString("indexOf with character failed: "
				767	"expected to find 16 occurrences, found ") + occurrences);
				768
				769	for ( occurrences = 0, startPos = 10;
				770	startPos != -1 && startPos < test1.length();
				771	(startPos = test1.indexOf(testChar, startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
				772	;
				773	if (occurrences != 12)
				774	errln(UnicodeString("indexOf with character & start offset failed: "
				775	"expected to find 12 occurrences, found ") + occurrences);
				776
				777	for ( occurrences = 0, startPos = 5, endPos = 28;
				778	startPos != -1 && startPos < test1.length();
				779	(startPos = test1.indexOf(testChar, startPos, endPos - startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
				780	;
				781	if (occurrences != 10)
				782	errln(UnicodeString("indexOf with character & start & end offsets failed: "
				783	"expected to find 10 occurrences, found ") + occurrences);
				784
				785	//testing for UChar32
				786	UnicodeString subString;
				787	for( occurrences =0, startPos=0; startPos < test3.length(); startPos +=1){
				788	subString.append(test3, startPos, test3.length());
				789	if(subString.indexOf(testChar32) != -1 ){
				790	++occurrences;
				791	}
				792	subString.remove();
				793	}
				794	if (occurrences != 14)
				795	errln((UnicodeString)"indexOf failed: expected to find 14 occurrences, found " + occurrences);
				796
				797	for ( occurrences = 0, startPos = 0;
				798	startPos != -1 && startPos < test3.length();
				799	(startPos = test3.indexOf(testChar32, startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
				800	;
				801	if (occurrences != 4)
				802	errln((UnicodeString)"indexOf failed: expected to find 4 occurrences, found " + occurrences);
				803
				804	endPos=test3.length();
				805	for ( occurrences = 0, startPos = 5;
				806	startPos != -1 && startPos < test3.length();
				807	(startPos = test3.indexOf(testChar32, startPos, endPos - startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
				808	;
				809	if (occurrences != 3)
				810	errln((UnicodeString)"indexOf with character & start & end offsets failed: expected to find 2 occurrences, found " + occurrences);
				811	//---
				812
				813	if(test1.lastIndexOf(test2)!=29) {
				814	errln("test1.lastIndexOf(test2)!=29");
				815	}
				816
				817	if(test1.lastIndexOf(test2, 15)!=29 \|\| test1.lastIndexOf(test2, 29)!=29 \|\| test1.lastIndexOf(test2, 30)!=-1) {
				818	errln("test1.lastIndexOf(test2, start) failed");
				819	}
				820
				821	for ( occurrences = 0, startPos = 32;
				822	startPos != -1;
				823	(startPos = test1.lastIndexOf(test2, 5, startPos - 5)) != -1 ? ++occurrences : 0)
				824	;
				825	if (occurrences != 4)
				826	errln(UnicodeString("lastIndexOf with starting and ending offsets failed: "
				827	"expected to find 4 occurrences, found ") + occurrences);
				828
				829	for ( occurrences = 0, startPos = 32;
				830	startPos != -1;
				831	(startPos = test1.lastIndexOf(testChar, 5, startPos - 5)) != -1 ? ++occurrences : 0)
				832	;
				833	if (occurrences != 11)
				834	errln(UnicodeString("lastIndexOf with character & start & end offsets failed: "
				835	"expected to find 11 occurrences, found ") + occurrences);
				836
				837	//testing UChar32
				838	startPos=test3.length();
				839	for ( occurrences = 0;
				840	startPos != -1;
				841	(startPos = test3.lastIndexOf(testChar32, 5, startPos - 5)) != -1 ? ++occurrences : 0)
				842	;
				843	if (occurrences != 3)
				844	errln((UnicodeString)"lastIndexOf with character & start & end offsets failed: expected to find 3 occurrences, found " + occurrences);
				845
				846
				847	for ( occurrences = 0, endPos = test3.length(); endPos > 0; endPos -= 1){
				848	subString.remove();
				849	subString.append(test3, 0, endPos);
				850	if(subString.lastIndexOf(testChar32) != -1 ){
				851	++occurrences;
				852	}
				853	}
				854	if (occurrences != 18)
				855	errln((UnicodeString)"indexOf failed: expected to find 18 occurrences, found " + occurrences);
				856	//---
				857
				858	// test that indexOf(UChar32) and lastIndexOf(UChar32)
				859	// do not find surrogate code points when they are part of matched pairs
				860	// (= part of supplementary code points)
				861	// Jitterbug 1542
				862	if(test3.indexOf((UChar32)0xd841) != 4 \|\| test3.indexOf((UChar32)0xdc02) != 3) {
				863	errln("error: UnicodeString::indexOf(UChar32 surrogate) finds a partial supplementary code point");
				864	}
				865	if( UnicodeString(test3, 0, 17).lastIndexOf((UChar)0xd841, 0) != 4 \|\|
				866	UnicodeString(test3, 0, 17).lastIndexOf((UChar32)0xd841, 2) != 4 \|\|
				867	test3.lastIndexOf((UChar32)0xd841, 0, 17) != 4 \|\| test3.lastIndexOf((UChar32)0xdc02, 0, 17) != 16
				868	) {
				869	errln("error: UnicodeString::lastIndexOf(UChar32 surrogate) finds a partial supplementary code point");
				870	}
				871	}
				872
				873	void
				874	UnicodeStringTest::TestSpacePadding()
				875	{
				876	UnicodeString test1("hello");
				877	UnicodeString test2(" there");
				878	UnicodeString test3("Hi! How ya doin'? Beautiful day, isn't it?");
				879	UnicodeString test4;
				880	UBool returnVal;
				881	UnicodeString expectedValue;
				882
				883	returnVal = test1.padLeading(15);
				884	expectedValue = " hello";
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	885	if (returnVal == false \|\| test1 != expectedValue)
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	886	errln("padLeading() failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
				887
				888	returnVal = test2.padTrailing(15);
				889	expectedValue = " there ";
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	890	if (returnVal == false \|\| test2 != expectedValue)
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	891	errln("padTrailing() failed: expected \"" + expectedValue + "\", got \"" + test2 + "\".");
				892
				893	expectedValue = test3;
				894	returnVal = test3.padTrailing(15);
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	895	if (returnVal == true \|\| test3 != expectedValue)
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	896	errln("padTrailing() failed: expected \"" + expectedValue + "\", got \"" + test3 + "\".");
				897
				898	expectedValue = "hello";
				899	test4.setTo(test1).trim();
				900
				901	if (test4 != expectedValue \|\| test1 == expectedValue \|\| test4 != expectedValue)
				902	errln("trim(UnicodeString&) failed");
				903
				904	test1.trim();
				905	if (test1 != expectedValue)
				906	errln("trim() failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
				907
				908	test2.trim();
				909	expectedValue = "there";
				910	if (test2 != expectedValue)
				911	errln("trim() failed: expected \"" + expectedValue + "\", got \"" + test2 + "\".");
				912
				913	test3.trim();
				914	expectedValue = "Hi! How ya doin'? Beautiful day, isn't it?";
				915	if (test3 != expectedValue)
				916	errln("trim() failed: expected \"" + expectedValue + "\", got \"" + test3 + "\".");
				917
				918	returnVal = test1.truncate(15);
				919	expectedValue = "hello";
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	920	if (returnVal == true \|\| test1 != expectedValue)
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	921	errln("truncate() failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
				922
				923	returnVal = test2.truncate(15);
				924	expectedValue = "there";
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	925	if (returnVal == true \|\| test2 != expectedValue)
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	926	errln("truncate() failed: expected \"" + expectedValue + "\", got \"" + test2 + "\".");
				927
				928	returnVal = test3.truncate(15);
				929	expectedValue = "Hi! How ya doi";
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	930	if (returnVal == false \|\| test3 != expectedValue)
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	931	errln("truncate() failed: expected \"" + expectedValue + "\", got \"" + test3 + "\".");
				932	}
				933
				934	void
				935	UnicodeStringTest::TestPrefixAndSuffix()
				936	{
				937	UnicodeString test1("Now is the time for all good men to come to the aid of their country.");
				938	UnicodeString test2("Now");
				939	UnicodeString test3("country.");
				940	UnicodeString test4("count");
				941
				942	if (!test1.startsWith(test2) \|\| !test1.startsWith(test2, 0, test2.length())) {
				943	errln("startsWith() failed: \"" + test2 + "\" should be a prefix of \"" + test1 + "\".");
				944	}
				945
				946	if (test1.startsWith(test3) \|\|
				947	test1.startsWith(test3.getBuffer(), test3.length()) \|\|
				948	test1.startsWith(test3.getTerminatedBuffer(), 0, -1)
				949	) {
				950	errln("startsWith() failed: \"" + test3 + "\" shouldn't be a prefix of \"" + test1 + "\".");
				951	}
				952
				953	if (test1.endsWith(test2)) {
				954	errln("endsWith() failed: \"" + test2 + "\" shouldn't be a suffix of \"" + test1 + "\".");
				955	}
				956
				957	if (!test1.endsWith(test3)) {
				958	errln("endsWith(test3) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
				959	}
				960	if (!test1.endsWith(test3, 0, INT32_MAX)) {
				961	errln("endsWith(test3, 0, INT32_MAX) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
				962	}
				963
				964	if(!test1.endsWith(test3.getBuffer(), test3.length())) {
				965	errln("endsWith(test3.getBuffer(), test3.length()) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
				966	}
				967	if(!test1.endsWith(test3.getTerminatedBuffer(), 0, -1)) {
				968	errln("endsWith(test3.getTerminatedBuffer(), 0, -1) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
				969	}
				970
				971	if (!test3.startsWith(test4)) {
				972	errln("endsWith(test4) failed: \"" + test4 + "\" should be a prefix of \"" + test3 + "\".");
				973	}
				974
				975	if (test4.startsWith(test3)) {
				976	errln("startsWith(test3) failed: \"" + test3 + "\" shouldn't be a prefix of \"" + test4 + "\".");
				977	}
				978	}
				979
				980	void
				981	UnicodeStringTest::TestStartsWithAndEndsWithNulTerminated() {
				982	UnicodeString test("abcde");
				983	const UChar ab[] = { 0x61, 0x62, 0 };
				984	const UChar de[] = { 0x64, 0x65, 0 };
				985	assertTrue("abcde.startsWith(ab, -1)", test.startsWith(ab, -1));
				986	assertTrue("abcde.startsWith(ab, 0, -1)", test.startsWith(ab, 0, -1));
				987	assertTrue("abcde.endsWith(de, -1)", test.endsWith(de, -1));
				988	assertTrue("abcde.endsWith(de, 0, -1)", test.endsWith(de, 0, -1));
				989	}
				990
				991	void
				992	UnicodeStringTest::TestFindAndReplace()
				993	{
				994	UnicodeString test1("One potato, two potato, three potato, four\n");
				995	UnicodeString test2("potato");
				996	UnicodeString test3("MISSISSIPPI");
				997
				998	UnicodeString expectedValue;
				999
				1000	test1.findAndReplace(test2, test3);
				1001	expectedValue = "One MISSISSIPPI, two MISSISSIPPI, three MISSISSIPPI, four\n";
				1002	if (test1 != expectedValue)
				1003	errln("findAndReplace failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
				1004	test1.findAndReplace(2, 32, test3, test2);
				1005	expectedValue = "One potato, two potato, three MISSISSIPPI, four\n";
				1006	if (test1 != expectedValue)
				1007	errln("findAndReplace failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
				1008	}
				1009
				1010	void
				1011	UnicodeStringTest::TestReverse()
				1012	{
				1013	UnicodeString test("backwards words say to used I");
				1014
				1015	test.reverse();
				1016	test.reverse(2, 4);
				1017	test.reverse(7, 2);
				1018	test.reverse(10, 3);
				1019	test.reverse(14, 5);
				1020	test.reverse(20, 9);
				1021
				1022	if (test != "I used to say words backwards")
				1023	errln("reverse() failed: Expected \"I used to say words backwards\",\n got \""
				1024	+ test + "\"");
				1025
				1026	test=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
				1027	test.reverse();
				1028	if(test.char32At(0)!=0x1ed0 \|\| test.char32At(1)!=0xc4 \|\| test.char32At(2)!=0x1d15f \|\| test.char32At(4)!=0x2f999) {
				1029	errln("reverse() failed with supplementary characters");
				1030	}
				1031
				1032	// Test case for ticket #8091:
				1033	// UnicodeString::reverse() failed to see a lead surrogate in the middle of
				1034	// an odd-length string that contains no other lead surrogates.
				1035	test=UNICODE_STRING_SIMPLE("ab\\U0001F4A9e").unescape();
				1036	UnicodeString expected=UNICODE_STRING_SIMPLE("e\\U0001F4A9ba").unescape();
				1037	test.reverse();
				1038	if(test!=expected) {
				1039	errln("reverse() failed with only lead surrogate in the middle");
				1040	}
				1041	}
				1042
				1043	void
				1044	UnicodeStringTest::TestMiscellaneous()
				1045	{
				1046	UnicodeString test1("This is a test");
				1047	UnicodeString test2("This is a test");
				1048	UnicodeString test3("Me too!");
				1049
				1050	// test getBuffer(minCapacity) and releaseBuffer()
				1051	test1=UnicodeString(); // make sure that it starts with its stackBuffer
				1052	UChar *p=test1.getBuffer(20);
				1053	if(test1.getCapacity()<20) {
				1054	errln("UnicodeString::getBuffer(20).getCapacity()<20");
				1055	}
				1056
				1057	test1.append((UChar)7); // must not be able to modify the string here
				1058	test1.setCharAt(3, 7);
				1059	test1.reverse();
				1060	if( test1.length()!=0 \|\|
				1061	test1.charAt(0)!=0xffff \|\| test1.charAt(3)!=0xffff \|\|
				1062	test1.getBuffer(10)!=0 \|\| test1.getBuffer()!=0
				1063	) {
				1064	errln("UnicodeString::getBuffer(minCapacity) allows read or write access to the UnicodeString");
				1065	}
				1066
				1067	p[0]=1;
				1068	p[1]=2;
				1069	p[2]=3;
				1070	test1.releaseBuffer(3);
				1071	test1.append((UChar)4);
				1072
				1073	if(test1.length()!=4 \|\| test1.charAt(0)!=1 \|\| test1.charAt(1)!=2 \|\| test1.charAt(2)!=3 \|\| test1.charAt(3)!=4) {
				1074	errln("UnicodeString::releaseBuffer(newLength) does not properly reallow access to the UnicodeString");
				1075	}
				1076
				1077	// test releaseBuffer() without getBuffer(minCapacity) - must not have any effect
				1078	test1.releaseBuffer(1);
				1079	if(test1.length()!=4 \|\| test1.charAt(0)!=1 \|\| test1.charAt(1)!=2 \|\| test1.charAt(2)!=3 \|\| test1.charAt(3)!=4) {
				1080	errln("UnicodeString::releaseBuffer(newLength) without getBuffer(minCapacity) changed the UnicodeString");
				1081	}
				1082
				1083	// test getBuffer(const)
				1084	const UChar q=test1.getBuffer(), r=test1.getBuffer();
				1085	if( test1.length()!=4 \|\|
				1086	q[0]!=1 \|\| q[1]!=2 \|\| q[2]!=3 \|\| q[3]!=4 \|\|
				1087	r[0]!=1 \|\| r[1]!=2 \|\| r[2]!=3 \|\| r[3]!=4
				1088	) {
				1089	errln("UnicodeString::getBuffer(const) does not return a usable buffer pointer");
				1090	}
				1091
				1092	// test releaseBuffer() with a NUL-terminated buffer
				1093	test1.getBuffer(20)[2]=0;
				1094	test1.releaseBuffer(); // implicit -1
				1095	if(test1.length()!=2 \|\| test1.charAt(0)!=1 \|\| test1.charAt(1) !=2) {
				1096	errln("UnicodeString::releaseBuffer(-1) does not properly set the length of the UnicodeString");
				1097	}
				1098
				1099	// test releaseBuffer() with a non-NUL-terminated buffer
				1100	p=test1.getBuffer(256);
				1101	for(int32_t i=0; i<test1.getCapacity(); ++i) {
				1102	p[i]=(UChar)1; // fill the buffer with all non-NUL code units
				1103	}
				1104	test1.releaseBuffer(); // implicit -1
				1105	if(test1.length()!=test1.getCapacity() \|\| test1.charAt(1)!=1 \|\| test1.charAt(100)!=1 \|\| test1.charAt(test1.getCapacity()-1)!=1) {
				1106	errln("UnicodeString::releaseBuffer(-1 but no NUL) does not properly set the length of the UnicodeString");
				1107	}
				1108
				1109	// test getTerminatedBuffer()
				1110	test1=UnicodeString("This is another test.", "");
				1111	test2=UnicodeString("This is another test.", "");
				1112	q=test1.getTerminatedBuffer();
				1113	if(q[test1.length()]!=0 \|\| test1!=test2 \|\| test2.compare(q, -1)!=0) {
				1114	errln("getTerminatedBuffer()[length]!=0");
				1115	}
				1116
				1117	const UChar u[]={ 5, 6, 7, 8, 0 };
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1118	test1.setTo(false, u, 3);
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1119	q=test1.getTerminatedBuffer();
				1120	if(q==u \|\| q[0]!=5 \|\| q[1]!=6 \|\| q[2]!=7 \|\| q[3]!=0) {
				1121	errln("UnicodeString(u[3]).getTerminatedBuffer() returns a bad buffer");
				1122	}
				1123
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1124	test1.setTo(true, u, -1);
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1125	q=test1.getTerminatedBuffer();
				1126	if(q!=u \|\| test1.length()!=4 \|\| q[3]!=8 \|\| q[4]!=0) {
				1127	errln("UnicodeString(u[-1]).getTerminatedBuffer() returns a bad buffer");
				1128	}
				1129
				1130	// NOTE: Some compilers will optimize u"la" to point to the same static memory
				1131	// as u" lila", offset by 3 code units
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1132	test1=UnicodeString(true, u"la", 2);
				1133	test1.append(UnicodeString(true, u" lila", 5).getTerminatedBuffer(), 0, -1);
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1134	assertEquals("UnicodeString::append(const UChar *, start, length) failed",
				1135	u"la lila", test1);
				1136
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1137	test1.insert(3, UnicodeString(true, u"dudum ", 6), 0, INT32_MAX);
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1138	assertEquals("UnicodeString::insert(start, const UniStr &, start, length) failed",
				1139	u"la dudum lila", test1);
				1140
				1141	static const UChar ucs[]={ 0x68, 0x6d, 0x20, 0 };
				1142	test1.insert(9, ucs, -1);
				1143	assertEquals("UnicodeString::insert(start, const UChar *, length) failed",
				1144	u"la dudum hm lila", test1);
				1145
				1146	test1.replace(9, 2, (UChar)0x2b);
				1147	assertEquals("UnicodeString::replace(start, length, UChar) failed",
				1148	u"la dudum + lila", test1);
				1149
				1150	if(test1.hasMetaData() \|\| UnicodeString().hasMetaData()) {
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1151	errln("UnicodeString::hasMetaData() returns true");
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1152	}
				1153
				1154	// test getTerminatedBuffer() on a truncated, shared, heap-allocated string
				1155	test1=UNICODE_STRING_SIMPLE("abcdefghijklmnopqrstuvwxyz0123456789.");
				1156	test1.truncate(36); // ensure length()<getCapacity()
				1157	test2=test1; // share the buffer
				1158	test1.truncate(5);
				1159	if(test1.length()!=5 \|\| test1.getTerminatedBuffer()[5]!=0) {
				1160	errln("UnicodeString(shared buffer).truncate() failed");
				1161	}
				1162	if(test2.length()!=36 \|\| test2[5]!=0x66 \|\| u_strlen(test2.getTerminatedBuffer())!=36) {
				1163	errln("UnicodeString(shared buffer).truncate().getTerminatedBuffer() "
				1164	"modified another copy of the string!");
				1165	}
				1166	test1=UNICODE_STRING_SIMPLE("abcdefghijklmnopqrstuvwxyz0123456789.");
				1167	test1.truncate(36); // ensure length()<getCapacity()
				1168	test2=test1; // share the buffer
				1169	test1.remove();
				1170	if(test1.length()!=0 \|\| test1.getTerminatedBuffer()[0]!=0) {
				1171	errln("UnicodeString(shared buffer).remove() failed");
				1172	}
				1173	if(test2.length()!=36 \|\| test2[0]!=0x61 \|\| u_strlen(test2.getTerminatedBuffer())!=36) {
				1174	errln("UnicodeString(shared buffer).remove().getTerminatedBuffer() "
				1175	"modified another copy of the string!");
				1176	}
				1177
				1178	// ticket #9740
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1179	test1.setTo(true, ucs, 3);
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1180	assertEquals("length of read-only alias", 3, test1.length());
				1181	test1.trim();
				1182	assertEquals("length of read-only alias after trim()", 2, test1.length());
				1183	assertEquals("length of terminated buffer of read-only alias + trim()",
				1184	2, u_strlen(test1.getTerminatedBuffer()));
				1185	}
				1186
				1187	void
				1188	UnicodeStringTest::TestStackAllocation()
				1189	{
				1190	UChar testString[] ={
				1191	0x54, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x61, 0x20, 0x63, 0x72, 0x61, 0x7a, 0x79, 0x20, 0x74, 0x65, 0x73, 0x74, 0x2e, 0 };
				1192	UChar guardWord = 0x4DED;
				1193	UnicodeString* test = 0;
				1194
				1195	test = new UnicodeString(testString);
				1196	if (*test != "This is a crazy test.")
				1197	errln("Test string failed to initialize properly.");
				1198	if (guardWord != 0x04DED)
				1199	errln("Test string initialization overwrote guard word!");
				1200
				1201	test->insert(8, "only ");
				1202	test->remove(15, 6);
				1203	if (*test != "This is only a test.")
				1204	errln("Manipulation of test string failed to work right.");
				1205	if (guardWord != 0x4DED)
				1206	errln("Manipulation of test string overwrote guard word!");
				1207
				1208	// we have to deinitialize and release the backing store by calling the destructor
				1209	// explicitly, since we can't overload operator delete
				1210	delete test;
				1211
				1212	UChar workingBuffer[] = {
				1213	0x4e, 0x6f, 0x77, 0x20, 0x69, 0x73, 0x20, 0x74, 0x68, 0x65, 0x20, 0x74, 0x69, 0x6d, 0x65, 0x20,
				1214	0x66, 0x6f, 0x72, 0x20, 0x61, 0x6c, 0x6c, 0x20, 0x6d, 0x65, 0x6e, 0x20, 0x74, 0x6f, 0x20,
				1215	0x63, 0x6f, 0x6d, 0x65, 0xffff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1216	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				1217	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
				1218	UChar guardWord2 = 0x4DED;
				1219
				1220	test = new UnicodeString(workingBuffer, 35, 100);
				1221	if (*test != "Now is the time for all men to come")
				1222	errln("Stack-allocated backing store failed to initialize correctly.");
				1223	if (guardWord2 != 0x4DED)
				1224	errln("Stack-allocated backing store overwrote guard word!");
				1225
				1226	test->insert(24, "good ");
				1227	if (*test != "Now is the time for all good men to come")
				1228	errln("insert() on stack-allocated UnicodeString didn't work right");
				1229	if (guardWord2 != 0x4DED)
				1230	errln("insert() on stack-allocated UnicodeString overwrote guard word!");
				1231
				1232	if (workingBuffer[24] != 0x67)
				1233	errln("insert() on stack-allocated UnicodeString didn't affect backing store");
				1234
				1235	*test += " to the aid of their country.";
				1236	if (*test != "Now is the time for all good men to come to the aid of their country.")
				1237	errln("Stack-allocated UnicodeString overflow didn't work");
				1238	if (guardWord2 != 0x4DED)
				1239	errln("Stack-allocated UnicodeString overflow overwrote guard word!");
				1240
				1241	*test = "ha!";
				1242	if (*test != "ha!")
				1243	errln("Assignment to stack-allocated UnicodeString didn't work");
				1244	if (workingBuffer[0] != 0x4e)
				1245	errln("Change to UnicodeString after overflow are still affecting original buffer");
				1246	if (guardWord2 != 0x4DED)
				1247	errln("Change to UnicodeString after overflow overwrote guard word!");
				1248
				1249	// test read-only aliasing with setTo()
				1250	workingBuffer[0] = 0x20ac;
				1251	workingBuffer[1] = 0x125;
				1252	workingBuffer[2] = 0;
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1253	test->setTo(true, workingBuffer, 2);
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1254	if(test->length() != 2 \|\| test->charAt(0) != 0x20ac \|\| test->charAt(1) != 0x125) {
				1255	errln("UnicodeString.setTo(readonly alias) does not alias correctly");
				1256	}
				1257
				1258	UnicodeString *c=test->clone();
				1259
				1260	workingBuffer[1] = 0x109;
				1261	if(test->charAt(1) != 0x109) {
				1262	errln("UnicodeString.setTo(readonly alias) made a copy: did not see change in buffer");
				1263	}
				1264
				1265	if(c->length() != 2 \|\| c->charAt(1) != 0x125) {
				1266	errln("clone(alias) did not copy the buffer");
				1267	}
				1268	delete c;
				1269
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1270	test->setTo(true, workingBuffer, -1);
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1271	if(test->length() != 2 \|\| test->charAt(0) != 0x20ac \|\| test->charAt(1) != 0x109) {
				1272	errln("UnicodeString.setTo(readonly alias, length -1) does not alias correctly");
				1273	}
				1274
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1275	test->setTo(false, workingBuffer, -1);
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1276	if(!test->isBogus()) {
				1277	errln("UnicodeString.setTo(unterminated readonly alias, length -1) does not result in isBogus()");
				1278	}
				1279
				1280	delete test;
				1281
				1282	test=new UnicodeString();
				1283	UChar buffer[]={0x0061, 0x0062, 0x20ac, 0x0043, 0x0042, 0x0000};
				1284	test->setTo(buffer, 4, 10);
				1285	if(test->length() !=4 \|\| test->charAt(0) != 0x0061 \|\| test->charAt(1) != 0x0062 \|\|
				1286	test->charAt(2) != 0x20ac \|\| test->charAt(3) != 0x0043){
				1287	errln((UnicodeString)"UnicodeString.setTo(UChar, length, capacity) does not work correctly\n" + prettify(test));
				1288	}
				1289	delete test;
				1290
				1291
				1292	// test the UChar32 constructor
				1293	UnicodeString c32Test((UChar32)0x10ff2a);
				1294	if( c32Test.length() != U16_LENGTH(0x10ff2a) \|\|
				1295	c32Test.char32At(c32Test.length() - 1) != 0x10ff2a
				1296	) {
				1297	errln("The UnicodeString(UChar32) constructor does not work with a 0x10ff2a filler");
				1298	}
				1299
				1300	// test the (new) capacity constructor
				1301	UnicodeString capTest(5, (UChar32)0x2a, 5);
				1302	if( capTest.length() != 5 * U16_LENGTH(0x2a) \|\|
				1303	capTest.char32At(0) != 0x2a \|\|
				1304	capTest.char32At(4) != 0x2a
				1305	) {
				1306	errln("The UnicodeString capacity constructor does not work with an ASCII filler");
				1307	}
				1308
				1309	capTest = UnicodeString(5, (UChar32)0x10ff2a, 5);
				1310	if( capTest.length() != 5 * U16_LENGTH(0x10ff2a) \|\|
				1311	capTest.char32At(0) != 0x10ff2a \|\|
				1312	capTest.char32At(4) != 0x10ff2a
				1313	) {
				1314	errln("The UnicodeString capacity constructor does not work with a 0x10ff2a filler");
				1315	}
				1316
				1317	capTest = UnicodeString(5, (UChar32)0, 0);
				1318	if(capTest.length() != 0) {
				1319	errln("The UnicodeString capacity constructor does not work with a 0x10ff2a filler");
				1320	}
				1321	}
				1322
				1323	/**
				1324	* Test the unescape() function.
				1325	*/
				1326	void UnicodeStringTest::TestUnescape(void) {
				1327	UnicodeString IN("abc\\u4567 \\n\\r \\U00101234xyz\\x1\\x{5289}\\x1b", -1, US_INV);
				1328	UnicodeString OUT("abc");
				1329	OUT.append((UChar)0x4567);
				1330	OUT.append(" ");
				1331	OUT.append((UChar)0xA);
				1332	OUT.append((UChar)0xD);
				1333	OUT.append(" ");
				1334	OUT.append((UChar32)0x00101234);
				1335	OUT.append("xyz");
				1336	OUT.append((UChar32)1).append((UChar32)0x5289).append((UChar)0x1b);
				1337	UnicodeString result = IN.unescape();
				1338	if (result != OUT) {
				1339	errln("FAIL: " + prettify(IN) + ".unescape() -> " +
				1340	prettify(result) + ", expected " +
				1341	prettify(OUT));
				1342	}
				1343
				1344	// test that an empty string is returned in case of an error
				1345	if (!UNICODE_STRING("wrong \\u sequence", 17).unescape().isEmpty()) {
				1346	errln("FAIL: unescaping of a string with an illegal escape sequence did not return an empty string");
				1347	}
				1348
				1349	// ICU-21648 limit backslash-uhhhh escapes to ASCII hex digits
				1350	UnicodeString euro = UnicodeString(u"\\u20aC").unescape();
				1351	assertEquals("ASCII Euro", u"€", euro);
				1352	UnicodeString nonASCIIEuro = UnicodeString(u"\\u୨෦ａＣ").unescape();
				1353	assertTrue("unescape() accepted non-ASCII digits", nonASCIIEuro.isEmpty());
				1354	}
				1355
				1356	/* test code point counting functions --------------------------------------- */
				1357
				1358	/* reference implementation of UnicodeString::hasMoreChar32Than() */
				1359	static int32_t
				1360	_refUnicodeStringHasMoreChar32Than(const UnicodeString &s, int32_t start, int32_t length, int32_t number) {
				1361	int32_t count=s.countChar32(start, length);
				1362	return count>number;
				1363	}
				1364
				1365	/* compare the real function against the reference */
				1366	void
				1367	UnicodeStringTest::_testUnicodeStringHasMoreChar32Than(const UnicodeString &s, int32_t start, int32_t length, int32_t number) {
				1368	if(s.hasMoreChar32Than(start, length, number)!=_refUnicodeStringHasMoreChar32Than(s, start, length, number)) {
				1369	errln("hasMoreChar32Than(%d, %d, %d)=%hd is wrong\n",
				1370	start, length, number, s.hasMoreChar32Than(start, length, number));
				1371	}
				1372	}
				1373
				1374	void
				1375	UnicodeStringTest::TestCountChar32(void) {
				1376	{
				1377	UnicodeString s=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
				1378
				1379	// test countChar32()
				1380	// note that this also calls and tests u_countChar32(length>=0)
				1381	if(
				1382	s.countChar32()!=4 \|\|
				1383	s.countChar32(1)!=4 \|\|
				1384	s.countChar32(2)!=3 \|\|
				1385	s.countChar32(2, 3)!=2 \|\|
				1386	s.countChar32(2, 0)!=0
				1387	) {
				1388	errln("UnicodeString::countChar32() failed");
				1389	}
				1390
				1391	// NUL-terminate the string buffer and test u_countChar32(length=-1)
				1392	const UChar *buffer=s.getTerminatedBuffer();
				1393	if(
				1394	u_countChar32(buffer, -1)!=4 \|\|
				1395	u_countChar32(buffer+1, -1)!=4 \|\|
				1396	u_countChar32(buffer+2, -1)!=3 \|\|
				1397	u_countChar32(buffer+3, -1)!=3 \|\|
				1398	u_countChar32(buffer+4, -1)!=2 \|\|
				1399	u_countChar32(buffer+5, -1)!=1 \|\|
				1400	u_countChar32(buffer+6, -1)!=0
				1401	) {
				1402	errln("u_countChar32(length=-1) failed");
				1403	}
				1404
				1405	// test u_countChar32() with bad input
				1406	if(u_countChar32(NULL, 5)!=0 \|\| u_countChar32(buffer, -2)!=0) {
				1407	errln("u_countChar32(bad input) failed (returned non-zero counts)");
				1408	}
				1409	}
				1410
				1411	/* test data and variables for hasMoreChar32Than() */
				1412	static const UChar str[]={
				1413	0x61, 0x62, 0xd800, 0xdc00,
				1414	0xd801, 0xdc01, 0x63, 0xd802,
				1415	0x64, 0xdc03, 0x65, 0x66,
				1416	0xd804, 0xdc04, 0xd805, 0xdc05,
				1417	0x67
				1418	};
				1419	UnicodeString string(str, UPRV_LENGTHOF(str));
				1420	int32_t start, length, number;
				1421
				1422	/* test hasMoreChar32Than() */
				1423	for(length=string.length(); length>=0; --length) {
				1424	for(start=0; start<=length; ++start) {
				1425	for(number=-1; number<=((length-start)+2); ++number) {
				1426	_testUnicodeStringHasMoreChar32Than(string, start, length-start, number);
				1427	}
				1428	}
				1429	}
				1430
				1431	/* test hasMoreChar32Than() with pinning */
				1432	for(start=-1; start<=string.length()+1; ++start) {
				1433	for(number=-1; number<=((string.length()-start)+2); ++number) {
				1434	_testUnicodeStringHasMoreChar32Than(string, start, 0x7fffffff, number);
				1435	}
				1436	}
				1437
				1438	/* test hasMoreChar32Than() with a bogus string */
				1439	string.setToBogus();
				1440	for(length=-1; length<=1; ++length) {
				1441	for(start=-1; start<=length; ++start) {
				1442	for(number=-1; number<=((length-start)+2); ++number) {
				1443	_testUnicodeStringHasMoreChar32Than(string, start, length-start, number);
				1444	}
				1445	}
				1446	}
				1447	}
				1448
				1449	void
				1450	UnicodeStringTest::TestBogus() {
				1451	UnicodeString test1("This is a test");
				1452	UnicodeString test2("This is a test");
				1453	UnicodeString test3("Me too!");
				1454
				1455	// test isBogus() and setToBogus()
				1456	if (test1.isBogus() \|\| test2.isBogus() \|\| test3.isBogus()) {
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1457	errln("A string returned true for isBogus()!");
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1458	}
				1459
				1460	// NULL pointers are treated like empty strings
				1461	// use other illegal arguments to make a bogus string
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1462	test3.setTo(false, test1.getBuffer(), -2);
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1463	if(!test3.isBogus()) {
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1464	errln("A bogus string returned false for isBogus()!");
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1465	}
				1466	if (test1.hashCode() != test2.hashCode() \|\| test1.hashCode() == test3.hashCode()) {
				1467	errln("hashCode() failed");
				1468	}
				1469	if(test3.getBuffer()!=0 \|\| test3.getBuffer(20)!=0 \|\| test3.getTerminatedBuffer()!=0) {
				1470	errln("bogus.getBuffer()!=0");
				1471	}
				1472	if (test1.indexOf(test3) != -1) {
				1473	errln("bogus.indexOf() != -1");
				1474	}
				1475	if (test1.lastIndexOf(test3) != -1) {
				1476	errln("bogus.lastIndexOf() != -1");
				1477	}
				1478	if (test1.caseCompare(test3, U_FOLD_CASE_DEFAULT) != 1 \|\| test3.caseCompare(test1, U_FOLD_CASE_DEFAULT) != -1) {
				1479	errln("caseCompare() doesn't work with bogus strings");
				1480	}
				1481	if (test1.compareCodePointOrder(test3) != 1 \|\| test3.compareCodePointOrder(test1) != -1) {
				1482	errln("compareCodePointOrder() doesn't work with bogus strings");
				1483	}
				1484
				1485	// verify that non-assignment modifications fail and do not revive a bogus string
				1486	test3.setToBogus();
				1487	test3.append((UChar)0x61);
				1488	if(!test3.isBogus() \|\| test3.getBuffer()!=0) {
				1489	errln("bogus.append('a') worked but must not");
				1490	}
				1491
				1492	test3.setToBogus();
				1493	test3.findAndReplace(UnicodeString((UChar)0x61), test2);
				1494	if(!test3.isBogus() \|\| test3.getBuffer()!=0) {
				1495	errln("bogus.findAndReplace() worked but must not");
				1496	}
				1497
				1498	test3.setToBogus();
				1499	test3.trim();
				1500	if(!test3.isBogus() \|\| test3.getBuffer()!=0) {
				1501	errln("bogus.trim() revived bogus but must not");
				1502	}
				1503
				1504	test3.setToBogus();
				1505	test3.remove(1);
				1506	if(!test3.isBogus() \|\| test3.getBuffer()!=0) {
				1507	errln("bogus.remove(1) revived bogus but must not");
				1508	}
				1509
				1510	test3.setToBogus();
				1511	if(!test3.setCharAt(0, 0x62).isBogus() \|\| !test3.isEmpty()) {
				1512	errln("bogus.setCharAt(0, 'b') worked but must not");
				1513	}
				1514
				1515	test3.setToBogus();
				1516	if(test3.truncate(1) \|\| !test3.isBogus() \|\| !test3.isEmpty()) {
				1517	errln("bogus.truncate(1) revived bogus but must not");
				1518	}
				1519
				1520	// verify that assignments revive a bogus string
				1521	test3.setToBogus();
				1522	if(!test3.isBogus() \|\| (test3=test1).isBogus() \|\| test3!=test1) {
				1523	errln("bogus.operator=() failed");
				1524	}
				1525
				1526	test3.setToBogus();
				1527	if(!test3.isBogus() \|\| test3.fastCopyFrom(test1).isBogus() \|\| test3!=test1) {
				1528	errln("bogus.fastCopyFrom() failed");
				1529	}
				1530
				1531	test3.setToBogus();
				1532	if(!test3.isBogus() \|\| test3.setTo(test1).isBogus() \|\| test3!=test1) {
				1533	errln("bogus.setTo(UniStr) failed");
				1534	}
				1535
				1536	test3.setToBogus();
				1537	if(!test3.isBogus() \|\| test3.setTo(test1, 0).isBogus() \|\| test3!=test1) {
				1538	errln("bogus.setTo(UniStr, 0) failed");
				1539	}
				1540
				1541	test3.setToBogus();
				1542	if(!test3.isBogus() \|\| test3.setTo(test1, 0, 0x7fffffff).isBogus() \|\| test3!=test1) {
				1543	errln("bogus.setTo(UniStr, 0, len) failed");
				1544	}
				1545
				1546	test3.setToBogus();
				1547	if(!test3.isBogus() \|\| test3.setTo(test1.getBuffer(), test1.length()).isBogus() \|\| test3!=test1) {
				1548	errln("bogus.setTo(const UChar *, len) failed");
				1549	}
				1550
				1551	test3.setToBogus();
				1552	if(!test3.isBogus() \|\| test3.setTo((UChar)0x2028).isBogus() \|\| test3!=UnicodeString((UChar)0x2028)) {
				1553	errln("bogus.setTo(UChar) failed");
				1554	}
				1555
				1556	test3.setToBogus();
				1557	if(!test3.isBogus() \|\| test3.setTo((UChar32)0x1d157).isBogus() \|\| test3!=UnicodeString((UChar32)0x1d157)) {
				1558	errln("bogus.setTo(UChar32) failed");
				1559	}
				1560
				1561	test3.setToBogus();
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1562	if(!test3.isBogus() \|\| test3.setTo(false, test1.getBuffer(), test1.length()).isBogus() \|\| test3!=test1) {
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1563	errln("bogus.setTo(readonly alias) failed");
				1564	}
				1565
				1566	// writable alias to another string's buffer: very bad idea, just convenient for this test
				1567	test3.setToBogus();
				1568	if(!test3.isBogus() \|\|
				1569	test3.setTo(const_cast<UChar *>(test1.getBuffer()),
				1570	test1.length(), test1.getCapacity()).isBogus() \|\|
				1571	test3!=test1) {
				1572	errln("bogus.setTo(writable alias) failed");
				1573	}
				1574
				1575	// verify simple, documented ways to turn a bogus string into an empty one
				1576	test3.setToBogus();
				1577	if(!test3.isBogus() \|\| (test3=UnicodeString()).isBogus() \|\| !test3.isEmpty()) {
				1578	errln("bogus.operator=(UnicodeString()) failed");
				1579	}
				1580
				1581	test3.setToBogus();
				1582	if(!test3.isBogus() \|\| test3.setTo(UnicodeString()).isBogus() \|\| !test3.isEmpty()) {
				1583	errln("bogus.setTo(UnicodeString()) failed");
				1584	}
				1585
				1586	test3.setToBogus();
				1587	if(test3.remove().isBogus() \|\| test3.getBuffer()==0 \|\| !test3.isEmpty()) {
				1588	errln("bogus.remove() failed");
				1589	}
				1590
				1591	test3.setToBogus();
				1592	if(test3.remove(0, INT32_MAX).isBogus() \|\| test3.getBuffer()==0 \|\| !test3.isEmpty()) {
				1593	errln("bogus.remove(0, INT32_MAX) failed");
				1594	}
				1595
				1596	test3.setToBogus();
				1597	if(test3.truncate(0) \|\| test3.isBogus() \|\| !test3.isEmpty()) {
				1598	errln("bogus.truncate(0) failed");
				1599	}
				1600
				1601	test3.setToBogus();
				1602	if(!test3.isBogus() \|\| test3.setTo((UChar32)-1).isBogus() \|\| !test3.isEmpty()) {
				1603	errln("bogus.setTo((UChar32)-1) failed");
				1604	}
				1605
				1606	static const UChar nul=0;
				1607
				1608	test3.setToBogus();
				1609	if(!test3.isBogus() \|\| test3.setTo(&nul, 0).isBogus() \|\| !test3.isEmpty()) {
				1610	errln("bogus.setTo(&nul, 0) failed");
				1611	}
				1612
				1613	test3.setToBogus();
				1614	if(!test3.isBogus() \|\| test3.getBuffer()!=0) {
				1615	errln("setToBogus() failed to make a string bogus");
				1616	}
				1617
				1618	test3.setToBogus();
				1619	if(test1.isBogus() \|\| !(test1=test3).isBogus()) {
				1620	errln("normal=bogus failed to make the left string bogus");
				1621	}
				1622
				1623	// test that NULL primitive input string values are treated like
				1624	// empty strings, not errors (bogus)
				1625	test2.setTo((UChar32)0x10005);
				1626	if(test2.insert(1, nullptr, 1).length()!=2) {
				1627	errln("UniStr.insert(...nullptr...) should not modify the string but does");
				1628	}
				1629
				1630	UErrorCode errorCode=U_ZERO_ERROR;
				1631	UnicodeString
				1632	test4((const UChar *)NULL),
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1633	test5(true, (const UChar *)NULL, 1),
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1634	test6((UChar *)NULL, 5, 5),
				1635	test7((const char *)NULL, 3, NULL, errorCode);
				1636	if(test4.isBogus() \|\| test5.isBogus() \|\| test6.isBogus() \|\| test7.isBogus()) {
				1637	errln("a constructor set to bogus for a NULL input string, should be empty");
				1638	}
				1639
				1640	test4.setTo(NULL, 3);
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1641	test5.setTo(true, (const UChar *)NULL, 1);
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1642	test6.setTo((UChar *)NULL, 5, 5);
				1643	if(test4.isBogus() \|\| test5.isBogus() \|\| test6.isBogus()) {
				1644	errln("a setTo() set to bogus for a NULL input string, should be empty");
				1645	}
				1646
				1647	// test that bogus==bogus<any
				1648	if(test1!=test3 \|\| test1.compare(test3)!=0) {
				1649	errln("bogus==bogus failed");
				1650	}
				1651
				1652	test2.remove();
				1653	if(test1>=test2 \|\| !(test2>test1) \|\| test1.compare(test2)>=0 \|\| !(test2.compare(test1)>0)) {
				1654	errln("bogus<empty failed");
				1655	}
Frank Tang	d2858cb	2022-04-08 20:34:12 -0700	[diff] [blame]	1656
				1657	// test that copy constructor of bogus is bogus & clone of bogus is nullptr
				1658	{
				1659	test3.setToBogus();
				1660	UnicodeString test3Copy(test3);
				1661	UnicodeString *test3Clone = test3.clone();
				1662	assertTrue(WHERE, test3.isBogus());
				1663	assertTrue(WHERE, test3Copy.isBogus());
				1664	assertTrue(WHERE, test3Clone == nullptr);
				1665	}
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1666	}
				1667
				1668	// StringEnumeration ------------------------------------------------------- ***
				1669	// most of StringEnumeration is tested elsewhere
				1670	// this test improves code coverage
				1671
				1672	static const char *const
				1673	testEnumStrings[]={
				1674	"a",
				1675	"b",
				1676	"c",
				1677	"this is a long string which helps us test some buffer limits",
				1678	"eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee"
				1679	};
				1680
				1681	class TestEnumeration : public StringEnumeration {
				1682	public:
				1683	TestEnumeration() : i(0) {}
				1684
				1685	virtual int32_t count(UErrorCode& /status/) const override {
				1686	return UPRV_LENGTHOF(testEnumStrings);
				1687	}
				1688
				1689	virtual const UnicodeString *snext(UErrorCode &status) override {
				1690	if(U_SUCCESS(status) && i<UPRV_LENGTHOF(testEnumStrings)) {
				1691	unistr=UnicodeString(testEnumStrings[i++], "");
				1692	return &unistr;
				1693	}
				1694
				1695	return NULL;
				1696	}
				1697
				1698	virtual void reset(UErrorCode& /status/) override {
				1699	i=0;
				1700	}
				1701
				1702	static inline UClassID getStaticClassID() {
				1703	return (UClassID)&fgClassID;
				1704	}
				1705	virtual UClassID getDynamicClassID() const override {
				1706	return getStaticClassID();
				1707	}
				1708
				1709	private:
				1710	static const char fgClassID;
				1711
				1712	int32_t i;
				1713	};
				1714
				1715	const char TestEnumeration::fgClassID=0;
				1716
				1717	void
				1718	UnicodeStringTest::TestStringEnumeration() {
				1719	UnicodeString s;
				1720	TestEnumeration ten;
				1721	int32_t i, length;
				1722	UErrorCode status;
				1723
				1724	const UChar *pu;
				1725	const char *pc;
				1726
				1727	// test the next() default implementation and ensureCharsCapacity()
				1728	for(i=0; i<UPRV_LENGTHOF(testEnumStrings); ++i) {
				1729	status=U_ZERO_ERROR;
				1730	pc=ten.next(&length, status);
				1731	s=UnicodeString(testEnumStrings[i], "");
				1732	if(U_FAILURE(status) \|\| pc==NULL \|\| length!=s.length() \|\| UnicodeString(pc, length, "")!=s) {
				1733	errln("StringEnumeration.next(%d) failed", i);
				1734	}
				1735	}
				1736	status=U_ZERO_ERROR;
				1737	if(ten.next(&length, status)!=NULL) {
				1738	errln("StringEnumeration.next(done)!=NULL");
				1739	}
				1740
				1741	// test the unext() default implementation
				1742	ten.reset(status);
				1743	for(i=0; i<UPRV_LENGTHOF(testEnumStrings); ++i) {
				1744	status=U_ZERO_ERROR;
				1745	pu=ten.unext(&length, status);
				1746	s=UnicodeString(testEnumStrings[i], "");
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1747	if(U_FAILURE(status) \|\| pu==NULL \|\| length!=s.length() \|\| UnicodeString(true, pu, length)!=s) {
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1748	errln("StringEnumeration.unext(%d) failed", i);
				1749	}
				1750	}
				1751	status=U_ZERO_ERROR;
				1752	if(ten.unext(&length, status)!=NULL) {
				1753	errln("StringEnumeration.unext(done)!=NULL");
				1754	}
				1755
				1756	// test that the default clone() implementation works, and returns NULL
				1757	if(ten.clone()!=NULL) {
				1758	errln("StringEnumeration.clone()!=NULL");
				1759	}
				1760
				1761	// test that uenum_openFromStringEnumeration() works
				1762	// Need a heap allocated string enumeration because it is adopted by the UEnumeration.
				1763	StringEnumeration *newTen = new TestEnumeration;
				1764	status=U_ZERO_ERROR;
				1765	UEnumeration *uten = uenum_openFromStringEnumeration(newTen, &status);
				1766	if (uten==NULL \|\| U_FAILURE(status)) {
				1767	errln("fail at file %s, line %d, UErrorCode is %s\n", __FILE__, __LINE__, u_errorName(status));
				1768	return;
				1769	}
				1770
				1771	// test uenum_next()
				1772	for(i=0; i<UPRV_LENGTHOF(testEnumStrings); ++i) {
				1773	status=U_ZERO_ERROR;
				1774	pc=uenum_next(uten, &length, &status);
				1775	if(U_FAILURE(status) \|\| pc==NULL \|\| strcmp(pc, testEnumStrings[i]) != 0) {
				1776	errln("File %s, line %d, StringEnumeration.next(%d) failed", __FILE__, __LINE__, i);
				1777	}
				1778	}
				1779	status=U_ZERO_ERROR;
				1780	if(uenum_next(uten, &length, &status)!=NULL) {
				1781	errln("File %s, line %d, uenum_next(done)!=NULL");
				1782	}
				1783
				1784	// test the uenum_unext()
				1785	uenum_reset(uten, &status);
				1786	for(i=0; i<UPRV_LENGTHOF(testEnumStrings); ++i) {
				1787	status=U_ZERO_ERROR;
				1788	pu=uenum_unext(uten, &length, &status);
				1789	s=UnicodeString(testEnumStrings[i], "");
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1790	if(U_FAILURE(status) \|\| pu==NULL \|\| length!=s.length() \|\| UnicodeString(true, pu, length)!=s) {
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1791	errln("File %s, Line %d, uenum_unext(%d) failed", __FILE__, __LINE__, i);
				1792	}
				1793	}
				1794	status=U_ZERO_ERROR;
				1795	if(uenum_unext(uten, &length, &status)!=NULL) {
				1796	errln("File %s, Line %d, uenum_unext(done)!=NULL" __FILE__, __LINE__);
				1797	}
				1798
				1799	uenum_close(uten);
				1800	}
				1801
				1802	/*
				1803	* Namespace test, to make sure that macros like UNICODE_STRING include the
				1804	* namespace qualifier.
				1805	*
				1806	* Define a (bogus) UnicodeString class in another namespace and check for ambiguity.
				1807	*/
				1808	namespace bogus {
				1809	class UnicodeString {
				1810	public:
				1811	enum EInvariant { kInvariant };
				1812	UnicodeString() : i(1) {}
				1813	UnicodeString(UBool /isTerminated/, const UChar * /text/, int32_t textLength) : i(textLength) {(void)i;}
				1814	UnicodeString(const char * /src/, int32_t length, enum EInvariant /inv/
				1815	) : i(length) {}
				1816	private:
				1817	int32_t i;
				1818	};
				1819	}
				1820
				1821	void
				1822	UnicodeStringTest::TestNameSpace() {
				1823	// Provoke name collision unless the UnicodeString macros properly
				1824	// qualify the icu::UnicodeString class.
				1825	using namespace bogus;
				1826
				1827	// Use all UnicodeString macros from unistr.h.
				1828	icu::UnicodeString s1=icu::UnicodeString("abc", 3, US_INV);
				1829	icu::UnicodeString s2=UNICODE_STRING("def", 3);
				1830	icu::UnicodeString s3=UNICODE_STRING_SIMPLE("ghi");
				1831
				1832	// Make sure the compiler does not optimize away instantiation of s1, s2, s3.
				1833	icu::UnicodeString s4=s1+s2+s3;
				1834	if(s4.length()!=9) {
				1835	errln("Something wrong with UnicodeString::operator+().");
				1836	}
				1837	}
				1838
				1839	void
				1840	UnicodeStringTest::TestUTF32() {
				1841	// Input string length US_STACKBUF_SIZE to cause overflow of the
				1842	// initially chosen fStackBuffer due to supplementary characters.
				1843	static const UChar32 utf32[] = {
				1844	0x41, 0xd900, 0x61, 0xdc00, -1, 0x110000, 0x5a, 0x50000, 0x7a,
				1845	0x10000, 0x20000, 0xe0000, 0x10ffff
				1846	};
				1847	static const UChar expected_utf16[] = {
				1848	0x41, 0xfffd, 0x61, 0xfffd, 0xfffd, 0xfffd, 0x5a, 0xd900, 0xdc00, 0x7a,
				1849	0xd800, 0xdc00, 0xd840, 0xdc00, 0xdb40, 0xdc00, 0xdbff, 0xdfff
				1850	};
				1851	UnicodeString from32 = UnicodeString::fromUTF32(utf32, UPRV_LENGTHOF(utf32));
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1852	UnicodeString expected(false, expected_utf16, UPRV_LENGTHOF(expected_utf16));
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1853	if(from32 != expected) {
				1854	errln("UnicodeString::fromUTF32() did not create the expected string.");
				1855	}
				1856
				1857	static const UChar utf16[] = {
				1858	0x41, 0xd900, 0x61, 0xdc00, 0x5a, 0xd900, 0xdc00, 0x7a, 0xd800, 0xdc00, 0xdbff, 0xdfff
				1859	};
				1860	static const UChar32 expected_utf32[] = {
				1861	0x41, 0xfffd, 0x61, 0xfffd, 0x5a, 0x50000, 0x7a, 0x10000, 0x10ffff
				1862	};
				1863	UChar32 result32[16];
				1864	UErrorCode errorCode = U_ZERO_ERROR;
				1865	int32_t length32 =
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1866	UnicodeString(false, utf16, UPRV_LENGTHOF(utf16)).
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1867	toUTF32(result32, UPRV_LENGTHOF(result32), errorCode);
				1868	if( length32 != UPRV_LENGTHOF(expected_utf32) \|\|
				1869	0 != uprv_memcmp(result32, expected_utf32, length32*4) \|\|
				1870	result32[length32] != 0
				1871	) {
				1872	errln("UnicodeString::toUTF32() did not create the expected string.");
				1873	}
				1874	}
				1875
				1876	class TestCheckedArrayByteSink : public CheckedArrayByteSink {
				1877	public:
				1878	TestCheckedArrayByteSink(char* outbuf, int32_t capacity)
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1879	: CheckedArrayByteSink(outbuf, capacity), calledFlush(false) {}
				1880	virtual void Flush() override { calledFlush = true; }
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1881	UBool calledFlush;
				1882	};
				1883
				1884	void
				1885	UnicodeStringTest::TestUTF8() {
				1886	static const uint8_t utf8[] = {
				1887	// Code points:
				1888	// 0x41, 0xd900,
				1889	// 0x61, 0xdc00,
				1890	// 0x110000, 0x5a,
				1891	// 0x50000, 0x7a,
				1892	// 0x10000, 0x20000,
				1893	// 0xe0000, 0x10ffff
				1894	0x41, 0xed, 0xa4, 0x80,
				1895	0x61, 0xed, 0xb0, 0x80,
				1896	0xf4, 0x90, 0x80, 0x80, 0x5a,
				1897	0xf1, 0x90, 0x80, 0x80, 0x7a,
				1898	0xf0, 0x90, 0x80, 0x80, 0xf0, 0xa0, 0x80, 0x80,
				1899	0xf3, 0xa0, 0x80, 0x80, 0xf4, 0x8f, 0xbf, 0xbf
				1900	};
				1901	static const UChar expected_utf16[] = {
				1902	0x41, 0xfffd, 0xfffd, 0xfffd,
				1903	0x61, 0xfffd, 0xfffd, 0xfffd,
				1904	0xfffd, 0xfffd, 0xfffd, 0xfffd,0x5a,
				1905	0xd900, 0xdc00, 0x7a,
				1906	0xd800, 0xdc00, 0xd840, 0xdc00,
				1907	0xdb40, 0xdc00, 0xdbff, 0xdfff
				1908	};
				1909	UnicodeString from8 = UnicodeString::fromUTF8(StringPiece((const char *)utf8, (int32_t)sizeof(utf8)));
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1910	UnicodeString expected(false, expected_utf16, UPRV_LENGTHOF(expected_utf16));
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1911
				1912	if(from8 != expected) {
				1913	errln("UnicodeString::fromUTF8(StringPiece) did not create the expected string.");
				1914	}
				1915	std::string utf8_string((const char *)utf8, sizeof(utf8));
				1916	UnicodeString from8b = UnicodeString::fromUTF8(utf8_string);
				1917	if(from8b != expected) {
				1918	errln("UnicodeString::fromUTF8(std::string) did not create the expected string.");
				1919	}
				1920
				1921	static const UChar utf16[] = {
				1922	0x41, 0xd900, 0x61, 0xdc00, 0x5a, 0xd900, 0xdc00, 0x7a, 0xd800, 0xdc00, 0xdbff, 0xdfff
				1923	};
				1924	static const uint8_t expected_utf8[] = {
				1925	0x41, 0xef, 0xbf, 0xbd, 0x61, 0xef, 0xbf, 0xbd, 0x5a, 0xf1, 0x90, 0x80, 0x80, 0x7a,
				1926	0xf0, 0x90, 0x80, 0x80, 0xf4, 0x8f, 0xbf, 0xbf
				1927	};
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1928	UnicodeString us(false, utf16, UPRV_LENGTHOF(utf16));
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1929
				1930	char buffer[64];
				1931	TestCheckedArrayByteSink sink(buffer, (int32_t)sizeof(buffer));
				1932	us.toUTF8(sink);
				1933	if( sink.NumberOfBytesWritten() != (int32_t)sizeof(expected_utf8) \|\|
				1934	0 != uprv_memcmp(buffer, expected_utf8, sizeof(expected_utf8))
				1935	) {
				1936	errln("UnicodeString::toUTF8() did not create the expected string.");
				1937	}
				1938	if(!sink.calledFlush) {
				1939	errln("UnicodeString::toUTF8(sink) did not sink.Flush().");
				1940	}
				1941	// Initial contents for testing that toUTF8String() appends.
				1942	std::string result8 = "-->";
				1943	std::string expected8 = "-->" + std::string((const char *)expected_utf8, sizeof(expected_utf8));
				1944	// Use the return value just for testing.
				1945	std::string &result8r = us.toUTF8String(result8);
				1946	if(result8r != expected8 \|\| &result8r != &result8) {
				1947	errln("UnicodeString::toUTF8String() did not create the expected string.");
				1948	}
				1949	}
				1950
				1951	// Test if this compiler supports Return Value Optimization of unnamed temporary objects.
				1952	static UnicodeString wrapUChars(const UChar *uchars) {
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1953	return UnicodeString(true, uchars, -1);
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1954	}
				1955
				1956	void
				1957	UnicodeStringTest::TestReadOnlyAlias() {
				1958	UChar uchars[]={ 0x61, 0x62, 0 };
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1959	UnicodeString alias(true, uchars, 2);
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1960	if(alias.length()!=2 \|\| alias.getBuffer()!=uchars \|\| alias.getTerminatedBuffer()!=uchars) {
				1961	errln("UnicodeString read-only-aliasing constructor does not behave as expected.");
				1962	return;
				1963	}
				1964	alias.truncate(1);
				1965	if(alias.length()!=1 \|\| alias.getBuffer()!=uchars) {
				1966	errln("UnicodeString(read-only-alias).truncate() did not preserve aliasing as expected.");
				1967	}
				1968	if(alias.getTerminatedBuffer()==uchars) {
				1969	errln("UnicodeString(read-only-alias).truncate().getTerminatedBuffer() "
				1970	"did not allocate and copy as expected.");
				1971	}
				1972	if(uchars[1]!=0x62) {
				1973	errln("UnicodeString(read-only-alias).truncate().getTerminatedBuffer() "
				1974	"modified the original buffer.");
				1975	}
				1976	if(1!=u_strlen(alias.getTerminatedBuffer())) {
				1977	errln("UnicodeString(read-only-alias).truncate().getTerminatedBuffer() "
				1978	"does not return a buffer terminated at the proper length.");
				1979	}
				1980
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1981	alias.setTo(true, uchars, 2);
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1982	if(alias.length()!=2 \|\| alias.getBuffer()!=uchars \|\| alias.getTerminatedBuffer()!=uchars) {
				1983	errln("UnicodeString read-only-aliasing setTo() does not behave as expected.");
				1984	return;
				1985	}
				1986	alias.remove();
				1987	if(alias.length()!=0) {
				1988	errln("UnicodeString(read-only-alias).remove() did not work.");
				1989	}
				1990	if(alias.getTerminatedBuffer()==uchars) {
				1991	errln("UnicodeString(read-only-alias).remove().getTerminatedBuffer() "
				1992	"did not un-alias as expected.");
				1993	}
				1994	if(uchars[0]!=0x61) {
				1995	errln("UnicodeString(read-only-alias).remove().getTerminatedBuffer() "
				1996	"modified the original buffer.");
				1997	}
				1998	if(0!=u_strlen(alias.getTerminatedBuffer())) {
				1999	errln("UnicodeString.setTo(read-only-alias).remove().getTerminatedBuffer() "
				2000	"does not return a buffer terminated at length 0.");
				2001	}
				2002
				2003	UnicodeString longString=UNICODE_STRING_SIMPLE("abcdefghijklmnopqrstuvwxyz0123456789");
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	2004	alias.setTo(false, longString.getBuffer(), longString.length());
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	2005	alias.remove(0, 10);
				2006	if(longString.compare(10, INT32_MAX, alias)!=0 \|\| alias.getBuffer()!=longString.getBuffer()+10) {
				2007	errln("UnicodeString.setTo(read-only-alias).remove(0, 10) did not preserve aliasing as expected.");
				2008	}
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	2009	alias.setTo(false, longString.getBuffer(), longString.length());
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	2010	alias.remove(27, 99);
				2011	if(longString.compare(0, 27, alias)!=0 \|\| alias.getBuffer()!=longString.getBuffer()) {
				2012	errln("UnicodeString.setTo(read-only-alias).remove(27, 99) did not preserve aliasing as expected.");
				2013	}
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	2014	alias.setTo(false, longString.getBuffer(), longString.length());
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	2015	alias.retainBetween(6, 30);
				2016	if(longString.compare(6, 24, alias)!=0 \|\| alias.getBuffer()!=longString.getBuffer()+6) {
				2017	errln("UnicodeString.setTo(read-only-alias).retainBetween(6, 30) did not preserve aliasing as expected.");
				2018	}
				2019
				2020	UChar abc[]={ 0x61, 0x62, 0x63, 0 };
				2021	UBool hasRVO= wrapUChars(abc).getBuffer()==abc;
				2022
				2023	UnicodeString temp;
				2024	temp.fastCopyFrom(longString.tempSubString());
				2025	if(temp!=longString \|\| (hasRVO && temp.getBuffer()!=longString.getBuffer())) {
				2026	errln("UnicodeString.tempSubString() failed");
				2027	}
				2028	temp.fastCopyFrom(longString.tempSubString(-3, 5));
				2029	if(longString.compare(0, 5, temp)!=0 \|\| (hasRVO && temp.getBuffer()!=longString.getBuffer())) {
				2030	errln("UnicodeString.tempSubString(-3, 5) failed");
				2031	}
				2032	temp.fastCopyFrom(longString.tempSubString(17));
				2033	if(longString.compare(17, INT32_MAX, temp)!=0 \|\| (hasRVO && temp.getBuffer()!=longString.getBuffer()+17)) {
				2034	errln("UnicodeString.tempSubString(17) failed");
				2035	}
				2036	temp.fastCopyFrom(longString.tempSubString(99));
				2037	if(!temp.isEmpty()) {
				2038	errln("UnicodeString.tempSubString(99) failed");
				2039	}
				2040	temp.fastCopyFrom(longString.tempSubStringBetween(6));
				2041	if(longString.compare(6, INT32_MAX, temp)!=0 \|\| (hasRVO && temp.getBuffer()!=longString.getBuffer()+6)) {
				2042	errln("UnicodeString.tempSubStringBetween(6) failed");
				2043	}
				2044	temp.fastCopyFrom(longString.tempSubStringBetween(8, 18));
				2045	if(longString.compare(8, 10, temp)!=0 \|\| (hasRVO && temp.getBuffer()!=longString.getBuffer()+8)) {
				2046	errln("UnicodeString.tempSubStringBetween(8, 18) failed");
				2047	}
				2048	UnicodeString bogusString;
				2049	bogusString.setToBogus();
				2050	temp.fastCopyFrom(bogusString.tempSubStringBetween(8, 18));
				2051	if(!temp.isBogus()) {
				2052	errln("UnicodeString.setToBogus().tempSubStringBetween(8, 18) failed");
				2053	}
				2054	}
				2055
				2056	void
				2057	UnicodeStringTest::doTestAppendable(UnicodeString &dest, Appendable &app) {
				2058	static const UChar cde[3]={ 0x63, 0x64, 0x65 };
				2059	static const UChar fg[3]={ 0x66, 0x67, 0 };
				2060	if(!app.reserveAppendCapacity(12)) {
				2061	errln("Appendable.reserve(12) failed");
				2062	}
				2063	app.appendCodeUnit(0x61);
				2064	app.appendCodePoint(0x62);
				2065	app.appendCodePoint(0x50000);
				2066	app.appendString(cde, 3);
				2067	app.appendString(fg, -1);
				2068	UChar scratch[3];
				2069	int32_t capacity=-1;
				2070	UChar *buffer=app.getAppendBuffer(3, 3, scratch, 3, &capacity);
				2071	if(capacity<3) {
				2072	errln("Appendable.getAppendBuffer(min=3) returned capacity=%d<3", (int)capacity);
				2073	return;
				2074	}
				2075	static const UChar hij[3]={ 0x68, 0x69, 0x6a };
				2076	u_memcpy(buffer, hij, 3);
				2077	app.appendString(buffer, 3);
				2078	if(dest!=UNICODE_STRING_SIMPLE("ab\\U00050000cdefghij").unescape()) {
				2079	errln("Appendable.append(...) failed");
				2080	}
				2081	buffer=app.getAppendBuffer(0, 3, scratch, 3, &capacity);
				2082	if(buffer!=NULL \|\| capacity!=0) {
				2083	errln("Appendable.getAppendBuffer(min=0) failed");
				2084	}
				2085	capacity=1;
				2086	buffer=app.getAppendBuffer(3, 3, scratch, 2, &capacity);
				2087	if(buffer!=NULL \|\| capacity!=0) {
				2088	errln("Appendable.getAppendBuffer(scratch<min) failed");
				2089	}
				2090	}
				2091
				2092	class SimpleAppendable : public Appendable {
				2093	public:
				2094	explicit SimpleAppendable(UnicodeString &dest) : str(dest) {}
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	2095	virtual UBool appendCodeUnit(UChar c) override { str.append(c); return true; }
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	2096	SimpleAppendable &reset() { str.remove(); return *this; }
				2097	private:
				2098	UnicodeString &str;
				2099	};
				2100
				2101	void
				2102	UnicodeStringTest::TestAppendable() {
				2103	UnicodeString dest;
				2104	SimpleAppendable app(dest);
				2105	doTestAppendable(dest, app);
				2106	}
				2107
				2108	void
				2109	UnicodeStringTest::TestUnicodeStringImplementsAppendable() {
				2110	UnicodeString dest;
				2111	UnicodeStringAppendable app(dest);
				2112	doTestAppendable(dest, app);
				2113	}
				2114
				2115	void
				2116	UnicodeStringTest::TestSizeofUnicodeString() {
				2117	// See the comments in unistr.h near the declaration of UnicodeString's fields.
				2118	// See the API comments for UNISTR_OBJECT_SIZE.
				2119	size_t sizeofUniStr=sizeof(UnicodeString);
				2120	size_t expected=UNISTR_OBJECT_SIZE;
				2121	if(expected!=sizeofUniStr) {
				2122	// Possible cause: UNISTR_OBJECT_SIZE may not be a multiple of sizeof(pointer),
				2123	// of the compiler might add more internal padding than expected.
				2124	errln("sizeof(UnicodeString)=%d, expected UNISTR_OBJECT_SIZE=%d",
				2125	(int)sizeofUniStr, (int)expected);
				2126	}
				2127	if(sizeofUniStr<32) {
				2128	errln("sizeof(UnicodeString)=%d < 32, probably too small", (int)sizeofUniStr);
				2129	}
				2130	// We assume that the entire UnicodeString object,
				2131	// minus the vtable pointer and 2 bytes for flags and short length,
				2132	// is available for internal storage of UChars.
				2133	int32_t expectedStackBufferLength=((int32_t)UNISTR_OBJECT_SIZE-sizeof(void *)-2)/U_SIZEOF_UCHAR;
				2134	UnicodeString s;
				2135	const UChar *emptyBuffer=s.getBuffer();
				2136	for(int32_t i=0; i<expectedStackBufferLength; ++i) {
				2137	s.append((UChar)0x2e);
				2138	}
				2139	const UChar *fullBuffer=s.getBuffer();
				2140	if(fullBuffer!=emptyBuffer) {
				2141	errln("unexpected reallocation when filling with assumed stack buffer size of %d",
				2142	expectedStackBufferLength);
				2143	}
				2144	const UChar *terminatedBuffer=s.getTerminatedBuffer();
				2145	if(terminatedBuffer==emptyBuffer) {
				2146	errln("unexpected keeping stack buffer when overfilling assumed stack buffer size of %d",
				2147	expectedStackBufferLength);
				2148	}
				2149	}
				2150
				2151	// Try to avoid clang -Wself-move warnings from s1 = std::move(s1);
				2152	void moveFrom(UnicodeString &dest, UnicodeString &src) {
				2153	dest = std::move(src);
				2154	}
				2155
				2156	void
				2157	UnicodeStringTest::TestMoveSwap() {
				2158	static const UChar abc[3] = { 0x61, 0x62, 0x63 }; // "abc"
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	2159	UnicodeString s1(false, abc, UPRV_LENGTHOF(abc)); // read-only alias
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	2160	UnicodeString s2(100, 0x7a, 100); // 100 * 'z' should be on the heap
				2161	UnicodeString s3("defg", 4, US_INV); // in stack buffer
				2162	const UChar *p = s2.getBuffer();
				2163	s1.swap(s2);
				2164	if(s1.getBuffer() != p \|\| s1.length() != 100 \|\| s2.getBuffer() != abc \|\| s2.length() != 3) {
				2165	errln("UnicodeString.swap() did not swap");
				2166	}
				2167	swap(s2, s3);
				2168	if(s2 != UNICODE_STRING_SIMPLE("defg") \|\| s3.getBuffer() != abc \|\| s3.length() != 3) {
				2169	errln("swap(UnicodeString) did not swap back");
				2170	}
				2171	UnicodeString s4;
				2172	s4 = std::move(s1);
				2173	if(s4.getBuffer() != p \|\| s4.length() != 100 \|\| !s1.isBogus()) {
				2174	errln("UnicodeString = std::move(heap) did not move");
				2175	}
				2176	UnicodeString s5;
				2177	s5 = std::move(s2);
				2178	if(s5 != UNICODE_STRING_SIMPLE("defg")) {
				2179	errln("UnicodeString = std::move(stack) did not move");
				2180	}
				2181	UnicodeString s6;
				2182	s6 = std::move(s3);
				2183	if(s6.getBuffer() != abc \|\| s6.length() != 3) {
				2184	errln("UnicodeString = std::move(alias) did not move");
				2185	}
				2186	infoln("TestMoveSwap() with rvalue references");
				2187	s1 = static_cast<UnicodeString &&>(s6);
				2188	if(s1.getBuffer() != abc \|\| s1.length() != 3) {
				2189	errln("UnicodeString move assignment operator did not move");
				2190	}
				2191	UnicodeString s7(static_cast<UnicodeString &&>(s4));
				2192	if(s7.getBuffer() != p \|\| s7.length() != 100 \|\| !s4.isBogus()) {
				2193	errln("UnicodeString move constructor did not move");
				2194	}
				2195
				2196	// Move self assignment leaves the object valid but in an undefined state.
				2197	// Do it to make sure there is no crash,
				2198	// but do not check for any particular resulting value.
				2199	moveFrom(s1, s1);
				2200	moveFrom(s2, s2);
				2201	moveFrom(s3, s3);
				2202	moveFrom(s4, s4);
				2203	moveFrom(s5, s5);
				2204	moveFrom(s6, s6);
				2205	moveFrom(s7, s7);
				2206	// Simple copy assignment must work.
				2207	UnicodeString simple = UNICODE_STRING_SIMPLE("simple");
				2208	s1 = s6 = s4 = s7 = simple;
				2209	if(s1 != simple \|\| s4 != simple \|\| s6 != simple \|\| s7 != simple) {
				2210	errln("UnicodeString copy after self-move did not work");
				2211	}
				2212	}
				2213
				2214	void
				2215	UnicodeStringTest::TestUInt16Pointers() {
				2216	static const uint16_t carr[] = { 0x61, 0x62, 0x63, 0 };
				2217	uint16_t arr[4];
				2218
				2219	UnicodeString expected(u"abc");
				2220	assertEquals("abc from pointer", expected, UnicodeString(carr));
				2221	assertEquals("abc from pointer+length", expected, UnicodeString(carr, 3));
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	2222	assertEquals("abc from read-only-alias pointer", expected, UnicodeString(true, carr, 3));
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	2223
				2224	UnicodeString alias(arr, 0, 4);
				2225	alias.append(u'a').append(u'b').append(u'c');
				2226	assertEquals("abc from writable alias", expected, alias);
				2227	assertEquals("buffer=abc from writable alias", expected, UnicodeString(arr, 3));
				2228
				2229	UErrorCode errorCode = U_ZERO_ERROR;
				2230	int32_t length = UnicodeString(u"def").extract(arr, 4, errorCode);
				2231	assertSuccess(WHERE, errorCode);
				2232	assertEquals("def from extract()", UnicodeString(u"def"), UnicodeString(arr, length));
				2233	}
				2234
				2235	void
				2236	UnicodeStringTest::TestWCharPointers() {
				2237	#if U_SIZEOF_WCHAR_T==2
				2238	static const wchar_t carr[] = { 0x61, 0x62, 0x63, 0 };
				2239	wchar_t arr[4];
				2240
				2241	UnicodeString expected(u"abc");
				2242	assertEquals("abc from pointer", expected, UnicodeString(carr));
				2243	assertEquals("abc from pointer+length", expected, UnicodeString(carr, 3));
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	2244	assertEquals("abc from read-only-alias pointer", expected, UnicodeString(true, carr, 3));
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	2245
				2246	UnicodeString alias(arr, 0, 4);
				2247	alias.append(u'a').append(u'b').append(u'c');
				2248	assertEquals("abc from writable alias", expected, alias);
				2249	assertEquals("buffer=abc from writable alias", expected, UnicodeString(arr, 3));
				2250
				2251	UErrorCode errorCode = U_ZERO_ERROR;
				2252	int32_t length = UnicodeString(u"def").extract(arr, 4, errorCode);
				2253	assertSuccess(WHERE, errorCode);
				2254	assertEquals("def from extract()", UnicodeString(u"def"), UnicodeString(arr, length));
				2255	#endif
				2256	}
				2257
				2258	void
				2259	UnicodeStringTest::TestNullPointers() {
				2260	assertTrue("empty from nullptr", UnicodeString(nullptr).isEmpty());
				2261	assertTrue("empty from nullptr+length", UnicodeString(nullptr, 2).isEmpty());
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	2262	assertTrue("empty from read-only-alias nullptr", UnicodeString(true, nullptr, 3).isEmpty());
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	2263
				2264	UnicodeString alias(nullptr, 4, 4); // empty, no alias
				2265	assertTrue("empty from writable alias", alias.isEmpty());
				2266	alias.append(u'a').append(u'b').append(u'c');
				2267	UnicodeString expected(u"abc");
				2268	assertEquals("abc from writable alias", expected, alias);
				2269
				2270	UErrorCode errorCode = U_ZERO_ERROR;
				2271	UnicodeString(u"def").extract(nullptr, 0, errorCode);
				2272	assertEquals("buffer overflow extracting to nullptr", U_BUFFER_OVERFLOW_ERROR, errorCode);
				2273	}
				2274
				2275	void UnicodeStringTest::TestUnicodeStringInsertAppendToSelf() {
				2276	IcuTestErrorCode status(*this, "TestUnicodeStringAppendToSelf");
				2277
				2278	// Test append operation
				2279	UnicodeString str(u"foo ");
				2280	str.append(str);
				2281	str.append(str);
				2282	str.append(str);
				2283	assertEquals("", u"foo foo foo foo foo foo foo foo ", str);
				2284
				2285	// Test append operation with readonly alias to start
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	2286	str = UnicodeString(true, u"foo ", 4);
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	2287	str.append(str);
				2288	str.append(str);
				2289	str.append(str);
				2290	assertEquals("", u"foo foo foo foo foo foo foo foo ", str);
				2291
				2292	// Test append operation with aliased substring
				2293	str = u"abcde";
				2294	UnicodeString sub = str.tempSubString(1, 2);
				2295	str.append(sub);
				2296	assertEquals("", u"abcdebc", str);
				2297
				2298	// Test append operation with double-aliased substring
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	2299	str = UnicodeString(true, u"abcde", 5);
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	2300	sub = str.tempSubString(1, 2);
				2301	str.append(sub);
				2302	assertEquals("", u"abcdebc", str);
				2303
				2304	// Test insert operation
				2305	str = u"a-*b";
				2306	str.insert(2, str);
				2307	str.insert(4, str);
				2308	str.insert(8, str);
				2309	assertEquals("", u"a-a-a-a-a-a-a-a-bbbbbbbb", str);
				2310
				2311	// Test insert operation with readonly alias to start
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	2312	str = UnicodeString(true, u"a-*b", 4);
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	2313	str.insert(2, str);
				2314	str.insert(4, str);
				2315	str.insert(8, str);
				2316	assertEquals("", u"a-a-a-a-a-a-a-a-bbbbbbbb", str);
				2317
				2318	// Test insert operation with aliased substring
				2319	str = u"abcde";
				2320	sub = str.tempSubString(1, 3);
				2321	str.insert(2, sub);
				2322	assertEquals("", u"abbcdcde", str);
				2323
				2324	// Test insert operation with double-aliased substring
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	2325	str = UnicodeString(true, u"abcde", 5);
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	2326	sub = str.tempSubString(1, 3);
				2327	str.insert(2, sub);
				2328	assertEquals("", u"abbcdcde", str);
				2329	}
				2330
				2331	void UnicodeStringTest::TestLargeAppend() {
				2332	if(quick) return;
				2333
				2334	IcuTestErrorCode status(*this, "TestLargeAppend");
				2335	// Make a large UnicodeString
				2336	int32_t len = 0xAFFFFFF;
				2337	UnicodeString str;
				2338	char16_t *buf = str.getBuffer(len);
				2339	// A fast way to set buffer to valid Unicode.
				2340	// 4E4E is a valid unicode character
				2341	uprv_memset(buf, 0x4e, len * 2);
				2342	str.releaseBuffer(len);
				2343	UnicodeString dest;
				2344	// Append it 16 times
				2345	// 0xAFFFFFF times 16 is 0xA4FFFFF1,
				2346	// which is greater than INT32_MAX, which is 0x7FFFFFFF.
				2347	int64_t total = 0;
				2348	for (int32_t i = 0; i < 16; i++) {
				2349	dest.append(str);
				2350	total += len;
				2351	if (total <= INT32_MAX) {
				2352	assertFalse("dest is not bogus", dest.isBogus());
				2353	} else {
				2354	assertTrue("dest should be bogus", dest.isBogus());
				2355	}
				2356	}
				2357	dest.remove();
				2358	total = 0;
				2359	for (int32_t i = 0; i < 16; i++) {
				2360	dest.append(str);
				2361	total += len;
				2362	if (total + len <= INT32_MAX) {
				2363	assertFalse("dest is not bogus", dest.isBogus());
				2364	} else if (total <= INT32_MAX) {
				2365	// Check that a string of exactly the maximum size works
				2366	UnicodeString str2;
Frank Tang	d2858cb	2022-04-08 20:34:12 -0700	[diff] [blame]	2367	int32_t remain = static_cast<int32_t>(INT32_MAX - total);
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	2368	char16_t *buf2 = str2.getBuffer(remain);
				2369	if (buf2 == nullptr) {
				2370	// if somehow memory allocation fail, return the test
				2371	return;
				2372	}
				2373	uprv_memset(buf2, 0x4e, remain * 2);
				2374	str2.releaseBuffer(remain);
				2375	dest.append(str2);
				2376	total += remain;
				2377	assertEquals("When a string of exactly the maximum size works", (int64_t)INT32_MAX, total);
				2378	assertEquals("When a string of exactly the maximum size works", INT32_MAX, dest.length());
				2379	assertFalse("dest is not bogus", dest.isBogus());
				2380
				2381	// Check that a string size+1 goes bogus
				2382	str2.truncate(1);
				2383	dest.append(str2);
				2384	total++;
				2385	assertTrue("dest should be bogus", dest.isBogus());
				2386	} else {
				2387	assertTrue("dest should be bogus", dest.isBogus());
				2388	}
				2389	}
				2390	}