Blame - source/test/intltest/utxttest.cpp - chromium.googlesource.com/chromium/deps/icu

blob: d0e5ffb571da80b1e566576853653eb3c127972c [file] [log] [blame]

Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1	// © 2016 and later: Unicode, Inc. and others.
				2	// License & terms of use: http://www.unicode.org/copyright.html
				3	/********************************************************************
				4	* COPYRIGHT:
				5	* Copyright (c) 2005-2016, International Business Machines Corporation and
				6	* others. All Rights Reserved.
				7	********************************************************************/
				8	/************************************************************************
				9	* Tests for the UText and UTextIterator text abstraction classes
				10	*
				11	************************************************************************/
				12
				13	#include <string.h>
				14	#include <stdio.h>
				15	#include <stdlib.h>
				16	#include "unicode/utypes.h"
				17	#include "unicode/utext.h"
				18	#include "unicode/utf8.h"
				19	#include "unicode/utf16.h"
				20	#include "unicode/ustring.h"
				21	#include "unicode/uchriter.h"
				22	#include "cmemory.h"
				23	#include "cstr.h"
				24	#include "utxttest.h"
				25
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	26	static UBool gFailed = false;
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	27	static int gTestNum = 0;
				28
				29	// Forward decl
				30	UText openFragmentedUnicodeString(UText ut, UnicodeString s, UErrorCode status);
				31
				32	#define TEST_ASSERT(x) UPRV_BLOCK_MACRO_BEGIN { \
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	33	if ((x)==false) { \
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	34	errln("Test #%d failure in file %s at line %d\n", gTestNum, __FILE__, __LINE__); \
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	35	gFailed = true; \
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	36	} \
				37	} UPRV_BLOCK_MACRO_END
				38
				39
				40	#define TEST_SUCCESS(status) UPRV_BLOCK_MACRO_BEGIN { \
				41	if (U_FAILURE(status)) { \
				42	errln("Test #%d failure in file %s at line %d. Error = \"%s\"\n", \
				43	gTestNum, __FILE__, __LINE__, u_errorName(status)); \
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	44	gFailed = true; \
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	45	} \
				46	} UPRV_BLOCK_MACRO_END
				47
				48	UTextTest::UTextTest() {
				49	}
				50
				51	UTextTest::~UTextTest() {
				52	}
				53
				54
				55	void
				56	UTextTest::runIndexedTest(int32_t index, UBool exec,
				57	const char* &name, char* /par/) {
				58	TESTCASE_AUTO_BEGIN;
				59	TESTCASE_AUTO(TextTest);
				60	TESTCASE_AUTO(ErrorTest);
				61	TESTCASE_AUTO(FreezeTest);
				62	TESTCASE_AUTO(Ticket5560);
				63	TESTCASE_AUTO(Ticket6847);
				64	TESTCASE_AUTO(Ticket10562);
				65	TESTCASE_AUTO(Ticket10983);
				66	TESTCASE_AUTO(Ticket12130);
				67	TESTCASE_AUTO(Ticket13344);
				68	TESTCASE_AUTO_END;
				69	}
				70
				71	//
				72	// Quick and dirty random number generator.
				73	// (don't use library so that results are portable.
				74	static uint32_t m_seed = 1;
				75	static uint32_t m_rand()
				76	{
				77	m_seed = m_seed * 1103515245 + 12345;
				78	return (uint32_t)(m_seed/65536) % 32768;
				79	}
				80
				81
				82	//
				83	// TextTest()
				84	//
				85	// Top Level function for UText testing.
				86	// Specifies the strings to be tested, with the actual testing itself
				87	// being carried out in another function, TestString().
				88	//
				89	void UTextTest::TextTest() {
				90	int32_t i, j;
				91
				92	TestString("abcd\\U00010001xyz");
				93	TestString("");
				94
				95	// Supplementary chars at start or end
				96	TestString("\\U00010001");
				97	TestString("abc\\U00010001");
				98	TestString("\\U00010001abc");
				99
				100	// Test simple strings of lengths 1 to 60, looking for glitches at buffer boundaries
				101	UnicodeString s;
				102	for (i=1; i<60; i++) {
				103	s.truncate(0);
				104	for (j=0; j<i; j++) {
				105	if (j+0x30 == 0x5c) {
				106	// backslash. Needs to be escaped
				107	s.append((UChar)0x5c);
				108	}
				109	s.append(UChar(j+0x30));
				110	}
				111	TestString(s);
				112	}
				113
				114	// Test strings with odd-aligned supplementary chars,
				115	// looking for glitches at buffer boundaries
				116	for (i=1; i<60; i++) {
				117	s.truncate(0);
				118	s.append((UChar)0x41);
				119	for (j=0; j<i; j++) {
				120	s.append(UChar32(j+0x11000));
				121	}
				122	TestString(s);
				123	}
				124
				125	// String of chars of randomly varying size in utf-8 representation.
				126	// Exercise the mapping, and the varying sized buffer.
				127	//
				128	s.truncate(0);
				129	UChar32 c1 = 0;
				130	UChar32 c2 = 0x100;
				131	UChar32 c3 = 0xa000;
				132	UChar32 c4 = 0x11000;
				133	for (i=0; i<1000; i++) {
				134	int len8 = m_rand()%4 + 1;
				135	switch (len8) {
				136	case 1:
				137	c1 = (c1+1)%0x80;
				138	// don't put 0 into string (0 terminated strings for some tests)
				139	// don't put '\', will cause unescape() to fail.
				140	if (c1==0x5c \|\| c1==0) {
				141	c1++;
				142	}
				143	s.append(c1);
				144	break;
				145	case 2:
				146	s.append(c2++);
				147	break;
				148	case 3:
				149	s.append(c3++);
				150	break;
				151	case 4:
				152	s.append(c4++);
				153	break;
				154	}
				155	}
				156	TestString(s);
				157	}
				158
				159
				160	//
				161	// TestString() Run a suite of UText tests on a string.
				162	// The test string is unescaped before use.
				163	//
				164	void UTextTest::TestString(const UnicodeString &s) {
				165	int32_t i;
				166	int32_t j;
				167	UChar32 c;
				168	int32_t cpCount = 0;
				169	UErrorCode status = U_ZERO_ERROR;
				170	UText *ut = NULL;
				171	int32_t saLen;
				172
				173	UnicodeString sa = s.unescape();
				174	saLen = sa.length();
				175
				176	//
				177	// Build up a mapping between code points and UTF-16 code unit indexes.
				178	//
				179	m *cpMap = new m[sa.length() + 1];
				180	j = 0;
				181	for (i=0; i<sa.length(); i=sa.moveIndex32(i, 1)) {
				182	c = sa.char32At(i);
				183	cpMap[j].nativeIdx = i;
				184	cpMap[j].cp = c;
				185	j++;
				186	cpCount++;
				187	}
				188	cpMap[j].nativeIdx = i; // position following the last char in utf-16 string.
				189
				190
				191	// UChar * test, null terminated
				192	status = U_ZERO_ERROR;
				193	UChar *buf = new UChar[saLen+1];
				194	sa.extract(buf, saLen+1, status);
				195	TEST_SUCCESS(status);
				196	ut = utext_openUChars(NULL, buf, -1, &status);
				197	TEST_SUCCESS(status);
				198	TestAccess(sa, ut, cpCount, cpMap);
				199	utext_close(ut);
				200	delete [] buf;
				201
				202	// UChar * test, with length
				203	status = U_ZERO_ERROR;
				204	buf = new UChar[saLen+1];
				205	sa.extract(buf, saLen+1, status);
				206	TEST_SUCCESS(status);
				207	ut = utext_openUChars(NULL, buf, saLen, &status);
				208	TEST_SUCCESS(status);
				209	TestAccess(sa, ut, cpCount, cpMap);
				210	utext_close(ut);
				211	delete [] buf;
				212
				213
				214	// UnicodeString test
				215	status = U_ZERO_ERROR;
				216	ut = utext_openUnicodeString(NULL, &sa, &status);
				217	TEST_SUCCESS(status);
				218	TestAccess(sa, ut, cpCount, cpMap);
				219	TestCMR(sa, ut, cpCount, cpMap, cpMap);
				220	utext_close(ut);
				221
				222
				223	// Const UnicodeString test
				224	status = U_ZERO_ERROR;
				225	ut = utext_openConstUnicodeString(NULL, &sa, &status);
				226	TEST_SUCCESS(status);
				227	TestAccess(sa, ut, cpCount, cpMap);
				228	utext_close(ut);
				229
				230
				231	// Replaceable test. (UnicodeString inherits Replaceable)
				232	status = U_ZERO_ERROR;
				233	ut = utext_openReplaceable(NULL, &sa, &status);
				234	TEST_SUCCESS(status);
				235	TestAccess(sa, ut, cpCount, cpMap);
				236	TestCMR(sa, ut, cpCount, cpMap, cpMap);
				237	utext_close(ut);
				238
				239	// Character Iterator Tests
				240	status = U_ZERO_ERROR;
				241	const UChar *cbuf = sa.getBuffer();
				242	CharacterIterator *ci = new UCharCharacterIterator(cbuf, saLen, status);
				243	TEST_SUCCESS(status);
				244	ut = utext_openCharacterIterator(NULL, ci, &status);
				245	TEST_SUCCESS(status);
				246	TestAccess(sa, ut, cpCount, cpMap);
				247	utext_close(ut);
				248	delete ci;
				249
				250
				251	// Fragmented UnicodeString (Chunk size of one)
				252	//
				253	status = U_ZERO_ERROR;
				254	ut = openFragmentedUnicodeString(NULL, &sa, &status);
				255	TEST_SUCCESS(status);
				256	TestAccess(sa, ut, cpCount, cpMap);
				257	utext_close(ut);
				258
				259	//
				260	// UTF-8 test
				261	//
				262
				263	// Convert the test string from UnicodeString to (char *) in utf-8 format
				264	int32_t u8Len = sa.extract(0, sa.length(), NULL, 0, "utf-8");
				265	char *u8String = new char[u8Len + 1];
				266	sa.extract(0, sa.length(), u8String, u8Len+1, "utf-8");
				267
				268	// Build up the map of code point indices in the utf-8 string
				269	m * u8Map = new m[sa.length() + 1];
				270	i = 0; // native utf-8 index
				271	for (j=0; j<cpCount ; j++) { // code point number
				272	u8Map[j].nativeIdx = i;
				273	U8_NEXT(u8String, i, u8Len, c);
				274	u8Map[j].cp = c;
				275	}
				276	u8Map[cpCount].nativeIdx = u8Len; // position following the last char in utf-8 string.
				277
				278	// Do the test itself
				279	status = U_ZERO_ERROR;
				280	ut = utext_openUTF8(NULL, u8String, -1, &status);
				281	TEST_SUCCESS(status);
				282	TestAccess(sa, ut, cpCount, u8Map);
				283	utext_close(ut);
				284
				285
				286
				287	delete []cpMap;
				288	delete []u8Map;
				289	delete []u8String;
				290	}
				291
				292	// TestCMR test Copy, Move and Replace operations.
				293	// us UnicodeString containing the test text.
				294	// ut UText containing the same test text.
				295	// cpCount number of code points in the test text.
				296	// nativeMap Mapping from code points to native indexes for the UText.
				297	// u16Map Mapping from code points to UTF-16 indexes, for use with the UnicodeString.
				298	//
				299	// This function runs a whole series of operations on each incoming UText.
				300	// The UText is deep-cloned prior to each operation, so that the original UText remains unchanged.
				301	//
				302	void UTextTest::TestCMR(const UnicodeString &us, UText ut, int cpCount, m nativeMap, m *u16Map) {
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	303	TEST_ASSERT(utext_isWritable(ut) == true);
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	304
				305	int srcLengthType; // Loop variables for selecting the position and length
				306	int srcPosType; // of the block to operate on within the source text.
				307	int destPosType;
				308
				309	int srcIndex = 0; // Code Point indexes of the block to operate on for
				310	int srcLength = 0; // a specific test.
				311
				312	int destIndex = 0; // Code point index of the destination for a copy/move test.
				313
				314	int32_t nativeStart = 0; // Native unit indexes for a test.
				315	int32_t nativeLimit = 0;
				316	int32_t nativeDest = 0;
				317
				318	int32_t u16Start = 0; // UTF-16 indexes for a test.
				319	int32_t u16Limit = 0; // used when performing the same operation in a Unicode String
				320	int32_t u16Dest = 0;
				321
				322	// Iterate over a whole series of source index, length and a target indexes.
				323	// This is done with code point indexes; these will be later translated to native
				324	// indexes using the cpMap.
				325	for (srcLengthType=1; srcLengthType<=3; srcLengthType++) {
				326	switch (srcLengthType) {
				327	case 1: srcLength = 1; break;
				328	case 2: srcLength = 5; break;
				329	case 3: srcLength = cpCount / 3;
				330	}
				331	for (srcPosType=1; srcPosType<=5; srcPosType++) {
				332	switch (srcPosType) {
				333	case 1: srcIndex = 0; break;
				334	case 2: srcIndex = 1; break;
				335	case 3: srcIndex = cpCount - srcLength; break;
				336	case 4: srcIndex = cpCount - srcLength - 1; break;
				337	case 5: srcIndex = cpCount / 2; break;
				338	}
				339	if (srcIndex < 0 \|\| srcIndex + srcLength > cpCount) {
				340	// filter out bogus test cases -
				341	// those with a source range that falls of an edge of the string.
				342	continue;
				343	}
				344
				345	//
				346	// Copy and move tests.
				347	// iterate over a variety of destination positions.
				348	//
				349	for (destPosType=1; destPosType<=4; destPosType++) {
				350	switch (destPosType) {
				351	case 1: destIndex = 0; break;
				352	case 2: destIndex = 1; break;
				353	case 3: destIndex = srcIndex - 1; break;
				354	case 4: destIndex = srcIndex + srcLength + 1; break;
				355	case 5: destIndex = cpCount-1; break;
				356	case 6: destIndex = cpCount; break;
				357	}
				358	if (destIndex<0 \|\| destIndex>cpCount) {
				359	// filter out bogus test cases.
				360	continue;
				361	}
				362
				363	nativeStart = nativeMap[srcIndex].nativeIdx;
				364	nativeLimit = nativeMap[srcIndex+srcLength].nativeIdx;
				365	nativeDest = nativeMap[destIndex].nativeIdx;
				366
				367	u16Start = u16Map[srcIndex].nativeIdx;
				368	u16Limit = u16Map[srcIndex+srcLength].nativeIdx;
				369	u16Dest = u16Map[destIndex].nativeIdx;
				370
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	371	gFailed = false;
				372	TestCopyMove(us, ut, false,
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	373	nativeStart, nativeLimit, nativeDest,
				374	u16Start, u16Limit, u16Dest);
				375
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	376	TestCopyMove(us, ut, true,
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	377	nativeStart, nativeLimit, nativeDest,
				378	u16Start, u16Limit, u16Dest);
				379
				380	if (gFailed) {
				381	return;
				382	}
				383	}
				384
				385	//
				386	// Replace tests.
				387	//
				388	UnicodeString fullRepString("This is an arbitrary string that will be used as replacement text");
				389	for (int32_t replStrLen=0; replStrLen<20; replStrLen++) {
				390	UnicodeString repStr(fullRepString, 0, replStrLen);
				391	TestReplace(us, ut,
				392	nativeStart, nativeLimit,
				393	u16Start, u16Limit,
				394	repStr);
				395	if (gFailed) {
				396	return;
				397	}
				398	}
				399
				400	}
				401	}
				402
				403	}
				404
				405	//
				406	// TestCopyMove run a single test case for utext_copy.
				407	// Test cases are created in TestCMR and dispatched here for execution.
				408	//
				409	void UTextTest::TestCopyMove(const UnicodeString &us, UText *ut, UBool move,
				410	int32_t nativeStart, int32_t nativeLimit, int32_t nativeDest,
				411	int32_t u16Start, int32_t u16Limit, int32_t u16Dest)
				412	{
				413	UErrorCode status = U_ZERO_ERROR;
				414	UText *targetUT = NULL;
				415	gTestNum++;
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	416	gFailed = false;
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	417
				418	//
				419	// clone the UText. The test will be run in the cloned copy
				420	// so that we don't alter the original.
				421	//
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	422	targetUT = utext_clone(NULL, ut, true, false, &status);
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	423	TEST_SUCCESS(status);
				424	UnicodeString targetUS(us); // And copy the reference string.
				425
				426	// do the test operation first in the reference
				427	targetUS.copy(u16Start, u16Limit, u16Dest);
				428	if (move) {
				429	// delete out the source range.
				430	if (u16Limit < u16Dest) {
				431	targetUS.removeBetween(u16Start, u16Limit);
				432	} else {
				433	int32_t amtCopied = u16Limit - u16Start;
				434	targetUS.removeBetween(u16Start+amtCopied, u16Limit+amtCopied);
				435	}
				436	}
				437
				438	// Do the same operation in the UText under test
				439	utext_copy(targetUT, nativeStart, nativeLimit, nativeDest, move, &status);
				440	if (nativeDest > nativeStart && nativeDest < nativeLimit) {
				441	TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
				442	} else {
				443	TEST_SUCCESS(status);
				444
				445	// Compare the results of the two parallel tests
				446	int32_t usi = 0; // UnicodeString position, utf-16 index.
				447	int64_t uti = 0; // UText position, native index.
				448	int32_t cpi; // char32 position (code point index)
				449	UChar32 usc; // code point from Unicode String
				450	UChar32 utc; // code point from UText
				451	utext_setNativeIndex(targetUT, 0);
				452	for (cpi=0; ; cpi++) {
				453	usc = targetUS.char32At(usi);
				454	utc = utext_next32(targetUT);
				455	if (utc < 0) {
				456	break;
				457	}
				458	TEST_ASSERT(uti == usi);
				459	TEST_ASSERT(utc == usc);
				460	usi = targetUS.moveIndex32(usi, 1);
				461	uti = utext_getNativeIndex(targetUT);
				462	if (gFailed) {
				463	goto cleanupAndReturn;
				464	}
				465	}
				466	int64_t expectedNativeLength = utext_nativeLength(ut);
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	467	if (move == false) {
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	468	expectedNativeLength += nativeLimit - nativeStart;
				469	}
				470	uti = utext_getNativeIndex(targetUT);
				471	TEST_ASSERT(uti == expectedNativeLength);
				472	}
				473
				474	cleanupAndReturn:
				475	utext_close(targetUT);
				476	}
				477
				478
				479	//
				480	// TestReplace Test a single Replace operation.
				481	//
				482	void UTextTest::TestReplace(
				483	const UnicodeString &us, // reference UnicodeString in which to do the replace
				484	UText *ut, // UnicodeText object under test.
				485	int32_t nativeStart, // Range to be replaced, in UText native units.
				486	int32_t nativeLimit,
				487	int32_t u16Start, // Range to be replaced, in UTF-16 units
				488	int32_t u16Limit, // for use in the reference UnicodeString.
				489	const UnicodeString &repStr) // The replacement string
				490	{
				491	UErrorCode status = U_ZERO_ERROR;
				492	UText *targetUT = NULL;
				493	gTestNum++;
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	494	gFailed = false;
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	495
				496	//
				497	// clone the target UText. The test will be run in the cloned copy
				498	// so that we don't alter the original.
				499	//
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	500	targetUT = utext_clone(NULL, ut, true, false, &status);
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	501	TEST_SUCCESS(status);
				502	UnicodeString targetUS(us); // And copy the reference string.
				503
				504	//
				505	// Do the replace operation in the Unicode String, to
				506	// produce a reference result.
				507	//
				508	targetUS.replace(u16Start, u16Limit-u16Start, repStr);
				509
				510	//
				511	// Do the replace on the UText under test
				512	//
				513	const UChar *rs = repStr.getBuffer();
				514	int32_t rsLen = repStr.length();
				515	int32_t actualDelta = utext_replace(targetUT, nativeStart, nativeLimit, rs, rsLen, &status);
				516	int32_t expectedDelta = repStr.length() - (nativeLimit - nativeStart);
				517	TEST_ASSERT(actualDelta == expectedDelta);
				518
				519	//
				520	// Compare the results
				521	//
				522	int32_t usi = 0; // UnicodeString position, utf-16 index.
				523	int64_t uti = 0; // UText position, native index.
				524	int32_t cpi; // char32 position (code point index)
				525	UChar32 usc; // code point from Unicode String
				526	UChar32 utc; // code point from UText
				527	int64_t expectedNativeLength = 0;
				528	utext_setNativeIndex(targetUT, 0);
				529	for (cpi=0; ; cpi++) {
				530	usc = targetUS.char32At(usi);
				531	utc = utext_next32(targetUT);
				532	if (utc < 0) {
				533	break;
				534	}
				535	TEST_ASSERT(uti == usi);
				536	TEST_ASSERT(utc == usc);
				537	usi = targetUS.moveIndex32(usi, 1);
				538	uti = utext_getNativeIndex(targetUT);
				539	if (gFailed) {
				540	goto cleanupAndReturn;
				541	}
				542	}
				543	expectedNativeLength = utext_nativeLength(ut) + expectedDelta;
				544	uti = utext_getNativeIndex(targetUT);
				545	TEST_ASSERT(uti == expectedNativeLength);
				546
				547	cleanupAndReturn:
				548	utext_close(targetUT);
				549	}
				550
				551	//
				552	// TestAccess Test the read only access functions on a UText, including cloning.
				553	// The text is accessed in a variety of ways, and compared with
				554	// the reference UnicodeString.
				555	//
				556	void UTextTest::TestAccess(const UnicodeString &us, UText ut, int cpCount, m cpMap) {
				557	// Run the standard tests on the caller-supplied UText.
				558	TestAccessNoClone(us, ut, cpCount, cpMap);
				559
				560	// Re-run tests on a shallow clone.
				561	utext_setNativeIndex(ut, 0);
				562	UErrorCode status = U_ZERO_ERROR;
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	563	UText shallowClone = utext_clone(NULL, ut, false /deep/, false /readOnly*/, &status);
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	564	TEST_SUCCESS(status);
				565	TestAccessNoClone(us, shallowClone, cpCount, cpMap);
				566
				567	//
				568	// Rerun again on a deep clone.
				569	// Note that text providers are not required to provide deep cloning,
				570	// so unsupported errors are ignored.
				571	//
				572	status = U_ZERO_ERROR;
				573	utext_setNativeIndex(shallowClone, 0);
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	574	UText *deepClone = utext_clone(NULL, shallowClone, true, false, &status);
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	575	utext_close(shallowClone);
				576	if (status != U_UNSUPPORTED_ERROR) {
				577	TEST_SUCCESS(status);
				578	TestAccessNoClone(us, deepClone, cpCount, cpMap);
				579	}
				580	utext_close(deepClone);
				581	}
				582
				583
				584	//
				585	// TestAccessNoClone() Test the read only access functions on a UText.
				586	// The text is accessed in a variety of ways, and compared with
				587	// the reference UnicodeString.
				588	//
				589	void UTextTest::TestAccessNoClone(const UnicodeString &us, UText ut, int cpCount, m cpMap) {
				590	UErrorCode status = U_ZERO_ERROR;
				591	gTestNum++;
				592
				593	//
				594	// Check the length from the UText
				595	//
				596	int64_t expectedLen = cpMap[cpCount].nativeIdx;
				597	int64_t utlen = utext_nativeLength(ut);
				598	TEST_ASSERT(expectedLen == utlen);
				599
				600	//
				601	// Iterate forwards, verify that we get the correct code points
				602	// at the correct native offsets.
				603	//
				604	int i = 0;
				605	int64_t index;
				606	int64_t expectedIndex = 0;
				607	int64_t foundIndex = 0;
				608	UChar32 expectedC;
				609	UChar32 foundC;
				610	int64_t len;
				611
				612	for (i=0; i<cpCount; i++) {
				613	expectedIndex = cpMap[i].nativeIdx;
				614	foundIndex = utext_getNativeIndex(ut);
				615	TEST_ASSERT(expectedIndex == foundIndex);
				616	expectedC = cpMap[i].cp;
				617	foundC = utext_next32(ut);
				618	TEST_ASSERT(expectedC == foundC);
				619	foundIndex = utext_getPreviousNativeIndex(ut);
				620	TEST_ASSERT(expectedIndex == foundIndex);
				621	if (gFailed) {
				622	return;
				623	}
				624	}
				625	foundC = utext_next32(ut);
				626	TEST_ASSERT(foundC == U_SENTINEL);
				627
				628	// Repeat above, using macros
				629	utext_setNativeIndex(ut, 0);
				630	for (i=0; i<cpCount; i++) {
				631	expectedIndex = cpMap[i].nativeIdx;
				632	foundIndex = UTEXT_GETNATIVEINDEX(ut);
				633	TEST_ASSERT(expectedIndex == foundIndex);
				634	expectedC = cpMap[i].cp;
				635	foundC = UTEXT_NEXT32(ut);
				636	TEST_ASSERT(expectedC == foundC);
				637	if (gFailed) {
				638	return;
				639	}
				640	}
				641	foundC = UTEXT_NEXT32(ut);
				642	TEST_ASSERT(foundC == U_SENTINEL);
				643
				644	//
				645	// Forward iteration (above) should have left index at the
				646	// end of the input, which should == length().
				647	//
				648	len = utext_nativeLength(ut);
				649	foundIndex = utext_getNativeIndex(ut);
				650	TEST_ASSERT(len == foundIndex);
				651
				652	//
				653	// Iterate backwards over entire test string
				654	//
				655	len = utext_getNativeIndex(ut);
				656	utext_setNativeIndex(ut, len);
				657	for (i=cpCount-1; i>=0; i--) {
				658	expectedC = cpMap[i].cp;
				659	expectedIndex = cpMap[i].nativeIdx;
				660	int64_t prevIndex = utext_getPreviousNativeIndex(ut);
				661	foundC = utext_previous32(ut);
				662	foundIndex = utext_getNativeIndex(ut);
				663	TEST_ASSERT(expectedIndex == foundIndex);
				664	TEST_ASSERT(expectedC == foundC);
				665	TEST_ASSERT(prevIndex == foundIndex);
				666	if (gFailed) {
				667	return;
				668	}
				669	}
				670
				671	//
				672	// Backwards iteration, above, should have left our iterator
				673	// position at zero, and continued backwards iterationshould fail.
				674	//
				675	foundIndex = utext_getNativeIndex(ut);
				676	TEST_ASSERT(foundIndex == 0);
				677	foundIndex = utext_getPreviousNativeIndex(ut);
				678	TEST_ASSERT(foundIndex == 0);
				679
				680
				681	foundC = utext_previous32(ut);
				682	TEST_ASSERT(foundC == U_SENTINEL);
				683	foundIndex = utext_getNativeIndex(ut);
				684	TEST_ASSERT(foundIndex == 0);
				685	foundIndex = utext_getPreviousNativeIndex(ut);
				686	TEST_ASSERT(foundIndex == 0);
				687
				688
				689	// And again, with the macros
				690	utext_setNativeIndex(ut, len);
				691	for (i=cpCount-1; i>=0; i--) {
				692	expectedC = cpMap[i].cp;
				693	expectedIndex = cpMap[i].nativeIdx;
				694	foundC = UTEXT_PREVIOUS32(ut);
				695	foundIndex = UTEXT_GETNATIVEINDEX(ut);
				696	TEST_ASSERT(expectedIndex == foundIndex);
				697	TEST_ASSERT(expectedC == foundC);
				698	if (gFailed) {
				699	return;
				700	}
				701	}
				702
				703	//
				704	// Backwards iteration, above, should have left our iterator
				705	// position at zero, and continued backwards iterationshould fail.
				706	//
				707	foundIndex = UTEXT_GETNATIVEINDEX(ut);
				708	TEST_ASSERT(foundIndex == 0);
				709
				710	foundC = UTEXT_PREVIOUS32(ut);
				711	TEST_ASSERT(foundC == U_SENTINEL);
				712	foundIndex = UTEXT_GETNATIVEINDEX(ut);
				713	TEST_ASSERT(foundIndex == 0);
				714	if (gFailed) {
				715	return;
				716	}
				717
				718	//
				719	// next32From(), previous32From(), Iterate in a somewhat random order.
				720	//
				721	int cpIndex = 0;
				722	for (i=0; i<cpCount; i++) {
				723	cpIndex = (cpIndex + 9973) % cpCount;
				724	index = cpMap[cpIndex].nativeIdx;
				725	expectedC = cpMap[cpIndex].cp;
				726	foundC = utext_next32From(ut, index);
				727	TEST_ASSERT(expectedC == foundC);
				728	if (gFailed) {
				729	return;
				730	}
				731	}
				732
				733	cpIndex = 0;
				734	for (i=0; i<cpCount; i++) {
				735	cpIndex = (cpIndex + 9973) % cpCount;
				736	index = cpMap[cpIndex+1].nativeIdx;
				737	expectedC = cpMap[cpIndex].cp;
				738	foundC = utext_previous32From(ut, index);
				739	TEST_ASSERT(expectedC == foundC);
				740	if (gFailed) {
				741	return;
				742	}
				743	}
				744
				745
				746	//
				747	// moveIndex(int32_t delta);
				748	//
				749
				750	// Walk through frontwards, incrementing by one
				751	utext_setNativeIndex(ut, 0);
				752	for (i=1; i<=cpCount; i++) {
				753	utext_moveIndex32(ut, 1);
				754	index = utext_getNativeIndex(ut);
				755	expectedIndex = cpMap[i].nativeIdx;
				756	TEST_ASSERT(expectedIndex == index);
				757	index = UTEXT_GETNATIVEINDEX(ut);
				758	TEST_ASSERT(expectedIndex == index);
				759	}
				760
				761	// Walk through frontwards, incrementing by two
				762	utext_setNativeIndex(ut, 0);
				763	for (i=2; i<cpCount; i+=2) {
				764	utext_moveIndex32(ut, 2);
				765	index = utext_getNativeIndex(ut);
				766	expectedIndex = cpMap[i].nativeIdx;
				767	TEST_ASSERT(expectedIndex == index);
				768	index = UTEXT_GETNATIVEINDEX(ut);
				769	TEST_ASSERT(expectedIndex == index);
				770	}
				771
				772	// walk through the string backwards, decrementing by one.
				773	i = cpMap[cpCount].nativeIdx;
				774	utext_setNativeIndex(ut, i);
				775	for (i=cpCount; i>=0; i--) {
				776	expectedIndex = cpMap[i].nativeIdx;
				777	index = utext_getNativeIndex(ut);
				778	TEST_ASSERT(expectedIndex == index);
				779	index = UTEXT_GETNATIVEINDEX(ut);
				780	TEST_ASSERT(expectedIndex == index);
				781	utext_moveIndex32(ut, -1);
				782	}
				783
				784
				785	// walk through backwards, decrementing by three
				786	i = cpMap[cpCount].nativeIdx;
				787	utext_setNativeIndex(ut, i);
				788	for (i=cpCount; i>=0; i-=3) {
				789	expectedIndex = cpMap[i].nativeIdx;
				790	index = utext_getNativeIndex(ut);
				791	TEST_ASSERT(expectedIndex == index);
				792	index = UTEXT_GETNATIVEINDEX(ut);
				793	TEST_ASSERT(expectedIndex == index);
				794	utext_moveIndex32(ut, -3);
				795	}
				796
				797
				798	//
				799	// Extract
				800	//
				801	int bufSize = us.length() + 10;
				802	UChar *buf = new UChar[bufSize];
				803	status = U_ZERO_ERROR;
				804	expectedLen = us.length();
				805	len = utext_extract(ut, 0, utlen, buf, bufSize, &status);
				806	TEST_SUCCESS(status);
				807	TEST_ASSERT(len == expectedLen);
				808	int compareResult = us.compare(buf, -1);
				809	TEST_ASSERT(compareResult == 0);
				810
				811	status = U_ZERO_ERROR;
				812	len = utext_extract(ut, 0, utlen, NULL, 0, &status);
				813	if (utlen == 0) {
				814	TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
				815	} else {
				816	TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
				817	}
				818	TEST_ASSERT(len == expectedLen);
				819
				820	status = U_ZERO_ERROR;
				821	u_memset(buf, 0x5555, bufSize);
				822	len = utext_extract(ut, 0, utlen, buf, 1, &status);
				823	if (us.length() == 0) {
				824	TEST_SUCCESS(status);
				825	TEST_ASSERT(buf[0] == 0);
				826	} else {
				827	// Buf len == 1, extracting a single 16 bit value.
				828	// If the data char is supplementary, it doesn't matter whether the buffer remains unchanged,
				829	// or whether the lead surrogate of the pair is extracted.
				830	// It's a buffer overflow error in either case.
				831	TEST_ASSERT(buf[0] == us.charAt(0) \|\|
				832	(buf[0] == 0x5555 && U_IS_SUPPLEMENTARY(us.char32At(0))));
				833	TEST_ASSERT(buf[1] == 0x5555);
				834	if (us.length() == 1) {
				835	TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
				836	} else {
				837	TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
				838	}
				839	}
				840
				841	delete []buf;
				842	}
				843
				844	//
				845	// ErrorTest() Check various error and edge cases.
				846	//
				847	void UTextTest::ErrorTest()
				848	{
				849	// Close of an uninitialized UText. Shouldn't blow up.
				850	{
				851	UText ut;
				852	memset(&ut, 0, sizeof(UText));
				853	utext_close(&ut);
				854	utext_close(NULL);
				855	}
				856
				857	// Double-close of a UText. Shouldn't blow up. UText should still be usable.
				858	{
				859	UErrorCode status = U_ZERO_ERROR;
				860	UText ut = UTEXT_INITIALIZER;
				861	UnicodeString s("Hello, World");
				862	UText *ut2 = utext_openUnicodeString(&ut, &s, &status);
				863	TEST_SUCCESS(status);
				864	TEST_ASSERT(ut2 == &ut);
				865
				866	UText *ut3 = utext_close(&ut);
				867	TEST_ASSERT(ut3 == &ut);
				868
				869	UText *ut4 = utext_close(&ut);
				870	TEST_ASSERT(ut4 == &ut);
				871
				872	utext_openUnicodeString(&ut, &s, &status);
				873	TEST_SUCCESS(status);
				874	utext_close(&ut);
				875	}
				876
				877	// Re-use of a UText, chaining through each of the types of UText
				878	// (If it doesn't blow up, and doesn't leak, it's probably working fine)
				879	{
				880	UErrorCode status = U_ZERO_ERROR;
				881	UText ut = UTEXT_INITIALIZER;
				882	UText *utp;
				883	UnicodeString s1("Hello, World");
				884	UChar s2[] = {(UChar)0x41, (UChar)0x42, (UChar)0};
				885	const char *s3 = "\x66\x67\x68";
				886
				887	utp = utext_openUnicodeString(&ut, &s1, &status);
				888	TEST_SUCCESS(status);
				889	TEST_ASSERT(utp == &ut);
				890
				891	utp = utext_openConstUnicodeString(&ut, &s1, &status);
				892	TEST_SUCCESS(status);
				893	TEST_ASSERT(utp == &ut);
				894
				895	utp = utext_openUTF8(&ut, s3, -1, &status);
				896	TEST_SUCCESS(status);
				897	TEST_ASSERT(utp == &ut);
				898
				899	utp = utext_openUChars(&ut, s2, -1, &status);
				900	TEST_SUCCESS(status);
				901	TEST_ASSERT(utp == &ut);
				902
				903	utp = utext_close(&ut);
				904	TEST_ASSERT(utp == &ut);
				905
				906	utp = utext_openUnicodeString(&ut, &s1, &status);
				907	TEST_SUCCESS(status);
				908	TEST_ASSERT(utp == &ut);
				909	}
				910
				911	// Invalid parameters on open
				912	//
				913	{
				914	UErrorCode status = U_ZERO_ERROR;
				915	UText ut = UTEXT_INITIALIZER;
				916
				917	utext_openUChars(&ut, NULL, 5, &status);
				918	TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
				919
				920	status = U_ZERO_ERROR;
				921	utext_openUChars(&ut, NULL, -1, &status);
				922	TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
				923
				924	status = U_ZERO_ERROR;
				925	utext_openUTF8(&ut, NULL, 4, &status);
				926	TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
				927
				928	status = U_ZERO_ERROR;
				929	utext_openUTF8(&ut, NULL, -1, &status);
				930	TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
				931	}
				932
				933	//
				934	// UTF-8 with malformed sequences.
				935	// These should come through as the Unicode replacement char, \ufffd
				936	//
				937	{
				938	UErrorCode status = U_ZERO_ERROR;
				939	UText *ut = NULL;
				940	const char *badUTF8 = "\x41\x81\x42\xf0\x81\x81\x43";
				941	UChar32 c;
				942
				943	ut = utext_openUTF8(NULL, badUTF8, -1, &status);
				944	TEST_SUCCESS(status);
				945	c = utext_char32At(ut, 1);
				946	TEST_ASSERT(c == 0xfffd);
				947	c = utext_char32At(ut, 3);
				948	TEST_ASSERT(c == 0xfffd);
				949	c = utext_char32At(ut, 5);
				950	TEST_ASSERT(c == 0xfffd);
				951	c = utext_char32At(ut, 6);
				952	TEST_ASSERT(c == 0x43);
				953
				954	UChar buf[10];
				955	int n = utext_extract(ut, 0, 9, buf, 10, &status);
				956	TEST_SUCCESS(status);
				957	TEST_ASSERT(n==7);
				958	TEST_ASSERT(buf[0] == 0x41);
				959	TEST_ASSERT(buf[1] == 0xfffd);
				960	TEST_ASSERT(buf[2] == 0x42);
				961	TEST_ASSERT(buf[3] == 0xfffd);
				962	TEST_ASSERT(buf[4] == 0xfffd);
				963	TEST_ASSERT(buf[5] == 0xfffd);
				964	TEST_ASSERT(buf[6] == 0x43);
				965	utext_close(ut);
				966	}
				967
				968
				969	//
				970	// isLengthExpensive - does it make the expected transitions after
				971	// getting the length of a nul terminated string?
				972	//
				973	{
				974	UErrorCode status = U_ZERO_ERROR;
				975	UnicodeString sa("Hello, this is a string");
				976	UBool isExpensive;
				977
				978	UChar sb[100];
				979	memset(sb, 0x20, sizeof(sb));
				980	sb[99] = 0;
				981
				982	UText *uta = utext_openUnicodeString(NULL, &sa, &status);
				983	TEST_SUCCESS(status);
				984	isExpensive = utext_isLengthExpensive(uta);
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	985	TEST_ASSERT(isExpensive == false);
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	986	utext_close(uta);
				987
				988	UText *utb = utext_openUChars(NULL, sb, -1, &status);
				989	TEST_SUCCESS(status);
				990	isExpensive = utext_isLengthExpensive(utb);
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	991	TEST_ASSERT(isExpensive == true);
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	992	int64_t len = utext_nativeLength(utb);
				993	TEST_ASSERT(len == 99);
				994	isExpensive = utext_isLengthExpensive(utb);
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	995	TEST_ASSERT(isExpensive == false);
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	996	utext_close(utb);
				997	}
				998
				999	//
				1000	// Index to positions not on code point boundaries.
				1001	//
				1002	{
				1003	const char *u8str = "\xc8\x81\xe1\x82\x83\xf1\x84\x85\x86";
				1004	int32_t startMap[] = { 0, 0, 2, 2, 2, 5, 5, 5, 5, 9, 9};
				1005	int32_t nextMap[] = { 2, 2, 5, 5, 5, 9, 9, 9, 9, 9, 9};
				1006	int32_t prevMap[] = { 0, 0, 0, 0, 0, 2, 2, 2, 2, 5, 5};
				1007	UChar32 c32Map[] = {0x201, 0x201, 0x1083, 0x1083, 0x1083, 0x044146, 0x044146, 0x044146, 0x044146, -1, -1};
				1008	UChar32 pr32Map[] = { -1, -1, 0x201, 0x201, 0x201, 0x1083, 0x1083, 0x1083, 0x1083, 0x044146, 0x044146};
				1009
				1010	// extractLen is the size, in UChars, of what will be extracted between index and index+1.
				1011	// is zero when both index positions lie within the same code point.
				1012	int32_t exLen[] = { 0, 1, 0, 0, 1, 0, 0, 0, 2, 0, 0};
				1013
				1014
				1015	UErrorCode status = U_ZERO_ERROR;
				1016	UText *ut = utext_openUTF8(NULL, u8str, -1, &status);
				1017	TEST_SUCCESS(status);
				1018
				1019	// Check setIndex
				1020	int32_t i;
				1021	int32_t startMapLimit = UPRV_LENGTHOF(startMap);
				1022	for (i=0; i<startMapLimit; i++) {
				1023	utext_setNativeIndex(ut, i);
				1024	int64_t cpIndex = utext_getNativeIndex(ut);
				1025	TEST_ASSERT(cpIndex == startMap[i]);
				1026	cpIndex = UTEXT_GETNATIVEINDEX(ut);
				1027	TEST_ASSERT(cpIndex == startMap[i]);
				1028	}
				1029
				1030	// Check char32At
				1031	for (i=0; i<startMapLimit; i++) {
				1032	UChar32 c32 = utext_char32At(ut, i);
				1033	TEST_ASSERT(c32 == c32Map[i]);
				1034	int64_t cpIndex = utext_getNativeIndex(ut);
				1035	TEST_ASSERT(cpIndex == startMap[i]);
				1036	}
				1037
				1038	// Check utext_next32From
				1039	for (i=0; i<startMapLimit; i++) {
				1040	UChar32 c32 = utext_next32From(ut, i);
				1041	TEST_ASSERT(c32 == c32Map[i]);
				1042	int64_t cpIndex = utext_getNativeIndex(ut);
				1043	TEST_ASSERT(cpIndex == nextMap[i]);
				1044	}
				1045
				1046	// check utext_previous32From
				1047	for (i=0; i<startMapLimit; i++) {
				1048	gTestNum++;
				1049	UChar32 c32 = utext_previous32From(ut, i);
				1050	TEST_ASSERT(c32 == pr32Map[i]);
				1051	int64_t cpIndex = utext_getNativeIndex(ut);
				1052	TEST_ASSERT(cpIndex == prevMap[i]);
				1053	}
				1054
				1055	// check Extract
				1056	// Extract from i to i+1, which may be zero or one code points,
				1057	// depending on whether the indices straddle a cp boundary.
				1058	for (i=0; i<startMapLimit; i++) {
				1059	UChar buf[3];
				1060	status = U_ZERO_ERROR;
				1061	int32_t extractedLen = utext_extract(ut, i, i+1, buf, 3, &status);
				1062	TEST_SUCCESS(status);
				1063	TEST_ASSERT(extractedLen == exLen[i]);
				1064	if (extractedLen > 0) {
				1065	UChar32 c32;
				1066	/* extractedLen-extractedLen == 0 is used to get around a compiler warning. */
				1067	U16_GET(buf, 0, extractedLen-extractedLen, extractedLen, c32);
				1068	TEST_ASSERT(c32 == c32Map[i]);
				1069	}
				1070	}
				1071
				1072	utext_close(ut);
				1073	}
				1074
				1075
				1076	{ // Similar test, with utf16 instead of utf8
				1077	// TODO: merge the common parts of these tests.
				1078
				1079	UnicodeString u16str("\\u1000\\U00011000\\u2000\\U00022000", -1, US_INV);
				1080	int32_t startMap[] ={ 0, 1, 1, 3, 4, 4, 6, 6};
				1081	int32_t nextMap[] = { 1, 3, 3, 4, 6, 6, 6, 6};
				1082	int32_t prevMap[] = { 0, 0, 0, 1, 3, 3, 4, 4};
				1083	UChar32 c32Map[] = {0x1000, 0x11000, 0x11000, 0x2000, 0x22000, 0x22000, -1, -1};
				1084	UChar32 pr32Map[] = { -1, 0x1000, 0x1000, 0x11000, 0x2000, 0x2000, 0x22000, 0x22000};
				1085	int32_t exLen[] = { 1, 0, 2, 1, 0, 2, 0, 0,};
				1086
				1087	u16str = u16str.unescape();
				1088	UErrorCode status = U_ZERO_ERROR;
				1089	UText *ut = utext_openUnicodeString(NULL, &u16str, &status);
				1090	TEST_SUCCESS(status);
				1091
				1092	int32_t startMapLimit = UPRV_LENGTHOF(startMap);
				1093	int i;
				1094	for (i=0; i<startMapLimit; i++) {
				1095	utext_setNativeIndex(ut, i);
				1096	int64_t cpIndex = utext_getNativeIndex(ut);
				1097	TEST_ASSERT(cpIndex == startMap[i]);
				1098	}
				1099
				1100	// Check char32At
				1101	for (i=0; i<startMapLimit; i++) {
				1102	UChar32 c32 = utext_char32At(ut, i);
				1103	TEST_ASSERT(c32 == c32Map[i]);
				1104	int64_t cpIndex = utext_getNativeIndex(ut);
				1105	TEST_ASSERT(cpIndex == startMap[i]);
				1106	}
				1107
				1108	// Check utext_next32From
				1109	for (i=0; i<startMapLimit; i++) {
				1110	UChar32 c32 = utext_next32From(ut, i);
				1111	TEST_ASSERT(c32 == c32Map[i]);
				1112	int64_t cpIndex = utext_getNativeIndex(ut);
				1113	TEST_ASSERT(cpIndex == nextMap[i]);
				1114	}
				1115
				1116	// check utext_previous32From
				1117	for (i=0; i<startMapLimit; i++) {
				1118	UChar32 c32 = utext_previous32From(ut, i);
				1119	TEST_ASSERT(c32 == pr32Map[i]);
				1120	int64_t cpIndex = utext_getNativeIndex(ut);
				1121	TEST_ASSERT(cpIndex == prevMap[i]);
				1122	}
				1123
				1124	// check Extract
				1125	// Extract from i to i+1, which may be zero or one code points,
				1126	// depending on whether the indices straddle a cp boundary.
				1127	for (i=0; i<startMapLimit; i++) {
				1128	UChar buf[3];
				1129	status = U_ZERO_ERROR;
				1130	int32_t extractedLen = utext_extract(ut, i, i+1, buf, 3, &status);
				1131	TEST_SUCCESS(status);
				1132	TEST_ASSERT(extractedLen == exLen[i]);
				1133	if (extractedLen > 0) {
				1134	UChar32 c32;
				1135	/* extractedLen-extractedLen == 0 is used to get around a compiler warning. */
				1136	U16_GET(buf, 0, extractedLen-extractedLen, extractedLen, c32);
				1137	TEST_ASSERT(c32 == c32Map[i]);
				1138	}
				1139	}
				1140
				1141	utext_close(ut);
				1142	}
				1143
				1144	{ // Similar test, with UText over Replaceable
				1145	// TODO: merge the common parts of these tests.
				1146
				1147	UnicodeString u16str("\\u1000\\U00011000\\u2000\\U00022000", -1, US_INV);
				1148	int32_t startMap[] ={ 0, 1, 1, 3, 4, 4, 6, 6};
				1149	int32_t nextMap[] = { 1, 3, 3, 4, 6, 6, 6, 6};
				1150	int32_t prevMap[] = { 0, 0, 0, 1, 3, 3, 4, 4};
				1151	UChar32 c32Map[] = {0x1000, 0x11000, 0x11000, 0x2000, 0x22000, 0x22000, -1, -1};
				1152	UChar32 pr32Map[] = { -1, 0x1000, 0x1000, 0x11000, 0x2000, 0x2000, 0x22000, 0x22000};
				1153	int32_t exLen[] = { 1, 0, 2, 1, 0, 2, 0, 0,};
				1154
				1155	u16str = u16str.unescape();
				1156	UErrorCode status = U_ZERO_ERROR;
				1157	UText *ut = utext_openReplaceable(NULL, &u16str, &status);
				1158	TEST_SUCCESS(status);
				1159
				1160	int32_t startMapLimit = UPRV_LENGTHOF(startMap);
				1161	int i;
				1162	for (i=0; i<startMapLimit; i++) {
				1163	utext_setNativeIndex(ut, i);
				1164	int64_t cpIndex = utext_getNativeIndex(ut);
				1165	TEST_ASSERT(cpIndex == startMap[i]);
				1166	}
				1167
				1168	// Check char32At
				1169	for (i=0; i<startMapLimit; i++) {
				1170	UChar32 c32 = utext_char32At(ut, i);
				1171	TEST_ASSERT(c32 == c32Map[i]);
				1172	int64_t cpIndex = utext_getNativeIndex(ut);
				1173	TEST_ASSERT(cpIndex == startMap[i]);
				1174	}
				1175
				1176	// Check utext_next32From
				1177	for (i=0; i<startMapLimit; i++) {
				1178	UChar32 c32 = utext_next32From(ut, i);
				1179	TEST_ASSERT(c32 == c32Map[i]);
				1180	int64_t cpIndex = utext_getNativeIndex(ut);
				1181	TEST_ASSERT(cpIndex == nextMap[i]);
				1182	}
				1183
				1184	// check utext_previous32From
				1185	for (i=0; i<startMapLimit; i++) {
				1186	UChar32 c32 = utext_previous32From(ut, i);
				1187	TEST_ASSERT(c32 == pr32Map[i]);
				1188	int64_t cpIndex = utext_getNativeIndex(ut);
				1189	TEST_ASSERT(cpIndex == prevMap[i]);
				1190	}
				1191
				1192	// check Extract
				1193	// Extract from i to i+1, which may be zero or one code points,
				1194	// depending on whether the indices straddle a cp boundary.
				1195	for (i=0; i<startMapLimit; i++) {
				1196	UChar buf[3];
				1197	status = U_ZERO_ERROR;
				1198	int32_t extractedLen = utext_extract(ut, i, i+1, buf, 3, &status);
				1199	TEST_SUCCESS(status);
				1200	TEST_ASSERT(extractedLen == exLen[i]);
				1201	if (extractedLen > 0) {
				1202	UChar32 c32;
				1203	/* extractedLen-extractedLen == 0 is used to get around a compiler warning. */
				1204	U16_GET(buf, 0, extractedLen-extractedLen, extractedLen, c32);
				1205	TEST_ASSERT(c32 == c32Map[i]);
				1206	}
				1207	}
				1208
				1209	utext_close(ut);
				1210	}
				1211	}
				1212
				1213
				1214	void UTextTest::FreezeTest() {
				1215	// Check isWritable() and freeze() behavior.
				1216	//
				1217
				1218	UnicodeString ustr("Hello, World.");
				1219	const char u8str[] = {char(0x31), (char)0x32, (char)0x33, 0};
				1220	const UChar u16str[] = {(UChar)0x31, (UChar)0x32, (UChar)0x44, 0};
				1221
				1222	UErrorCode status = U_ZERO_ERROR;
				1223	UText *ut = NULL;
				1224	UText *ut2 = NULL;
				1225
				1226	ut = utext_openUTF8(ut, u8str, -1, &status);
				1227	TEST_SUCCESS(status);
				1228	UBool writable = utext_isWritable(ut);
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1229	TEST_ASSERT(writable == false);
				1230	utext_copy(ut, 1, 2, 0, true, &status);
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1231	TEST_ASSERT(status == U_NO_WRITE_PERMISSION);
				1232
				1233	status = U_ZERO_ERROR;
				1234	ut = utext_openUChars(ut, u16str, -1, &status);
				1235	TEST_SUCCESS(status);
				1236	writable = utext_isWritable(ut);
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1237	TEST_ASSERT(writable == false);
				1238	utext_copy(ut, 1, 2, 0, true, &status);
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1239	TEST_ASSERT(status == U_NO_WRITE_PERMISSION);
				1240
				1241	status = U_ZERO_ERROR;
				1242	ut = utext_openUnicodeString(ut, &ustr, &status);
				1243	TEST_SUCCESS(status);
				1244	writable = utext_isWritable(ut);
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1245	TEST_ASSERT(writable == true);
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1246	utext_freeze(ut);
				1247	writable = utext_isWritable(ut);
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1248	TEST_ASSERT(writable == false);
				1249	utext_copy(ut, 1, 2, 0, true, &status);
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1250	TEST_ASSERT(status == U_NO_WRITE_PERMISSION);
				1251
				1252	status = U_ZERO_ERROR;
				1253	ut = utext_openUnicodeString(ut, &ustr, &status);
				1254	TEST_SUCCESS(status);
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1255	ut2 = utext_clone(ut2, ut, false, false, &status); // clone with readonly = false
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1256	TEST_SUCCESS(status);
				1257	writable = utext_isWritable(ut2);
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1258	TEST_ASSERT(writable == true);
				1259	ut2 = utext_clone(ut2, ut, false, true, &status); // clone with readonly = true
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1260	TEST_SUCCESS(status);
				1261	writable = utext_isWritable(ut2);
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1262	TEST_ASSERT(writable == false);
				1263	utext_copy(ut2, 1, 2, 0, true, &status);
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1264	TEST_ASSERT(status == U_NO_WRITE_PERMISSION);
				1265
				1266	status = U_ZERO_ERROR;
				1267	ut = utext_openConstUnicodeString(ut, (const UnicodeString *)&ustr, &status);
				1268	TEST_SUCCESS(status);
				1269	writable = utext_isWritable(ut);
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1270	TEST_ASSERT(writable == false);
				1271	utext_copy(ut, 1, 2, 0, true, &status);
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1272	TEST_ASSERT(status == U_NO_WRITE_PERMISSION);
				1273
				1274	// Deep Clone of a frozen UText should re-enable writing in the copy.
				1275	status = U_ZERO_ERROR;
				1276	ut = utext_openUnicodeString(ut, &ustr, &status);
				1277	TEST_SUCCESS(status);
				1278	utext_freeze(ut);
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1279	ut2 = utext_clone(ut2, ut, true, false, &status); // deep clone
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1280	TEST_SUCCESS(status);
				1281	writable = utext_isWritable(ut2);
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1282	TEST_ASSERT(writable == true);
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1283
				1284
				1285	// Deep clone of a frozen UText, where the base type is intrinsically non-writable,
				1286	// should NOT enable writing in the copy.
				1287	status = U_ZERO_ERROR;
				1288	ut = utext_openUChars(ut, u16str, -1, &status);
				1289	TEST_SUCCESS(status);
				1290	utext_freeze(ut);
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1291	ut2 = utext_clone(ut2, ut, true, false, &status); // deep clone
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1292	TEST_SUCCESS(status);
				1293	writable = utext_isWritable(ut2);
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1294	TEST_ASSERT(writable == false);
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1295
				1296	// cleanup
				1297	utext_close(ut);
				1298	utext_close(ut2);
				1299	}
				1300
				1301
				1302	//
				1303	// Fragmented UText
				1304	// A UText type that works with a chunk size of 1.
				1305	// Intended to test for edge cases.
				1306	// Input comes from a UnicodeString.
				1307	//
				1308	// ut.b the character. Put into both halves.
				1309	//
				1310
				1311	U_CDECL_BEGIN
				1312	static UBool U_CALLCONV
				1313	fragTextAccess(UText *ut, int64_t index, UBool forward) {
				1314	const UnicodeString us = (const UnicodeString )ut->context;
				1315	UChar c;
				1316	int32_t length = us->length();
				1317	if (forward && index>=0 && index<length) {
				1318	c = us->charAt((int32_t)index);
				1319	ut->b = c \| c<<16;
				1320	ut->chunkOffset = 0;
				1321	ut->chunkLength = 1;
				1322	ut->chunkNativeStart = index;
				1323	ut->chunkNativeLimit = index+1;
				1324	return true;
				1325	}
				1326	if (!forward && index>0 && index <=length) {
				1327	c = us->charAt((int32_t)index-1);
				1328	ut->b = c \| c<<16;
				1329	ut->chunkOffset = 1;
				1330	ut->chunkLength = 1;
				1331	ut->chunkNativeStart = index-1;
				1332	ut->chunkNativeLimit = index;
				1333	return true;
				1334	}
				1335	ut->b = 0;
				1336	ut->chunkOffset = 0;
				1337	ut->chunkLength = 0;
				1338	if (index <= 0) {
				1339	ut->chunkNativeStart = 0;
				1340	ut->chunkNativeLimit = 0;
				1341	} else {
				1342	ut->chunkNativeStart = length;
				1343	ut->chunkNativeLimit = length;
				1344	}
				1345	return false;
				1346	}
				1347
				1348	// Function table to be used with this fragmented text provider.
				1349	// Initialized in the open function.
				1350	static UTextFuncs fragmentFuncs;
				1351
				1352	// Clone function for fragmented text provider.
				1353	// Didn't really want to provide this, but it's easier to provide it than to keep it
				1354	// out of the tests.
				1355	//
				1356	UText *
				1357	cloneFragmentedUnicodeString(UText dest, const UText src, UBool deep, UErrorCode *status) {
				1358	if (U_FAILURE(*status)) {
				1359	return NULL;
				1360	}
				1361	if (deep) {
				1362	*status = U_UNSUPPORTED_ERROR;
				1363	return NULL;
				1364	}
				1365	dest = utext_openUnicodeString(dest, (UnicodeString *)src->context, status);
				1366	utext_setNativeIndex(dest, utext_getNativeIndex(src));
				1367	return dest;
				1368	}
				1369
				1370	U_CDECL_END
				1371
				1372	// Open function for the fragmented text provider.
				1373	UText *
				1374	openFragmentedUnicodeString(UText ut, UnicodeString s, UErrorCode *status) {
				1375	ut = utext_openUnicodeString(ut, s, status);
				1376	if (U_FAILURE(*status)) {
				1377	return ut;
				1378	}
				1379
				1380	// Copy of the function table from the stock UnicodeString UText,
				1381	// and replace the entry for the access function.
				1382	memcpy(&fragmentFuncs, ut->pFuncs, sizeof(fragmentFuncs));
				1383	fragmentFuncs.access = fragTextAccess;
				1384	fragmentFuncs.clone = cloneFragmentedUnicodeString;
				1385	ut->pFuncs = &fragmentFuncs;
				1386
				1387	ut->chunkContents = (UChar *)&ut->b;
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1388	ut->pFuncs->access(ut, 0, true);
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1389	return ut;
				1390	}
				1391
				1392	// Regression test for Ticket 5560
				1393	// Clone fails to update chunkContentPointer in the cloned copy.
				1394	// This is only an issue for UText types that work in a local buffer,
				1395	// (UTF-8 wrapper, for example)
				1396	//
				1397	// The test:
				1398	// 1. Create an initial UText
				1399	// 2. Deep clone it. Contents should match original.
				1400	// 3. Reset original to something different.
				1401	// 4. Check that clone contents did not change.
				1402	//
				1403	void UTextTest::Ticket5560() {
				1404	/* The following two strings are in UTF-8 even on EBCDIC platforms. */
				1405	static const char s1[] = {0x41,0x42,0x43,0x44,0x45,0x46,0}; /* "ABCDEF" */
				1406	static const char s2[] = {0x31,0x32,0x33,0x34,0x35,0x36,0}; /* "123456" */
				1407	UErrorCode status = U_ZERO_ERROR;
				1408
				1409	UText ut1 = UTEXT_INITIALIZER;
				1410	UText ut2 = UTEXT_INITIALIZER;
				1411
				1412	utext_openUTF8(&ut1, s1, -1, &status);
				1413	UChar c = utext_next32(&ut1);
				1414	TEST_ASSERT(c == 0x41); // c == 'A'
				1415
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1416	utext_clone(&ut2, &ut1, true, false, &status);
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1417	TEST_SUCCESS(status);
				1418	c = utext_next32(&ut2);
				1419	TEST_ASSERT(c == 0x42); // c == 'B'
				1420	c = utext_next32(&ut1);
				1421	TEST_ASSERT(c == 0x42); // c == 'B'
				1422
				1423	utext_openUTF8(&ut1, s2, -1, &status);
				1424	c = utext_next32(&ut1);
				1425	TEST_ASSERT(c == 0x31); // c == '1'
				1426	c = utext_next32(&ut2);
				1427	TEST_ASSERT(c == 0x43); // c == 'C'
				1428
				1429	utext_close(&ut1);
				1430	utext_close(&ut2);
				1431	}
				1432
				1433
				1434	// Test for Ticket 6847
				1435	//
				1436	void UTextTest::Ticket6847() {
				1437	const int STRLEN = 90;
				1438	UChar s[STRLEN+1];
				1439	u_memset(s, 0x41, STRLEN);
				1440	s[STRLEN] = 0;
				1441
				1442	UErrorCode status = U_ZERO_ERROR;
				1443	UText *ut = utext_openUChars(NULL, s, -1, &status);
				1444
				1445	utext_setNativeIndex(ut, 0);
				1446	int32_t count = 0;
				1447	UChar32 c = 0;
				1448	int64_t nativeIndex = UTEXT_GETNATIVEINDEX(ut);
				1449	TEST_ASSERT(nativeIndex == 0);
				1450	while ((c = utext_next32(ut)) != U_SENTINEL) {
				1451	TEST_ASSERT(c == 0x41);
				1452	TEST_ASSERT(count < STRLEN);
				1453	if (count >= STRLEN) {
				1454	break;
				1455	}
				1456	count++;
				1457	nativeIndex = UTEXT_GETNATIVEINDEX(ut);
				1458	TEST_ASSERT(nativeIndex == count);
				1459	}
				1460	TEST_ASSERT(count == STRLEN);
				1461	nativeIndex = UTEXT_GETNATIVEINDEX(ut);
				1462	TEST_ASSERT(nativeIndex == STRLEN);
				1463	utext_close(ut);
				1464	}
				1465
				1466
				1467	void UTextTest::Ticket10562() {
				1468	// Note: failures show as a heap error when the test is run under valgrind.
				1469	UErrorCode status = U_ZERO_ERROR;
				1470
				1471	const char *utf8_string = "\x41\x41\x41\x41\x41\x41\x41\x41\x41\x41\x41\x41\x41\x41\x41";
				1472	UText *utf8Text = utext_openUTF8(NULL, utf8_string, -1, &status);
				1473	TEST_SUCCESS(status);
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1474	UText *deepClone = utext_clone(NULL, utf8Text, true, false, &status);
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1475	TEST_SUCCESS(status);
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1476	UText *shallowClone = utext_clone(NULL, deepClone, false, false, &status);
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1477	TEST_SUCCESS(status);
				1478	utext_close(shallowClone);
				1479	utext_close(deepClone);
				1480	utext_close(utf8Text);
				1481
				1482	status = U_ZERO_ERROR;
				1483	UnicodeString usString("Hello, World.");
				1484	UText *usText = utext_openUnicodeString(NULL, &usString, &status);
				1485	TEST_SUCCESS(status);
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1486	UText *usDeepClone = utext_clone(NULL, usText, true, false, &status);
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1487	TEST_SUCCESS(status);
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1488	UText *usShallowClone = utext_clone(NULL, usDeepClone, false, false, &status);
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1489	TEST_SUCCESS(status);
				1490	utext_close(usShallowClone);
				1491	utext_close(usDeepClone);
				1492	utext_close(usText);
				1493	}
				1494
				1495
				1496	void UTextTest::Ticket10983() {
				1497	// Note: failure shows as a seg fault when the defect is present.
				1498
				1499	UErrorCode status = U_ZERO_ERROR;
				1500	UnicodeString s("Hello, World");
				1501	UText *ut = utext_openConstUnicodeString(NULL, &s, &status);
				1502	TEST_SUCCESS(status);
				1503
				1504	status = U_INVALID_STATE_ERROR;
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1505	UText *cloned = utext_clone(NULL, ut, true, true, &status);
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1506	TEST_ASSERT(cloned == NULL);
				1507	TEST_ASSERT(status == U_INVALID_STATE_ERROR);
				1508
				1509	utext_close(ut);
				1510	}
				1511
				1512	// Ticket 12130 - extract on a UText wrapping a null terminated UChar * string
				1513	// leaves the iteration position set incorrectly when the
				1514	// actual string length is not yet known.
				1515	//
				1516	// The test text needs to be long enough that UText defers getting the length.
				1517
				1518	void UTextTest::Ticket12130() {
				1519	UErrorCode status = U_ZERO_ERROR;
				1520
				1521	const char *text8 =
				1522	"Fundamentally, computers just deal with numbers. They store letters and other characters "
				1523	"by assigning a number for each one. Before Unicode was invented, there were hundreds "
				1524	"of different encoding systems for assigning these numbers. No single encoding could "
				1525	"contain enough characters: for example, the European Union alone requires several "
				1526	"different encodings to cover all its languages. Even for a single language like "
				1527	"English no single encoding was adequate for all the letters, punctuation, and technical "
				1528	"symbols in common use.";
				1529
				1530	UnicodeString str(text8);
				1531	const UChar *ustr = str.getTerminatedBuffer();
				1532	UText ut = UTEXT_INITIALIZER;
				1533	utext_openUChars(&ut, ustr, -1, &status);
				1534	UChar extractBuffer[50];
				1535
				1536	for (int32_t startIdx = 0; startIdx<str.length(); ++startIdx) {
				1537	int32_t endIdx = startIdx + 20;
				1538
				1539	u_memset(extractBuffer, 0, UPRV_LENGTHOF(extractBuffer));
				1540	utext_extract(&ut, startIdx, endIdx, extractBuffer, UPRV_LENGTHOF(extractBuffer), &status);
				1541	if (U_FAILURE(status)) {
				1542	errln("%s:%d %s", __FILE__, __LINE__, u_errorName(status));
				1543	return;
				1544	}
				1545	int64_t ni = utext_getNativeIndex(&ut);
				1546	int64_t expectedni = startIdx + 20;
				1547	if (expectedni > str.length()) {
				1548	expectedni = str.length();
				1549	}
				1550	if (expectedni != ni) {
				1551	errln("%s:%d utext_getNativeIndex() expected %d, got %d", __FILE__, __LINE__, expectedni, ni);
				1552	}
				1553	if (0 != str.tempSubString(startIdx, 20).compare(extractBuffer)) {
				1554	errln("%s:%d utext_extract() failed. expected \"%s\", got \"%s\"",
				1555	__FILE__, __LINE__, CStr(str.tempSubString(startIdx, 20))(), CStr(UnicodeString(extractBuffer))());
				1556	}
				1557	}
				1558	utext_close(&ut);
				1559
				1560	// Similar utext extract, this time with the string length provided to the UText in advance,
				1561	// and a buffer of larger than required capacity.
				1562
				1563	utext_openUChars(&ut, ustr, str.length(), &status);
				1564	for (int32_t startIdx = 0; startIdx<str.length(); ++startIdx) {
				1565	int32_t endIdx = startIdx + 20;
				1566	u_memset(extractBuffer, 0, UPRV_LENGTHOF(extractBuffer));
				1567	utext_extract(&ut, startIdx, endIdx, extractBuffer, UPRV_LENGTHOF(extractBuffer), &status);
				1568	if (U_FAILURE(status)) {
				1569	errln("%s:%d %s", __FILE__, __LINE__, u_errorName(status));
				1570	return;
				1571	}
				1572	int64_t ni = utext_getNativeIndex(&ut);
				1573	int64_t expectedni = startIdx + 20;
				1574	if (expectedni > str.length()) {
				1575	expectedni = str.length();
				1576	}
				1577	if (expectedni != ni) {
				1578	errln("%s:%d utext_getNativeIndex() expected %d, got %d", __FILE__, __LINE__, expectedni, ni);
				1579	}
				1580	if (0 != str.tempSubString(startIdx, 20).compare(extractBuffer)) {
				1581	errln("%s:%d utext_extract() failed. expected \"%s\", got \"%s\"",
				1582	__FILE__, __LINE__, CStr(str.tempSubString(startIdx, 20))(), CStr(UnicodeString(extractBuffer))());
				1583	}
				1584	}
				1585	utext_close(&ut);
				1586	}
				1587
				1588	// Ticket 13344 The macro form of UTEXT_SETNATIVEINDEX failed when target was a trail surrogate
				1589	// of a supplementary character.
				1590
				1591	void UTextTest::Ticket13344() {
				1592	UErrorCode status = U_ZERO_ERROR;
				1593	const char16_t *str = u"abc\U0010abcd xyz";
				1594	LocalUTextPointer ut(utext_openUChars(NULL, str, -1, &status));
				1595
				1596	assertSuccess("UTextTest::Ticket13344-status", status);
				1597	UTEXT_SETNATIVEINDEX(ut.getAlias(), 3);
				1598	assertEquals("UTextTest::Ticket13344-lead", (int64_t)3, utext_getNativeIndex(ut.getAlias()));
				1599	UTEXT_SETNATIVEINDEX(ut.getAlias(), 4);
				1600	assertEquals("UTextTest::Ticket13344-trail", (int64_t)3, utext_getNativeIndex(ut.getAlias()));
				1601	UTEXT_SETNATIVEINDEX(ut.getAlias(), 5);
				1602	assertEquals("UTextTest::Ticket13344-bmp", (int64_t)5, utext_getNativeIndex(ut.getAlias()));
				1603
				1604	utext_setNativeIndex(ut.getAlias(), 3);
				1605	assertEquals("UTextTest::Ticket13344-lead-2", (int64_t)3, utext_getNativeIndex(ut.getAlias()));
				1606	utext_setNativeIndex(ut.getAlias(), 4);
				1607	assertEquals("UTextTest::Ticket13344-trail-2", (int64_t)3, utext_getNativeIndex(ut.getAlias()));
				1608	utext_setNativeIndex(ut.getAlias(), 5);
				1609	assertEquals("UTextTest::Ticket13344-bmp-2", (int64_t)5, utext_getNativeIndex(ut.getAlias()));
				1610	}
				1611