Blame - source/common/unistr.cpp - chromium.googlesource.com/chromium/deps/icu

blob: 077b4d6ef20811d1e67da55dfd3531f22f99a46f [file] [log] [blame]

Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	1	// © 2016 and later: Unicode, Inc. and others.
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	2	// License & terms of use: http://www.unicode.org/copyright.html
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	3	/*
				4	******************************************************************************
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	5	* Copyright (C) 1999-2016, International Business Machines Corporation and
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	6	* others. All Rights Reserved.
				7	******************************************************************************
				8	*
				9	* File unistr.cpp
				10	*
				11	* Modification History:
				12	*
				13	* Date Name Description
				14	* 09/25/98 stephen Creation.
				15	* 04/20/99 stephen Overhauled per 4/16 code review.
				16	* 07/09/99 stephen Renamed {hi,lo},{byte,word} to icu_X for HP/UX
				17	* 11/18/99 aliu Added handleReplaceBetween() to make inherit from
				18	* Replaceable.
				19	* 06/25/01 grhoten Removed the dependency on iostream
				20	******************************************************************************
				21	*/
				22
				23	#include "unicode/utypes.h"
				24	#include "unicode/appendable.h"
				25	#include "unicode/putil.h"
				26	#include "cstring.h"
				27	#include "cmemory.h"
				28	#include "unicode/ustring.h"
				29	#include "unicode/unistr.h"
				30	#include "unicode/utf.h"
				31	#include "unicode/utf16.h"
				32	#include "uelement.h"
				33	#include "ustr_imp.h"
				34	#include "umutex.h"
				35	#include "uassert.h"
				36
				37	#if 0
				38
				39	#include <iostream>
				40	using namespace std;
				41
				42	//DEBUGGING
				43	void
				44	print(const UnicodeString& s,
				45	const char *name)
				46	{
				47	UChar c;
				48	cout << name << ":\|";
				49	for(int i = 0; i < s.length(); ++i) {
				50	c = s[i];
				51	if(c>= 0x007E \|\| c < 0x0020)
				52	cout << "[0x" << hex << s[i] << "]";
				53	else
				54	cout << (char) s[i];
				55	}
				56	cout << '\|' << endl;
				57	}
				58
				59	void
				60	print(const UChar *s,
				61	int32_t len,
				62	const char *name)
				63	{
				64	UChar c;
				65	cout << name << ":\|";
				66	for(int i = 0; i < len; ++i) {
				67	c = s[i];
				68	if(c>= 0x007E \|\| c < 0x0020)
				69	cout << "[0x" << hex << s[i] << "]";
				70	else
				71	cout << (char) s[i];
				72	}
				73	cout << '\|' << endl;
				74	}
				75	// END DEBUGGING
				76	#endif
				77
				78	// Local function definitions for now
				79
				80	// need to copy areas that may overlap
				81	static
				82	inline void
				83	us_arrayCopy(const UChar *src, int32_t srcStart,
				84	UChar *dst, int32_t dstStart, int32_t count)
				85	{
				86	if(count>0) {
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	87	uprv_memmove(dst+dstStart, src+srcStart, (size_t)countsizeof(src));
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	88	}
				89	}
				90
				91	// u_unescapeAt() callback to get a UChar from a UnicodeString
				92	U_CDECL_BEGIN
				93	static UChar U_CALLCONV
				94	UnicodeString_charAt(int32_t offset, void *context) {
				95	return ((icu::UnicodeString*) context)->charAt(offset);
				96	}
				97	U_CDECL_END
				98
				99	U_NAMESPACE_BEGIN
				100
				101	/* The Replaceable virtual destructor can't be defined in the header
				102	due to how AIX works with multiple definitions of virtual functions.
				103	*/
				104	Replaceable::~Replaceable() {}
				105
				106	UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeString)
				107
				108	UnicodeString U_EXPORT2
				109	operator+ (const UnicodeString &s1, const UnicodeString &s2) {
				110	return
				111	UnicodeString(s1.length()+s2.length()+1, (UChar32)0, 0).
				112	append(s1).
				113	append(s2);
				114	}
				115
				116	//========================================
				117	// Reference Counting functions, put at top of file so that optimizing compilers
				118	// have a chance to automatically inline.
				119	//========================================
				120
				121	void
				122	UnicodeString::addRef() {
				123	umtx_atomic_inc((u_atomic_int32_t *)fUnion.fFields.fArray - 1);
				124	}
				125
				126	int32_t
				127	UnicodeString::removeRef() {
				128	return umtx_atomic_dec((u_atomic_int32_t *)fUnion.fFields.fArray - 1);
				129	}
				130
				131	int32_t
				132	UnicodeString::refCount() const {
				133	return umtx_loadAcquire(((u_atomic_int32_t )fUnion.fFields.fArray - 1));
				134	}
				135
				136	void
				137	UnicodeString::releaseArray() {
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	138	if((fUnion.fFields.fLengthAndFlags & kRefCounted) && removeRef() == 0) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	139	uprv_free((int32_t *)fUnion.fFields.fArray - 1);
				140	}
				141	}
				142
				143
				144
				145	//========================================
				146	// Constructors
				147	//========================================
				148
				149	// The default constructor is inline in unistr.h.
				150
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	151	UnicodeString::UnicodeString(int32_t capacity, UChar32 c, int32_t count) {
				152	fUnion.fFields.fLengthAndFlags = 0;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	153	if(count <= 0 \|\| (uint32_t)c > 0x10ffff) {
				154	// just allocate and do not do anything else
				155	allocate(capacity);
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	156	} else if(c <= 0xffff) {
				157	int32_t length = count;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	158	if(capacity < length) {
				159	capacity = length;
				160	}
				161	if(allocate(capacity)) {
				162	UChar *array = getArrayStart();
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	163	UChar unit = (UChar)c;
				164	for(int32_t i = 0; i < length; ++i) {
				165	array[i] = unit;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	166	}
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	167	setLength(length);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	168	}
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	169	} else { // supplementary code point, write surrogate pairs
				170	if(count > (INT32_MAX / 2)) {
				171	// We would get more than 2G UChars.
				172	allocate(capacity);
				173	return;
				174	}
				175	int32_t length = count * 2;
				176	if(capacity < length) {
				177	capacity = length;
				178	}
				179	if(allocate(capacity)) {
				180	UChar *array = getArrayStart();
				181	UChar lead = U16_LEAD(c);
				182	UChar trail = U16_TRAIL(c);
				183	for(int32_t i = 0; i < length; i += 2) {
				184	array[i] = lead;
				185	array[i + 1] = trail;
				186	}
				187	setLength(length);
				188	}
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	189	}
				190	}
				191
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	192	UnicodeString::UnicodeString(UChar ch) {
				193	fUnion.fFields.fLengthAndFlags = kLength1 \| kShortString;
				194	fUnion.fStackFields.fBuffer[0] = ch;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	195	}
				196
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	197	UnicodeString::UnicodeString(UChar32 ch) {
				198	fUnion.fFields.fLengthAndFlags = kShortString;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	199	int32_t i = 0;
				200	UBool isError = FALSE;
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	201	U16_APPEND(fUnion.fStackFields.fBuffer, i, US_STACKBUF_SIZE, ch, isError);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	202	// We test isError so that the compiler does not complain that we don't.
				203	// If isError then i==0 which is what we want anyway.
				204	if(!isError) {
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	205	setShortLength(i);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	206	}
				207	}
				208
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	209	UnicodeString::UnicodeString(const UChar *text) {
				210	fUnion.fFields.fLengthAndFlags = kShortString;
				211	doAppend(text, 0, -1);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	212	}
				213
				214	UnicodeString::UnicodeString(const UChar *text,
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	215	int32_t textLength) {
				216	fUnion.fFields.fLengthAndFlags = kShortString;
				217	doAppend(text, 0, textLength);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	218	}
				219
				220	UnicodeString::UnicodeString(UBool isTerminated,
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	221	ConstChar16Ptr textPtr,
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	222	int32_t textLength) {
				223	fUnion.fFields.fLengthAndFlags = kReadonlyAlias;
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	224	const UChar *text = textPtr;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	225	if(text == NULL) {
				226	// treat as an empty string, do not alias
				227	setToEmpty();
				228	} else if(textLength < -1 \|\|
				229	(textLength == -1 && !isTerminated) \|\|
				230	(textLength >= 0 && isTerminated && text[textLength] != 0)
				231	) {
				232	setToBogus();
				233	} else {
				234	if(textLength == -1) {
				235	// text is terminated, or else it would have failed the above test
				236	textLength = u_strlen(text);
				237	}
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	238	setArray(const_cast<UChar *>(text), textLength,
				239	isTerminated ? textLength + 1 : textLength);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	240	}
				241	}
				242
				243	UnicodeString::UnicodeString(UChar *buff,
				244	int32_t buffLength,
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	245	int32_t buffCapacity) {
				246	fUnion.fFields.fLengthAndFlags = kWritableAlias;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	247	if(buff == NULL) {
				248	// treat as an empty string, do not alias
				249	setToEmpty();
				250	} else if(buffLength < -1 \|\| buffCapacity < 0 \|\| buffLength > buffCapacity) {
				251	setToBogus();
				252	} else {
				253	if(buffLength == -1) {
				254	// fLength = u_strlen(buff); but do not look beyond buffCapacity
				255	const UChar p = buff, limit = buff + buffCapacity;
				256	while(p != limit && *p != 0) {
				257	++p;
				258	}
				259	buffLength = (int32_t)(p - buff);
				260	}
				261	setArray(buff, buffLength, buffCapacity);
				262	}
				263	}
				264
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	265	UnicodeString::UnicodeString(const char *src, int32_t length, EInvariant) {
				266	fUnion.fFields.fLengthAndFlags = kShortString;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	267	if(src==NULL) {
				268	// treat as an empty string
				269	} else {
				270	if(length<0) {
				271	length=(int32_t)uprv_strlen(src);
				272	}
				273	if(cloneArrayIfNeeded(length, length, FALSE)) {
				274	u_charsToUChars(src, getArrayStart(), length);
				275	setLength(length);
				276	} else {
				277	setToBogus();
				278	}
				279	}
				280	}
				281
				282	#if U_CHARSET_IS_UTF8
				283
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	284	UnicodeString::UnicodeString(const char *codepageData) {
				285	fUnion.fFields.fLengthAndFlags = kShortString;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	286	if(codepageData != 0) {
				287	setToUTF8(codepageData);
				288	}
				289	}
				290
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	291	UnicodeString::UnicodeString(const char *codepageData, int32_t dataLength) {
				292	fUnion.fFields.fLengthAndFlags = kShortString;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	293	// if there's nothing to convert, do nothing
				294	if(codepageData == 0 \|\| dataLength == 0 \|\| dataLength < -1) {
				295	return;
				296	}
				297	if(dataLength == -1) {
				298	dataLength = (int32_t)uprv_strlen(codepageData);
				299	}
				300	setToUTF8(StringPiece(codepageData, dataLength));
				301	}
				302
				303	// else see unistr_cnv.cpp
				304	#endif
				305
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	306	UnicodeString::UnicodeString(const UnicodeString& that) {
				307	fUnion.fFields.fLengthAndFlags = kShortString;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	308	copyFrom(that);
				309	}
				310
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	311	UnicodeString::UnicodeString(UnicodeString &&src) U_NOEXCEPT {
Frank Tang	69c72a6	2019-04-03 21:41:21 -0700	[diff] [blame]	312	copyFieldsFrom(src, TRUE);
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	313	}
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	314
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	315	UnicodeString::UnicodeString(const UnicodeString& that,
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	316	int32_t srcStart) {
				317	fUnion.fFields.fLengthAndFlags = kShortString;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	318	setTo(that, srcStart);
				319	}
				320
				321	UnicodeString::UnicodeString(const UnicodeString& that,
				322	int32_t srcStart,
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	323	int32_t srcLength) {
				324	fUnion.fFields.fLengthAndFlags = kShortString;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	325	setTo(that, srcStart, srcLength);
				326	}
				327
				328	// Replaceable base class clone() default implementation, does not clone
				329	Replaceable *
				330	Replaceable::clone() const {
				331	return NULL;
				332	}
				333
				334	// UnicodeString overrides clone() with a real implementation
Frank Tang	b869661	2019-10-25 14:58:21 -0700	[diff] [blame]	335	UnicodeString *
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	336	UnicodeString::clone() const {
				337	return new UnicodeString(*this);
				338	}
				339
				340	//========================================
				341	// array allocation
				342	//========================================
				343
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	344	namespace {
				345
				346	const int32_t kGrowSize = 128;
				347
				348	// The number of bytes for one int32_t reference counter and capacity UChars
				349	// must fit into a 32-bit size_t (at least when on a 32-bit platform).
				350	// We also add one for the NUL terminator, to avoid reallocation in getTerminatedBuffer(),
				351	// and round up to a multiple of 16 bytes.
				352	// This means that capacity must be at most (0xfffffff0 - 4) / 2 - 1 = 0x7ffffff5.
				353	// (With more complicated checks we could go up to 0x7ffffffd without rounding up,
				354	// but that does not seem worth it.)
				355	const int32_t kMaxCapacity = 0x7ffffff5;
				356
				357	int32_t getGrowCapacity(int32_t newLength) {
				358	int32_t growSize = (newLength >> 2) + kGrowSize;
				359	if(growSize <= (kMaxCapacity - newLength)) {
				360	return newLength + growSize;
				361	} else {
				362	return kMaxCapacity;
				363	}
				364	}
				365
				366	} // namespace
				367
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	368	UBool
				369	UnicodeString::allocate(int32_t capacity) {
				370	if(capacity <= US_STACKBUF_SIZE) {
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	371	fUnion.fFields.fLengthAndFlags = kShortString;
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	372	return TRUE;
				373	}
				374	if(capacity <= kMaxCapacity) {
				375	++capacity; // for the NUL
				376	// Switch to size_t which is unsigned so that we can allocate up to 4GB.
				377	// Reference counter + UChars.
				378	size_t numBytes = sizeof(int32_t) + (size_t)capacity * U_SIZEOF_UCHAR;
				379	// Round up to a multiple of 16.
				380	numBytes = (numBytes + 15) & ~15;
				381	int32_t array = (int32_t ) uprv_malloc(numBytes);
				382	if(array != NULL) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	383	// set initial refCount and point behind the refCount
				384	*array++ = 1;
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	385	numBytes -= sizeof(int32_t);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	386
				387	// have fArray point to the first UChar
				388	fUnion.fFields.fArray = (UChar *)array;
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	389	fUnion.fFields.fCapacity = (int32_t)(numBytes / U_SIZEOF_UCHAR);
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	390	fUnion.fFields.fLengthAndFlags = kLongString;
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	391	return TRUE;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	392	}
				393	}
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	394	fUnion.fFields.fLengthAndFlags = kIsBogus;
				395	fUnion.fFields.fArray = 0;
				396	fUnion.fFields.fCapacity = 0;
				397	return FALSE;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	398	}
				399
				400	//========================================
				401	// Destructor
				402	//========================================
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	403
				404	#ifdef UNISTR_COUNT_FINAL_STRING_LENGTHS
				405	static u_atomic_int32_t finalLengthCounts[0x400]; // UnicodeString::kMaxShortLength+1
				406	static u_atomic_int32_t beyondCount(0);
				407
				408	U_CAPI void unistr_printLengths() {
				409	int32_t i;
				410	for(i = 0; i <= 59; ++i) {
				411	printf("%2d, %9d\n", i, (int32_t)finalLengthCounts[i]);
				412	}
				413	int32_t beyond = beyondCount;
				414	for(; i < UPRV_LENGTHOF(finalLengthCounts); ++i) {
				415	beyond += finalLengthCounts[i];
				416	}
				417	printf(">59, %9d\n", beyond);
				418	}
				419	#endif
				420
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	421	UnicodeString::~UnicodeString()
				422	{
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	423	#ifdef UNISTR_COUNT_FINAL_STRING_LENGTHS
				424	// Count lengths of strings at the end of their lifetime.
				425	// Useful for discussion of a desirable stack buffer size.
				426	// Count the contents length, not the optional NUL terminator nor further capacity.
				427	// Ignore open-buffer strings and strings which alias external storage.
				428	if((fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer\|kReadonlyAlias\|kWritableAlias)) == 0) {
				429	if(hasShortLength()) {
				430	umtx_atomic_inc(finalLengthCounts + getShortLength());
				431	} else {
				432	umtx_atomic_inc(&beyondCount);
				433	}
				434	}
				435	#endif
				436
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	437	releaseArray();
				438	}
				439
				440	//========================================
				441	// Factory methods
				442	//========================================
				443
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	444	UnicodeString UnicodeString::fromUTF8(StringPiece utf8) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	445	UnicodeString result;
				446	result.setToUTF8(utf8);
				447	return result;
				448	}
				449
				450	UnicodeString UnicodeString::fromUTF32(const UChar32 *utf32, int32_t length) {
				451	UnicodeString result;
				452	int32_t capacity;
				453	// Most UTF-32 strings will be BMP-only and result in a same-length
				454	// UTF-16 string. We overestimate the capacity just slightly,
				455	// just in case there are a few supplementary characters.
				456	if(length <= US_STACKBUF_SIZE) {
				457	capacity = US_STACKBUF_SIZE;
				458	} else {
				459	capacity = length + (length >> 4) + 4;
				460	}
				461	do {
				462	UChar *utf16 = result.getBuffer(capacity);
				463	int32_t length16;
				464	UErrorCode errorCode = U_ZERO_ERROR;
				465	u_strFromUTF32WithSub(utf16, result.getCapacity(), &length16,
				466	utf32, length,
				467	0xfffd, // Substitution character.
				468	NULL, // Don't care about number of substitutions.
				469	&errorCode);
				470	result.releaseBuffer(length16);
				471	if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
				472	capacity = length16 + 1; // +1 for the terminating NUL.
				473	continue;
				474	} else if(U_FAILURE(errorCode)) {
				475	result.setToBogus();
				476	}
				477	break;
				478	} while(TRUE);
				479	return result;
				480	}
				481
				482	//========================================
				483	// Assignment
				484	//========================================
				485
				486	UnicodeString &
				487	UnicodeString::operator=(const UnicodeString &src) {
				488	return copyFrom(src);
				489	}
				490
				491	UnicodeString &
				492	UnicodeString::fastCopyFrom(const UnicodeString &src) {
				493	return copyFrom(src, TRUE);
				494	}
				495
				496	UnicodeString &
				497	UnicodeString::copyFrom(const UnicodeString &src, UBool fastCopy) {
				498	// if assigning to ourselves, do nothing
Jungshik Shin (jungshik at google)	0f8746a	2015-01-08 15:46:45 -0800	[diff] [blame]	499	if(this == &src) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	500	return *this;
				501	}
				502
				503	// is the right side bogus?
Jungshik Shin (jungshik at google)	0f8746a	2015-01-08 15:46:45 -0800	[diff] [blame]	504	if(src.isBogus()) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	505	setToBogus();
				506	return *this;
				507	}
				508
				509	// delete the current contents
				510	releaseArray();
				511
				512	if(src.isEmpty()) {
				513	// empty string - use the stack buffer
				514	setToEmpty();
				515	return *this;
				516	}
				517
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	518	// fLength>0 and not an "open" src.getBuffer(minCapacity)
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	519	fUnion.fFields.fLengthAndFlags = src.fUnion.fFields.fLengthAndFlags;
				520	switch(src.fUnion.fFields.fLengthAndFlags & kAllStorageFlags) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	521	case kShortString:
				522	// short string using the stack buffer, do the same
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	523	uprv_memcpy(fUnion.fStackFields.fBuffer, src.fUnion.fStackFields.fBuffer,
				524	getShortLength() * U_SIZEOF_UCHAR);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	525	break;
				526	case kLongString:
				527	// src uses a refCounted string buffer, use that buffer with refCount
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	528	// src is const, use a cast - we don't actually change it
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	529	((UnicodeString &)src).addRef();
				530	// copy all fields, share the reference-counted buffer
				531	fUnion.fFields.fArray = src.fUnion.fFields.fArray;
				532	fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	533	if(!hasShortLength()) {
				534	fUnion.fFields.fLength = src.fUnion.fFields.fLength;
				535	}
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	536	break;
				537	case kReadonlyAlias:
				538	if(fastCopy) {
				539	// src is a readonly alias, do the same
				540	// -> maintain the readonly alias as such
				541	fUnion.fFields.fArray = src.fUnion.fFields.fArray;
				542	fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	543	if(!hasShortLength()) {
				544	fUnion.fFields.fLength = src.fUnion.fFields.fLength;
				545	}
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	546	break;
				547	}
				548	// else if(!fastCopy) fall through to case kWritableAlias
				549	// -> allocate a new buffer and copy the contents
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	550	U_FALLTHROUGH;
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	551	case kWritableAlias: {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	552	// src is a writable alias; we make a copy of that instead
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	553	int32_t srcLength = src.length();
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	554	if(allocate(srcLength)) {
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	555	u_memcpy(getArrayStart(), src.getArrayStart(), srcLength);
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	556	setLength(srcLength);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	557	break;
				558	}
				559	// if there is not enough memory, then fall through to setting to bogus
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	560	U_FALLTHROUGH;
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	561	}
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	562	default:
				563	// if src is bogus, set ourselves to bogus
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	564	// do not call setToBogus() here because fArray and flags are not consistent here
				565	fUnion.fFields.fLengthAndFlags = kIsBogus;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	566	fUnion.fFields.fArray = 0;
				567	fUnion.fFields.fCapacity = 0;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	568	break;
				569	}
				570
				571	return *this;
				572	}
				573
Frank Tang	69c72a6	2019-04-03 21:41:21 -0700	[diff] [blame]	574	UnicodeString &UnicodeString::operator=(UnicodeString &&src) U_NOEXCEPT {
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	575	// No explicit check for self move assignment, consistent with standard library.
				576	// Self move assignment causes no crash nor leak but might make the object bogus.
				577	releaseArray();
				578	copyFieldsFrom(src, TRUE);
				579	return *this;
				580	}
				581
Frank Tang	69c72a6	2019-04-03 21:41:21 -0700	[diff] [blame]	582	// Same as move assignment except without memory management.
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	583	void UnicodeString::copyFieldsFrom(UnicodeString &src, UBool setSrcToBogus) U_NOEXCEPT {
				584	int16_t lengthAndFlags = fUnion.fFields.fLengthAndFlags = src.fUnion.fFields.fLengthAndFlags;
				585	if(lengthAndFlags & kUsingStackBuffer) {
				586	// Short string using the stack buffer, copy the contents.
				587	// Check for self assignment to prevent "overlap in memcpy" warnings,
				588	// although it should be harmless to copy a buffer to itself exactly.
				589	if(this != &src) {
				590	uprv_memcpy(fUnion.fStackFields.fBuffer, src.fUnion.fStackFields.fBuffer,
				591	getShortLength() * U_SIZEOF_UCHAR);
				592	}
				593	} else {
				594	// In all other cases, copy all fields.
				595	fUnion.fFields.fArray = src.fUnion.fFields.fArray;
				596	fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
				597	if(!hasShortLength()) {
				598	fUnion.fFields.fLength = src.fUnion.fFields.fLength;
				599	}
				600	if(setSrcToBogus) {
				601	// Set src to bogus without releasing any memory.
				602	src.fUnion.fFields.fLengthAndFlags = kIsBogus;
				603	src.fUnion.fFields.fArray = NULL;
				604	src.fUnion.fFields.fCapacity = 0;
				605	}
				606	}
				607	}
				608
				609	void UnicodeString::swap(UnicodeString &other) U_NOEXCEPT {
				610	UnicodeString temp; // Empty short string: Known not to need releaseArray().
				611	// Copy fields without resetting source values in between.
				612	temp.copyFieldsFrom(*this, FALSE);
				613	this->copyFieldsFrom(other, FALSE);
				614	other.copyFieldsFrom(temp, FALSE);
				615	// Set temp to an empty string so that other's memory is not released twice.
				616	temp.fUnion.fFields.fLengthAndFlags = kShortString;
				617	}
				618
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	619	//========================================
				620	// Miscellaneous operations
				621	//========================================
				622
				623	UnicodeString UnicodeString::unescape() const {
				624	UnicodeString result(length(), (UChar32)0, (int32_t)0); // construct with capacity
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	625	if (result.isBogus()) {
				626	return result;
				627	}
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	628	const UChar *array = getBuffer();
				629	int32_t len = length();
				630	int32_t prev = 0;
				631	for (int32_t i=0;;) {
				632	if (i == len) {
				633	result.append(array, prev, len - prev);
				634	break;
				635	}
				636	if (array[i++] == 0x5C /'\\'/) {
				637	result.append(array, prev, (i - 1) - prev);
				638	UChar32 c = unescapeAt(i); // advances i
				639	if (c < 0) {
				640	result.remove(); // return empty string
				641	break; // invalid escape sequence
				642	}
				643	result.append(c);
				644	prev = i;
				645	}
				646	}
				647	return result;
				648	}
				649
				650	UChar32 UnicodeString::unescapeAt(int32_t &offset) const {
				651	return u_unescapeAt(UnicodeString_charAt, &offset, length(), (void*)this);
				652	}
				653
				654	//========================================
				655	// Read-only implementation
				656	//========================================
				657	UBool
				658	UnicodeString::doEquals(const UnicodeString &text, int32_t len) const {
				659	// Requires: this & text not bogus and have same lengths.
				660	// Byte-wise comparison works for equality regardless of endianness.
				661	return uprv_memcmp(getArrayStart(), text.getArrayStart(), len * U_SIZEOF_UCHAR) == 0;
				662	}
				663
				664	int8_t
				665	UnicodeString::doCompare( int32_t start,
				666	int32_t length,
				667	const UChar *srcChars,
				668	int32_t srcStart,
				669	int32_t srcLength) const
				670	{
				671	// compare illegal string values
				672	if(isBogus()) {
				673	return -1;
				674	}
				675
				676	// pin indices to legal values
				677	pinIndices(start, length);
				678
				679	if(srcChars == NULL) {
				680	// treat const UChar *srcChars==NULL as an empty string
				681	return length == 0 ? 0 : 1;
				682	}
				683
				684	// get the correct pointer
				685	const UChar *chars = getArrayStart();
				686
				687	chars += start;
				688	srcChars += srcStart;
				689
				690	int32_t minLength;
				691	int8_t lengthResult;
				692
				693	// get the srcLength if necessary
				694	if(srcLength < 0) {
				695	srcLength = u_strlen(srcChars + srcStart);
				696	}
				697
				698	// are we comparing different lengths?
				699	if(length != srcLength) {
				700	if(length < srcLength) {
				701	minLength = length;
				702	lengthResult = -1;
				703	} else {
				704	minLength = srcLength;
				705	lengthResult = 1;
				706	}
				707	} else {
				708	minLength = length;
				709	lengthResult = 0;
				710	}
				711
				712	/*
				713	* note that uprv_memcmp() returns an int but we return an int8_t;
				714	* we need to take care not to truncate the result -
				715	* one way to do this is to right-shift the value to
				716	* move the sign bit into the lower 8 bits and making sure that this
				717	* does not become 0 itself
				718	*/
				719
				720	if(minLength > 0 && chars != srcChars) {
				721	int32_t result;
				722
				723	# if U_IS_BIG_ENDIAN
				724	// big-endian: byte comparison works
				725	result = uprv_memcmp(chars, srcChars, minLength * sizeof(UChar));
				726	if(result != 0) {
				727	return (int8_t)(result >> 15 \| 1);
				728	}
				729	# else
				730	// little-endian: compare UChar units
				731	do {
				732	result = ((int32_t)(chars++) - (int32_t)(srcChars++));
				733	if(result != 0) {
				734	return (int8_t)(result >> 15 \| 1);
				735	}
				736	} while(--minLength > 0);
				737	# endif
				738	}
				739	return lengthResult;
				740	}
				741
				742	/* String compare in code point order - doCompare() compares in code unit order. */
				743	int8_t
				744	UnicodeString::doCompareCodePointOrder(int32_t start,
				745	int32_t length,
				746	const UChar *srcChars,
				747	int32_t srcStart,
				748	int32_t srcLength) const
				749	{
				750	// compare illegal string values
				751	// treat const UChar *srcChars==NULL as an empty string
				752	if(isBogus()) {
				753	return -1;
				754	}
				755
				756	// pin indices to legal values
				757	pinIndices(start, length);
				758
				759	if(srcChars == NULL) {
				760	srcStart = srcLength = 0;
				761	}
				762
				763	int32_t diff = uprv_strCompare(getArrayStart() + start, length, (srcChars!=NULL)?(srcChars + srcStart):NULL, srcLength, FALSE, TRUE);
				764	/* translate the 32-bit result into an 8-bit one */
				765	if(diff!=0) {
				766	return (int8_t)(diff >> 15 \| 1);
				767	} else {
				768	return 0;
				769	}
				770	}
				771
				772	int32_t
				773	UnicodeString::getLength() const {
				774	return length();
				775	}
				776
				777	UChar
				778	UnicodeString::getCharAt(int32_t offset) const {
				779	return charAt(offset);
				780	}
				781
				782	UChar32
				783	UnicodeString::getChar32At(int32_t offset) const {
				784	return char32At(offset);
				785	}
				786
				787	UChar32
				788	UnicodeString::char32At(int32_t offset) const
				789	{
				790	int32_t len = length();
				791	if((uint32_t)offset < (uint32_t)len) {
				792	const UChar *array = getArrayStart();
				793	UChar32 c;
				794	U16_GET(array, 0, offset, len, c);
				795	return c;
				796	} else {
				797	return kInvalidUChar;
				798	}
				799	}
				800
				801	int32_t
				802	UnicodeString::getChar32Start(int32_t offset) const {
				803	if((uint32_t)offset < (uint32_t)length()) {
				804	const UChar *array = getArrayStart();
				805	U16_SET_CP_START(array, 0, offset);
				806	return offset;
				807	} else {
				808	return 0;
				809	}
				810	}
				811
				812	int32_t
				813	UnicodeString::getChar32Limit(int32_t offset) const {
				814	int32_t len = length();
				815	if((uint32_t)offset < (uint32_t)len) {
				816	const UChar *array = getArrayStart();
				817	U16_SET_CP_LIMIT(array, 0, offset, len);
				818	return offset;
				819	} else {
				820	return len;
				821	}
				822	}
				823
				824	int32_t
				825	UnicodeString::countChar32(int32_t start, int32_t length) const {
				826	pinIndices(start, length);
				827	// if(isBogus()) then fArray==0 and start==0 - u_countChar32() checks for NULL
				828	return u_countChar32(getArrayStart()+start, length);
				829	}
				830
				831	UBool
				832	UnicodeString::hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const {
				833	pinIndices(start, length);
				834	// if(isBogus()) then fArray==0 and start==0 - u_strHasMoreChar32Than() checks for NULL
				835	return u_strHasMoreChar32Than(getArrayStart()+start, length, number);
				836	}
				837
				838	int32_t
				839	UnicodeString::moveIndex32(int32_t index, int32_t delta) const {
				840	// pin index
				841	int32_t len = length();
				842	if(index<0) {
				843	index=0;
				844	} else if(index>len) {
				845	index=len;
				846	}
				847
				848	const UChar *array = getArrayStart();
				849	if(delta>0) {
				850	U16_FWD_N(array, index, len, delta);
				851	} else {
				852	U16_BACK_N(array, 0, index, -delta);
				853	}
				854
				855	return index;
				856	}
				857
				858	void
				859	UnicodeString::doExtract(int32_t start,
				860	int32_t length,
				861	UChar *dst,
				862	int32_t dstStart) const
				863	{
				864	// pin indices to legal values
				865	pinIndices(start, length);
				866
				867	// do not copy anything if we alias dst itself
				868	const UChar *array = getArrayStart();
				869	if(array + start != dst + dstStart) {
				870	us_arrayCopy(array, start, dst, dstStart, length);
				871	}
				872	}
				873
				874	int32_t
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	875	UnicodeString::extract(Char16Ptr dest, int32_t destCapacity,
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	876	UErrorCode &errorCode) const {
				877	int32_t len = length();
				878	if(U_SUCCESS(errorCode)) {
				879	if(isBogus() \|\| destCapacity<0 \|\| (destCapacity>0 && dest==0)) {
				880	errorCode=U_ILLEGAL_ARGUMENT_ERROR;
				881	} else {
				882	const UChar *array = getArrayStart();
				883	if(len>0 && len<=destCapacity && array!=dest) {
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	884	u_memcpy(dest, array, len);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	885	}
				886	return u_terminateUChars(dest, destCapacity, len, &errorCode);
				887	}
				888	}
				889
				890	return len;
				891	}
				892
				893	int32_t
				894	UnicodeString::extract(int32_t start,
				895	int32_t length,
				896	char *target,
				897	int32_t targetCapacity,
				898	enum EInvariant) const
				899	{
				900	// if the arguments are illegal, then do nothing
				901	if(targetCapacity < 0 \|\| (targetCapacity > 0 && target == NULL)) {
				902	return 0;
				903	}
				904
				905	// pin the indices to legal values
				906	pinIndices(start, length);
				907
				908	if(length <= targetCapacity) {
				909	u_UCharsToChars(getArrayStart() + start, target, length);
				910	}
				911	UErrorCode status = U_ZERO_ERROR;
				912	return u_terminateChars(target, targetCapacity, length, &status);
				913	}
				914
				915	UnicodeString
				916	UnicodeString::tempSubString(int32_t start, int32_t len) const {
				917	pinIndices(start, len);
				918	const UChar *array = getBuffer(); // not getArrayStart() to check kIsBogus & kOpenGetBuffer
				919	if(array==NULL) {
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	920	array=fUnion.fStackFields.fBuffer; // anything not NULL because that would make an empty string
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	921	len=-2; // bogus result string
				922	}
				923	return UnicodeString(FALSE, array + start, len);
				924	}
				925
				926	int32_t
				927	UnicodeString::toUTF8(int32_t start, int32_t len,
				928	char *target, int32_t capacity) const {
				929	pinIndices(start, len);
				930	int32_t length8;
				931	UErrorCode errorCode = U_ZERO_ERROR;
				932	u_strToUTF8WithSub(target, capacity, &length8,
				933	getBuffer() + start, len,
				934	0xFFFD, // Standard substitution character.
				935	NULL, // Don't care about number of substitutions.
				936	&errorCode);
				937	return length8;
				938	}
				939
				940	#if U_CHARSET_IS_UTF8
				941
				942	int32_t
				943	UnicodeString::extract(int32_t start, int32_t len,
				944	char *target, uint32_t dstSize) const {
				945	// if the arguments are illegal, then do nothing
				946	if(/dstSize < 0 \|\| /(dstSize > 0 && target == 0)) {
				947	return 0;
				948	}
				949	return toUTF8(start, len, target, dstSize <= 0x7fffffff ? (int32_t)dstSize : 0x7fffffff);
				950	}
				951
				952	// else see unistr_cnv.cpp
				953	#endif
				954
				955	void
				956	UnicodeString::extractBetween(int32_t start,
				957	int32_t limit,
				958	UnicodeString& target) const {
				959	pinIndex(start);
				960	pinIndex(limit);
				961	doExtract(start, limit - start, target);
				962	}
				963
				964	// When converting from UTF-16 to UTF-8, the result will have at most 3 times
				965	// as many bytes as the source has UChars.
				966	// The "worst cases" are writing systems like Indic, Thai and CJK with
				967	// 3:1 bytes:UChars.
				968	void
				969	UnicodeString::toUTF8(ByteSink &sink) const {
				970	int32_t length16 = length();
				971	if(length16 != 0) {
				972	char stackBuffer[1024];
				973	int32_t capacity = (int32_t)sizeof(stackBuffer);
				974	UBool utf8IsOwned = FALSE;
				975	char *utf8 = sink.GetAppendBuffer(length16 < capacity ? length16 : capacity,
				976	3*length16,
				977	stackBuffer, capacity,
				978	&capacity);
				979	int32_t length8 = 0;
				980	UErrorCode errorCode = U_ZERO_ERROR;
				981	u_strToUTF8WithSub(utf8, capacity, &length8,
				982	getBuffer(), length16,
				983	0xFFFD, // Standard substitution character.
				984	NULL, // Don't care about number of substitutions.
				985	&errorCode);
				986	if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
				987	utf8 = (char *)uprv_malloc(length8);
				988	if(utf8 != NULL) {
				989	utf8IsOwned = TRUE;
				990	errorCode = U_ZERO_ERROR;
				991	u_strToUTF8WithSub(utf8, length8, &length8,
				992	getBuffer(), length16,
				993	0xFFFD, // Standard substitution character.
				994	NULL, // Don't care about number of substitutions.
				995	&errorCode);
				996	} else {
				997	errorCode = U_MEMORY_ALLOCATION_ERROR;
				998	}
				999	}
				1000	if(U_SUCCESS(errorCode)) {
				1001	sink.Append(utf8, length8);
				1002	sink.Flush();
				1003	}
				1004	if(utf8IsOwned) {
				1005	uprv_free(utf8);
				1006	}
				1007	}
				1008	}
				1009
				1010	int32_t
				1011	UnicodeString::toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const {
				1012	int32_t length32=0;
				1013	if(U_SUCCESS(errorCode)) {
				1014	// getBuffer() and u_strToUTF32WithSub() check for illegal arguments.
				1015	u_strToUTF32WithSub(utf32, capacity, &length32,
				1016	getBuffer(), length(),
				1017	0xfffd, // Substitution character.
				1018	NULL, // Don't care about number of substitutions.
				1019	&errorCode);
				1020	}
				1021	return length32;
				1022	}
				1023
				1024	int32_t
				1025	UnicodeString::indexOf(const UChar *srcChars,
				1026	int32_t srcStart,
				1027	int32_t srcLength,
				1028	int32_t start,
				1029	int32_t length) const
				1030	{
				1031	if(isBogus() \|\| srcChars == 0 \|\| srcStart < 0 \|\| srcLength == 0) {
				1032	return -1;
				1033	}
				1034
				1035	// UnicodeString does not find empty substrings
				1036	if(srcLength < 0 && srcChars[srcStart] == 0) {
				1037	return -1;
				1038	}
				1039
				1040	// get the indices within bounds
				1041	pinIndices(start, length);
				1042
				1043	// find the first occurrence of the substring
				1044	const UChar *array = getArrayStart();
				1045	const UChar *match = u_strFindFirst(array + start, length, srcChars + srcStart, srcLength);
				1046	if(match == NULL) {
				1047	return -1;
				1048	} else {
				1049	return (int32_t)(match - array);
				1050	}
				1051	}
				1052
				1053	int32_t
				1054	UnicodeString::doIndexOf(UChar c,
				1055	int32_t start,
				1056	int32_t length) const
				1057	{
				1058	// pin indices
				1059	pinIndices(start, length);
				1060
				1061	// find the first occurrence of c
				1062	const UChar *array = getArrayStart();
				1063	const UChar *match = u_memchr(array + start, c, length);
				1064	if(match == NULL) {
				1065	return -1;
				1066	} else {
				1067	return (int32_t)(match - array);
				1068	}
				1069	}
				1070
				1071	int32_t
				1072	UnicodeString::doIndexOf(UChar32 c,
				1073	int32_t start,
				1074	int32_t length) const {
				1075	// pin indices
				1076	pinIndices(start, length);
				1077
				1078	// find the first occurrence of c
				1079	const UChar *array = getArrayStart();
				1080	const UChar *match = u_memchr32(array + start, c, length);
				1081	if(match == NULL) {
				1082	return -1;
				1083	} else {
				1084	return (int32_t)(match - array);
				1085	}
				1086	}
				1087
				1088	int32_t
				1089	UnicodeString::lastIndexOf(const UChar *srcChars,
				1090	int32_t srcStart,
				1091	int32_t srcLength,
				1092	int32_t start,
				1093	int32_t length) const
				1094	{
				1095	if(isBogus() \|\| srcChars == 0 \|\| srcStart < 0 \|\| srcLength == 0) {
				1096	return -1;
				1097	}
				1098
				1099	// UnicodeString does not find empty substrings
				1100	if(srcLength < 0 && srcChars[srcStart] == 0) {
				1101	return -1;
				1102	}
				1103
				1104	// get the indices within bounds
				1105	pinIndices(start, length);
				1106
				1107	// find the last occurrence of the substring
				1108	const UChar *array = getArrayStart();
				1109	const UChar *match = u_strFindLast(array + start, length, srcChars + srcStart, srcLength);
				1110	if(match == NULL) {
				1111	return -1;
				1112	} else {
				1113	return (int32_t)(match - array);
				1114	}
				1115	}
				1116
				1117	int32_t
				1118	UnicodeString::doLastIndexOf(UChar c,
				1119	int32_t start,
				1120	int32_t length) const
				1121	{
				1122	if(isBogus()) {
				1123	return -1;
				1124	}
				1125
				1126	// pin indices
				1127	pinIndices(start, length);
				1128
				1129	// find the last occurrence of c
				1130	const UChar *array = getArrayStart();
				1131	const UChar *match = u_memrchr(array + start, c, length);
				1132	if(match == NULL) {
				1133	return -1;
				1134	} else {
				1135	return (int32_t)(match - array);
				1136	}
				1137	}
				1138
				1139	int32_t
				1140	UnicodeString::doLastIndexOf(UChar32 c,
				1141	int32_t start,
				1142	int32_t length) const {
				1143	// pin indices
				1144	pinIndices(start, length);
				1145
				1146	// find the last occurrence of c
				1147	const UChar *array = getArrayStart();
				1148	const UChar *match = u_memrchr32(array + start, c, length);
				1149	if(match == NULL) {
				1150	return -1;
				1151	} else {
				1152	return (int32_t)(match - array);
				1153	}
				1154	}
				1155
				1156	//========================================
				1157	// Write implementation
				1158	//========================================
				1159
				1160	UnicodeString&
				1161	UnicodeString::findAndReplace(int32_t start,
				1162	int32_t length,
				1163	const UnicodeString& oldText,
				1164	int32_t oldStart,
				1165	int32_t oldLength,
				1166	const UnicodeString& newText,
				1167	int32_t newStart,
				1168	int32_t newLength)
				1169	{
				1170	if(isBogus() \|\| oldText.isBogus() \|\| newText.isBogus()) {
				1171	return *this;
				1172	}
				1173
				1174	pinIndices(start, length);
				1175	oldText.pinIndices(oldStart, oldLength);
				1176	newText.pinIndices(newStart, newLength);
				1177
				1178	if(oldLength == 0) {
				1179	return *this;
				1180	}
				1181
				1182	while(length > 0 && length >= oldLength) {
				1183	int32_t pos = indexOf(oldText, oldStart, oldLength, start, length);
				1184	if(pos < 0) {
				1185	// no more oldText's here: done
				1186	break;
				1187	} else {
				1188	// we found oldText, replace it by newText and go beyond it
				1189	replace(pos, oldLength, newText, newStart, newLength);
				1190	length -= pos + oldLength - start;
				1191	start = pos + newLength;
				1192	}
				1193	}
				1194
				1195	return *this;
				1196	}
				1197
				1198
				1199	void
				1200	UnicodeString::setToBogus()
				1201	{
				1202	releaseArray();
				1203
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	1204	fUnion.fFields.fLengthAndFlags = kIsBogus;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1205	fUnion.fFields.fArray = 0;
				1206	fUnion.fFields.fCapacity = 0;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1207	}
				1208
				1209	// turn a bogus string into an empty one
				1210	void
				1211	UnicodeString::unBogus() {
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	1212	if(fUnion.fFields.fLengthAndFlags & kIsBogus) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1213	setToEmpty();
				1214	}
				1215	}
				1216
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	1217	const char16_t *
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1218	UnicodeString::getTerminatedBuffer() {
				1219	if(!isWritable()) {
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	1220	return nullptr;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1221	}
				1222	UChar *array = getArrayStart();
				1223	int32_t len = length();
				1224	if(len < getCapacity()) {
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	1225	if(fUnion.fFields.fLengthAndFlags & kBufferIsReadonly) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1226	// If len<capacity on a read-only alias, then array[len] is
				1227	// either the original NUL (if constructed with (TRUE, s, length))
				1228	// or one of the original string contents characters (if later truncated),
				1229	// therefore we can assume that array[len] is initialized memory.
				1230	if(array[len] == 0) {
				1231	return array;
				1232	}
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	1233	} else if(((fUnion.fFields.fLengthAndFlags & kRefCounted) == 0 \|\| refCount() == 1)) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1234	// kRefCounted: Do not write the NUL if the buffer is shared.
				1235	// That is mostly safe, except when the length of one copy was modified
				1236	// without copy-on-write, e.g., via truncate(newLength) or remove(void).
				1237	// Then the NUL would be written into the middle of another copy's string.
				1238
				1239	// Otherwise, the buffer is fully writable and it is anyway safe to write the NUL.
				1240	// Do not test if there is a NUL already because it might be uninitialized memory.
				1241	// (That would be safe, but tools like valgrind & Purify would complain.)
				1242	array[len] = 0;
				1243	return array;
				1244	}
				1245	}
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	1246	if(len<INT32_MAX && cloneArrayIfNeeded(len+1)) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1247	array = getArrayStart();
				1248	array[len] = 0;
				1249	return array;
				1250	} else {
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	1251	return nullptr;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1252	}
				1253	}
				1254
				1255	// setTo() analogous to the readonly-aliasing constructor with the same signature
				1256	UnicodeString &
				1257	UnicodeString::setTo(UBool isTerminated,
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	1258	ConstChar16Ptr textPtr,
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1259	int32_t textLength)
				1260	{
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	1261	if(fUnion.fFields.fLengthAndFlags & kOpenGetBuffer) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1262	// do not modify a string that has an "open" getBuffer(minCapacity)
				1263	return *this;
				1264	}
				1265
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	1266	const UChar *text = textPtr;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1267	if(text == NULL) {
				1268	// treat as an empty string, do not alias
				1269	releaseArray();
				1270	setToEmpty();
				1271	return *this;
				1272	}
				1273
				1274	if( textLength < -1 \|\|
				1275	(textLength == -1 && !isTerminated) \|\|
				1276	(textLength >= 0 && isTerminated && text[textLength] != 0)
				1277	) {
				1278	setToBogus();
				1279	return *this;
				1280	}
				1281
				1282	releaseArray();
				1283
				1284	if(textLength == -1) {
				1285	// text is terminated, or else it would have failed the above test
				1286	textLength = u_strlen(text);
				1287	}
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	1288	fUnion.fFields.fLengthAndFlags = kReadonlyAlias;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1289	setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1290	return *this;
				1291	}
				1292
				1293	// setTo() analogous to the writable-aliasing constructor with the same signature
				1294	UnicodeString &
				1295	UnicodeString::setTo(UChar *buffer,
				1296	int32_t buffLength,
				1297	int32_t buffCapacity) {
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	1298	if(fUnion.fFields.fLengthAndFlags & kOpenGetBuffer) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1299	// do not modify a string that has an "open" getBuffer(minCapacity)
				1300	return *this;
				1301	}
				1302
				1303	if(buffer == NULL) {
				1304	// treat as an empty string, do not alias
				1305	releaseArray();
				1306	setToEmpty();
				1307	return *this;
				1308	}
				1309
				1310	if(buffLength < -1 \|\| buffCapacity < 0 \|\| buffLength > buffCapacity) {
				1311	setToBogus();
				1312	return *this;
				1313	} else if(buffLength == -1) {
				1314	// buffLength = u_strlen(buff); but do not look beyond buffCapacity
				1315	const UChar p = buffer, limit = buffer + buffCapacity;
				1316	while(p != limit && *p != 0) {
				1317	++p;
				1318	}
				1319	buffLength = (int32_t)(p - buffer);
				1320	}
				1321
				1322	releaseArray();
				1323
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	1324	fUnion.fFields.fLengthAndFlags = kWritableAlias;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1325	setArray(buffer, buffLength, buffCapacity);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1326	return *this;
				1327	}
				1328
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	1329	UnicodeString &UnicodeString::setToUTF8(StringPiece utf8) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1330	unBogus();
				1331	int32_t length = utf8.length();
				1332	int32_t capacity;
				1333	// The UTF-16 string will be at most as long as the UTF-8 string.
				1334	if(length <= US_STACKBUF_SIZE) {
				1335	capacity = US_STACKBUF_SIZE;
				1336	} else {
				1337	capacity = length + 1; // +1 for the terminating NUL.
				1338	}
				1339	UChar *utf16 = getBuffer(capacity);
				1340	int32_t length16;
				1341	UErrorCode errorCode = U_ZERO_ERROR;
				1342	u_strFromUTF8WithSub(utf16, getCapacity(), &length16,
				1343	utf8.data(), length,
				1344	0xfffd, // Substitution character.
				1345	NULL, // Don't care about number of substitutions.
				1346	&errorCode);
				1347	releaseBuffer(length16);
				1348	if(U_FAILURE(errorCode)) {
				1349	setToBogus();
				1350	}
				1351	return *this;
				1352	}
				1353
				1354	UnicodeString&
				1355	UnicodeString::setCharAt(int32_t offset,
				1356	UChar c)
				1357	{
				1358	int32_t len = length();
				1359	if(cloneArrayIfNeeded() && len > 0) {
				1360	if(offset < 0) {
				1361	offset = 0;
				1362	} else if(offset >= len) {
				1363	offset = len - 1;
				1364	}
				1365
				1366	getArrayStart()[offset] = c;
				1367	}
				1368	return *this;
				1369	}
				1370
				1371	UnicodeString&
				1372	UnicodeString::replace(int32_t start,
				1373	int32_t _length,
				1374	UChar32 srcChar) {
				1375	UChar buffer[U16_MAX_LENGTH];
				1376	int32_t count = 0;
				1377	UBool isError = FALSE;
				1378	U16_APPEND(buffer, count, U16_MAX_LENGTH, srcChar, isError);
				1379	// We test isError so that the compiler does not complain that we don't.
				1380	// If isError (srcChar is not a valid code point) then count==0 which means
				1381	// we remove the source segment rather than replacing it with srcChar.
				1382	return doReplace(start, _length, buffer, 0, isError ? 0 : count);
				1383	}
				1384
				1385	UnicodeString&
				1386	UnicodeString::append(UChar32 srcChar) {
				1387	UChar buffer[U16_MAX_LENGTH];
				1388	int32_t _length = 0;
				1389	UBool isError = FALSE;
				1390	U16_APPEND(buffer, _length, U16_MAX_LENGTH, srcChar, isError);
				1391	// We test isError so that the compiler does not complain that we don't.
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	1392	// If isError then _length==0 which turns the doAppend() into a no-op anyway.
				1393	return isError ? *this : doAppend(buffer, 0, _length);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1394	}
				1395
				1396	UnicodeString&
				1397	UnicodeString::doReplace( int32_t start,
				1398	int32_t length,
				1399	const UnicodeString& src,
				1400	int32_t srcStart,
				1401	int32_t srcLength)
				1402	{
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	1403	// pin the indices to legal values
				1404	src.pinIndices(srcStart, srcLength);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1405
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	1406	// get the characters from src
				1407	// and replace the range in ourselves with them
				1408	return doReplace(start, length, src.getArrayStart(), srcStart, srcLength);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1409	}
				1410
				1411	UnicodeString&
				1412	UnicodeString::doReplace(int32_t start,
				1413	int32_t length,
				1414	const UChar *srcChars,
				1415	int32_t srcStart,
				1416	int32_t srcLength)
				1417	{
				1418	if(!isWritable()) {
				1419	return *this;
				1420	}
				1421
				1422	int32_t oldLength = this->length();
				1423
				1424	// optimize (read-only alias).remove(0, start) and .remove(start, end)
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	1425	if((fUnion.fFields.fLengthAndFlags&kBufferIsReadonly) && srcLength == 0) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1426	if(start == 0) {
				1427	// remove prefix by adjusting the array pointer
				1428	pinIndex(length);
				1429	fUnion.fFields.fArray += length;
				1430	fUnion.fFields.fCapacity -= length;
				1431	setLength(oldLength - length);
				1432	return *this;
				1433	} else {
				1434	pinIndex(start);
				1435	if(length >= (oldLength - start)) {
				1436	// remove suffix by reducing the length (like truncate())
				1437	setLength(start);
				1438	fUnion.fFields.fCapacity = start; // not NUL-terminated any more
				1439	return *this;
				1440	}
				1441	}
				1442	}
				1443
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	1444	if(start == oldLength) {
				1445	return doAppend(srcChars, srcStart, srcLength);
				1446	}
				1447
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1448	if(srcChars == 0) {
Jungshik Shin	42d5027	2018-10-24 01:22:09 -0700	[diff] [blame]	1449	srcLength = 0;
				1450	} else {
				1451	// Perform all remaining operations relative to srcChars + srcStart.
				1452	// From this point forward, do not use srcStart.
				1453	srcChars += srcStart;
				1454	if (srcLength < 0) {
				1455	// get the srcLength if necessary
				1456	srcLength = u_strlen(srcChars);
				1457	}
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1458	}
				1459
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	1460	// pin the indices to legal values
				1461	pinIndices(start, length);
				1462
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	1463	// Calculate the size of the string after the replace.
				1464	// Avoid int32_t overflow.
				1465	int32_t newLength = oldLength - length;
				1466	if(srcLength > (INT32_MAX - newLength)) {
				1467	setToBogus();
				1468	return *this;
				1469	}
				1470	newLength += srcLength;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1471
Jungshik Shin	42d5027	2018-10-24 01:22:09 -0700	[diff] [blame]	1472	// Check for insertion into ourself
				1473	const UChar *oldArray = getArrayStart();
				1474	if (isBufferWritable() &&
				1475	oldArray < srcChars + srcLength &&
				1476	srcChars < oldArray + oldLength) {
				1477	// Copy into a new UnicodeString and start over
				1478	UnicodeString copy(srcChars, srcLength);
				1479	if (copy.isBogus()) {
				1480	setToBogus();
				1481	return *this;
				1482	}
				1483	return doReplace(start, length, copy.getArrayStart(), 0, srcLength);
				1484	}
				1485
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	1486	// cloneArrayIfNeeded(doCopyArray=FALSE) may change fArray but will not copy the current contents;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1487	// therefore we need to keep the current fArray
				1488	UChar oldStackBuffer[US_STACKBUF_SIZE];
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	1489	if((fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) && (newLength > US_STACKBUF_SIZE)) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1490	// copy the stack buffer contents because it will be overwritten with
				1491	// fUnion.fFields values
Jungshik Shin	42d5027	2018-10-24 01:22:09 -0700	[diff] [blame]	1492	u_memcpy(oldStackBuffer, oldArray, oldLength);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1493	oldArray = oldStackBuffer;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1494	}
				1495
				1496	// clone our array and allocate a bigger array if needed
				1497	int32_t *bufferToDelete = 0;
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	1498	if(!cloneArrayIfNeeded(newLength, getGrowCapacity(newLength),
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1499	FALSE, &bufferToDelete)
				1500	) {
				1501	return *this;
				1502	}
				1503
				1504	// now do the replace
				1505
				1506	UChar *newArray = getArrayStart();
				1507	if(newArray != oldArray) {
				1508	// if fArray changed, then we need to copy everything except what will change
				1509	us_arrayCopy(oldArray, 0, newArray, 0, start);
				1510	us_arrayCopy(oldArray, start + length,
				1511	newArray, start + srcLength,
				1512	oldLength - (start + length));
				1513	} else if(length != srcLength) {
				1514	// fArray did not change; copy only the portion that isn't changing, leaving a hole
				1515	us_arrayCopy(oldArray, start + length,
				1516	newArray, start + srcLength,
				1517	oldLength - (start + length));
				1518	}
				1519
				1520	// now fill in the hole with the new string
Jungshik Shin	42d5027	2018-10-24 01:22:09 -0700	[diff] [blame]	1521	us_arrayCopy(srcChars, 0, newArray, start, srcLength);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1522
				1523	setLength(newLength);
				1524
				1525	// delayed delete in case srcChars == fArray when we started, and
				1526	// to keep oldArray alive for the above operations
				1527	if (bufferToDelete) {
				1528	uprv_free(bufferToDelete);
				1529	}
				1530
				1531	return *this;
				1532	}
				1533
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	1534	// Versions of doReplace() only for append() variants.
				1535	// doReplace() and doAppend() optimize for different cases.
				1536
				1537	UnicodeString&
				1538	UnicodeString::doAppend(const UnicodeString& src, int32_t srcStart, int32_t srcLength) {
				1539	if(srcLength == 0) {
				1540	return *this;
				1541	}
				1542
				1543	// pin the indices to legal values
				1544	src.pinIndices(srcStart, srcLength);
				1545	return doAppend(src.getArrayStart(), srcStart, srcLength);
				1546	}
				1547
				1548	UnicodeString&
				1549	UnicodeString::doAppend(const UChar *srcChars, int32_t srcStart, int32_t srcLength) {
				1550	if(!isWritable() \|\| srcLength == 0 \|\| srcChars == NULL) {
				1551	return *this;
				1552	}
				1553
Jungshik Shin	42d5027	2018-10-24 01:22:09 -0700	[diff] [blame]	1554	// Perform all remaining operations relative to srcChars + srcStart.
				1555	// From this point forward, do not use srcStart.
				1556	srcChars += srcStart;
				1557
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	1558	if(srcLength < 0) {
				1559	// get the srcLength if necessary
Jungshik Shin	42d5027	2018-10-24 01:22:09 -0700	[diff] [blame]	1560	if((srcLength = u_strlen(srcChars)) == 0) {
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	1561	return *this;
				1562	}
				1563	}
				1564
				1565	int32_t oldLength = length();
Frank Tang	9f40209	2020-02-03 10:30:51 -0800	[diff] [blame^]	1566	int32_t newLength;
				1567	if (uprv_add32_overflow(oldLength, srcLength, &newLength)) {
				1568	setToBogus();
				1569	return *this;
				1570	}
Jungshik Shin	42d5027	2018-10-24 01:22:09 -0700	[diff] [blame]	1571
				1572	// Check for append onto ourself
				1573	const UChar* oldArray = getArrayStart();
				1574	if (isBufferWritable() &&
				1575	oldArray < srcChars + srcLength &&
				1576	srcChars < oldArray + oldLength) {
				1577	// Copy into a new UnicodeString and start over
				1578	UnicodeString copy(srcChars, srcLength);
				1579	if (copy.isBogus()) {
				1580	setToBogus();
				1581	return *this;
				1582	}
				1583	return doAppend(copy.getArrayStart(), 0, srcLength);
				1584	}
				1585
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	1586	// optimize append() onto a large-enough, owned string
				1587	if((newLength <= getCapacity() && isBufferWritable()) \|\|
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	1588	cloneArrayIfNeeded(newLength, getGrowCapacity(newLength))) {
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	1589	UChar *newArray = getArrayStart();
				1590	// Do not copy characters when
				1591	// UChar *buffer=str.getAppendBuffer(...);
				1592	// is followed by
				1593	// str.append(buffer, length);
				1594	// or
				1595	// str.appendString(buffer, length)
				1596	// or similar.
Jungshik Shin	42d5027	2018-10-24 01:22:09 -0700	[diff] [blame]	1597	if(srcChars != newArray + oldLength) {
				1598	us_arrayCopy(srcChars, 0, newArray, oldLength, srcLength);
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	1599	}
				1600	setLength(newLength);
				1601	}
				1602	return *this;
				1603	}
				1604
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1605	/**
				1606	* Replaceable API
				1607	*/
				1608	void
				1609	UnicodeString::handleReplaceBetween(int32_t start,
				1610	int32_t limit,
				1611	const UnicodeString& text) {
				1612	replaceBetween(start, limit, text);
				1613	}
				1614
				1615	/**
				1616	* Replaceable API
				1617	*/
				1618	void
				1619	UnicodeString::copy(int32_t start, int32_t limit, int32_t dest) {
				1620	if (limit <= start) {
				1621	return; // Nothing to do; avoid bogus malloc call
				1622	}
				1623	UChar* text = (UChar) uprv_malloc( sizeof(UChar) (limit - start) );
				1624	// Check to make sure text is not null.
				1625	if (text != NULL) {
				1626	extractBetween(start, limit, text, 0);
				1627	insert(dest, text, 0, limit - start);
				1628	uprv_free(text);
				1629	}
				1630	}
				1631
				1632	/**
				1633	* Replaceable API
				1634	*
				1635	* NOTE: This is for the Replaceable class. There is no rep.cpp,
				1636	* so we implement this function here.
				1637	*/
				1638	UBool Replaceable::hasMetaData() const {
				1639	return TRUE;
				1640	}
				1641
				1642	/**
				1643	* Replaceable API
				1644	*/
				1645	UBool UnicodeString::hasMetaData() const {
				1646	return FALSE;
				1647	}
				1648
				1649	UnicodeString&
				1650	UnicodeString::doReverse(int32_t start, int32_t length) {
				1651	if(length <= 1 \|\| !cloneArrayIfNeeded()) {
				1652	return *this;
				1653	}
				1654
				1655	// pin the indices to legal values
				1656	pinIndices(start, length);
				1657	if(length <= 1) { // pinIndices() might have shrunk the length
				1658	return *this;
				1659	}
				1660
				1661	UChar *left = getArrayStart() + start;
				1662	UChar *right = left + length - 1; // -1 for inclusive boundary (length>=2)
				1663	UChar swap;
				1664	UBool hasSupplementary = FALSE;
				1665
				1666	// Before the loop we know left<right because length>=2.
				1667	do {
				1668	hasSupplementary \|= (UBool)U16_IS_LEAD(swap = *left);
				1669	hasSupplementary \|= (UBool)U16_IS_LEAD(left++ = right);
				1670	*right-- = swap;
				1671	} while(left < right);
				1672	// Make sure to test the middle code unit of an odd-length string.
				1673	// Redundant if the length is even.
				1674	hasSupplementary \|= (UBool)U16_IS_LEAD(*left);
				1675
				1676	/* if there are supplementary code points in the reversed range, then re-swap their surrogates */
				1677	if(hasSupplementary) {
				1678	UChar swap2;
				1679
				1680	left = getArrayStart() + start;
				1681	right = left + length - 1; // -1 so that we can look at *(left+1) if left<right
				1682	while(left < right) {
				1683	if(U16_IS_TRAIL(swap = left) && U16_IS_LEAD(swap2 = (left + 1))) {
				1684	*left++ = swap2;
				1685	*left++ = swap;
				1686	} else {
				1687	++left;
				1688	}
				1689	}
				1690	}
				1691
				1692	return *this;
				1693	}
				1694
				1695	UBool
				1696	UnicodeString::padLeading(int32_t targetLength,
				1697	UChar padChar)
				1698	{
				1699	int32_t oldLength = length();
				1700	if(oldLength >= targetLength \|\| !cloneArrayIfNeeded(targetLength)) {
				1701	return FALSE;
				1702	} else {
				1703	// move contents up by padding width
				1704	UChar *array = getArrayStart();
				1705	int32_t start = targetLength - oldLength;
				1706	us_arrayCopy(array, 0, array, start, oldLength);
				1707
				1708	// fill in padding character
				1709	while(--start >= 0) {
				1710	array[start] = padChar;
				1711	}
				1712	setLength(targetLength);
				1713	return TRUE;
				1714	}
				1715	}
				1716
				1717	UBool
				1718	UnicodeString::padTrailing(int32_t targetLength,
				1719	UChar padChar)
				1720	{
				1721	int32_t oldLength = length();
				1722	if(oldLength >= targetLength \|\| !cloneArrayIfNeeded(targetLength)) {
				1723	return FALSE;
				1724	} else {
				1725	// fill in padding character
				1726	UChar *array = getArrayStart();
				1727	int32_t length = targetLength;
				1728	while(--length >= oldLength) {
				1729	array[length] = padChar;
				1730	}
				1731	setLength(targetLength);
				1732	return TRUE;
				1733	}
				1734	}
				1735
				1736	//========================================
				1737	// Hashing
				1738	//========================================
				1739	int32_t
				1740	UnicodeString::doHashCode() const
				1741	{
				1742	/* Delegate hash computation to uhash. This makes UnicodeString
				1743	* hashing consistent with UChar* hashing. */
				1744	int32_t hashCode = ustr_hashUCharsN(getArrayStart(), length());
				1745	if (hashCode == kInvalidHashCode) {
				1746	hashCode = kEmptyHashCode;
				1747	}
				1748	return hashCode;
				1749	}
				1750
				1751	//========================================
				1752	// External Buffer
				1753	//========================================
				1754
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	1755	char16_t *
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1756	UnicodeString::getBuffer(int32_t minCapacity) {
				1757	if(minCapacity>=-1 && cloneArrayIfNeeded(minCapacity)) {
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	1758	fUnion.fFields.fLengthAndFlags\|=kOpenGetBuffer;
				1759	setZeroLength();
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1760	return getArrayStart();
				1761	} else {
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	1762	return nullptr;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1763	}
				1764	}
				1765
				1766	void
				1767	UnicodeString::releaseBuffer(int32_t newLength) {
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	1768	if(fUnion.fFields.fLengthAndFlags&kOpenGetBuffer && newLength>=-1) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1769	// set the new fLength
				1770	int32_t capacity=getCapacity();
				1771	if(newLength==-1) {
				1772	// the new length is the string length, capped by fCapacity
				1773	const UChar array=getArrayStart(), p=array, *limit=array+capacity;
				1774	while(p<limit && *p!=0) {
				1775	++p;
				1776	}
				1777	newLength=(int32_t)(p-array);
				1778	} else if(newLength>capacity) {
				1779	newLength=capacity;
				1780	}
				1781	setLength(newLength);
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	1782	fUnion.fFields.fLengthAndFlags&=~kOpenGetBuffer;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1783	}
				1784	}
				1785
				1786	//========================================
				1787	// Miscellaneous
				1788	//========================================
				1789	UBool
				1790	UnicodeString::cloneArrayIfNeeded(int32_t newCapacity,
				1791	int32_t growCapacity,
				1792	UBool doCopyArray,
				1793	int32_t **pBufferToDelete,
				1794	UBool forceClone) {
				1795	// default parameters need to be static, therefore
				1796	// the defaults are -1 to have convenience defaults
				1797	if(newCapacity == -1) {
				1798	newCapacity = getCapacity();
				1799	}
				1800
				1801	// while a getBuffer(minCapacity) is "open",
				1802	// prevent any modifications of the string by returning FALSE here
				1803	// if the string is bogus, then only an assignment or similar can revive it
				1804	if(!isWritable()) {
				1805	return FALSE;
				1806	}
				1807
				1808	/*
				1809	* We need to make a copy of the array if
				1810	* the buffer is read-only, or
				1811	* the buffer is refCounted (shared), and refCount>1, or
				1812	* the buffer is too small.
				1813	* Return FALSE if memory could not be allocated.
				1814	*/
				1815	if(forceClone \|\|
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	1816	fUnion.fFields.fLengthAndFlags & kBufferIsReadonly \|\|
				1817	(fUnion.fFields.fLengthAndFlags & kRefCounted && refCount() > 1) \|\|
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1818	newCapacity > getCapacity()
				1819	) {
				1820	// check growCapacity for default value and use of the stack buffer
				1821	if(growCapacity < 0) {
				1822	growCapacity = newCapacity;
				1823	} else if(newCapacity <= US_STACKBUF_SIZE && growCapacity > US_STACKBUF_SIZE) {
				1824	growCapacity = US_STACKBUF_SIZE;
				1825	}
				1826
				1827	// save old values
				1828	UChar oldStackBuffer[US_STACKBUF_SIZE];
				1829	UChar *oldArray;
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	1830	int32_t oldLength = length();
				1831	int16_t flags = fUnion.fFields.fLengthAndFlags;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1832
				1833	if(flags&kUsingStackBuffer) {
				1834	U_ASSERT(!(flags&kRefCounted)); /* kRefCounted and kUsingStackBuffer are mutally exclusive */
				1835	if(doCopyArray && growCapacity > US_STACKBUF_SIZE) {
				1836	// copy the stack buffer contents because it will be overwritten with
				1837	// fUnion.fFields values
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	1838	us_arrayCopy(fUnion.fStackFields.fBuffer, 0, oldStackBuffer, 0, oldLength);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1839	oldArray = oldStackBuffer;
				1840	} else {
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	1841	oldArray = NULL; // no need to copy from the stack buffer to itself
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1842	}
				1843	} else {
				1844	oldArray = fUnion.fFields.fArray;
				1845	U_ASSERT(oldArray!=NULL); /* when stack buffer is not used, oldArray must have a non-NULL reference */
				1846	}
				1847
				1848	// allocate a new array
				1849	if(allocate(growCapacity) \|\|
				1850	(newCapacity < growCapacity && allocate(newCapacity))
				1851	) {
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	1852	if(doCopyArray) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1853	// copy the contents
				1854	// do not copy more than what fits - it may be smaller than before
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	1855	int32_t minLength = oldLength;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1856	newCapacity = getCapacity();
				1857	if(newCapacity < minLength) {
				1858	minLength = newCapacity;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1859	}
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	1860	if(oldArray != NULL) {
				1861	us_arrayCopy(oldArray, 0, getArrayStart(), 0, minLength);
				1862	}
				1863	setLength(minLength);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1864	} else {
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	1865	setZeroLength();
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1866	}
				1867
				1868	// release the old array
				1869	if(flags & kRefCounted) {
				1870	// the array is refCounted; decrement and release if 0
				1871	u_atomic_int32_t pRefCount = ((u_atomic_int32_t )oldArray - 1);
				1872	if(umtx_atomic_dec(pRefCount) == 0) {
				1873	if(pBufferToDelete == 0) {
				1874	// Note: cast to (void *) is needed with MSVC, where u_atomic_int32_t
				1875	// is defined as volatile. (Volatile has useful non-standard behavior
				1876	// with this compiler.)
				1877	uprv_free((void *)pRefCount);
				1878	} else {
				1879	// the caller requested to delete it himself
				1880	pBufferToDelete = (int32_t )pRefCount;
				1881	}
				1882	}
				1883	}
				1884	} else {
				1885	// not enough memory for growCapacity and not even for the smaller newCapacity
				1886	// reset the old values for setToBogus() to release the array
				1887	if(!(flags&kUsingStackBuffer)) {
				1888	fUnion.fFields.fArray = oldArray;
				1889	}
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	1890	fUnion.fFields.fLengthAndFlags = flags;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1891	setToBogus();
				1892	return FALSE;
				1893	}
				1894	}
				1895	return TRUE;
				1896	}
				1897
				1898	// UnicodeStringAppendable ------------------------------------------------- ***
				1899
				1900	UnicodeStringAppendable::~UnicodeStringAppendable() {}
				1901
				1902	UBool
				1903	UnicodeStringAppendable::appendCodeUnit(UChar c) {
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	1904	return str.doAppend(&c, 0, 1).isWritable();
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1905	}
				1906
				1907	UBool
				1908	UnicodeStringAppendable::appendCodePoint(UChar32 c) {
				1909	UChar buffer[U16_MAX_LENGTH];
				1910	int32_t cLength = 0;
				1911	UBool isError = FALSE;
				1912	U16_APPEND(buffer, cLength, U16_MAX_LENGTH, c, isError);
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	1913	return !isError && str.doAppend(buffer, 0, cLength).isWritable();
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1914	}
				1915
				1916	UBool
				1917	UnicodeStringAppendable::appendString(const UChar *s, int32_t length) {
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	1918	return str.doAppend(s, 0, length).isWritable();
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1919	}
				1920
				1921	UBool
				1922	UnicodeStringAppendable::reserveAppendCapacity(int32_t appendCapacity) {
				1923	return str.cloneArrayIfNeeded(str.length() + appendCapacity);
				1924	}
				1925
				1926	UChar *
				1927	UnicodeStringAppendable::getAppendBuffer(int32_t minCapacity,
				1928	int32_t desiredCapacityHint,
				1929	UChar *scratch, int32_t scratchCapacity,
				1930	int32_t *resultCapacity) {
				1931	if(minCapacity < 1 \|\| scratchCapacity < minCapacity) {
				1932	*resultCapacity = 0;
				1933	return NULL;
				1934	}
				1935	int32_t oldLength = str.length();
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	1936	if(minCapacity <= (kMaxCapacity - oldLength) &&
				1937	desiredCapacityHint <= (kMaxCapacity - oldLength) &&
				1938	str.cloneArrayIfNeeded(oldLength + minCapacity, oldLength + desiredCapacityHint)) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1939	*resultCapacity = str.getCapacity() - oldLength;
				1940	return str.getArrayStart() + oldLength;
				1941	}
				1942	*resultCapacity = scratchCapacity;
				1943	return scratch;
				1944	}
				1945
				1946	U_NAMESPACE_END
				1947
				1948	U_NAMESPACE_USE
				1949
				1950	U_CAPI int32_t U_EXPORT2
				1951	uhash_hashUnicodeString(const UElement key) {
				1952	const UnicodeString str = (const UnicodeString) key.pointer;
				1953	return (str == NULL) ? 0 : str->hashCode();
				1954	}
				1955
				1956	// Moved here from uhash_us.cpp so that using a UVector of UnicodeString*
				1957	// does not depend on hashtable code.
				1958	U_CAPI UBool U_EXPORT2
				1959	uhash_compareUnicodeString(const UElement key1, const UElement key2) {
				1960	const UnicodeString str1 = (const UnicodeString) key1.pointer;
				1961	const UnicodeString str2 = (const UnicodeString) key2.pointer;
				1962	if (str1 == str2) {
				1963	return TRUE;
				1964	}
				1965	if (str1 == NULL \|\| str2 == NULL) {
				1966	return FALSE;
				1967	}
				1968	return str1 == str2;
				1969	}
				1970
				1971	#ifdef U_STATIC_IMPLEMENTATION
				1972	/*
				1973	This should never be called. It is defined here to make sure that the
				1974	virtual vector deleting destructor is defined within unistr.cpp.
				1975	The vector deleting destructor is already a part of UObject,
				1976	but defining it here makes sure that it is included with this object file.
				1977	This makes sure that static library dependencies are kept to a minimum.
				1978	*/
				1979	static void uprv_UnicodeStringDummy(void) {
				1980	delete [] (new UnicodeString[2]);
				1981	}
				1982	#endif