Blame - source/common/unistr.cpp - chromium.googlesource.com/chromium/deps/icu

blob: c8b6c0a3a46319219b7e9d15a220dc5930786401 [file] [log] [blame]

Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	1	// © 2016 and later: Unicode, Inc. and others.
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	2	// License & terms of use: http://www.unicode.org/copyright.html
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	3	/*
				4	******************************************************************************
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	5	* Copyright (C) 1999-2016, International Business Machines Corporation and
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	6	* others. All Rights Reserved.
				7	******************************************************************************
				8	*
				9	* File unistr.cpp
				10	*
				11	* Modification History:
				12	*
				13	* Date Name Description
				14	* 09/25/98 stephen Creation.
				15	* 04/20/99 stephen Overhauled per 4/16 code review.
				16	* 07/09/99 stephen Renamed {hi,lo},{byte,word} to icu_X for HP/UX
				17	* 11/18/99 aliu Added handleReplaceBetween() to make inherit from
				18	* Replaceable.
				19	* 06/25/01 grhoten Removed the dependency on iostream
				20	******************************************************************************
				21	*/
				22
				23	#include "unicode/utypes.h"
				24	#include "unicode/appendable.h"
				25	#include "unicode/putil.h"
				26	#include "cstring.h"
				27	#include "cmemory.h"
				28	#include "unicode/ustring.h"
				29	#include "unicode/unistr.h"
				30	#include "unicode/utf.h"
				31	#include "unicode/utf16.h"
				32	#include "uelement.h"
				33	#include "ustr_imp.h"
				34	#include "umutex.h"
				35	#include "uassert.h"
				36
				37	#if 0
				38
				39	#include <iostream>
				40	using namespace std;
				41
				42	//DEBUGGING
				43	void
				44	print(const UnicodeString& s,
				45	const char *name)
				46	{
				47	UChar c;
				48	cout << name << ":\|";
				49	for(int i = 0; i < s.length(); ++i) {
				50	c = s[i];
				51	if(c>= 0x007E \|\| c < 0x0020)
				52	cout << "[0x" << hex << s[i] << "]";
				53	else
				54	cout << (char) s[i];
				55	}
				56	cout << '\|' << endl;
				57	}
				58
				59	void
				60	print(const UChar *s,
				61	int32_t len,
				62	const char *name)
				63	{
				64	UChar c;
				65	cout << name << ":\|";
				66	for(int i = 0; i < len; ++i) {
				67	c = s[i];
				68	if(c>= 0x007E \|\| c < 0x0020)
				69	cout << "[0x" << hex << s[i] << "]";
				70	else
				71	cout << (char) s[i];
				72	}
				73	cout << '\|' << endl;
				74	}
				75	// END DEBUGGING
				76	#endif
				77
				78	// Local function definitions for now
				79
				80	// need to copy areas that may overlap
				81	static
				82	inline void
				83	us_arrayCopy(const UChar *src, int32_t srcStart,
				84	UChar *dst, int32_t dstStart, int32_t count)
				85	{
				86	if(count>0) {
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	87	uprv_memmove(dst+dstStart, src+srcStart, (size_t)countsizeof(src));
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	88	}
				89	}
				90
				91	// u_unescapeAt() callback to get a UChar from a UnicodeString
				92	U_CDECL_BEGIN
				93	static UChar U_CALLCONV
				94	UnicodeString_charAt(int32_t offset, void *context) {
				95	return ((icu::UnicodeString*) context)->charAt(offset);
				96	}
				97	U_CDECL_END
				98
				99	U_NAMESPACE_BEGIN
				100
				101	/* The Replaceable virtual destructor can't be defined in the header
				102	due to how AIX works with multiple definitions of virtual functions.
				103	*/
				104	Replaceable::~Replaceable() {}
				105
				106	UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeString)
				107
				108	UnicodeString U_EXPORT2
				109	operator+ (const UnicodeString &s1, const UnicodeString &s2) {
				110	return
				111	UnicodeString(s1.length()+s2.length()+1, (UChar32)0, 0).
				112	append(s1).
				113	append(s2);
				114	}
				115
				116	//========================================
				117	// Reference Counting functions, put at top of file so that optimizing compilers
				118	// have a chance to automatically inline.
				119	//========================================
				120
				121	void
				122	UnicodeString::addRef() {
				123	umtx_atomic_inc((u_atomic_int32_t *)fUnion.fFields.fArray - 1);
				124	}
				125
				126	int32_t
				127	UnicodeString::removeRef() {
				128	return umtx_atomic_dec((u_atomic_int32_t *)fUnion.fFields.fArray - 1);
				129	}
				130
				131	int32_t
				132	UnicodeString::refCount() const {
				133	return umtx_loadAcquire(((u_atomic_int32_t )fUnion.fFields.fArray - 1));
				134	}
				135
				136	void
				137	UnicodeString::releaseArray() {
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	138	if((fUnion.fFields.fLengthAndFlags & kRefCounted) && removeRef() == 0) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	139	uprv_free((int32_t *)fUnion.fFields.fArray - 1);
				140	}
				141	}
				142
				143
				144
				145	//========================================
				146	// Constructors
				147	//========================================
				148
				149	// The default constructor is inline in unistr.h.
				150
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	151	UnicodeString::UnicodeString(int32_t capacity, UChar32 c, int32_t count) {
				152	fUnion.fFields.fLengthAndFlags = 0;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	153	if(count <= 0 \|\| (uint32_t)c > 0x10ffff) {
				154	// just allocate and do not do anything else
				155	allocate(capacity);
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	156	} else if(c <= 0xffff) {
				157	int32_t length = count;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	158	if(capacity < length) {
				159	capacity = length;
				160	}
				161	if(allocate(capacity)) {
				162	UChar *array = getArrayStart();
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	163	UChar unit = (UChar)c;
				164	for(int32_t i = 0; i < length; ++i) {
				165	array[i] = unit;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	166	}
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	167	setLength(length);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	168	}
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	169	} else { // supplementary code point, write surrogate pairs
				170	if(count > (INT32_MAX / 2)) {
				171	// We would get more than 2G UChars.
				172	allocate(capacity);
				173	return;
				174	}
				175	int32_t length = count * 2;
				176	if(capacity < length) {
				177	capacity = length;
				178	}
				179	if(allocate(capacity)) {
				180	UChar *array = getArrayStart();
				181	UChar lead = U16_LEAD(c);
				182	UChar trail = U16_TRAIL(c);
				183	for(int32_t i = 0; i < length; i += 2) {
				184	array[i] = lead;
				185	array[i + 1] = trail;
				186	}
				187	setLength(length);
				188	}
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	189	}
				190	}
				191
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	192	UnicodeString::UnicodeString(UChar ch) {
				193	fUnion.fFields.fLengthAndFlags = kLength1 \| kShortString;
				194	fUnion.fStackFields.fBuffer[0] = ch;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	195	}
				196
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	197	UnicodeString::UnicodeString(UChar32 ch) {
				198	fUnion.fFields.fLengthAndFlags = kShortString;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	199	int32_t i = 0;
				200	UBool isError = FALSE;
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	201	U16_APPEND(fUnion.fStackFields.fBuffer, i, US_STACKBUF_SIZE, ch, isError);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	202	// We test isError so that the compiler does not complain that we don't.
				203	// If isError then i==0 which is what we want anyway.
				204	if(!isError) {
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	205	setShortLength(i);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	206	}
				207	}
				208
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	209	UnicodeString::UnicodeString(const UChar *text) {
				210	fUnion.fFields.fLengthAndFlags = kShortString;
				211	doAppend(text, 0, -1);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	212	}
				213
				214	UnicodeString::UnicodeString(const UChar *text,
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	215	int32_t textLength) {
				216	fUnion.fFields.fLengthAndFlags = kShortString;
				217	doAppend(text, 0, textLength);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	218	}
				219
				220	UnicodeString::UnicodeString(UBool isTerminated,
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	221	ConstChar16Ptr textPtr,
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	222	int32_t textLength) {
				223	fUnion.fFields.fLengthAndFlags = kReadonlyAlias;
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	224	const UChar *text = textPtr;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	225	if(text == NULL) {
				226	// treat as an empty string, do not alias
				227	setToEmpty();
				228	} else if(textLength < -1 \|\|
				229	(textLength == -1 && !isTerminated) \|\|
				230	(textLength >= 0 && isTerminated && text[textLength] != 0)
				231	) {
				232	setToBogus();
				233	} else {
				234	if(textLength == -1) {
				235	// text is terminated, or else it would have failed the above test
				236	textLength = u_strlen(text);
				237	}
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	238	setArray(const_cast<UChar *>(text), textLength,
				239	isTerminated ? textLength + 1 : textLength);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	240	}
				241	}
				242
				243	UnicodeString::UnicodeString(UChar *buff,
				244	int32_t buffLength,
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	245	int32_t buffCapacity) {
				246	fUnion.fFields.fLengthAndFlags = kWritableAlias;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	247	if(buff == NULL) {
				248	// treat as an empty string, do not alias
				249	setToEmpty();
				250	} else if(buffLength < -1 \|\| buffCapacity < 0 \|\| buffLength > buffCapacity) {
				251	setToBogus();
				252	} else {
				253	if(buffLength == -1) {
				254	// fLength = u_strlen(buff); but do not look beyond buffCapacity
				255	const UChar p = buff, limit = buff + buffCapacity;
				256	while(p != limit && *p != 0) {
				257	++p;
				258	}
				259	buffLength = (int32_t)(p - buff);
				260	}
				261	setArray(buff, buffLength, buffCapacity);
				262	}
				263	}
				264
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	265	UnicodeString::UnicodeString(const char *src, int32_t length, EInvariant) {
				266	fUnion.fFields.fLengthAndFlags = kShortString;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	267	if(src==NULL) {
				268	// treat as an empty string
				269	} else {
				270	if(length<0) {
				271	length=(int32_t)uprv_strlen(src);
				272	}
				273	if(cloneArrayIfNeeded(length, length, FALSE)) {
				274	u_charsToUChars(src, getArrayStart(), length);
				275	setLength(length);
				276	} else {
				277	setToBogus();
				278	}
				279	}
				280	}
				281
				282	#if U_CHARSET_IS_UTF8
				283
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	284	UnicodeString::UnicodeString(const char *codepageData) {
				285	fUnion.fFields.fLengthAndFlags = kShortString;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	286	if(codepageData != 0) {
				287	setToUTF8(codepageData);
				288	}
				289	}
				290
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	291	UnicodeString::UnicodeString(const char *codepageData, int32_t dataLength) {
				292	fUnion.fFields.fLengthAndFlags = kShortString;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	293	// if there's nothing to convert, do nothing
				294	if(codepageData == 0 \|\| dataLength == 0 \|\| dataLength < -1) {
				295	return;
				296	}
				297	if(dataLength == -1) {
				298	dataLength = (int32_t)uprv_strlen(codepageData);
				299	}
				300	setToUTF8(StringPiece(codepageData, dataLength));
				301	}
				302
				303	// else see unistr_cnv.cpp
				304	#endif
				305
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	306	UnicodeString::UnicodeString(const UnicodeString& that) {
				307	fUnion.fFields.fLengthAndFlags = kShortString;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	308	copyFrom(that);
				309	}
				310
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	311	UnicodeString::UnicodeString(UnicodeString &&src) U_NOEXCEPT {
				312	fUnion.fFields.fLengthAndFlags = kShortString;
				313	moveFrom(src);
				314	}
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	315
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	316	UnicodeString::UnicodeString(const UnicodeString& that,
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	317	int32_t srcStart) {
				318	fUnion.fFields.fLengthAndFlags = kShortString;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	319	setTo(that, srcStart);
				320	}
				321
				322	UnicodeString::UnicodeString(const UnicodeString& that,
				323	int32_t srcStart,
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	324	int32_t srcLength) {
				325	fUnion.fFields.fLengthAndFlags = kShortString;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	326	setTo(that, srcStart, srcLength);
				327	}
				328
				329	// Replaceable base class clone() default implementation, does not clone
				330	Replaceable *
				331	Replaceable::clone() const {
				332	return NULL;
				333	}
				334
				335	// UnicodeString overrides clone() with a real implementation
				336	Replaceable *
				337	UnicodeString::clone() const {
				338	return new UnicodeString(*this);
				339	}
				340
				341	//========================================
				342	// array allocation
				343	//========================================
				344
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	345	namespace {
				346
				347	const int32_t kGrowSize = 128;
				348
				349	// The number of bytes for one int32_t reference counter and capacity UChars
				350	// must fit into a 32-bit size_t (at least when on a 32-bit platform).
				351	// We also add one for the NUL terminator, to avoid reallocation in getTerminatedBuffer(),
				352	// and round up to a multiple of 16 bytes.
				353	// This means that capacity must be at most (0xfffffff0 - 4) / 2 - 1 = 0x7ffffff5.
				354	// (With more complicated checks we could go up to 0x7ffffffd without rounding up,
				355	// but that does not seem worth it.)
				356	const int32_t kMaxCapacity = 0x7ffffff5;
				357
				358	int32_t getGrowCapacity(int32_t newLength) {
				359	int32_t growSize = (newLength >> 2) + kGrowSize;
				360	if(growSize <= (kMaxCapacity - newLength)) {
				361	return newLength + growSize;
				362	} else {
				363	return kMaxCapacity;
				364	}
				365	}
				366
				367	} // namespace
				368
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	369	UBool
				370	UnicodeString::allocate(int32_t capacity) {
				371	if(capacity <= US_STACKBUF_SIZE) {
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	372	fUnion.fFields.fLengthAndFlags = kShortString;
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	373	return TRUE;
				374	}
				375	if(capacity <= kMaxCapacity) {
				376	++capacity; // for the NUL
				377	// Switch to size_t which is unsigned so that we can allocate up to 4GB.
				378	// Reference counter + UChars.
				379	size_t numBytes = sizeof(int32_t) + (size_t)capacity * U_SIZEOF_UCHAR;
				380	// Round up to a multiple of 16.
				381	numBytes = (numBytes + 15) & ~15;
				382	int32_t array = (int32_t ) uprv_malloc(numBytes);
				383	if(array != NULL) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	384	// set initial refCount and point behind the refCount
				385	*array++ = 1;
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	386	numBytes -= sizeof(int32_t);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	387
				388	// have fArray point to the first UChar
				389	fUnion.fFields.fArray = (UChar *)array;
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	390	fUnion.fFields.fCapacity = (int32_t)(numBytes / U_SIZEOF_UCHAR);
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	391	fUnion.fFields.fLengthAndFlags = kLongString;
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	392	return TRUE;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	393	}
				394	}
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	395	fUnion.fFields.fLengthAndFlags = kIsBogus;
				396	fUnion.fFields.fArray = 0;
				397	fUnion.fFields.fCapacity = 0;
				398	return FALSE;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	399	}
				400
				401	//========================================
				402	// Destructor
				403	//========================================
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	404
				405	#ifdef UNISTR_COUNT_FINAL_STRING_LENGTHS
				406	static u_atomic_int32_t finalLengthCounts[0x400]; // UnicodeString::kMaxShortLength+1
				407	static u_atomic_int32_t beyondCount(0);
				408
				409	U_CAPI void unistr_printLengths() {
				410	int32_t i;
				411	for(i = 0; i <= 59; ++i) {
				412	printf("%2d, %9d\n", i, (int32_t)finalLengthCounts[i]);
				413	}
				414	int32_t beyond = beyondCount;
				415	for(; i < UPRV_LENGTHOF(finalLengthCounts); ++i) {
				416	beyond += finalLengthCounts[i];
				417	}
				418	printf(">59, %9d\n", beyond);
				419	}
				420	#endif
				421
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	422	UnicodeString::~UnicodeString()
				423	{
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	424	#ifdef UNISTR_COUNT_FINAL_STRING_LENGTHS
				425	// Count lengths of strings at the end of their lifetime.
				426	// Useful for discussion of a desirable stack buffer size.
				427	// Count the contents length, not the optional NUL terminator nor further capacity.
				428	// Ignore open-buffer strings and strings which alias external storage.
				429	if((fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer\|kReadonlyAlias\|kWritableAlias)) == 0) {
				430	if(hasShortLength()) {
				431	umtx_atomic_inc(finalLengthCounts + getShortLength());
				432	} else {
				433	umtx_atomic_inc(&beyondCount);
				434	}
				435	}
				436	#endif
				437
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	438	releaseArray();
				439	}
				440
				441	//========================================
				442	// Factory methods
				443	//========================================
				444
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	445	UnicodeString UnicodeString::fromUTF8(StringPiece utf8) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	446	UnicodeString result;
				447	result.setToUTF8(utf8);
				448	return result;
				449	}
				450
				451	UnicodeString UnicodeString::fromUTF32(const UChar32 *utf32, int32_t length) {
				452	UnicodeString result;
				453	int32_t capacity;
				454	// Most UTF-32 strings will be BMP-only and result in a same-length
				455	// UTF-16 string. We overestimate the capacity just slightly,
				456	// just in case there are a few supplementary characters.
				457	if(length <= US_STACKBUF_SIZE) {
				458	capacity = US_STACKBUF_SIZE;
				459	} else {
				460	capacity = length + (length >> 4) + 4;
				461	}
				462	do {
				463	UChar *utf16 = result.getBuffer(capacity);
				464	int32_t length16;
				465	UErrorCode errorCode = U_ZERO_ERROR;
				466	u_strFromUTF32WithSub(utf16, result.getCapacity(), &length16,
				467	utf32, length,
				468	0xfffd, // Substitution character.
				469	NULL, // Don't care about number of substitutions.
				470	&errorCode);
				471	result.releaseBuffer(length16);
				472	if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
				473	capacity = length16 + 1; // +1 for the terminating NUL.
				474	continue;
				475	} else if(U_FAILURE(errorCode)) {
				476	result.setToBogus();
				477	}
				478	break;
				479	} while(TRUE);
				480	return result;
				481	}
				482
				483	//========================================
				484	// Assignment
				485	//========================================
				486
				487	UnicodeString &
				488	UnicodeString::operator=(const UnicodeString &src) {
				489	return copyFrom(src);
				490	}
				491
				492	UnicodeString &
				493	UnicodeString::fastCopyFrom(const UnicodeString &src) {
				494	return copyFrom(src, TRUE);
				495	}
				496
				497	UnicodeString &
				498	UnicodeString::copyFrom(const UnicodeString &src, UBool fastCopy) {
				499	// if assigning to ourselves, do nothing
Jungshik Shin (jungshik at google)	0f8746a	2015-01-08 15:46:45 -0800	[diff] [blame]	500	if(this == &src) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	501	return *this;
				502	}
				503
				504	// is the right side bogus?
Jungshik Shin (jungshik at google)	0f8746a	2015-01-08 15:46:45 -0800	[diff] [blame]	505	if(src.isBogus()) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	506	setToBogus();
				507	return *this;
				508	}
				509
				510	// delete the current contents
				511	releaseArray();
				512
				513	if(src.isEmpty()) {
				514	// empty string - use the stack buffer
				515	setToEmpty();
				516	return *this;
				517	}
				518
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	519	// fLength>0 and not an "open" src.getBuffer(minCapacity)
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	520	fUnion.fFields.fLengthAndFlags = src.fUnion.fFields.fLengthAndFlags;
				521	switch(src.fUnion.fFields.fLengthAndFlags & kAllStorageFlags) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	522	case kShortString:
				523	// short string using the stack buffer, do the same
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	524	uprv_memcpy(fUnion.fStackFields.fBuffer, src.fUnion.fStackFields.fBuffer,
				525	getShortLength() * U_SIZEOF_UCHAR);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	526	break;
				527	case kLongString:
				528	// src uses a refCounted string buffer, use that buffer with refCount
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	529	// src is const, use a cast - we don't actually change it
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	530	((UnicodeString &)src).addRef();
				531	// copy all fields, share the reference-counted buffer
				532	fUnion.fFields.fArray = src.fUnion.fFields.fArray;
				533	fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	534	if(!hasShortLength()) {
				535	fUnion.fFields.fLength = src.fUnion.fFields.fLength;
				536	}
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	537	break;
				538	case kReadonlyAlias:
				539	if(fastCopy) {
				540	// src is a readonly alias, do the same
				541	// -> maintain the readonly alias as such
				542	fUnion.fFields.fArray = src.fUnion.fFields.fArray;
				543	fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	544	if(!hasShortLength()) {
				545	fUnion.fFields.fLength = src.fUnion.fFields.fLength;
				546	}
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	547	break;
				548	}
				549	// else if(!fastCopy) fall through to case kWritableAlias
				550	// -> allocate a new buffer and copy the contents
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	551	U_FALLTHROUGH;
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	552	case kWritableAlias: {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	553	// src is a writable alias; we make a copy of that instead
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	554	int32_t srcLength = src.length();
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	555	if(allocate(srcLength)) {
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	556	u_memcpy(getArrayStart(), src.getArrayStart(), srcLength);
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	557	setLength(srcLength);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	558	break;
				559	}
				560	// if there is not enough memory, then fall through to setting to bogus
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	561	U_FALLTHROUGH;
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	562	}
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	563	default:
				564	// if src is bogus, set ourselves to bogus
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	565	// do not call setToBogus() here because fArray and flags are not consistent here
				566	fUnion.fFields.fLengthAndFlags = kIsBogus;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	567	fUnion.fFields.fArray = 0;
				568	fUnion.fFields.fCapacity = 0;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	569	break;
				570	}
				571
				572	return *this;
				573	}
				574
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	575	UnicodeString &UnicodeString::moveFrom(UnicodeString &src) U_NOEXCEPT {
				576	// No explicit check for self move assignment, consistent with standard library.
				577	// Self move assignment causes no crash nor leak but might make the object bogus.
				578	releaseArray();
				579	copyFieldsFrom(src, TRUE);
				580	return *this;
				581	}
				582
				583	// Same as moveFrom() except without memory management.
				584	void UnicodeString::copyFieldsFrom(UnicodeString &src, UBool setSrcToBogus) U_NOEXCEPT {
				585	int16_t lengthAndFlags = fUnion.fFields.fLengthAndFlags = src.fUnion.fFields.fLengthAndFlags;
				586	if(lengthAndFlags & kUsingStackBuffer) {
				587	// Short string using the stack buffer, copy the contents.
				588	// Check for self assignment to prevent "overlap in memcpy" warnings,
				589	// although it should be harmless to copy a buffer to itself exactly.
				590	if(this != &src) {
				591	uprv_memcpy(fUnion.fStackFields.fBuffer, src.fUnion.fStackFields.fBuffer,
				592	getShortLength() * U_SIZEOF_UCHAR);
				593	}
				594	} else {
				595	// In all other cases, copy all fields.
				596	fUnion.fFields.fArray = src.fUnion.fFields.fArray;
				597	fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
				598	if(!hasShortLength()) {
				599	fUnion.fFields.fLength = src.fUnion.fFields.fLength;
				600	}
				601	if(setSrcToBogus) {
				602	// Set src to bogus without releasing any memory.
				603	src.fUnion.fFields.fLengthAndFlags = kIsBogus;
				604	src.fUnion.fFields.fArray = NULL;
				605	src.fUnion.fFields.fCapacity = 0;
				606	}
				607	}
				608	}
				609
				610	void UnicodeString::swap(UnicodeString &other) U_NOEXCEPT {
				611	UnicodeString temp; // Empty short string: Known not to need releaseArray().
				612	// Copy fields without resetting source values in between.
				613	temp.copyFieldsFrom(*this, FALSE);
				614	this->copyFieldsFrom(other, FALSE);
				615	other.copyFieldsFrom(temp, FALSE);
				616	// Set temp to an empty string so that other's memory is not released twice.
				617	temp.fUnion.fFields.fLengthAndFlags = kShortString;
				618	}
				619
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	620	//========================================
				621	// Miscellaneous operations
				622	//========================================
				623
				624	UnicodeString UnicodeString::unescape() const {
				625	UnicodeString result(length(), (UChar32)0, (int32_t)0); // construct with capacity
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	626	if (result.isBogus()) {
				627	return result;
				628	}
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	629	const UChar *array = getBuffer();
				630	int32_t len = length();
				631	int32_t prev = 0;
				632	for (int32_t i=0;;) {
				633	if (i == len) {
				634	result.append(array, prev, len - prev);
				635	break;
				636	}
				637	if (array[i++] == 0x5C /'\\'/) {
				638	result.append(array, prev, (i - 1) - prev);
				639	UChar32 c = unescapeAt(i); // advances i
				640	if (c < 0) {
				641	result.remove(); // return empty string
				642	break; // invalid escape sequence
				643	}
				644	result.append(c);
				645	prev = i;
				646	}
				647	}
				648	return result;
				649	}
				650
				651	UChar32 UnicodeString::unescapeAt(int32_t &offset) const {
				652	return u_unescapeAt(UnicodeString_charAt, &offset, length(), (void*)this);
				653	}
				654
				655	//========================================
				656	// Read-only implementation
				657	//========================================
				658	UBool
				659	UnicodeString::doEquals(const UnicodeString &text, int32_t len) const {
				660	// Requires: this & text not bogus and have same lengths.
				661	// Byte-wise comparison works for equality regardless of endianness.
				662	return uprv_memcmp(getArrayStart(), text.getArrayStart(), len * U_SIZEOF_UCHAR) == 0;
				663	}
				664
				665	int8_t
				666	UnicodeString::doCompare( int32_t start,
				667	int32_t length,
				668	const UChar *srcChars,
				669	int32_t srcStart,
				670	int32_t srcLength) const
				671	{
				672	// compare illegal string values
				673	if(isBogus()) {
				674	return -1;
				675	}
				676
				677	// pin indices to legal values
				678	pinIndices(start, length);
				679
				680	if(srcChars == NULL) {
				681	// treat const UChar *srcChars==NULL as an empty string
				682	return length == 0 ? 0 : 1;
				683	}
				684
				685	// get the correct pointer
				686	const UChar *chars = getArrayStart();
				687
				688	chars += start;
				689	srcChars += srcStart;
				690
				691	int32_t minLength;
				692	int8_t lengthResult;
				693
				694	// get the srcLength if necessary
				695	if(srcLength < 0) {
				696	srcLength = u_strlen(srcChars + srcStart);
				697	}
				698
				699	// are we comparing different lengths?
				700	if(length != srcLength) {
				701	if(length < srcLength) {
				702	minLength = length;
				703	lengthResult = -1;
				704	} else {
				705	minLength = srcLength;
				706	lengthResult = 1;
				707	}
				708	} else {
				709	minLength = length;
				710	lengthResult = 0;
				711	}
				712
				713	/*
				714	* note that uprv_memcmp() returns an int but we return an int8_t;
				715	* we need to take care not to truncate the result -
				716	* one way to do this is to right-shift the value to
				717	* move the sign bit into the lower 8 bits and making sure that this
				718	* does not become 0 itself
				719	*/
				720
				721	if(minLength > 0 && chars != srcChars) {
				722	int32_t result;
				723
				724	# if U_IS_BIG_ENDIAN
				725	// big-endian: byte comparison works
				726	result = uprv_memcmp(chars, srcChars, minLength * sizeof(UChar));
				727	if(result != 0) {
				728	return (int8_t)(result >> 15 \| 1);
				729	}
				730	# else
				731	// little-endian: compare UChar units
				732	do {
				733	result = ((int32_t)(chars++) - (int32_t)(srcChars++));
				734	if(result != 0) {
				735	return (int8_t)(result >> 15 \| 1);
				736	}
				737	} while(--minLength > 0);
				738	# endif
				739	}
				740	return lengthResult;
				741	}
				742
				743	/* String compare in code point order - doCompare() compares in code unit order. */
				744	int8_t
				745	UnicodeString::doCompareCodePointOrder(int32_t start,
				746	int32_t length,
				747	const UChar *srcChars,
				748	int32_t srcStart,
				749	int32_t srcLength) const
				750	{
				751	// compare illegal string values
				752	// treat const UChar *srcChars==NULL as an empty string
				753	if(isBogus()) {
				754	return -1;
				755	}
				756
				757	// pin indices to legal values
				758	pinIndices(start, length);
				759
				760	if(srcChars == NULL) {
				761	srcStart = srcLength = 0;
				762	}
				763
				764	int32_t diff = uprv_strCompare(getArrayStart() + start, length, (srcChars!=NULL)?(srcChars + srcStart):NULL, srcLength, FALSE, TRUE);
				765	/* translate the 32-bit result into an 8-bit one */
				766	if(diff!=0) {
				767	return (int8_t)(diff >> 15 \| 1);
				768	} else {
				769	return 0;
				770	}
				771	}
				772
				773	int32_t
				774	UnicodeString::getLength() const {
				775	return length();
				776	}
				777
				778	UChar
				779	UnicodeString::getCharAt(int32_t offset) const {
				780	return charAt(offset);
				781	}
				782
				783	UChar32
				784	UnicodeString::getChar32At(int32_t offset) const {
				785	return char32At(offset);
				786	}
				787
				788	UChar32
				789	UnicodeString::char32At(int32_t offset) const
				790	{
				791	int32_t len = length();
				792	if((uint32_t)offset < (uint32_t)len) {
				793	const UChar *array = getArrayStart();
				794	UChar32 c;
				795	U16_GET(array, 0, offset, len, c);
				796	return c;
				797	} else {
				798	return kInvalidUChar;
				799	}
				800	}
				801
				802	int32_t
				803	UnicodeString::getChar32Start(int32_t offset) const {
				804	if((uint32_t)offset < (uint32_t)length()) {
				805	const UChar *array = getArrayStart();
				806	U16_SET_CP_START(array, 0, offset);
				807	return offset;
				808	} else {
				809	return 0;
				810	}
				811	}
				812
				813	int32_t
				814	UnicodeString::getChar32Limit(int32_t offset) const {
				815	int32_t len = length();
				816	if((uint32_t)offset < (uint32_t)len) {
				817	const UChar *array = getArrayStart();
				818	U16_SET_CP_LIMIT(array, 0, offset, len);
				819	return offset;
				820	} else {
				821	return len;
				822	}
				823	}
				824
				825	int32_t
				826	UnicodeString::countChar32(int32_t start, int32_t length) const {
				827	pinIndices(start, length);
				828	// if(isBogus()) then fArray==0 and start==0 - u_countChar32() checks for NULL
				829	return u_countChar32(getArrayStart()+start, length);
				830	}
				831
				832	UBool
				833	UnicodeString::hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const {
				834	pinIndices(start, length);
				835	// if(isBogus()) then fArray==0 and start==0 - u_strHasMoreChar32Than() checks for NULL
				836	return u_strHasMoreChar32Than(getArrayStart()+start, length, number);
				837	}
				838
				839	int32_t
				840	UnicodeString::moveIndex32(int32_t index, int32_t delta) const {
				841	// pin index
				842	int32_t len = length();
				843	if(index<0) {
				844	index=0;
				845	} else if(index>len) {
				846	index=len;
				847	}
				848
				849	const UChar *array = getArrayStart();
				850	if(delta>0) {
				851	U16_FWD_N(array, index, len, delta);
				852	} else {
				853	U16_BACK_N(array, 0, index, -delta);
				854	}
				855
				856	return index;
				857	}
				858
				859	void
				860	UnicodeString::doExtract(int32_t start,
				861	int32_t length,
				862	UChar *dst,
				863	int32_t dstStart) const
				864	{
				865	// pin indices to legal values
				866	pinIndices(start, length);
				867
				868	// do not copy anything if we alias dst itself
				869	const UChar *array = getArrayStart();
				870	if(array + start != dst + dstStart) {
				871	us_arrayCopy(array, start, dst, dstStart, length);
				872	}
				873	}
				874
				875	int32_t
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	876	UnicodeString::extract(Char16Ptr dest, int32_t destCapacity,
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	877	UErrorCode &errorCode) const {
				878	int32_t len = length();
				879	if(U_SUCCESS(errorCode)) {
				880	if(isBogus() \|\| destCapacity<0 \|\| (destCapacity>0 && dest==0)) {
				881	errorCode=U_ILLEGAL_ARGUMENT_ERROR;
				882	} else {
				883	const UChar *array = getArrayStart();
				884	if(len>0 && len<=destCapacity && array!=dest) {
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	885	u_memcpy(dest, array, len);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	886	}
				887	return u_terminateUChars(dest, destCapacity, len, &errorCode);
				888	}
				889	}
				890
				891	return len;
				892	}
				893
				894	int32_t
				895	UnicodeString::extract(int32_t start,
				896	int32_t length,
				897	char *target,
				898	int32_t targetCapacity,
				899	enum EInvariant) const
				900	{
				901	// if the arguments are illegal, then do nothing
				902	if(targetCapacity < 0 \|\| (targetCapacity > 0 && target == NULL)) {
				903	return 0;
				904	}
				905
				906	// pin the indices to legal values
				907	pinIndices(start, length);
				908
				909	if(length <= targetCapacity) {
				910	u_UCharsToChars(getArrayStart() + start, target, length);
				911	}
				912	UErrorCode status = U_ZERO_ERROR;
				913	return u_terminateChars(target, targetCapacity, length, &status);
				914	}
				915
				916	UnicodeString
				917	UnicodeString::tempSubString(int32_t start, int32_t len) const {
				918	pinIndices(start, len);
				919	const UChar *array = getBuffer(); // not getArrayStart() to check kIsBogus & kOpenGetBuffer
				920	if(array==NULL) {
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	921	array=fUnion.fStackFields.fBuffer; // anything not NULL because that would make an empty string
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	922	len=-2; // bogus result string
				923	}
				924	return UnicodeString(FALSE, array + start, len);
				925	}
				926
				927	int32_t
				928	UnicodeString::toUTF8(int32_t start, int32_t len,
				929	char *target, int32_t capacity) const {
				930	pinIndices(start, len);
				931	int32_t length8;
				932	UErrorCode errorCode = U_ZERO_ERROR;
				933	u_strToUTF8WithSub(target, capacity, &length8,
				934	getBuffer() + start, len,
				935	0xFFFD, // Standard substitution character.
				936	NULL, // Don't care about number of substitutions.
				937	&errorCode);
				938	return length8;
				939	}
				940
				941	#if U_CHARSET_IS_UTF8
				942
				943	int32_t
				944	UnicodeString::extract(int32_t start, int32_t len,
				945	char *target, uint32_t dstSize) const {
				946	// if the arguments are illegal, then do nothing
				947	if(/dstSize < 0 \|\| /(dstSize > 0 && target == 0)) {
				948	return 0;
				949	}
				950	return toUTF8(start, len, target, dstSize <= 0x7fffffff ? (int32_t)dstSize : 0x7fffffff);
				951	}
				952
				953	// else see unistr_cnv.cpp
				954	#endif
				955
				956	void
				957	UnicodeString::extractBetween(int32_t start,
				958	int32_t limit,
				959	UnicodeString& target) const {
				960	pinIndex(start);
				961	pinIndex(limit);
				962	doExtract(start, limit - start, target);
				963	}
				964
				965	// When converting from UTF-16 to UTF-8, the result will have at most 3 times
				966	// as many bytes as the source has UChars.
				967	// The "worst cases" are writing systems like Indic, Thai and CJK with
				968	// 3:1 bytes:UChars.
				969	void
				970	UnicodeString::toUTF8(ByteSink &sink) const {
				971	int32_t length16 = length();
				972	if(length16 != 0) {
				973	char stackBuffer[1024];
				974	int32_t capacity = (int32_t)sizeof(stackBuffer);
				975	UBool utf8IsOwned = FALSE;
				976	char *utf8 = sink.GetAppendBuffer(length16 < capacity ? length16 : capacity,
				977	3*length16,
				978	stackBuffer, capacity,
				979	&capacity);
				980	int32_t length8 = 0;
				981	UErrorCode errorCode = U_ZERO_ERROR;
				982	u_strToUTF8WithSub(utf8, capacity, &length8,
				983	getBuffer(), length16,
				984	0xFFFD, // Standard substitution character.
				985	NULL, // Don't care about number of substitutions.
				986	&errorCode);
				987	if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
				988	utf8 = (char *)uprv_malloc(length8);
				989	if(utf8 != NULL) {
				990	utf8IsOwned = TRUE;
				991	errorCode = U_ZERO_ERROR;
				992	u_strToUTF8WithSub(utf8, length8, &length8,
				993	getBuffer(), length16,
				994	0xFFFD, // Standard substitution character.
				995	NULL, // Don't care about number of substitutions.
				996	&errorCode);
				997	} else {
				998	errorCode = U_MEMORY_ALLOCATION_ERROR;
				999	}
				1000	}
				1001	if(U_SUCCESS(errorCode)) {
				1002	sink.Append(utf8, length8);
				1003	sink.Flush();
				1004	}
				1005	if(utf8IsOwned) {
				1006	uprv_free(utf8);
				1007	}
				1008	}
				1009	}
				1010
				1011	int32_t
				1012	UnicodeString::toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const {
				1013	int32_t length32=0;
				1014	if(U_SUCCESS(errorCode)) {
				1015	// getBuffer() and u_strToUTF32WithSub() check for illegal arguments.
				1016	u_strToUTF32WithSub(utf32, capacity, &length32,
				1017	getBuffer(), length(),
				1018	0xfffd, // Substitution character.
				1019	NULL, // Don't care about number of substitutions.
				1020	&errorCode);
				1021	}
				1022	return length32;
				1023	}
				1024
				1025	int32_t
				1026	UnicodeString::indexOf(const UChar *srcChars,
				1027	int32_t srcStart,
				1028	int32_t srcLength,
				1029	int32_t start,
				1030	int32_t length) const
				1031	{
				1032	if(isBogus() \|\| srcChars == 0 \|\| srcStart < 0 \|\| srcLength == 0) {
				1033	return -1;
				1034	}
				1035
				1036	// UnicodeString does not find empty substrings
				1037	if(srcLength < 0 && srcChars[srcStart] == 0) {
				1038	return -1;
				1039	}
				1040
				1041	// get the indices within bounds
				1042	pinIndices(start, length);
				1043
				1044	// find the first occurrence of the substring
				1045	const UChar *array = getArrayStart();
				1046	const UChar *match = u_strFindFirst(array + start, length, srcChars + srcStart, srcLength);
				1047	if(match == NULL) {
				1048	return -1;
				1049	} else {
				1050	return (int32_t)(match - array);
				1051	}
				1052	}
				1053
				1054	int32_t
				1055	UnicodeString::doIndexOf(UChar c,
				1056	int32_t start,
				1057	int32_t length) const
				1058	{
				1059	// pin indices
				1060	pinIndices(start, length);
				1061
				1062	// find the first occurrence of c
				1063	const UChar *array = getArrayStart();
				1064	const UChar *match = u_memchr(array + start, c, length);
				1065	if(match == NULL) {
				1066	return -1;
				1067	} else {
				1068	return (int32_t)(match - array);
				1069	}
				1070	}
				1071
				1072	int32_t
				1073	UnicodeString::doIndexOf(UChar32 c,
				1074	int32_t start,
				1075	int32_t length) const {
				1076	// pin indices
				1077	pinIndices(start, length);
				1078
				1079	// find the first occurrence of c
				1080	const UChar *array = getArrayStart();
				1081	const UChar *match = u_memchr32(array + start, c, length);
				1082	if(match == NULL) {
				1083	return -1;
				1084	} else {
				1085	return (int32_t)(match - array);
				1086	}
				1087	}
				1088
				1089	int32_t
				1090	UnicodeString::lastIndexOf(const UChar *srcChars,
				1091	int32_t srcStart,
				1092	int32_t srcLength,
				1093	int32_t start,
				1094	int32_t length) const
				1095	{
				1096	if(isBogus() \|\| srcChars == 0 \|\| srcStart < 0 \|\| srcLength == 0) {
				1097	return -1;
				1098	}
				1099
				1100	// UnicodeString does not find empty substrings
				1101	if(srcLength < 0 && srcChars[srcStart] == 0) {
				1102	return -1;
				1103	}
				1104
				1105	// get the indices within bounds
				1106	pinIndices(start, length);
				1107
				1108	// find the last occurrence of the substring
				1109	const UChar *array = getArrayStart();
				1110	const UChar *match = u_strFindLast(array + start, length, srcChars + srcStart, srcLength);
				1111	if(match == NULL) {
				1112	return -1;
				1113	} else {
				1114	return (int32_t)(match - array);
				1115	}
				1116	}
				1117
				1118	int32_t
				1119	UnicodeString::doLastIndexOf(UChar c,
				1120	int32_t start,
				1121	int32_t length) const
				1122	{
				1123	if(isBogus()) {
				1124	return -1;
				1125	}
				1126
				1127	// pin indices
				1128	pinIndices(start, length);
				1129
				1130	// find the last occurrence of c
				1131	const UChar *array = getArrayStart();
				1132	const UChar *match = u_memrchr(array + start, c, length);
				1133	if(match == NULL) {
				1134	return -1;
				1135	} else {
				1136	return (int32_t)(match - array);
				1137	}
				1138	}
				1139
				1140	int32_t
				1141	UnicodeString::doLastIndexOf(UChar32 c,
				1142	int32_t start,
				1143	int32_t length) const {
				1144	// pin indices
				1145	pinIndices(start, length);
				1146
				1147	// find the last occurrence of c
				1148	const UChar *array = getArrayStart();
				1149	const UChar *match = u_memrchr32(array + start, c, length);
				1150	if(match == NULL) {
				1151	return -1;
				1152	} else {
				1153	return (int32_t)(match - array);
				1154	}
				1155	}
				1156
				1157	//========================================
				1158	// Write implementation
				1159	//========================================
				1160
				1161	UnicodeString&
				1162	UnicodeString::findAndReplace(int32_t start,
				1163	int32_t length,
				1164	const UnicodeString& oldText,
				1165	int32_t oldStart,
				1166	int32_t oldLength,
				1167	const UnicodeString& newText,
				1168	int32_t newStart,
				1169	int32_t newLength)
				1170	{
				1171	if(isBogus() \|\| oldText.isBogus() \|\| newText.isBogus()) {
				1172	return *this;
				1173	}
				1174
				1175	pinIndices(start, length);
				1176	oldText.pinIndices(oldStart, oldLength);
				1177	newText.pinIndices(newStart, newLength);
				1178
				1179	if(oldLength == 0) {
				1180	return *this;
				1181	}
				1182
				1183	while(length > 0 && length >= oldLength) {
				1184	int32_t pos = indexOf(oldText, oldStart, oldLength, start, length);
				1185	if(pos < 0) {
				1186	// no more oldText's here: done
				1187	break;
				1188	} else {
				1189	// we found oldText, replace it by newText and go beyond it
				1190	replace(pos, oldLength, newText, newStart, newLength);
				1191	length -= pos + oldLength - start;
				1192	start = pos + newLength;
				1193	}
				1194	}
				1195
				1196	return *this;
				1197	}
				1198
				1199
				1200	void
				1201	UnicodeString::setToBogus()
				1202	{
				1203	releaseArray();
				1204
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	1205	fUnion.fFields.fLengthAndFlags = kIsBogus;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1206	fUnion.fFields.fArray = 0;
				1207	fUnion.fFields.fCapacity = 0;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1208	}
				1209
				1210	// turn a bogus string into an empty one
				1211	void
				1212	UnicodeString::unBogus() {
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	1213	if(fUnion.fFields.fLengthAndFlags & kIsBogus) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1214	setToEmpty();
				1215	}
				1216	}
				1217
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	1218	const char16_t *
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1219	UnicodeString::getTerminatedBuffer() {
				1220	if(!isWritable()) {
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	1221	return nullptr;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1222	}
				1223	UChar *array = getArrayStart();
				1224	int32_t len = length();
				1225	if(len < getCapacity()) {
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	1226	if(fUnion.fFields.fLengthAndFlags & kBufferIsReadonly) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1227	// If len<capacity on a read-only alias, then array[len] is
				1228	// either the original NUL (if constructed with (TRUE, s, length))
				1229	// or one of the original string contents characters (if later truncated),
				1230	// therefore we can assume that array[len] is initialized memory.
				1231	if(array[len] == 0) {
				1232	return array;
				1233	}
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	1234	} else if(((fUnion.fFields.fLengthAndFlags & kRefCounted) == 0 \|\| refCount() == 1)) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1235	// kRefCounted: Do not write the NUL if the buffer is shared.
				1236	// That is mostly safe, except when the length of one copy was modified
				1237	// without copy-on-write, e.g., via truncate(newLength) or remove(void).
				1238	// Then the NUL would be written into the middle of another copy's string.
				1239
				1240	// Otherwise, the buffer is fully writable and it is anyway safe to write the NUL.
				1241	// Do not test if there is a NUL already because it might be uninitialized memory.
				1242	// (That would be safe, but tools like valgrind & Purify would complain.)
				1243	array[len] = 0;
				1244	return array;
				1245	}
				1246	}
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	1247	if(len<INT32_MAX && cloneArrayIfNeeded(len+1)) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1248	array = getArrayStart();
				1249	array[len] = 0;
				1250	return array;
				1251	} else {
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	1252	return nullptr;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1253	}
				1254	}
				1255
				1256	// setTo() analogous to the readonly-aliasing constructor with the same signature
				1257	UnicodeString &
				1258	UnicodeString::setTo(UBool isTerminated,
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	1259	ConstChar16Ptr textPtr,
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1260	int32_t textLength)
				1261	{
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	1262	if(fUnion.fFields.fLengthAndFlags & kOpenGetBuffer) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1263	// do not modify a string that has an "open" getBuffer(minCapacity)
				1264	return *this;
				1265	}
				1266
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	1267	const UChar *text = textPtr;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1268	if(text == NULL) {
				1269	// treat as an empty string, do not alias
				1270	releaseArray();
				1271	setToEmpty();
				1272	return *this;
				1273	}
				1274
				1275	if( textLength < -1 \|\|
				1276	(textLength == -1 && !isTerminated) \|\|
				1277	(textLength >= 0 && isTerminated && text[textLength] != 0)
				1278	) {
				1279	setToBogus();
				1280	return *this;
				1281	}
				1282
				1283	releaseArray();
				1284
				1285	if(textLength == -1) {
				1286	// text is terminated, or else it would have failed the above test
				1287	textLength = u_strlen(text);
				1288	}
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	1289	fUnion.fFields.fLengthAndFlags = kReadonlyAlias;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1290	setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1291	return *this;
				1292	}
				1293
				1294	// setTo() analogous to the writable-aliasing constructor with the same signature
				1295	UnicodeString &
				1296	UnicodeString::setTo(UChar *buffer,
				1297	int32_t buffLength,
				1298	int32_t buffCapacity) {
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	1299	if(fUnion.fFields.fLengthAndFlags & kOpenGetBuffer) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1300	// do not modify a string that has an "open" getBuffer(minCapacity)
				1301	return *this;
				1302	}
				1303
				1304	if(buffer == NULL) {
				1305	// treat as an empty string, do not alias
				1306	releaseArray();
				1307	setToEmpty();
				1308	return *this;
				1309	}
				1310
				1311	if(buffLength < -1 \|\| buffCapacity < 0 \|\| buffLength > buffCapacity) {
				1312	setToBogus();
				1313	return *this;
				1314	} else if(buffLength == -1) {
				1315	// buffLength = u_strlen(buff); but do not look beyond buffCapacity
				1316	const UChar p = buffer, limit = buffer + buffCapacity;
				1317	while(p != limit && *p != 0) {
				1318	++p;
				1319	}
				1320	buffLength = (int32_t)(p - buffer);
				1321	}
				1322
				1323	releaseArray();
				1324
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	1325	fUnion.fFields.fLengthAndFlags = kWritableAlias;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1326	setArray(buffer, buffLength, buffCapacity);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1327	return *this;
				1328	}
				1329
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	1330	UnicodeString &UnicodeString::setToUTF8(StringPiece utf8) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1331	unBogus();
				1332	int32_t length = utf8.length();
				1333	int32_t capacity;
				1334	// The UTF-16 string will be at most as long as the UTF-8 string.
				1335	if(length <= US_STACKBUF_SIZE) {
				1336	capacity = US_STACKBUF_SIZE;
				1337	} else {
				1338	capacity = length + 1; // +1 for the terminating NUL.
				1339	}
				1340	UChar *utf16 = getBuffer(capacity);
				1341	int32_t length16;
				1342	UErrorCode errorCode = U_ZERO_ERROR;
				1343	u_strFromUTF8WithSub(utf16, getCapacity(), &length16,
				1344	utf8.data(), length,
				1345	0xfffd, // Substitution character.
				1346	NULL, // Don't care about number of substitutions.
				1347	&errorCode);
				1348	releaseBuffer(length16);
				1349	if(U_FAILURE(errorCode)) {
				1350	setToBogus();
				1351	}
				1352	return *this;
				1353	}
				1354
				1355	UnicodeString&
				1356	UnicodeString::setCharAt(int32_t offset,
				1357	UChar c)
				1358	{
				1359	int32_t len = length();
				1360	if(cloneArrayIfNeeded() && len > 0) {
				1361	if(offset < 0) {
				1362	offset = 0;
				1363	} else if(offset >= len) {
				1364	offset = len - 1;
				1365	}
				1366
				1367	getArrayStart()[offset] = c;
				1368	}
				1369	return *this;
				1370	}
				1371
				1372	UnicodeString&
				1373	UnicodeString::replace(int32_t start,
				1374	int32_t _length,
				1375	UChar32 srcChar) {
				1376	UChar buffer[U16_MAX_LENGTH];
				1377	int32_t count = 0;
				1378	UBool isError = FALSE;
				1379	U16_APPEND(buffer, count, U16_MAX_LENGTH, srcChar, isError);
				1380	// We test isError so that the compiler does not complain that we don't.
				1381	// If isError (srcChar is not a valid code point) then count==0 which means
				1382	// we remove the source segment rather than replacing it with srcChar.
				1383	return doReplace(start, _length, buffer, 0, isError ? 0 : count);
				1384	}
				1385
				1386	UnicodeString&
				1387	UnicodeString::append(UChar32 srcChar) {
				1388	UChar buffer[U16_MAX_LENGTH];
				1389	int32_t _length = 0;
				1390	UBool isError = FALSE;
				1391	U16_APPEND(buffer, _length, U16_MAX_LENGTH, srcChar, isError);
				1392	// We test isError so that the compiler does not complain that we don't.
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	1393	// If isError then _length==0 which turns the doAppend() into a no-op anyway.
				1394	return isError ? *this : doAppend(buffer, 0, _length);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1395	}
				1396
				1397	UnicodeString&
				1398	UnicodeString::doReplace( int32_t start,
				1399	int32_t length,
				1400	const UnicodeString& src,
				1401	int32_t srcStart,
				1402	int32_t srcLength)
				1403	{
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	1404	// pin the indices to legal values
				1405	src.pinIndices(srcStart, srcLength);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1406
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	1407	// get the characters from src
				1408	// and replace the range in ourselves with them
				1409	return doReplace(start, length, src.getArrayStart(), srcStart, srcLength);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1410	}
				1411
				1412	UnicodeString&
				1413	UnicodeString::doReplace(int32_t start,
				1414	int32_t length,
				1415	const UChar *srcChars,
				1416	int32_t srcStart,
				1417	int32_t srcLength)
				1418	{
				1419	if(!isWritable()) {
				1420	return *this;
				1421	}
				1422
				1423	int32_t oldLength = this->length();
				1424
				1425	// optimize (read-only alias).remove(0, start) and .remove(start, end)
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	1426	if((fUnion.fFields.fLengthAndFlags&kBufferIsReadonly) && srcLength == 0) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1427	if(start == 0) {
				1428	// remove prefix by adjusting the array pointer
				1429	pinIndex(length);
				1430	fUnion.fFields.fArray += length;
				1431	fUnion.fFields.fCapacity -= length;
				1432	setLength(oldLength - length);
				1433	return *this;
				1434	} else {
				1435	pinIndex(start);
				1436	if(length >= (oldLength - start)) {
				1437	// remove suffix by reducing the length (like truncate())
				1438	setLength(start);
				1439	fUnion.fFields.fCapacity = start; // not NUL-terminated any more
				1440	return *this;
				1441	}
				1442	}
				1443	}
				1444
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	1445	if(start == oldLength) {
				1446	return doAppend(srcChars, srcStart, srcLength);
				1447	}
				1448
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1449	if(srcChars == 0) {
Jungshik Shin	42d5027	2018-10-24 01:22:09 -0700	[diff] [blame^]	1450	srcLength = 0;
				1451	} else {
				1452	// Perform all remaining operations relative to srcChars + srcStart.
				1453	// From this point forward, do not use srcStart.
				1454	srcChars += srcStart;
				1455	if (srcLength < 0) {
				1456	// get the srcLength if necessary
				1457	srcLength = u_strlen(srcChars);
				1458	}
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1459	}
				1460
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	1461	// pin the indices to legal values
				1462	pinIndices(start, length);
				1463
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	1464	// Calculate the size of the string after the replace.
				1465	// Avoid int32_t overflow.
				1466	int32_t newLength = oldLength - length;
				1467	if(srcLength > (INT32_MAX - newLength)) {
				1468	setToBogus();
				1469	return *this;
				1470	}
				1471	newLength += srcLength;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1472
Jungshik Shin	42d5027	2018-10-24 01:22:09 -0700	[diff] [blame^]	1473	// Check for insertion into ourself
				1474	const UChar *oldArray = getArrayStart();
				1475	if (isBufferWritable() &&
				1476	oldArray < srcChars + srcLength &&
				1477	srcChars < oldArray + oldLength) {
				1478	// Copy into a new UnicodeString and start over
				1479	UnicodeString copy(srcChars, srcLength);
				1480	if (copy.isBogus()) {
				1481	setToBogus();
				1482	return *this;
				1483	}
				1484	return doReplace(start, length, copy.getArrayStart(), 0, srcLength);
				1485	}
				1486
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	1487	// cloneArrayIfNeeded(doCopyArray=FALSE) may change fArray but will not copy the current contents;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1488	// therefore we need to keep the current fArray
				1489	UChar oldStackBuffer[US_STACKBUF_SIZE];
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	1490	if((fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) && (newLength > US_STACKBUF_SIZE)) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1491	// copy the stack buffer contents because it will be overwritten with
				1492	// fUnion.fFields values
Jungshik Shin	42d5027	2018-10-24 01:22:09 -0700	[diff] [blame^]	1493	u_memcpy(oldStackBuffer, oldArray, oldLength);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1494	oldArray = oldStackBuffer;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1495	}
				1496
				1497	// clone our array and allocate a bigger array if needed
				1498	int32_t *bufferToDelete = 0;
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	1499	if(!cloneArrayIfNeeded(newLength, getGrowCapacity(newLength),
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1500	FALSE, &bufferToDelete)
				1501	) {
				1502	return *this;
				1503	}
				1504
				1505	// now do the replace
				1506
				1507	UChar *newArray = getArrayStart();
				1508	if(newArray != oldArray) {
				1509	// if fArray changed, then we need to copy everything except what will change
				1510	us_arrayCopy(oldArray, 0, newArray, 0, start);
				1511	us_arrayCopy(oldArray, start + length,
				1512	newArray, start + srcLength,
				1513	oldLength - (start + length));
				1514	} else if(length != srcLength) {
				1515	// fArray did not change; copy only the portion that isn't changing, leaving a hole
				1516	us_arrayCopy(oldArray, start + length,
				1517	newArray, start + srcLength,
				1518	oldLength - (start + length));
				1519	}
				1520
				1521	// now fill in the hole with the new string
Jungshik Shin	42d5027	2018-10-24 01:22:09 -0700	[diff] [blame^]	1522	us_arrayCopy(srcChars, 0, newArray, start, srcLength);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1523
				1524	setLength(newLength);
				1525
				1526	// delayed delete in case srcChars == fArray when we started, and
				1527	// to keep oldArray alive for the above operations
				1528	if (bufferToDelete) {
				1529	uprv_free(bufferToDelete);
				1530	}
				1531
				1532	return *this;
				1533	}
				1534
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	1535	// Versions of doReplace() only for append() variants.
				1536	// doReplace() and doAppend() optimize for different cases.
				1537
				1538	UnicodeString&
				1539	UnicodeString::doAppend(const UnicodeString& src, int32_t srcStart, int32_t srcLength) {
				1540	if(srcLength == 0) {
				1541	return *this;
				1542	}
				1543
				1544	// pin the indices to legal values
				1545	src.pinIndices(srcStart, srcLength);
				1546	return doAppend(src.getArrayStart(), srcStart, srcLength);
				1547	}
				1548
				1549	UnicodeString&
				1550	UnicodeString::doAppend(const UChar *srcChars, int32_t srcStart, int32_t srcLength) {
				1551	if(!isWritable() \|\| srcLength == 0 \|\| srcChars == NULL) {
				1552	return *this;
				1553	}
				1554
Jungshik Shin	42d5027	2018-10-24 01:22:09 -0700	[diff] [blame^]	1555	// Perform all remaining operations relative to srcChars + srcStart.
				1556	// From this point forward, do not use srcStart.
				1557	srcChars += srcStart;
				1558
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	1559	if(srcLength < 0) {
				1560	// get the srcLength if necessary
Jungshik Shin	42d5027	2018-10-24 01:22:09 -0700	[diff] [blame^]	1561	if((srcLength = u_strlen(srcChars)) == 0) {
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	1562	return *this;
				1563	}
				1564	}
				1565
				1566	int32_t oldLength = length();
				1567	int32_t newLength = oldLength + srcLength;
Jungshik Shin	42d5027	2018-10-24 01:22:09 -0700	[diff] [blame^]	1568
				1569	// Check for append onto ourself
				1570	const UChar* oldArray = getArrayStart();
				1571	if (isBufferWritable() &&
				1572	oldArray < srcChars + srcLength &&
				1573	srcChars < oldArray + oldLength) {
				1574	// Copy into a new UnicodeString and start over
				1575	UnicodeString copy(srcChars, srcLength);
				1576	if (copy.isBogus()) {
				1577	setToBogus();
				1578	return *this;
				1579	}
				1580	return doAppend(copy.getArrayStart(), 0, srcLength);
				1581	}
				1582
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	1583	// optimize append() onto a large-enough, owned string
				1584	if((newLength <= getCapacity() && isBufferWritable()) \|\|
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	1585	cloneArrayIfNeeded(newLength, getGrowCapacity(newLength))) {
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	1586	UChar *newArray = getArrayStart();
				1587	// Do not copy characters when
				1588	// UChar *buffer=str.getAppendBuffer(...);
				1589	// is followed by
				1590	// str.append(buffer, length);
				1591	// or
				1592	// str.appendString(buffer, length)
				1593	// or similar.
Jungshik Shin	42d5027	2018-10-24 01:22:09 -0700	[diff] [blame^]	1594	if(srcChars != newArray + oldLength) {
				1595	us_arrayCopy(srcChars, 0, newArray, oldLength, srcLength);
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	1596	}
				1597	setLength(newLength);
				1598	}
				1599	return *this;
				1600	}
				1601
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1602	/**
				1603	* Replaceable API
				1604	*/
				1605	void
				1606	UnicodeString::handleReplaceBetween(int32_t start,
				1607	int32_t limit,
				1608	const UnicodeString& text) {
				1609	replaceBetween(start, limit, text);
				1610	}
				1611
				1612	/**
				1613	* Replaceable API
				1614	*/
				1615	void
				1616	UnicodeString::copy(int32_t start, int32_t limit, int32_t dest) {
				1617	if (limit <= start) {
				1618	return; // Nothing to do; avoid bogus malloc call
				1619	}
				1620	UChar* text = (UChar) uprv_malloc( sizeof(UChar) (limit - start) );
				1621	// Check to make sure text is not null.
				1622	if (text != NULL) {
				1623	extractBetween(start, limit, text, 0);
				1624	insert(dest, text, 0, limit - start);
				1625	uprv_free(text);
				1626	}
				1627	}
				1628
				1629	/**
				1630	* Replaceable API
				1631	*
				1632	* NOTE: This is for the Replaceable class. There is no rep.cpp,
				1633	* so we implement this function here.
				1634	*/
				1635	UBool Replaceable::hasMetaData() const {
				1636	return TRUE;
				1637	}
				1638
				1639	/**
				1640	* Replaceable API
				1641	*/
				1642	UBool UnicodeString::hasMetaData() const {
				1643	return FALSE;
				1644	}
				1645
				1646	UnicodeString&
				1647	UnicodeString::doReverse(int32_t start, int32_t length) {
				1648	if(length <= 1 \|\| !cloneArrayIfNeeded()) {
				1649	return *this;
				1650	}
				1651
				1652	// pin the indices to legal values
				1653	pinIndices(start, length);
				1654	if(length <= 1) { // pinIndices() might have shrunk the length
				1655	return *this;
				1656	}
				1657
				1658	UChar *left = getArrayStart() + start;
				1659	UChar *right = left + length - 1; // -1 for inclusive boundary (length>=2)
				1660	UChar swap;
				1661	UBool hasSupplementary = FALSE;
				1662
				1663	// Before the loop we know left<right because length>=2.
				1664	do {
				1665	hasSupplementary \|= (UBool)U16_IS_LEAD(swap = *left);
				1666	hasSupplementary \|= (UBool)U16_IS_LEAD(left++ = right);
				1667	*right-- = swap;
				1668	} while(left < right);
				1669	// Make sure to test the middle code unit of an odd-length string.
				1670	// Redundant if the length is even.
				1671	hasSupplementary \|= (UBool)U16_IS_LEAD(*left);
				1672
				1673	/* if there are supplementary code points in the reversed range, then re-swap their surrogates */
				1674	if(hasSupplementary) {
				1675	UChar swap2;
				1676
				1677	left = getArrayStart() + start;
				1678	right = left + length - 1; // -1 so that we can look at *(left+1) if left<right
				1679	while(left < right) {
				1680	if(U16_IS_TRAIL(swap = left) && U16_IS_LEAD(swap2 = (left + 1))) {
				1681	*left++ = swap2;
				1682	*left++ = swap;
				1683	} else {
				1684	++left;
				1685	}
				1686	}
				1687	}
				1688
				1689	return *this;
				1690	}
				1691
				1692	UBool
				1693	UnicodeString::padLeading(int32_t targetLength,
				1694	UChar padChar)
				1695	{
				1696	int32_t oldLength = length();
				1697	if(oldLength >= targetLength \|\| !cloneArrayIfNeeded(targetLength)) {
				1698	return FALSE;
				1699	} else {
				1700	// move contents up by padding width
				1701	UChar *array = getArrayStart();
				1702	int32_t start = targetLength - oldLength;
				1703	us_arrayCopy(array, 0, array, start, oldLength);
				1704
				1705	// fill in padding character
				1706	while(--start >= 0) {
				1707	array[start] = padChar;
				1708	}
				1709	setLength(targetLength);
				1710	return TRUE;
				1711	}
				1712	}
				1713
				1714	UBool
				1715	UnicodeString::padTrailing(int32_t targetLength,
				1716	UChar padChar)
				1717	{
				1718	int32_t oldLength = length();
				1719	if(oldLength >= targetLength \|\| !cloneArrayIfNeeded(targetLength)) {
				1720	return FALSE;
				1721	} else {
				1722	// fill in padding character
				1723	UChar *array = getArrayStart();
				1724	int32_t length = targetLength;
				1725	while(--length >= oldLength) {
				1726	array[length] = padChar;
				1727	}
				1728	setLength(targetLength);
				1729	return TRUE;
				1730	}
				1731	}
				1732
				1733	//========================================
				1734	// Hashing
				1735	//========================================
				1736	int32_t
				1737	UnicodeString::doHashCode() const
				1738	{
				1739	/* Delegate hash computation to uhash. This makes UnicodeString
				1740	* hashing consistent with UChar* hashing. */
				1741	int32_t hashCode = ustr_hashUCharsN(getArrayStart(), length());
				1742	if (hashCode == kInvalidHashCode) {
				1743	hashCode = kEmptyHashCode;
				1744	}
				1745	return hashCode;
				1746	}
				1747
				1748	//========================================
				1749	// External Buffer
				1750	//========================================
				1751
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	1752	char16_t *
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1753	UnicodeString::getBuffer(int32_t minCapacity) {
				1754	if(minCapacity>=-1 && cloneArrayIfNeeded(minCapacity)) {
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	1755	fUnion.fFields.fLengthAndFlags\|=kOpenGetBuffer;
				1756	setZeroLength();
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1757	return getArrayStart();
				1758	} else {
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	1759	return nullptr;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1760	}
				1761	}
				1762
				1763	void
				1764	UnicodeString::releaseBuffer(int32_t newLength) {
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	1765	if(fUnion.fFields.fLengthAndFlags&kOpenGetBuffer && newLength>=-1) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1766	// set the new fLength
				1767	int32_t capacity=getCapacity();
				1768	if(newLength==-1) {
				1769	// the new length is the string length, capped by fCapacity
				1770	const UChar array=getArrayStart(), p=array, *limit=array+capacity;
				1771	while(p<limit && *p!=0) {
				1772	++p;
				1773	}
				1774	newLength=(int32_t)(p-array);
				1775	} else if(newLength>capacity) {
				1776	newLength=capacity;
				1777	}
				1778	setLength(newLength);
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	1779	fUnion.fFields.fLengthAndFlags&=~kOpenGetBuffer;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1780	}
				1781	}
				1782
				1783	//========================================
				1784	// Miscellaneous
				1785	//========================================
				1786	UBool
				1787	UnicodeString::cloneArrayIfNeeded(int32_t newCapacity,
				1788	int32_t growCapacity,
				1789	UBool doCopyArray,
				1790	int32_t **pBufferToDelete,
				1791	UBool forceClone) {
				1792	// default parameters need to be static, therefore
				1793	// the defaults are -1 to have convenience defaults
				1794	if(newCapacity == -1) {
				1795	newCapacity = getCapacity();
				1796	}
				1797
				1798	// while a getBuffer(minCapacity) is "open",
				1799	// prevent any modifications of the string by returning FALSE here
				1800	// if the string is bogus, then only an assignment or similar can revive it
				1801	if(!isWritable()) {
				1802	return FALSE;
				1803	}
				1804
				1805	/*
				1806	* We need to make a copy of the array if
				1807	* the buffer is read-only, or
				1808	* the buffer is refCounted (shared), and refCount>1, or
				1809	* the buffer is too small.
				1810	* Return FALSE if memory could not be allocated.
				1811	*/
				1812	if(forceClone \|\|
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	1813	fUnion.fFields.fLengthAndFlags & kBufferIsReadonly \|\|
				1814	(fUnion.fFields.fLengthAndFlags & kRefCounted && refCount() > 1) \|\|
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1815	newCapacity > getCapacity()
				1816	) {
				1817	// check growCapacity for default value and use of the stack buffer
				1818	if(growCapacity < 0) {
				1819	growCapacity = newCapacity;
				1820	} else if(newCapacity <= US_STACKBUF_SIZE && growCapacity > US_STACKBUF_SIZE) {
				1821	growCapacity = US_STACKBUF_SIZE;
				1822	}
				1823
				1824	// save old values
				1825	UChar oldStackBuffer[US_STACKBUF_SIZE];
				1826	UChar *oldArray;
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	1827	int32_t oldLength = length();
				1828	int16_t flags = fUnion.fFields.fLengthAndFlags;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1829
				1830	if(flags&kUsingStackBuffer) {
				1831	U_ASSERT(!(flags&kRefCounted)); /* kRefCounted and kUsingStackBuffer are mutally exclusive */
				1832	if(doCopyArray && growCapacity > US_STACKBUF_SIZE) {
				1833	// copy the stack buffer contents because it will be overwritten with
				1834	// fUnion.fFields values
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	1835	us_arrayCopy(fUnion.fStackFields.fBuffer, 0, oldStackBuffer, 0, oldLength);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1836	oldArray = oldStackBuffer;
				1837	} else {
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	1838	oldArray = NULL; // no need to copy from the stack buffer to itself
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1839	}
				1840	} else {
				1841	oldArray = fUnion.fFields.fArray;
				1842	U_ASSERT(oldArray!=NULL); /* when stack buffer is not used, oldArray must have a non-NULL reference */
				1843	}
				1844
				1845	// allocate a new array
				1846	if(allocate(growCapacity) \|\|
				1847	(newCapacity < growCapacity && allocate(newCapacity))
				1848	) {
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	1849	if(doCopyArray) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1850	// copy the contents
				1851	// do not copy more than what fits - it may be smaller than before
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	1852	int32_t minLength = oldLength;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1853	newCapacity = getCapacity();
				1854	if(newCapacity < minLength) {
				1855	minLength = newCapacity;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1856	}
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	1857	if(oldArray != NULL) {
				1858	us_arrayCopy(oldArray, 0, getArrayStart(), 0, minLength);
				1859	}
				1860	setLength(minLength);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1861	} else {
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	1862	setZeroLength();
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1863	}
				1864
				1865	// release the old array
				1866	if(flags & kRefCounted) {
				1867	// the array is refCounted; decrement and release if 0
				1868	u_atomic_int32_t pRefCount = ((u_atomic_int32_t )oldArray - 1);
				1869	if(umtx_atomic_dec(pRefCount) == 0) {
				1870	if(pBufferToDelete == 0) {
				1871	// Note: cast to (void *) is needed with MSVC, where u_atomic_int32_t
				1872	// is defined as volatile. (Volatile has useful non-standard behavior
				1873	// with this compiler.)
				1874	uprv_free((void *)pRefCount);
				1875	} else {
				1876	// the caller requested to delete it himself
				1877	pBufferToDelete = (int32_t )pRefCount;
				1878	}
				1879	}
				1880	}
				1881	} else {
				1882	// not enough memory for growCapacity and not even for the smaller newCapacity
				1883	// reset the old values for setToBogus() to release the array
				1884	if(!(flags&kUsingStackBuffer)) {
				1885	fUnion.fFields.fArray = oldArray;
				1886	}
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	1887	fUnion.fFields.fLengthAndFlags = flags;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1888	setToBogus();
				1889	return FALSE;
				1890	}
				1891	}
				1892	return TRUE;
				1893	}
				1894
				1895	// UnicodeStringAppendable ------------------------------------------------- ***
				1896
				1897	UnicodeStringAppendable::~UnicodeStringAppendable() {}
				1898
				1899	UBool
				1900	UnicodeStringAppendable::appendCodeUnit(UChar c) {
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	1901	return str.doAppend(&c, 0, 1).isWritable();
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1902	}
				1903
				1904	UBool
				1905	UnicodeStringAppendable::appendCodePoint(UChar32 c) {
				1906	UChar buffer[U16_MAX_LENGTH];
				1907	int32_t cLength = 0;
				1908	UBool isError = FALSE;
				1909	U16_APPEND(buffer, cLength, U16_MAX_LENGTH, c, isError);
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	1910	return !isError && str.doAppend(buffer, 0, cLength).isWritable();
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1911	}
				1912
				1913	UBool
				1914	UnicodeStringAppendable::appendString(const UChar *s, int32_t length) {
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	1915	return str.doAppend(s, 0, length).isWritable();
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1916	}
				1917
				1918	UBool
				1919	UnicodeStringAppendable::reserveAppendCapacity(int32_t appendCapacity) {
				1920	return str.cloneArrayIfNeeded(str.length() + appendCapacity);
				1921	}
				1922
				1923	UChar *
				1924	UnicodeStringAppendable::getAppendBuffer(int32_t minCapacity,
				1925	int32_t desiredCapacityHint,
				1926	UChar *scratch, int32_t scratchCapacity,
				1927	int32_t *resultCapacity) {
				1928	if(minCapacity < 1 \|\| scratchCapacity < minCapacity) {
				1929	*resultCapacity = 0;
				1930	return NULL;
				1931	}
				1932	int32_t oldLength = str.length();
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	1933	if(minCapacity <= (kMaxCapacity - oldLength) &&
				1934	desiredCapacityHint <= (kMaxCapacity - oldLength) &&
				1935	str.cloneArrayIfNeeded(oldLength + minCapacity, oldLength + desiredCapacityHint)) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1936	*resultCapacity = str.getCapacity() - oldLength;
				1937	return str.getArrayStart() + oldLength;
				1938	}
				1939	*resultCapacity = scratchCapacity;
				1940	return scratch;
				1941	}
				1942
				1943	U_NAMESPACE_END
				1944
				1945	U_NAMESPACE_USE
				1946
				1947	U_CAPI int32_t U_EXPORT2
				1948	uhash_hashUnicodeString(const UElement key) {
				1949	const UnicodeString str = (const UnicodeString) key.pointer;
				1950	return (str == NULL) ? 0 : str->hashCode();
				1951	}
				1952
				1953	// Moved here from uhash_us.cpp so that using a UVector of UnicodeString*
				1954	// does not depend on hashtable code.
				1955	U_CAPI UBool U_EXPORT2
				1956	uhash_compareUnicodeString(const UElement key1, const UElement key2) {
				1957	const UnicodeString str1 = (const UnicodeString) key1.pointer;
				1958	const UnicodeString str2 = (const UnicodeString) key2.pointer;
				1959	if (str1 == str2) {
				1960	return TRUE;
				1961	}
				1962	if (str1 == NULL \|\| str2 == NULL) {
				1963	return FALSE;
				1964	}
				1965	return str1 == str2;
				1966	}
				1967
				1968	#ifdef U_STATIC_IMPLEMENTATION
				1969	/*
				1970	This should never be called. It is defined here to make sure that the
				1971	virtual vector deleting destructor is defined within unistr.cpp.
				1972	The vector deleting destructor is already a part of UObject,
				1973	but defining it here makes sure that it is included with this object file.
				1974	This makes sure that static library dependencies are kept to a minimum.
				1975	*/
				1976	static void uprv_UnicodeStringDummy(void) {
				1977	delete [] (new UnicodeString[2]);
				1978	}
				1979	#endif