blob: c8b6c0a3a46319219b7e9d15a220dc5930786401 [file] [log] [blame]
Jungshik Shin87232d82017-05-13 21:10:13 -07001// © 2016 and later: Unicode, Inc. and others.
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002// License & terms of use: http://www.unicode.org/copyright.html
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00003/*
4******************************************************************************
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07005* Copyright (C) 1999-2016, International Business Machines Corporation and
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00006* others. All Rights Reserved.
7******************************************************************************
8*
9* File unistr.cpp
10*
11* Modification History:
12*
13* Date Name Description
14* 09/25/98 stephen Creation.
15* 04/20/99 stephen Overhauled per 4/16 code review.
16* 07/09/99 stephen Renamed {hi,lo},{byte,word} to icu_X for HP/UX
17* 11/18/99 aliu Added handleReplaceBetween() to make inherit from
18* Replaceable.
19* 06/25/01 grhoten Removed the dependency on iostream
20******************************************************************************
21*/
22
23#include "unicode/utypes.h"
24#include "unicode/appendable.h"
25#include "unicode/putil.h"
26#include "cstring.h"
27#include "cmemory.h"
28#include "unicode/ustring.h"
29#include "unicode/unistr.h"
30#include "unicode/utf.h"
31#include "unicode/utf16.h"
32#include "uelement.h"
33#include "ustr_imp.h"
34#include "umutex.h"
35#include "uassert.h"
36
37#if 0
38
39#include <iostream>
40using namespace std;
41
42//DEBUGGING
43void
44print(const UnicodeString& s,
45 const char *name)
46{
47 UChar c;
48 cout << name << ":|";
49 for(int i = 0; i < s.length(); ++i) {
50 c = s[i];
51 if(c>= 0x007E || c < 0x0020)
52 cout << "[0x" << hex << s[i] << "]";
53 else
54 cout << (char) s[i];
55 }
56 cout << '|' << endl;
57}
58
59void
60print(const UChar *s,
61 int32_t len,
62 const char *name)
63{
64 UChar c;
65 cout << name << ":|";
66 for(int i = 0; i < len; ++i) {
67 c = s[i];
68 if(c>= 0x007E || c < 0x0020)
69 cout << "[0x" << hex << s[i] << "]";
70 else
71 cout << (char) s[i];
72 }
73 cout << '|' << endl;
74}
75// END DEBUGGING
76#endif
77
78// Local function definitions for now
79
80// need to copy areas that may overlap
81static
82inline void
83us_arrayCopy(const UChar *src, int32_t srcStart,
84 UChar *dst, int32_t dstStart, int32_t count)
85{
86 if(count>0) {
Jungshik Shin5feb9ad2016-10-21 12:52:48 -070087 uprv_memmove(dst+dstStart, src+srcStart, (size_t)count*sizeof(*src));
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000088 }
89}
90
91// u_unescapeAt() callback to get a UChar from a UnicodeString
92U_CDECL_BEGIN
93static UChar U_CALLCONV
94UnicodeString_charAt(int32_t offset, void *context) {
95 return ((icu::UnicodeString*) context)->charAt(offset);
96}
97U_CDECL_END
98
99U_NAMESPACE_BEGIN
100
101/* The Replaceable virtual destructor can't be defined in the header
102 due to how AIX works with multiple definitions of virtual functions.
103*/
104Replaceable::~Replaceable() {}
105
106UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeString)
107
108UnicodeString U_EXPORT2
109operator+ (const UnicodeString &s1, const UnicodeString &s2) {
110 return
111 UnicodeString(s1.length()+s2.length()+1, (UChar32)0, 0).
112 append(s1).
113 append(s2);
114}
115
116//========================================
117// Reference Counting functions, put at top of file so that optimizing compilers
118// have a chance to automatically inline.
119//========================================
120
121void
122UnicodeString::addRef() {
123 umtx_atomic_inc((u_atomic_int32_t *)fUnion.fFields.fArray - 1);
124}
125
126int32_t
127UnicodeString::removeRef() {
128 return umtx_atomic_dec((u_atomic_int32_t *)fUnion.fFields.fArray - 1);
129}
130
131int32_t
132UnicodeString::refCount() const {
133 return umtx_loadAcquire(*((u_atomic_int32_t *)fUnion.fFields.fArray - 1));
134}
135
136void
137UnicodeString::releaseArray() {
Jungshik Shin70f82502016-01-29 00:32:36 -0800138 if((fUnion.fFields.fLengthAndFlags & kRefCounted) && removeRef() == 0) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000139 uprv_free((int32_t *)fUnion.fFields.fArray - 1);
140 }
141}
142
143
144
145//========================================
146// Constructors
147//========================================
148
149// The default constructor is inline in unistr.h.
150
Jungshik Shin70f82502016-01-29 00:32:36 -0800151UnicodeString::UnicodeString(int32_t capacity, UChar32 c, int32_t count) {
152 fUnion.fFields.fLengthAndFlags = 0;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000153 if(count <= 0 || (uint32_t)c > 0x10ffff) {
154 // just allocate and do not do anything else
155 allocate(capacity);
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700156 } else if(c <= 0xffff) {
157 int32_t length = count;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000158 if(capacity < length) {
159 capacity = length;
160 }
161 if(allocate(capacity)) {
162 UChar *array = getArrayStart();
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700163 UChar unit = (UChar)c;
164 for(int32_t i = 0; i < length; ++i) {
165 array[i] = unit;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000166 }
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700167 setLength(length);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000168 }
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700169 } else { // supplementary code point, write surrogate pairs
170 if(count > (INT32_MAX / 2)) {
171 // We would get more than 2G UChars.
172 allocate(capacity);
173 return;
174 }
175 int32_t length = count * 2;
176 if(capacity < length) {
177 capacity = length;
178 }
179 if(allocate(capacity)) {
180 UChar *array = getArrayStart();
181 UChar lead = U16_LEAD(c);
182 UChar trail = U16_TRAIL(c);
183 for(int32_t i = 0; i < length; i += 2) {
184 array[i] = lead;
185 array[i + 1] = trail;
186 }
187 setLength(length);
188 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000189 }
190}
191
Jungshik Shin70f82502016-01-29 00:32:36 -0800192UnicodeString::UnicodeString(UChar ch) {
193 fUnion.fFields.fLengthAndFlags = kLength1 | kShortString;
194 fUnion.fStackFields.fBuffer[0] = ch;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000195}
196
Jungshik Shin70f82502016-01-29 00:32:36 -0800197UnicodeString::UnicodeString(UChar32 ch) {
198 fUnion.fFields.fLengthAndFlags = kShortString;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000199 int32_t i = 0;
200 UBool isError = FALSE;
Jungshik Shin70f82502016-01-29 00:32:36 -0800201 U16_APPEND(fUnion.fStackFields.fBuffer, i, US_STACKBUF_SIZE, ch, isError);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000202 // We test isError so that the compiler does not complain that we don't.
203 // If isError then i==0 which is what we want anyway.
204 if(!isError) {
Jungshik Shin70f82502016-01-29 00:32:36 -0800205 setShortLength(i);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000206 }
207}
208
Jungshik Shin70f82502016-01-29 00:32:36 -0800209UnicodeString::UnicodeString(const UChar *text) {
210 fUnion.fFields.fLengthAndFlags = kShortString;
211 doAppend(text, 0, -1);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000212}
213
214UnicodeString::UnicodeString(const UChar *text,
Jungshik Shin70f82502016-01-29 00:32:36 -0800215 int32_t textLength) {
216 fUnion.fFields.fLengthAndFlags = kShortString;
217 doAppend(text, 0, textLength);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000218}
219
220UnicodeString::UnicodeString(UBool isTerminated,
Jungshik Shin87232d82017-05-13 21:10:13 -0700221 ConstChar16Ptr textPtr,
Jungshik Shin70f82502016-01-29 00:32:36 -0800222 int32_t textLength) {
223 fUnion.fFields.fLengthAndFlags = kReadonlyAlias;
Jungshik Shin87232d82017-05-13 21:10:13 -0700224 const UChar *text = textPtr;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000225 if(text == NULL) {
226 // treat as an empty string, do not alias
227 setToEmpty();
228 } else if(textLength < -1 ||
229 (textLength == -1 && !isTerminated) ||
230 (textLength >= 0 && isTerminated && text[textLength] != 0)
231 ) {
232 setToBogus();
233 } else {
234 if(textLength == -1) {
235 // text is terminated, or else it would have failed the above test
236 textLength = u_strlen(text);
237 }
Jungshik Shin87232d82017-05-13 21:10:13 -0700238 setArray(const_cast<UChar *>(text), textLength,
239 isTerminated ? textLength + 1 : textLength);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000240 }
241}
242
243UnicodeString::UnicodeString(UChar *buff,
244 int32_t buffLength,
Jungshik Shin70f82502016-01-29 00:32:36 -0800245 int32_t buffCapacity) {
246 fUnion.fFields.fLengthAndFlags = kWritableAlias;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000247 if(buff == NULL) {
248 // treat as an empty string, do not alias
249 setToEmpty();
250 } else if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) {
251 setToBogus();
252 } else {
253 if(buffLength == -1) {
254 // fLength = u_strlen(buff); but do not look beyond buffCapacity
255 const UChar *p = buff, *limit = buff + buffCapacity;
256 while(p != limit && *p != 0) {
257 ++p;
258 }
259 buffLength = (int32_t)(p - buff);
260 }
261 setArray(buff, buffLength, buffCapacity);
262 }
263}
264
Jungshik Shin70f82502016-01-29 00:32:36 -0800265UnicodeString::UnicodeString(const char *src, int32_t length, EInvariant) {
266 fUnion.fFields.fLengthAndFlags = kShortString;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000267 if(src==NULL) {
268 // treat as an empty string
269 } else {
270 if(length<0) {
271 length=(int32_t)uprv_strlen(src);
272 }
273 if(cloneArrayIfNeeded(length, length, FALSE)) {
274 u_charsToUChars(src, getArrayStart(), length);
275 setLength(length);
276 } else {
277 setToBogus();
278 }
279 }
280}
281
282#if U_CHARSET_IS_UTF8
283
Jungshik Shin70f82502016-01-29 00:32:36 -0800284UnicodeString::UnicodeString(const char *codepageData) {
285 fUnion.fFields.fLengthAndFlags = kShortString;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000286 if(codepageData != 0) {
287 setToUTF8(codepageData);
288 }
289}
290
Jungshik Shin70f82502016-01-29 00:32:36 -0800291UnicodeString::UnicodeString(const char *codepageData, int32_t dataLength) {
292 fUnion.fFields.fLengthAndFlags = kShortString;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000293 // if there's nothing to convert, do nothing
294 if(codepageData == 0 || dataLength == 0 || dataLength < -1) {
295 return;
296 }
297 if(dataLength == -1) {
298 dataLength = (int32_t)uprv_strlen(codepageData);
299 }
300 setToUTF8(StringPiece(codepageData, dataLength));
301}
302
303// else see unistr_cnv.cpp
304#endif
305
Jungshik Shin70f82502016-01-29 00:32:36 -0800306UnicodeString::UnicodeString(const UnicodeString& that) {
307 fUnion.fFields.fLengthAndFlags = kShortString;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000308 copyFrom(that);
309}
310
Jungshik Shin70f82502016-01-29 00:32:36 -0800311UnicodeString::UnicodeString(UnicodeString &&src) U_NOEXCEPT {
312 fUnion.fFields.fLengthAndFlags = kShortString;
313 moveFrom(src);
314}
Jungshik Shin70f82502016-01-29 00:32:36 -0800315
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000316UnicodeString::UnicodeString(const UnicodeString& that,
Jungshik Shin70f82502016-01-29 00:32:36 -0800317 int32_t srcStart) {
318 fUnion.fFields.fLengthAndFlags = kShortString;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000319 setTo(that, srcStart);
320}
321
322UnicodeString::UnicodeString(const UnicodeString& that,
323 int32_t srcStart,
Jungshik Shin70f82502016-01-29 00:32:36 -0800324 int32_t srcLength) {
325 fUnion.fFields.fLengthAndFlags = kShortString;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000326 setTo(that, srcStart, srcLength);
327}
328
329// Replaceable base class clone() default implementation, does not clone
330Replaceable *
331Replaceable::clone() const {
332 return NULL;
333}
334
335// UnicodeString overrides clone() with a real implementation
336Replaceable *
337UnicodeString::clone() const {
338 return new UnicodeString(*this);
339}
340
341//========================================
342// array allocation
343//========================================
344
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700345namespace {
346
347const int32_t kGrowSize = 128;
348
349// The number of bytes for one int32_t reference counter and capacity UChars
350// must fit into a 32-bit size_t (at least when on a 32-bit platform).
351// We also add one for the NUL terminator, to avoid reallocation in getTerminatedBuffer(),
352// and round up to a multiple of 16 bytes.
353// This means that capacity must be at most (0xfffffff0 - 4) / 2 - 1 = 0x7ffffff5.
354// (With more complicated checks we could go up to 0x7ffffffd without rounding up,
355// but that does not seem worth it.)
356const int32_t kMaxCapacity = 0x7ffffff5;
357
358int32_t getGrowCapacity(int32_t newLength) {
359 int32_t growSize = (newLength >> 2) + kGrowSize;
360 if(growSize <= (kMaxCapacity - newLength)) {
361 return newLength + growSize;
362 } else {
363 return kMaxCapacity;
364 }
365}
366
367} // namespace
368
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000369UBool
370UnicodeString::allocate(int32_t capacity) {
371 if(capacity <= US_STACKBUF_SIZE) {
Jungshik Shin70f82502016-01-29 00:32:36 -0800372 fUnion.fFields.fLengthAndFlags = kShortString;
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700373 return TRUE;
374 }
375 if(capacity <= kMaxCapacity) {
376 ++capacity; // for the NUL
377 // Switch to size_t which is unsigned so that we can allocate up to 4GB.
378 // Reference counter + UChars.
379 size_t numBytes = sizeof(int32_t) + (size_t)capacity * U_SIZEOF_UCHAR;
380 // Round up to a multiple of 16.
381 numBytes = (numBytes + 15) & ~15;
382 int32_t *array = (int32_t *) uprv_malloc(numBytes);
383 if(array != NULL) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000384 // set initial refCount and point behind the refCount
385 *array++ = 1;
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700386 numBytes -= sizeof(int32_t);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000387
388 // have fArray point to the first UChar
389 fUnion.fFields.fArray = (UChar *)array;
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700390 fUnion.fFields.fCapacity = (int32_t)(numBytes / U_SIZEOF_UCHAR);
Jungshik Shin70f82502016-01-29 00:32:36 -0800391 fUnion.fFields.fLengthAndFlags = kLongString;
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700392 return TRUE;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000393 }
394 }
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700395 fUnion.fFields.fLengthAndFlags = kIsBogus;
396 fUnion.fFields.fArray = 0;
397 fUnion.fFields.fCapacity = 0;
398 return FALSE;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000399}
400
401//========================================
402// Destructor
403//========================================
Jungshik Shin70f82502016-01-29 00:32:36 -0800404
405#ifdef UNISTR_COUNT_FINAL_STRING_LENGTHS
406static u_atomic_int32_t finalLengthCounts[0x400]; // UnicodeString::kMaxShortLength+1
407static u_atomic_int32_t beyondCount(0);
408
409U_CAPI void unistr_printLengths() {
410 int32_t i;
411 for(i = 0; i <= 59; ++i) {
412 printf("%2d, %9d\n", i, (int32_t)finalLengthCounts[i]);
413 }
414 int32_t beyond = beyondCount;
415 for(; i < UPRV_LENGTHOF(finalLengthCounts); ++i) {
416 beyond += finalLengthCounts[i];
417 }
418 printf(">59, %9d\n", beyond);
419}
420#endif
421
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000422UnicodeString::~UnicodeString()
423{
Jungshik Shin70f82502016-01-29 00:32:36 -0800424#ifdef UNISTR_COUNT_FINAL_STRING_LENGTHS
425 // Count lengths of strings at the end of their lifetime.
426 // Useful for discussion of a desirable stack buffer size.
427 // Count the contents length, not the optional NUL terminator nor further capacity.
428 // Ignore open-buffer strings and strings which alias external storage.
429 if((fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kReadonlyAlias|kWritableAlias)) == 0) {
430 if(hasShortLength()) {
431 umtx_atomic_inc(finalLengthCounts + getShortLength());
432 } else {
433 umtx_atomic_inc(&beyondCount);
434 }
435 }
436#endif
437
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000438 releaseArray();
439}
440
441//========================================
442// Factory methods
443//========================================
444
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700445UnicodeString UnicodeString::fromUTF8(StringPiece utf8) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000446 UnicodeString result;
447 result.setToUTF8(utf8);
448 return result;
449}
450
451UnicodeString UnicodeString::fromUTF32(const UChar32 *utf32, int32_t length) {
452 UnicodeString result;
453 int32_t capacity;
454 // Most UTF-32 strings will be BMP-only and result in a same-length
455 // UTF-16 string. We overestimate the capacity just slightly,
456 // just in case there are a few supplementary characters.
457 if(length <= US_STACKBUF_SIZE) {
458 capacity = US_STACKBUF_SIZE;
459 } else {
460 capacity = length + (length >> 4) + 4;
461 }
462 do {
463 UChar *utf16 = result.getBuffer(capacity);
464 int32_t length16;
465 UErrorCode errorCode = U_ZERO_ERROR;
466 u_strFromUTF32WithSub(utf16, result.getCapacity(), &length16,
467 utf32, length,
468 0xfffd, // Substitution character.
469 NULL, // Don't care about number of substitutions.
470 &errorCode);
471 result.releaseBuffer(length16);
472 if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
473 capacity = length16 + 1; // +1 for the terminating NUL.
474 continue;
475 } else if(U_FAILURE(errorCode)) {
476 result.setToBogus();
477 }
478 break;
479 } while(TRUE);
480 return result;
481}
482
483//========================================
484// Assignment
485//========================================
486
487UnicodeString &
488UnicodeString::operator=(const UnicodeString &src) {
489 return copyFrom(src);
490}
491
492UnicodeString &
493UnicodeString::fastCopyFrom(const UnicodeString &src) {
494 return copyFrom(src, TRUE);
495}
496
497UnicodeString &
498UnicodeString::copyFrom(const UnicodeString &src, UBool fastCopy) {
499 // if assigning to ourselves, do nothing
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800500 if(this == &src) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000501 return *this;
502 }
503
504 // is the right side bogus?
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800505 if(src.isBogus()) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000506 setToBogus();
507 return *this;
508 }
509
510 // delete the current contents
511 releaseArray();
512
513 if(src.isEmpty()) {
514 // empty string - use the stack buffer
515 setToEmpty();
516 return *this;
517 }
518
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000519 // fLength>0 and not an "open" src.getBuffer(minCapacity)
Jungshik Shin70f82502016-01-29 00:32:36 -0800520 fUnion.fFields.fLengthAndFlags = src.fUnion.fFields.fLengthAndFlags;
521 switch(src.fUnion.fFields.fLengthAndFlags & kAllStorageFlags) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000522 case kShortString:
523 // short string using the stack buffer, do the same
Jungshik Shin70f82502016-01-29 00:32:36 -0800524 uprv_memcpy(fUnion.fStackFields.fBuffer, src.fUnion.fStackFields.fBuffer,
525 getShortLength() * U_SIZEOF_UCHAR);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000526 break;
527 case kLongString:
528 // src uses a refCounted string buffer, use that buffer with refCount
Jungshik Shin70f82502016-01-29 00:32:36 -0800529 // src is const, use a cast - we don't actually change it
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000530 ((UnicodeString &)src).addRef();
531 // copy all fields, share the reference-counted buffer
532 fUnion.fFields.fArray = src.fUnion.fFields.fArray;
533 fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
Jungshik Shin70f82502016-01-29 00:32:36 -0800534 if(!hasShortLength()) {
535 fUnion.fFields.fLength = src.fUnion.fFields.fLength;
536 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000537 break;
538 case kReadonlyAlias:
539 if(fastCopy) {
540 // src is a readonly alias, do the same
541 // -> maintain the readonly alias as such
542 fUnion.fFields.fArray = src.fUnion.fFields.fArray;
543 fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
Jungshik Shin70f82502016-01-29 00:32:36 -0800544 if(!hasShortLength()) {
545 fUnion.fFields.fLength = src.fUnion.fFields.fLength;
546 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000547 break;
548 }
549 // else if(!fastCopy) fall through to case kWritableAlias
550 // -> allocate a new buffer and copy the contents
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700551 U_FALLTHROUGH;
Jungshik Shin70f82502016-01-29 00:32:36 -0800552 case kWritableAlias: {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000553 // src is a writable alias; we make a copy of that instead
Jungshik Shin70f82502016-01-29 00:32:36 -0800554 int32_t srcLength = src.length();
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000555 if(allocate(srcLength)) {
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700556 u_memcpy(getArrayStart(), src.getArrayStart(), srcLength);
Jungshik Shin70f82502016-01-29 00:32:36 -0800557 setLength(srcLength);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000558 break;
559 }
560 // if there is not enough memory, then fall through to setting to bogus
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700561 U_FALLTHROUGH;
Jungshik Shin70f82502016-01-29 00:32:36 -0800562 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000563 default:
564 // if src is bogus, set ourselves to bogus
Jungshik Shin70f82502016-01-29 00:32:36 -0800565 // do not call setToBogus() here because fArray and flags are not consistent here
566 fUnion.fFields.fLengthAndFlags = kIsBogus;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000567 fUnion.fFields.fArray = 0;
568 fUnion.fFields.fCapacity = 0;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000569 break;
570 }
571
572 return *this;
573}
574
Jungshik Shin70f82502016-01-29 00:32:36 -0800575UnicodeString &UnicodeString::moveFrom(UnicodeString &src) U_NOEXCEPT {
576 // No explicit check for self move assignment, consistent with standard library.
577 // Self move assignment causes no crash nor leak but might make the object bogus.
578 releaseArray();
579 copyFieldsFrom(src, TRUE);
580 return *this;
581}
582
583// Same as moveFrom() except without memory management.
584void UnicodeString::copyFieldsFrom(UnicodeString &src, UBool setSrcToBogus) U_NOEXCEPT {
585 int16_t lengthAndFlags = fUnion.fFields.fLengthAndFlags = src.fUnion.fFields.fLengthAndFlags;
586 if(lengthAndFlags & kUsingStackBuffer) {
587 // Short string using the stack buffer, copy the contents.
588 // Check for self assignment to prevent "overlap in memcpy" warnings,
589 // although it should be harmless to copy a buffer to itself exactly.
590 if(this != &src) {
591 uprv_memcpy(fUnion.fStackFields.fBuffer, src.fUnion.fStackFields.fBuffer,
592 getShortLength() * U_SIZEOF_UCHAR);
593 }
594 } else {
595 // In all other cases, copy all fields.
596 fUnion.fFields.fArray = src.fUnion.fFields.fArray;
597 fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
598 if(!hasShortLength()) {
599 fUnion.fFields.fLength = src.fUnion.fFields.fLength;
600 }
601 if(setSrcToBogus) {
602 // Set src to bogus without releasing any memory.
603 src.fUnion.fFields.fLengthAndFlags = kIsBogus;
604 src.fUnion.fFields.fArray = NULL;
605 src.fUnion.fFields.fCapacity = 0;
606 }
607 }
608}
609
610void UnicodeString::swap(UnicodeString &other) U_NOEXCEPT {
611 UnicodeString temp; // Empty short string: Known not to need releaseArray().
612 // Copy fields without resetting source values in between.
613 temp.copyFieldsFrom(*this, FALSE);
614 this->copyFieldsFrom(other, FALSE);
615 other.copyFieldsFrom(temp, FALSE);
616 // Set temp to an empty string so that other's memory is not released twice.
617 temp.fUnion.fFields.fLengthAndFlags = kShortString;
618}
619
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000620//========================================
621// Miscellaneous operations
622//========================================
623
624UnicodeString UnicodeString::unescape() const {
625 UnicodeString result(length(), (UChar32)0, (int32_t)0); // construct with capacity
Jungshik Shin70f82502016-01-29 00:32:36 -0800626 if (result.isBogus()) {
627 return result;
628 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000629 const UChar *array = getBuffer();
630 int32_t len = length();
631 int32_t prev = 0;
632 for (int32_t i=0;;) {
633 if (i == len) {
634 result.append(array, prev, len - prev);
635 break;
636 }
637 if (array[i++] == 0x5C /*'\\'*/) {
638 result.append(array, prev, (i - 1) - prev);
639 UChar32 c = unescapeAt(i); // advances i
640 if (c < 0) {
641 result.remove(); // return empty string
642 break; // invalid escape sequence
643 }
644 result.append(c);
645 prev = i;
646 }
647 }
648 return result;
649}
650
651UChar32 UnicodeString::unescapeAt(int32_t &offset) const {
652 return u_unescapeAt(UnicodeString_charAt, &offset, length(), (void*)this);
653}
654
655//========================================
656// Read-only implementation
657//========================================
658UBool
659UnicodeString::doEquals(const UnicodeString &text, int32_t len) const {
660 // Requires: this & text not bogus and have same lengths.
661 // Byte-wise comparison works for equality regardless of endianness.
662 return uprv_memcmp(getArrayStart(), text.getArrayStart(), len * U_SIZEOF_UCHAR) == 0;
663}
664
665int8_t
666UnicodeString::doCompare( int32_t start,
667 int32_t length,
668 const UChar *srcChars,
669 int32_t srcStart,
670 int32_t srcLength) const
671{
672 // compare illegal string values
673 if(isBogus()) {
674 return -1;
675 }
676
677 // pin indices to legal values
678 pinIndices(start, length);
679
680 if(srcChars == NULL) {
681 // treat const UChar *srcChars==NULL as an empty string
682 return length == 0 ? 0 : 1;
683 }
684
685 // get the correct pointer
686 const UChar *chars = getArrayStart();
687
688 chars += start;
689 srcChars += srcStart;
690
691 int32_t minLength;
692 int8_t lengthResult;
693
694 // get the srcLength if necessary
695 if(srcLength < 0) {
696 srcLength = u_strlen(srcChars + srcStart);
697 }
698
699 // are we comparing different lengths?
700 if(length != srcLength) {
701 if(length < srcLength) {
702 minLength = length;
703 lengthResult = -1;
704 } else {
705 minLength = srcLength;
706 lengthResult = 1;
707 }
708 } else {
709 minLength = length;
710 lengthResult = 0;
711 }
712
713 /*
714 * note that uprv_memcmp() returns an int but we return an int8_t;
715 * we need to take care not to truncate the result -
716 * one way to do this is to right-shift the value to
717 * move the sign bit into the lower 8 bits and making sure that this
718 * does not become 0 itself
719 */
720
721 if(minLength > 0 && chars != srcChars) {
722 int32_t result;
723
724# if U_IS_BIG_ENDIAN
725 // big-endian: byte comparison works
726 result = uprv_memcmp(chars, srcChars, minLength * sizeof(UChar));
727 if(result != 0) {
728 return (int8_t)(result >> 15 | 1);
729 }
730# else
731 // little-endian: compare UChar units
732 do {
733 result = ((int32_t)*(chars++) - (int32_t)*(srcChars++));
734 if(result != 0) {
735 return (int8_t)(result >> 15 | 1);
736 }
737 } while(--minLength > 0);
738# endif
739 }
740 return lengthResult;
741}
742
743/* String compare in code point order - doCompare() compares in code unit order. */
744int8_t
745UnicodeString::doCompareCodePointOrder(int32_t start,
746 int32_t length,
747 const UChar *srcChars,
748 int32_t srcStart,
749 int32_t srcLength) const
750{
751 // compare illegal string values
752 // treat const UChar *srcChars==NULL as an empty string
753 if(isBogus()) {
754 return -1;
755 }
756
757 // pin indices to legal values
758 pinIndices(start, length);
759
760 if(srcChars == NULL) {
761 srcStart = srcLength = 0;
762 }
763
764 int32_t diff = uprv_strCompare(getArrayStart() + start, length, (srcChars!=NULL)?(srcChars + srcStart):NULL, srcLength, FALSE, TRUE);
765 /* translate the 32-bit result into an 8-bit one */
766 if(diff!=0) {
767 return (int8_t)(diff >> 15 | 1);
768 } else {
769 return 0;
770 }
771}
772
773int32_t
774UnicodeString::getLength() const {
775 return length();
776}
777
778UChar
779UnicodeString::getCharAt(int32_t offset) const {
780 return charAt(offset);
781}
782
783UChar32
784UnicodeString::getChar32At(int32_t offset) const {
785 return char32At(offset);
786}
787
788UChar32
789UnicodeString::char32At(int32_t offset) const
790{
791 int32_t len = length();
792 if((uint32_t)offset < (uint32_t)len) {
793 const UChar *array = getArrayStart();
794 UChar32 c;
795 U16_GET(array, 0, offset, len, c);
796 return c;
797 } else {
798 return kInvalidUChar;
799 }
800}
801
802int32_t
803UnicodeString::getChar32Start(int32_t offset) const {
804 if((uint32_t)offset < (uint32_t)length()) {
805 const UChar *array = getArrayStart();
806 U16_SET_CP_START(array, 0, offset);
807 return offset;
808 } else {
809 return 0;
810 }
811}
812
813int32_t
814UnicodeString::getChar32Limit(int32_t offset) const {
815 int32_t len = length();
816 if((uint32_t)offset < (uint32_t)len) {
817 const UChar *array = getArrayStart();
818 U16_SET_CP_LIMIT(array, 0, offset, len);
819 return offset;
820 } else {
821 return len;
822 }
823}
824
825int32_t
826UnicodeString::countChar32(int32_t start, int32_t length) const {
827 pinIndices(start, length);
828 // if(isBogus()) then fArray==0 and start==0 - u_countChar32() checks for NULL
829 return u_countChar32(getArrayStart()+start, length);
830}
831
832UBool
833UnicodeString::hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const {
834 pinIndices(start, length);
835 // if(isBogus()) then fArray==0 and start==0 - u_strHasMoreChar32Than() checks for NULL
836 return u_strHasMoreChar32Than(getArrayStart()+start, length, number);
837}
838
839int32_t
840UnicodeString::moveIndex32(int32_t index, int32_t delta) const {
841 // pin index
842 int32_t len = length();
843 if(index<0) {
844 index=0;
845 } else if(index>len) {
846 index=len;
847 }
848
849 const UChar *array = getArrayStart();
850 if(delta>0) {
851 U16_FWD_N(array, index, len, delta);
852 } else {
853 U16_BACK_N(array, 0, index, -delta);
854 }
855
856 return index;
857}
858
859void
860UnicodeString::doExtract(int32_t start,
861 int32_t length,
862 UChar *dst,
863 int32_t dstStart) const
864{
865 // pin indices to legal values
866 pinIndices(start, length);
867
868 // do not copy anything if we alias dst itself
869 const UChar *array = getArrayStart();
870 if(array + start != dst + dstStart) {
871 us_arrayCopy(array, start, dst, dstStart, length);
872 }
873}
874
875int32_t
Jungshik Shin87232d82017-05-13 21:10:13 -0700876UnicodeString::extract(Char16Ptr dest, int32_t destCapacity,
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000877 UErrorCode &errorCode) const {
878 int32_t len = length();
879 if(U_SUCCESS(errorCode)) {
880 if(isBogus() || destCapacity<0 || (destCapacity>0 && dest==0)) {
881 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
882 } else {
883 const UChar *array = getArrayStart();
884 if(len>0 && len<=destCapacity && array!=dest) {
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700885 u_memcpy(dest, array, len);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000886 }
887 return u_terminateUChars(dest, destCapacity, len, &errorCode);
888 }
889 }
890
891 return len;
892}
893
894int32_t
895UnicodeString::extract(int32_t start,
896 int32_t length,
897 char *target,
898 int32_t targetCapacity,
899 enum EInvariant) const
900{
901 // if the arguments are illegal, then do nothing
902 if(targetCapacity < 0 || (targetCapacity > 0 && target == NULL)) {
903 return 0;
904 }
905
906 // pin the indices to legal values
907 pinIndices(start, length);
908
909 if(length <= targetCapacity) {
910 u_UCharsToChars(getArrayStart() + start, target, length);
911 }
912 UErrorCode status = U_ZERO_ERROR;
913 return u_terminateChars(target, targetCapacity, length, &status);
914}
915
916UnicodeString
917UnicodeString::tempSubString(int32_t start, int32_t len) const {
918 pinIndices(start, len);
919 const UChar *array = getBuffer(); // not getArrayStart() to check kIsBogus & kOpenGetBuffer
920 if(array==NULL) {
Jungshik Shin70f82502016-01-29 00:32:36 -0800921 array=fUnion.fStackFields.fBuffer; // anything not NULL because that would make an empty string
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000922 len=-2; // bogus result string
923 }
924 return UnicodeString(FALSE, array + start, len);
925}
926
927int32_t
928UnicodeString::toUTF8(int32_t start, int32_t len,
929 char *target, int32_t capacity) const {
930 pinIndices(start, len);
931 int32_t length8;
932 UErrorCode errorCode = U_ZERO_ERROR;
933 u_strToUTF8WithSub(target, capacity, &length8,
934 getBuffer() + start, len,
935 0xFFFD, // Standard substitution character.
936 NULL, // Don't care about number of substitutions.
937 &errorCode);
938 return length8;
939}
940
941#if U_CHARSET_IS_UTF8
942
943int32_t
944UnicodeString::extract(int32_t start, int32_t len,
945 char *target, uint32_t dstSize) const {
946 // if the arguments are illegal, then do nothing
947 if(/*dstSize < 0 || */(dstSize > 0 && target == 0)) {
948 return 0;
949 }
950 return toUTF8(start, len, target, dstSize <= 0x7fffffff ? (int32_t)dstSize : 0x7fffffff);
951}
952
953// else see unistr_cnv.cpp
954#endif
955
956void
957UnicodeString::extractBetween(int32_t start,
958 int32_t limit,
959 UnicodeString& target) const {
960 pinIndex(start);
961 pinIndex(limit);
962 doExtract(start, limit - start, target);
963}
964
965// When converting from UTF-16 to UTF-8, the result will have at most 3 times
966// as many bytes as the source has UChars.
967// The "worst cases" are writing systems like Indic, Thai and CJK with
968// 3:1 bytes:UChars.
969void
970UnicodeString::toUTF8(ByteSink &sink) const {
971 int32_t length16 = length();
972 if(length16 != 0) {
973 char stackBuffer[1024];
974 int32_t capacity = (int32_t)sizeof(stackBuffer);
975 UBool utf8IsOwned = FALSE;
976 char *utf8 = sink.GetAppendBuffer(length16 < capacity ? length16 : capacity,
977 3*length16,
978 stackBuffer, capacity,
979 &capacity);
980 int32_t length8 = 0;
981 UErrorCode errorCode = U_ZERO_ERROR;
982 u_strToUTF8WithSub(utf8, capacity, &length8,
983 getBuffer(), length16,
984 0xFFFD, // Standard substitution character.
985 NULL, // Don't care about number of substitutions.
986 &errorCode);
987 if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
988 utf8 = (char *)uprv_malloc(length8);
989 if(utf8 != NULL) {
990 utf8IsOwned = TRUE;
991 errorCode = U_ZERO_ERROR;
992 u_strToUTF8WithSub(utf8, length8, &length8,
993 getBuffer(), length16,
994 0xFFFD, // Standard substitution character.
995 NULL, // Don't care about number of substitutions.
996 &errorCode);
997 } else {
998 errorCode = U_MEMORY_ALLOCATION_ERROR;
999 }
1000 }
1001 if(U_SUCCESS(errorCode)) {
1002 sink.Append(utf8, length8);
1003 sink.Flush();
1004 }
1005 if(utf8IsOwned) {
1006 uprv_free(utf8);
1007 }
1008 }
1009}
1010
1011int32_t
1012UnicodeString::toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const {
1013 int32_t length32=0;
1014 if(U_SUCCESS(errorCode)) {
1015 // getBuffer() and u_strToUTF32WithSub() check for illegal arguments.
1016 u_strToUTF32WithSub(utf32, capacity, &length32,
1017 getBuffer(), length(),
1018 0xfffd, // Substitution character.
1019 NULL, // Don't care about number of substitutions.
1020 &errorCode);
1021 }
1022 return length32;
1023}
1024
1025int32_t
1026UnicodeString::indexOf(const UChar *srcChars,
1027 int32_t srcStart,
1028 int32_t srcLength,
1029 int32_t start,
1030 int32_t length) const
1031{
1032 if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) {
1033 return -1;
1034 }
1035
1036 // UnicodeString does not find empty substrings
1037 if(srcLength < 0 && srcChars[srcStart] == 0) {
1038 return -1;
1039 }
1040
1041 // get the indices within bounds
1042 pinIndices(start, length);
1043
1044 // find the first occurrence of the substring
1045 const UChar *array = getArrayStart();
1046 const UChar *match = u_strFindFirst(array + start, length, srcChars + srcStart, srcLength);
1047 if(match == NULL) {
1048 return -1;
1049 } else {
1050 return (int32_t)(match - array);
1051 }
1052}
1053
1054int32_t
1055UnicodeString::doIndexOf(UChar c,
1056 int32_t start,
1057 int32_t length) const
1058{
1059 // pin indices
1060 pinIndices(start, length);
1061
1062 // find the first occurrence of c
1063 const UChar *array = getArrayStart();
1064 const UChar *match = u_memchr(array + start, c, length);
1065 if(match == NULL) {
1066 return -1;
1067 } else {
1068 return (int32_t)(match - array);
1069 }
1070}
1071
1072int32_t
1073UnicodeString::doIndexOf(UChar32 c,
1074 int32_t start,
1075 int32_t length) const {
1076 // pin indices
1077 pinIndices(start, length);
1078
1079 // find the first occurrence of c
1080 const UChar *array = getArrayStart();
1081 const UChar *match = u_memchr32(array + start, c, length);
1082 if(match == NULL) {
1083 return -1;
1084 } else {
1085 return (int32_t)(match - array);
1086 }
1087}
1088
1089int32_t
1090UnicodeString::lastIndexOf(const UChar *srcChars,
1091 int32_t srcStart,
1092 int32_t srcLength,
1093 int32_t start,
1094 int32_t length) const
1095{
1096 if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) {
1097 return -1;
1098 }
1099
1100 // UnicodeString does not find empty substrings
1101 if(srcLength < 0 && srcChars[srcStart] == 0) {
1102 return -1;
1103 }
1104
1105 // get the indices within bounds
1106 pinIndices(start, length);
1107
1108 // find the last occurrence of the substring
1109 const UChar *array = getArrayStart();
1110 const UChar *match = u_strFindLast(array + start, length, srcChars + srcStart, srcLength);
1111 if(match == NULL) {
1112 return -1;
1113 } else {
1114 return (int32_t)(match - array);
1115 }
1116}
1117
1118int32_t
1119UnicodeString::doLastIndexOf(UChar c,
1120 int32_t start,
1121 int32_t length) const
1122{
1123 if(isBogus()) {
1124 return -1;
1125 }
1126
1127 // pin indices
1128 pinIndices(start, length);
1129
1130 // find the last occurrence of c
1131 const UChar *array = getArrayStart();
1132 const UChar *match = u_memrchr(array + start, c, length);
1133 if(match == NULL) {
1134 return -1;
1135 } else {
1136 return (int32_t)(match - array);
1137 }
1138}
1139
1140int32_t
1141UnicodeString::doLastIndexOf(UChar32 c,
1142 int32_t start,
1143 int32_t length) const {
1144 // pin indices
1145 pinIndices(start, length);
1146
1147 // find the last occurrence of c
1148 const UChar *array = getArrayStart();
1149 const UChar *match = u_memrchr32(array + start, c, length);
1150 if(match == NULL) {
1151 return -1;
1152 } else {
1153 return (int32_t)(match - array);
1154 }
1155}
1156
1157//========================================
1158// Write implementation
1159//========================================
1160
1161UnicodeString&
1162UnicodeString::findAndReplace(int32_t start,
1163 int32_t length,
1164 const UnicodeString& oldText,
1165 int32_t oldStart,
1166 int32_t oldLength,
1167 const UnicodeString& newText,
1168 int32_t newStart,
1169 int32_t newLength)
1170{
1171 if(isBogus() || oldText.isBogus() || newText.isBogus()) {
1172 return *this;
1173 }
1174
1175 pinIndices(start, length);
1176 oldText.pinIndices(oldStart, oldLength);
1177 newText.pinIndices(newStart, newLength);
1178
1179 if(oldLength == 0) {
1180 return *this;
1181 }
1182
1183 while(length > 0 && length >= oldLength) {
1184 int32_t pos = indexOf(oldText, oldStart, oldLength, start, length);
1185 if(pos < 0) {
1186 // no more oldText's here: done
1187 break;
1188 } else {
1189 // we found oldText, replace it by newText and go beyond it
1190 replace(pos, oldLength, newText, newStart, newLength);
1191 length -= pos + oldLength - start;
1192 start = pos + newLength;
1193 }
1194 }
1195
1196 return *this;
1197}
1198
1199
1200void
1201UnicodeString::setToBogus()
1202{
1203 releaseArray();
1204
Jungshik Shin70f82502016-01-29 00:32:36 -08001205 fUnion.fFields.fLengthAndFlags = kIsBogus;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001206 fUnion.fFields.fArray = 0;
1207 fUnion.fFields.fCapacity = 0;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001208}
1209
1210// turn a bogus string into an empty one
1211void
1212UnicodeString::unBogus() {
Jungshik Shin70f82502016-01-29 00:32:36 -08001213 if(fUnion.fFields.fLengthAndFlags & kIsBogus) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001214 setToEmpty();
1215 }
1216}
1217
Jungshik Shin87232d82017-05-13 21:10:13 -07001218const char16_t *
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001219UnicodeString::getTerminatedBuffer() {
1220 if(!isWritable()) {
Jungshik Shin87232d82017-05-13 21:10:13 -07001221 return nullptr;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001222 }
1223 UChar *array = getArrayStart();
1224 int32_t len = length();
1225 if(len < getCapacity()) {
Jungshik Shin70f82502016-01-29 00:32:36 -08001226 if(fUnion.fFields.fLengthAndFlags & kBufferIsReadonly) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001227 // If len<capacity on a read-only alias, then array[len] is
1228 // either the original NUL (if constructed with (TRUE, s, length))
1229 // or one of the original string contents characters (if later truncated),
1230 // therefore we can assume that array[len] is initialized memory.
1231 if(array[len] == 0) {
1232 return array;
1233 }
Jungshik Shin70f82502016-01-29 00:32:36 -08001234 } else if(((fUnion.fFields.fLengthAndFlags & kRefCounted) == 0 || refCount() == 1)) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001235 // kRefCounted: Do not write the NUL if the buffer is shared.
1236 // That is mostly safe, except when the length of one copy was modified
1237 // without copy-on-write, e.g., via truncate(newLength) or remove(void).
1238 // Then the NUL would be written into the middle of another copy's string.
1239
1240 // Otherwise, the buffer is fully writable and it is anyway safe to write the NUL.
1241 // Do not test if there is a NUL already because it might be uninitialized memory.
1242 // (That would be safe, but tools like valgrind & Purify would complain.)
1243 array[len] = 0;
1244 return array;
1245 }
1246 }
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001247 if(len<INT32_MAX && cloneArrayIfNeeded(len+1)) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001248 array = getArrayStart();
1249 array[len] = 0;
1250 return array;
1251 } else {
Jungshik Shin87232d82017-05-13 21:10:13 -07001252 return nullptr;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001253 }
1254}
1255
1256// setTo() analogous to the readonly-aliasing constructor with the same signature
1257UnicodeString &
1258UnicodeString::setTo(UBool isTerminated,
Jungshik Shin87232d82017-05-13 21:10:13 -07001259 ConstChar16Ptr textPtr,
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001260 int32_t textLength)
1261{
Jungshik Shin70f82502016-01-29 00:32:36 -08001262 if(fUnion.fFields.fLengthAndFlags & kOpenGetBuffer) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001263 // do not modify a string that has an "open" getBuffer(minCapacity)
1264 return *this;
1265 }
1266
Jungshik Shin87232d82017-05-13 21:10:13 -07001267 const UChar *text = textPtr;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001268 if(text == NULL) {
1269 // treat as an empty string, do not alias
1270 releaseArray();
1271 setToEmpty();
1272 return *this;
1273 }
1274
1275 if( textLength < -1 ||
1276 (textLength == -1 && !isTerminated) ||
1277 (textLength >= 0 && isTerminated && text[textLength] != 0)
1278 ) {
1279 setToBogus();
1280 return *this;
1281 }
1282
1283 releaseArray();
1284
1285 if(textLength == -1) {
1286 // text is terminated, or else it would have failed the above test
1287 textLength = u_strlen(text);
1288 }
Jungshik Shin70f82502016-01-29 00:32:36 -08001289 fUnion.fFields.fLengthAndFlags = kReadonlyAlias;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001290 setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001291 return *this;
1292}
1293
1294// setTo() analogous to the writable-aliasing constructor with the same signature
1295UnicodeString &
1296UnicodeString::setTo(UChar *buffer,
1297 int32_t buffLength,
1298 int32_t buffCapacity) {
Jungshik Shin70f82502016-01-29 00:32:36 -08001299 if(fUnion.fFields.fLengthAndFlags & kOpenGetBuffer) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001300 // do not modify a string that has an "open" getBuffer(minCapacity)
1301 return *this;
1302 }
1303
1304 if(buffer == NULL) {
1305 // treat as an empty string, do not alias
1306 releaseArray();
1307 setToEmpty();
1308 return *this;
1309 }
1310
1311 if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) {
1312 setToBogus();
1313 return *this;
1314 } else if(buffLength == -1) {
1315 // buffLength = u_strlen(buff); but do not look beyond buffCapacity
1316 const UChar *p = buffer, *limit = buffer + buffCapacity;
1317 while(p != limit && *p != 0) {
1318 ++p;
1319 }
1320 buffLength = (int32_t)(p - buffer);
1321 }
1322
1323 releaseArray();
1324
Jungshik Shin70f82502016-01-29 00:32:36 -08001325 fUnion.fFields.fLengthAndFlags = kWritableAlias;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001326 setArray(buffer, buffLength, buffCapacity);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001327 return *this;
1328}
1329
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001330UnicodeString &UnicodeString::setToUTF8(StringPiece utf8) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001331 unBogus();
1332 int32_t length = utf8.length();
1333 int32_t capacity;
1334 // The UTF-16 string will be at most as long as the UTF-8 string.
1335 if(length <= US_STACKBUF_SIZE) {
1336 capacity = US_STACKBUF_SIZE;
1337 } else {
1338 capacity = length + 1; // +1 for the terminating NUL.
1339 }
1340 UChar *utf16 = getBuffer(capacity);
1341 int32_t length16;
1342 UErrorCode errorCode = U_ZERO_ERROR;
1343 u_strFromUTF8WithSub(utf16, getCapacity(), &length16,
1344 utf8.data(), length,
1345 0xfffd, // Substitution character.
1346 NULL, // Don't care about number of substitutions.
1347 &errorCode);
1348 releaseBuffer(length16);
1349 if(U_FAILURE(errorCode)) {
1350 setToBogus();
1351 }
1352 return *this;
1353}
1354
1355UnicodeString&
1356UnicodeString::setCharAt(int32_t offset,
1357 UChar c)
1358{
1359 int32_t len = length();
1360 if(cloneArrayIfNeeded() && len > 0) {
1361 if(offset < 0) {
1362 offset = 0;
1363 } else if(offset >= len) {
1364 offset = len - 1;
1365 }
1366
1367 getArrayStart()[offset] = c;
1368 }
1369 return *this;
1370}
1371
1372UnicodeString&
1373UnicodeString::replace(int32_t start,
1374 int32_t _length,
1375 UChar32 srcChar) {
1376 UChar buffer[U16_MAX_LENGTH];
1377 int32_t count = 0;
1378 UBool isError = FALSE;
1379 U16_APPEND(buffer, count, U16_MAX_LENGTH, srcChar, isError);
1380 // We test isError so that the compiler does not complain that we don't.
1381 // If isError (srcChar is not a valid code point) then count==0 which means
1382 // we remove the source segment rather than replacing it with srcChar.
1383 return doReplace(start, _length, buffer, 0, isError ? 0 : count);
1384}
1385
1386UnicodeString&
1387UnicodeString::append(UChar32 srcChar) {
1388 UChar buffer[U16_MAX_LENGTH];
1389 int32_t _length = 0;
1390 UBool isError = FALSE;
1391 U16_APPEND(buffer, _length, U16_MAX_LENGTH, srcChar, isError);
1392 // We test isError so that the compiler does not complain that we don't.
Jungshik Shin70f82502016-01-29 00:32:36 -08001393 // If isError then _length==0 which turns the doAppend() into a no-op anyway.
1394 return isError ? *this : doAppend(buffer, 0, _length);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001395}
1396
1397UnicodeString&
1398UnicodeString::doReplace( int32_t start,
1399 int32_t length,
1400 const UnicodeString& src,
1401 int32_t srcStart,
1402 int32_t srcLength)
1403{
Jungshik Shin70f82502016-01-29 00:32:36 -08001404 // pin the indices to legal values
1405 src.pinIndices(srcStart, srcLength);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001406
Jungshik Shin70f82502016-01-29 00:32:36 -08001407 // get the characters from src
1408 // and replace the range in ourselves with them
1409 return doReplace(start, length, src.getArrayStart(), srcStart, srcLength);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001410}
1411
1412UnicodeString&
1413UnicodeString::doReplace(int32_t start,
1414 int32_t length,
1415 const UChar *srcChars,
1416 int32_t srcStart,
1417 int32_t srcLength)
1418{
1419 if(!isWritable()) {
1420 return *this;
1421 }
1422
1423 int32_t oldLength = this->length();
1424
1425 // optimize (read-only alias).remove(0, start) and .remove(start, end)
Jungshik Shin70f82502016-01-29 00:32:36 -08001426 if((fUnion.fFields.fLengthAndFlags&kBufferIsReadonly) && srcLength == 0) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001427 if(start == 0) {
1428 // remove prefix by adjusting the array pointer
1429 pinIndex(length);
1430 fUnion.fFields.fArray += length;
1431 fUnion.fFields.fCapacity -= length;
1432 setLength(oldLength - length);
1433 return *this;
1434 } else {
1435 pinIndex(start);
1436 if(length >= (oldLength - start)) {
1437 // remove suffix by reducing the length (like truncate())
1438 setLength(start);
1439 fUnion.fFields.fCapacity = start; // not NUL-terminated any more
1440 return *this;
1441 }
1442 }
1443 }
1444
Jungshik Shin70f82502016-01-29 00:32:36 -08001445 if(start == oldLength) {
1446 return doAppend(srcChars, srcStart, srcLength);
1447 }
1448
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001449 if(srcChars == 0) {
Jungshik Shin42d50272018-10-24 01:22:09 -07001450 srcLength = 0;
1451 } else {
1452 // Perform all remaining operations relative to srcChars + srcStart.
1453 // From this point forward, do not use srcStart.
1454 srcChars += srcStart;
1455 if (srcLength < 0) {
1456 // get the srcLength if necessary
1457 srcLength = u_strlen(srcChars);
1458 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001459 }
1460
Jungshik Shin70f82502016-01-29 00:32:36 -08001461 // pin the indices to legal values
1462 pinIndices(start, length);
1463
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001464 // Calculate the size of the string after the replace.
1465 // Avoid int32_t overflow.
1466 int32_t newLength = oldLength - length;
1467 if(srcLength > (INT32_MAX - newLength)) {
1468 setToBogus();
1469 return *this;
1470 }
1471 newLength += srcLength;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001472
Jungshik Shin42d50272018-10-24 01:22:09 -07001473 // Check for insertion into ourself
1474 const UChar *oldArray = getArrayStart();
1475 if (isBufferWritable() &&
1476 oldArray < srcChars + srcLength &&
1477 srcChars < oldArray + oldLength) {
1478 // Copy into a new UnicodeString and start over
1479 UnicodeString copy(srcChars, srcLength);
1480 if (copy.isBogus()) {
1481 setToBogus();
1482 return *this;
1483 }
1484 return doReplace(start, length, copy.getArrayStart(), 0, srcLength);
1485 }
1486
Jungshik Shin70f82502016-01-29 00:32:36 -08001487 // cloneArrayIfNeeded(doCopyArray=FALSE) may change fArray but will not copy the current contents;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001488 // therefore we need to keep the current fArray
1489 UChar oldStackBuffer[US_STACKBUF_SIZE];
Jungshik Shin70f82502016-01-29 00:32:36 -08001490 if((fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) && (newLength > US_STACKBUF_SIZE)) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001491 // copy the stack buffer contents because it will be overwritten with
1492 // fUnion.fFields values
Jungshik Shin42d50272018-10-24 01:22:09 -07001493 u_memcpy(oldStackBuffer, oldArray, oldLength);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001494 oldArray = oldStackBuffer;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001495 }
1496
1497 // clone our array and allocate a bigger array if needed
1498 int32_t *bufferToDelete = 0;
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001499 if(!cloneArrayIfNeeded(newLength, getGrowCapacity(newLength),
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001500 FALSE, &bufferToDelete)
1501 ) {
1502 return *this;
1503 }
1504
1505 // now do the replace
1506
1507 UChar *newArray = getArrayStart();
1508 if(newArray != oldArray) {
1509 // if fArray changed, then we need to copy everything except what will change
1510 us_arrayCopy(oldArray, 0, newArray, 0, start);
1511 us_arrayCopy(oldArray, start + length,
1512 newArray, start + srcLength,
1513 oldLength - (start + length));
1514 } else if(length != srcLength) {
1515 // fArray did not change; copy only the portion that isn't changing, leaving a hole
1516 us_arrayCopy(oldArray, start + length,
1517 newArray, start + srcLength,
1518 oldLength - (start + length));
1519 }
1520
1521 // now fill in the hole with the new string
Jungshik Shin42d50272018-10-24 01:22:09 -07001522 us_arrayCopy(srcChars, 0, newArray, start, srcLength);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001523
1524 setLength(newLength);
1525
1526 // delayed delete in case srcChars == fArray when we started, and
1527 // to keep oldArray alive for the above operations
1528 if (bufferToDelete) {
1529 uprv_free(bufferToDelete);
1530 }
1531
1532 return *this;
1533}
1534
Jungshik Shin70f82502016-01-29 00:32:36 -08001535// Versions of doReplace() only for append() variants.
1536// doReplace() and doAppend() optimize for different cases.
1537
1538UnicodeString&
1539UnicodeString::doAppend(const UnicodeString& src, int32_t srcStart, int32_t srcLength) {
1540 if(srcLength == 0) {
1541 return *this;
1542 }
1543
1544 // pin the indices to legal values
1545 src.pinIndices(srcStart, srcLength);
1546 return doAppend(src.getArrayStart(), srcStart, srcLength);
1547}
1548
1549UnicodeString&
1550UnicodeString::doAppend(const UChar *srcChars, int32_t srcStart, int32_t srcLength) {
1551 if(!isWritable() || srcLength == 0 || srcChars == NULL) {
1552 return *this;
1553 }
1554
Jungshik Shin42d50272018-10-24 01:22:09 -07001555 // Perform all remaining operations relative to srcChars + srcStart.
1556 // From this point forward, do not use srcStart.
1557 srcChars += srcStart;
1558
Jungshik Shin70f82502016-01-29 00:32:36 -08001559 if(srcLength < 0) {
1560 // get the srcLength if necessary
Jungshik Shin42d50272018-10-24 01:22:09 -07001561 if((srcLength = u_strlen(srcChars)) == 0) {
Jungshik Shin70f82502016-01-29 00:32:36 -08001562 return *this;
1563 }
1564 }
1565
1566 int32_t oldLength = length();
1567 int32_t newLength = oldLength + srcLength;
Jungshik Shin42d50272018-10-24 01:22:09 -07001568
1569 // Check for append onto ourself
1570 const UChar* oldArray = getArrayStart();
1571 if (isBufferWritable() &&
1572 oldArray < srcChars + srcLength &&
1573 srcChars < oldArray + oldLength) {
1574 // Copy into a new UnicodeString and start over
1575 UnicodeString copy(srcChars, srcLength);
1576 if (copy.isBogus()) {
1577 setToBogus();
1578 return *this;
1579 }
1580 return doAppend(copy.getArrayStart(), 0, srcLength);
1581 }
1582
Jungshik Shin70f82502016-01-29 00:32:36 -08001583 // optimize append() onto a large-enough, owned string
1584 if((newLength <= getCapacity() && isBufferWritable()) ||
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001585 cloneArrayIfNeeded(newLength, getGrowCapacity(newLength))) {
Jungshik Shin70f82502016-01-29 00:32:36 -08001586 UChar *newArray = getArrayStart();
1587 // Do not copy characters when
1588 // UChar *buffer=str.getAppendBuffer(...);
1589 // is followed by
1590 // str.append(buffer, length);
1591 // or
1592 // str.appendString(buffer, length)
1593 // or similar.
Jungshik Shin42d50272018-10-24 01:22:09 -07001594 if(srcChars != newArray + oldLength) {
1595 us_arrayCopy(srcChars, 0, newArray, oldLength, srcLength);
Jungshik Shin70f82502016-01-29 00:32:36 -08001596 }
1597 setLength(newLength);
1598 }
1599 return *this;
1600}
1601
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001602/**
1603 * Replaceable API
1604 */
1605void
1606UnicodeString::handleReplaceBetween(int32_t start,
1607 int32_t limit,
1608 const UnicodeString& text) {
1609 replaceBetween(start, limit, text);
1610}
1611
1612/**
1613 * Replaceable API
1614 */
1615void
1616UnicodeString::copy(int32_t start, int32_t limit, int32_t dest) {
1617 if (limit <= start) {
1618 return; // Nothing to do; avoid bogus malloc call
1619 }
1620 UChar* text = (UChar*) uprv_malloc( sizeof(UChar) * (limit - start) );
1621 // Check to make sure text is not null.
1622 if (text != NULL) {
1623 extractBetween(start, limit, text, 0);
1624 insert(dest, text, 0, limit - start);
1625 uprv_free(text);
1626 }
1627}
1628
1629/**
1630 * Replaceable API
1631 *
1632 * NOTE: This is for the Replaceable class. There is no rep.cpp,
1633 * so we implement this function here.
1634 */
1635UBool Replaceable::hasMetaData() const {
1636 return TRUE;
1637}
1638
1639/**
1640 * Replaceable API
1641 */
1642UBool UnicodeString::hasMetaData() const {
1643 return FALSE;
1644}
1645
1646UnicodeString&
1647UnicodeString::doReverse(int32_t start, int32_t length) {
1648 if(length <= 1 || !cloneArrayIfNeeded()) {
1649 return *this;
1650 }
1651
1652 // pin the indices to legal values
1653 pinIndices(start, length);
1654 if(length <= 1) { // pinIndices() might have shrunk the length
1655 return *this;
1656 }
1657
1658 UChar *left = getArrayStart() + start;
1659 UChar *right = left + length - 1; // -1 for inclusive boundary (length>=2)
1660 UChar swap;
1661 UBool hasSupplementary = FALSE;
1662
1663 // Before the loop we know left<right because length>=2.
1664 do {
1665 hasSupplementary |= (UBool)U16_IS_LEAD(swap = *left);
1666 hasSupplementary |= (UBool)U16_IS_LEAD(*left++ = *right);
1667 *right-- = swap;
1668 } while(left < right);
1669 // Make sure to test the middle code unit of an odd-length string.
1670 // Redundant if the length is even.
1671 hasSupplementary |= (UBool)U16_IS_LEAD(*left);
1672
1673 /* if there are supplementary code points in the reversed range, then re-swap their surrogates */
1674 if(hasSupplementary) {
1675 UChar swap2;
1676
1677 left = getArrayStart() + start;
1678 right = left + length - 1; // -1 so that we can look at *(left+1) if left<right
1679 while(left < right) {
1680 if(U16_IS_TRAIL(swap = *left) && U16_IS_LEAD(swap2 = *(left + 1))) {
1681 *left++ = swap2;
1682 *left++ = swap;
1683 } else {
1684 ++left;
1685 }
1686 }
1687 }
1688
1689 return *this;
1690}
1691
1692UBool
1693UnicodeString::padLeading(int32_t targetLength,
1694 UChar padChar)
1695{
1696 int32_t oldLength = length();
1697 if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) {
1698 return FALSE;
1699 } else {
1700 // move contents up by padding width
1701 UChar *array = getArrayStart();
1702 int32_t start = targetLength - oldLength;
1703 us_arrayCopy(array, 0, array, start, oldLength);
1704
1705 // fill in padding character
1706 while(--start >= 0) {
1707 array[start] = padChar;
1708 }
1709 setLength(targetLength);
1710 return TRUE;
1711 }
1712}
1713
1714UBool
1715UnicodeString::padTrailing(int32_t targetLength,
1716 UChar padChar)
1717{
1718 int32_t oldLength = length();
1719 if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) {
1720 return FALSE;
1721 } else {
1722 // fill in padding character
1723 UChar *array = getArrayStart();
1724 int32_t length = targetLength;
1725 while(--length >= oldLength) {
1726 array[length] = padChar;
1727 }
1728 setLength(targetLength);
1729 return TRUE;
1730 }
1731}
1732
1733//========================================
1734// Hashing
1735//========================================
1736int32_t
1737UnicodeString::doHashCode() const
1738{
1739 /* Delegate hash computation to uhash. This makes UnicodeString
1740 * hashing consistent with UChar* hashing. */
1741 int32_t hashCode = ustr_hashUCharsN(getArrayStart(), length());
1742 if (hashCode == kInvalidHashCode) {
1743 hashCode = kEmptyHashCode;
1744 }
1745 return hashCode;
1746}
1747
1748//========================================
1749// External Buffer
1750//========================================
1751
Jungshik Shin87232d82017-05-13 21:10:13 -07001752char16_t *
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001753UnicodeString::getBuffer(int32_t minCapacity) {
1754 if(minCapacity>=-1 && cloneArrayIfNeeded(minCapacity)) {
Jungshik Shin70f82502016-01-29 00:32:36 -08001755 fUnion.fFields.fLengthAndFlags|=kOpenGetBuffer;
1756 setZeroLength();
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001757 return getArrayStart();
1758 } else {
Jungshik Shin87232d82017-05-13 21:10:13 -07001759 return nullptr;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001760 }
1761}
1762
1763void
1764UnicodeString::releaseBuffer(int32_t newLength) {
Jungshik Shin70f82502016-01-29 00:32:36 -08001765 if(fUnion.fFields.fLengthAndFlags&kOpenGetBuffer && newLength>=-1) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001766 // set the new fLength
1767 int32_t capacity=getCapacity();
1768 if(newLength==-1) {
1769 // the new length is the string length, capped by fCapacity
1770 const UChar *array=getArrayStart(), *p=array, *limit=array+capacity;
1771 while(p<limit && *p!=0) {
1772 ++p;
1773 }
1774 newLength=(int32_t)(p-array);
1775 } else if(newLength>capacity) {
1776 newLength=capacity;
1777 }
1778 setLength(newLength);
Jungshik Shin70f82502016-01-29 00:32:36 -08001779 fUnion.fFields.fLengthAndFlags&=~kOpenGetBuffer;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001780 }
1781}
1782
1783//========================================
1784// Miscellaneous
1785//========================================
1786UBool
1787UnicodeString::cloneArrayIfNeeded(int32_t newCapacity,
1788 int32_t growCapacity,
1789 UBool doCopyArray,
1790 int32_t **pBufferToDelete,
1791 UBool forceClone) {
1792 // default parameters need to be static, therefore
1793 // the defaults are -1 to have convenience defaults
1794 if(newCapacity == -1) {
1795 newCapacity = getCapacity();
1796 }
1797
1798 // while a getBuffer(minCapacity) is "open",
1799 // prevent any modifications of the string by returning FALSE here
1800 // if the string is bogus, then only an assignment or similar can revive it
1801 if(!isWritable()) {
1802 return FALSE;
1803 }
1804
1805 /*
1806 * We need to make a copy of the array if
1807 * the buffer is read-only, or
1808 * the buffer is refCounted (shared), and refCount>1, or
1809 * the buffer is too small.
1810 * Return FALSE if memory could not be allocated.
1811 */
1812 if(forceClone ||
Jungshik Shin70f82502016-01-29 00:32:36 -08001813 fUnion.fFields.fLengthAndFlags & kBufferIsReadonly ||
1814 (fUnion.fFields.fLengthAndFlags & kRefCounted && refCount() > 1) ||
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001815 newCapacity > getCapacity()
1816 ) {
1817 // check growCapacity for default value and use of the stack buffer
1818 if(growCapacity < 0) {
1819 growCapacity = newCapacity;
1820 } else if(newCapacity <= US_STACKBUF_SIZE && growCapacity > US_STACKBUF_SIZE) {
1821 growCapacity = US_STACKBUF_SIZE;
1822 }
1823
1824 // save old values
1825 UChar oldStackBuffer[US_STACKBUF_SIZE];
1826 UChar *oldArray;
Jungshik Shin70f82502016-01-29 00:32:36 -08001827 int32_t oldLength = length();
1828 int16_t flags = fUnion.fFields.fLengthAndFlags;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001829
1830 if(flags&kUsingStackBuffer) {
1831 U_ASSERT(!(flags&kRefCounted)); /* kRefCounted and kUsingStackBuffer are mutally exclusive */
1832 if(doCopyArray && growCapacity > US_STACKBUF_SIZE) {
1833 // copy the stack buffer contents because it will be overwritten with
1834 // fUnion.fFields values
Jungshik Shin70f82502016-01-29 00:32:36 -08001835 us_arrayCopy(fUnion.fStackFields.fBuffer, 0, oldStackBuffer, 0, oldLength);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001836 oldArray = oldStackBuffer;
1837 } else {
Jungshik Shin70f82502016-01-29 00:32:36 -08001838 oldArray = NULL; // no need to copy from the stack buffer to itself
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001839 }
1840 } else {
1841 oldArray = fUnion.fFields.fArray;
1842 U_ASSERT(oldArray!=NULL); /* when stack buffer is not used, oldArray must have a non-NULL reference */
1843 }
1844
1845 // allocate a new array
1846 if(allocate(growCapacity) ||
1847 (newCapacity < growCapacity && allocate(newCapacity))
1848 ) {
Jungshik Shin70f82502016-01-29 00:32:36 -08001849 if(doCopyArray) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001850 // copy the contents
1851 // do not copy more than what fits - it may be smaller than before
Jungshik Shin70f82502016-01-29 00:32:36 -08001852 int32_t minLength = oldLength;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001853 newCapacity = getCapacity();
1854 if(newCapacity < minLength) {
1855 minLength = newCapacity;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001856 }
Jungshik Shin70f82502016-01-29 00:32:36 -08001857 if(oldArray != NULL) {
1858 us_arrayCopy(oldArray, 0, getArrayStart(), 0, minLength);
1859 }
1860 setLength(minLength);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001861 } else {
Jungshik Shin70f82502016-01-29 00:32:36 -08001862 setZeroLength();
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001863 }
1864
1865 // release the old array
1866 if(flags & kRefCounted) {
1867 // the array is refCounted; decrement and release if 0
1868 u_atomic_int32_t *pRefCount = ((u_atomic_int32_t *)oldArray - 1);
1869 if(umtx_atomic_dec(pRefCount) == 0) {
1870 if(pBufferToDelete == 0) {
1871 // Note: cast to (void *) is needed with MSVC, where u_atomic_int32_t
1872 // is defined as volatile. (Volatile has useful non-standard behavior
1873 // with this compiler.)
1874 uprv_free((void *)pRefCount);
1875 } else {
1876 // the caller requested to delete it himself
1877 *pBufferToDelete = (int32_t *)pRefCount;
1878 }
1879 }
1880 }
1881 } else {
1882 // not enough memory for growCapacity and not even for the smaller newCapacity
1883 // reset the old values for setToBogus() to release the array
1884 if(!(flags&kUsingStackBuffer)) {
1885 fUnion.fFields.fArray = oldArray;
1886 }
Jungshik Shin70f82502016-01-29 00:32:36 -08001887 fUnion.fFields.fLengthAndFlags = flags;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001888 setToBogus();
1889 return FALSE;
1890 }
1891 }
1892 return TRUE;
1893}
1894
1895// UnicodeStringAppendable ------------------------------------------------- ***
1896
1897UnicodeStringAppendable::~UnicodeStringAppendable() {}
1898
1899UBool
1900UnicodeStringAppendable::appendCodeUnit(UChar c) {
Jungshik Shin70f82502016-01-29 00:32:36 -08001901 return str.doAppend(&c, 0, 1).isWritable();
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001902}
1903
1904UBool
1905UnicodeStringAppendable::appendCodePoint(UChar32 c) {
1906 UChar buffer[U16_MAX_LENGTH];
1907 int32_t cLength = 0;
1908 UBool isError = FALSE;
1909 U16_APPEND(buffer, cLength, U16_MAX_LENGTH, c, isError);
Jungshik Shin70f82502016-01-29 00:32:36 -08001910 return !isError && str.doAppend(buffer, 0, cLength).isWritable();
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001911}
1912
1913UBool
1914UnicodeStringAppendable::appendString(const UChar *s, int32_t length) {
Jungshik Shin70f82502016-01-29 00:32:36 -08001915 return str.doAppend(s, 0, length).isWritable();
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001916}
1917
1918UBool
1919UnicodeStringAppendable::reserveAppendCapacity(int32_t appendCapacity) {
1920 return str.cloneArrayIfNeeded(str.length() + appendCapacity);
1921}
1922
1923UChar *
1924UnicodeStringAppendable::getAppendBuffer(int32_t minCapacity,
1925 int32_t desiredCapacityHint,
1926 UChar *scratch, int32_t scratchCapacity,
1927 int32_t *resultCapacity) {
1928 if(minCapacity < 1 || scratchCapacity < minCapacity) {
1929 *resultCapacity = 0;
1930 return NULL;
1931 }
1932 int32_t oldLength = str.length();
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001933 if(minCapacity <= (kMaxCapacity - oldLength) &&
1934 desiredCapacityHint <= (kMaxCapacity - oldLength) &&
1935 str.cloneArrayIfNeeded(oldLength + minCapacity, oldLength + desiredCapacityHint)) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001936 *resultCapacity = str.getCapacity() - oldLength;
1937 return str.getArrayStart() + oldLength;
1938 }
1939 *resultCapacity = scratchCapacity;
1940 return scratch;
1941}
1942
1943U_NAMESPACE_END
1944
1945U_NAMESPACE_USE
1946
1947U_CAPI int32_t U_EXPORT2
1948uhash_hashUnicodeString(const UElement key) {
1949 const UnicodeString *str = (const UnicodeString*) key.pointer;
1950 return (str == NULL) ? 0 : str->hashCode();
1951}
1952
1953// Moved here from uhash_us.cpp so that using a UVector of UnicodeString*
1954// does not depend on hashtable code.
1955U_CAPI UBool U_EXPORT2
1956uhash_compareUnicodeString(const UElement key1, const UElement key2) {
1957 const UnicodeString *str1 = (const UnicodeString*) key1.pointer;
1958 const UnicodeString *str2 = (const UnicodeString*) key2.pointer;
1959 if (str1 == str2) {
1960 return TRUE;
1961 }
1962 if (str1 == NULL || str2 == NULL) {
1963 return FALSE;
1964 }
1965 return *str1 == *str2;
1966}
1967
1968#ifdef U_STATIC_IMPLEMENTATION
1969/*
1970This should never be called. It is defined here to make sure that the
1971virtual vector deleting destructor is defined within unistr.cpp.
1972The vector deleting destructor is already a part of UObject,
1973but defining it here makes sure that it is included with this object file.
1974This makes sure that static library dependencies are kept to a minimum.
1975*/
1976static void uprv_UnicodeStringDummy(void) {
1977 delete [] (new UnicodeString[2]);
1978}
1979#endif