Blame - source/common/unistr_case.cpp - chromium.googlesource.com/chromium/deps/icu

blob: 1715b6ec66e268d4784ee35af1e3d8f094bff637 [file] [log] [blame]

Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	1	// Copyright (C) 2016 and later: Unicode, Inc. and others.
				2	// License & terms of use: http://www.unicode.org/copyright.html
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	3	/*
				4	*******************************************************************************
				5	*
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	6	* Copyright (C) 1999-2014, International Business Machines
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	7	* Corporation and others. All Rights Reserved.
				8	*
				9	*******************************************************************************
				10	* file name: unistr_case.cpp
				11	* encoding: US-ASCII
				12	* tab size: 8 (not used)
				13	* indentation:2
				14	*
				15	* created on: 2004aug19
				16	* created by: Markus W. Scherer
				17	*
				18	* Case-mapping functions moved here from unistr.cpp
				19	*/
				20
				21	#include "unicode/utypes.h"
				22	#include "unicode/putil.h"
				23	#include "cstring.h"
				24	#include "cmemory.h"
				25	#include "unicode/ustring.h"
				26	#include "unicode/unistr.h"
				27	#include "unicode/uchar.h"
				28	#include "uelement.h"
				29	#include "ustr_imp.h"
				30
				31	U_NAMESPACE_BEGIN
				32
				33	//========================================
				34	// Read-only implementation
				35	//========================================
				36
				37	int8_t
				38	UnicodeString::doCaseCompare(int32_t start,
				39	int32_t length,
				40	const UChar *srcChars,
				41	int32_t srcStart,
				42	int32_t srcLength,
				43	uint32_t options) const
				44	{
				45	// compare illegal string values
				46	// treat const UChar *srcChars==NULL as an empty string
				47	if(isBogus()) {
				48	return -1;
				49	}
				50
				51	// pin indices to legal values
				52	pinIndices(start, length);
				53
				54	if(srcChars == NULL) {
				55	srcStart = srcLength = 0;
				56	}
				57
				58	// get the correct pointer
				59	const UChar *chars = getArrayStart();
				60
				61	chars += start;
				62	if(srcStart!=0) {
				63	srcChars += srcStart;
				64	}
				65
				66	if(chars != srcChars) {
				67	UErrorCode errorCode=U_ZERO_ERROR;
				68	int32_t result=u_strcmpFold(chars, length, srcChars, srcLength,
				69	options\|U_COMPARE_IGNORE_CASE, &errorCode);
				70	if(result!=0) {
				71	return (int8_t)(result >> 24 \| 1);
				72	}
				73	} else {
				74	// get the srcLength if necessary
				75	if(srcLength < 0) {
				76	srcLength = u_strlen(srcChars + srcStart);
				77	}
				78	if(length != srcLength) {
				79	return (int8_t)((length - srcLength) >> 24 \| 1);
				80	}
				81	}
				82	return 0;
				83	}
				84
				85	//========================================
				86	// Write implementation
				87	//========================================
				88
				89	UnicodeString &
				90	UnicodeString::caseMap(const UCaseMap *csm,
				91	UStringCaseMapper *stringCaseMapper) {
				92	if(isEmpty() \|\| !isWritable()) {
				93	// nothing to do
				94	return *this;
				95	}
				96
				97	// We need to allocate a new buffer for the internal string case mapping function.
				98	// This is very similar to how doReplace() keeps the old array pointer
				99	// and deletes the old array itself after it is done.
				100	// In addition, we are forcing cloneArrayIfNeeded() to always allocate a new array.
				101	UChar oldStackBuffer[US_STACKBUF_SIZE];
				102	UChar *oldArray;
				103	int32_t oldLength;
				104
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	105	if(fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	106	// copy the stack buffer contents because it will be overwritten
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	107	oldArray = oldStackBuffer;
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	108	oldLength = getShortLength();
				109	u_memcpy(oldStackBuffer, fUnion.fStackFields.fBuffer, oldLength);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	110	} else {
				111	oldArray = getArrayStart();
				112	oldLength = length();
				113	}
				114
				115	int32_t capacity;
				116	if(oldLength <= US_STACKBUF_SIZE) {
				117	capacity = US_STACKBUF_SIZE;
				118	} else {
				119	capacity = oldLength + 20;
				120	}
				121	int32_t *bufferToDelete = 0;
				122	if(!cloneArrayIfNeeded(capacity, capacity, FALSE, &bufferToDelete, TRUE)) {
				123	return *this;
				124	}
				125
				126	// Case-map, and if the result is too long, then reallocate and repeat.
				127	UErrorCode errorCode;
				128	int32_t newLength;
				129	do {
				130	errorCode = U_ZERO_ERROR;
				131	newLength = stringCaseMapper(csm, getArrayStart(), getCapacity(),
				132	oldArray, oldLength, &errorCode);
				133	setLength(newLength);
				134	} while(errorCode==U_BUFFER_OVERFLOW_ERROR && cloneArrayIfNeeded(newLength, newLength, FALSE));
				135
				136	if (bufferToDelete) {
				137	uprv_free(bufferToDelete);
				138	}
				139	if(U_FAILURE(errorCode)) {
				140	setToBogus();
				141	}
				142	return *this;
				143	}
				144
				145	UnicodeString &
				146	UnicodeString::foldCase(uint32_t options) {
				147	UCaseMap csm=UCASEMAP_INITIALIZER;
				148	csm.csp=ucase_getSingleton();
				149	csm.options=options;
				150	return caseMap(&csm, ustrcase_internalFold);
				151	}
				152
				153	U_NAMESPACE_END
				154
				155	// Defined here to reduce dependencies on break iterator
				156	U_CAPI int32_t U_EXPORT2
				157	uhash_hashCaselessUnicodeString(const UElement key) {
				158	U_NAMESPACE_USE
				159	const UnicodeString str = (const UnicodeString) key.pointer;
				160	if (str == NULL) {
				161	return 0;
				162	}
				163	// Inefficient; a better way would be to have a hash function in
				164	// UnicodeString that does case folding on the fly.
				165	UnicodeString copy(*str);
				166	return copy.foldCase().hashCode();
				167	}
				168
				169	// Defined here to reduce dependencies on break iterator
				170	U_CAPI UBool U_EXPORT2
				171	uhash_compareCaselessUnicodeString(const UElement key1, const UElement key2) {
				172	U_NAMESPACE_USE
				173	const UnicodeString str1 = (const UnicodeString) key1.pointer;
				174	const UnicodeString str2 = (const UnicodeString) key2.pointer;
				175	if (str1 == str2) {
				176	return TRUE;
				177	}
				178	if (str1 == NULL \|\| str2 == NULL) {
				179	return FALSE;
				180	}
				181	return str1->caseCompare(*str2, U_FOLD_CASE_DEFAULT) == 0;
				182	}