Blame - source/common/ucase.cpp - chromium.googlesource.com/chromium/deps/icu

blob: 706d7289691293147407418c2ed5f27a6da93175 [file] [log] [blame]

Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	1	// © 2016 and later: Unicode, Inc. and others.
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	2	// License & terms of use: http://www.unicode.org/copyright.html
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	3	/*
				4	*******************************************************************************
				5	*
Jungshik Shin (jungshik at google)	0f8746a	2015-01-08 15:46:45 -0800	[diff] [blame]	6	* Copyright (C) 2004-2014, International Business Machines
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	7	* Corporation and others. All Rights Reserved.
				8	*
				9	*******************************************************************************
				10	* file name: ucase.cpp
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	11	* encoding: UTF-8
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	12	* tab size: 8 (not used)
				13	* indentation:4
				14	*
				15	* created on: 2004aug30
				16	* created by: Markus W. Scherer
				17	*
				18	* Low-level Unicode character/string case mapping code.
				19	* Much code moved here (and modified) from uchar.c.
				20	*/
				21
				22	#include "unicode/utypes.h"
				23	#include "unicode/unistr.h"
				24	#include "unicode/uset.h"
				25	#include "unicode/udata.h" /* UDataInfo */
				26	#include "unicode/utf16.h"
				27	#include "ucmndata.h" /* DataHeader */
				28	#include "udatamem.h"
				29	#include "umutex.h"
				30	#include "uassert.h"
				31	#include "cmemory.h"
				32	#include "utrie2.h"
				33	#include "ucase.h"
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	34
				35	struct UCaseProps {
				36	UDataMemory *mem;
				37	const int32_t *indexes;
				38	const uint16_t *exceptions;
				39	const uint16_t *unfold;
				40
				41	UTrie2 trie;
				42	uint8_t formatVersion[4];
				43	};
				44
				45	/* ucase_props_data.h is machine-generated by gencase --csource */
				46	#define INCLUDED_FROM_UCASE_CPP
				47	#include "ucase_props_data.h"
				48
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	49	/* set of property starts for UnicodeSet ------------------------------------ */
				50
				51	static UBool U_CALLCONV
				52	_enumPropertyStartsRange(const void context, UChar32 start, UChar32 /end/, uint32_t /value*/) {
				53	/* add the start code point to the USet */
				54	const USetAdder sa=(const USetAdder )context;
				55	sa->add(sa->set, start);
				56	return TRUE;
				57	}
				58
				59	U_CFUNC void U_EXPORT2
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	60	ucase_addPropertyStarts(const USetAdder sa, UErrorCode pErrorCode) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	61	if(U_FAILURE(*pErrorCode)) {
				62	return;
				63	}
				64
				65	/* add the start code point of each same-value range of the trie */
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	66	utrie2_enum(&ucase_props_singleton.trie, NULL, _enumPropertyStartsRange, sa);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	67
				68	/* add code points with hardcoded properties, plus the ones following them */
				69
				70	/* (none right now, see comment below) */
				71
				72	/*
				73	* Omit code points with hardcoded specialcasing properties
				74	* because we do not build property UnicodeSets for them right now.
				75	*/
				76	}
				77
				78	/* data access primitives --------------------------------------------------- */
				79
				80	#define GET_EXCEPTIONS(csp, props) ((csp)->exceptions+((props)>>UCASE_EXC_SHIFT))
				81
				82	#define PROPS_HAS_EXCEPTION(props) ((props)&UCASE_EXCEPTION)
				83
				84	/* number of bits in an 8-bit integer value */
				85	static const uint8_t flagsOffset[256]={
				86	0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
				87	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
				88	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
				89	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
				90	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
				91	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
				92	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
				93	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
				94	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
				95	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
				96	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
				97	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
				98	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
				99	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
				100	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
				101	4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
				102	};
				103
				104	#define HAS_SLOT(flags, idx) ((flags)&(1<<(idx)))
				105	#define SLOT_OFFSET(flags, idx) flagsOffset[(flags)&((1<<(idx))-1)]
				106
				107	/*
				108	* Get the value of an optional-value slot where HAS_SLOT(excWord, idx).
				109	*
				110	* @param excWord (in) initial exceptions word
				111	* @param idx (in) desired slot index
				112	* @param pExc16 (in/out) const uint16_t * after excWord=*pExc16++;
				113	* moved to the last uint16_t of the value, use +1 for beginning of next slot
				114	* @param value (out) int32_t or uint32_t output if hasSlot, otherwise not modified
				115	*/
				116	#define GET_SLOT_VALUE(excWord, idx, pExc16, value) \
				117	if(((excWord)&UCASE_EXC_DOUBLE_SLOTS)==0) { \
				118	(pExc16)+=SLOT_OFFSET(excWord, idx); \
				119	(value)=*pExc16; \
				120	} else { \
				121	(pExc16)+=2*SLOT_OFFSET(excWord, idx); \
				122	(value)=*pExc16++; \
				123	(value)=((value)<<16)\|*pExc16; \
				124	}
				125
				126	/* simple case mappings ----------------------------------------------------- */
				127
				128	U_CAPI UChar32 U_EXPORT2
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	129	ucase_tolower(UChar32 c) {
				130	uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	131	if(!PROPS_HAS_EXCEPTION(props)) {
				132	if(UCASE_GET_TYPE(props)>=UCASE_UPPER) {
				133	c+=UCASE_GET_DELTA(props);
				134	}
				135	} else {
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	136	const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	137	uint16_t excWord=*pe++;
				138	if(HAS_SLOT(excWord, UCASE_EXC_LOWER)) {
				139	GET_SLOT_VALUE(excWord, UCASE_EXC_LOWER, pe, c);
				140	}
				141	}
				142	return c;
				143	}
				144
				145	U_CAPI UChar32 U_EXPORT2
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	146	ucase_toupper(UChar32 c) {
				147	uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	148	if(!PROPS_HAS_EXCEPTION(props)) {
				149	if(UCASE_GET_TYPE(props)==UCASE_LOWER) {
				150	c+=UCASE_GET_DELTA(props);
				151	}
				152	} else {
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	153	const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	154	uint16_t excWord=*pe++;
				155	if(HAS_SLOT(excWord, UCASE_EXC_UPPER)) {
				156	GET_SLOT_VALUE(excWord, UCASE_EXC_UPPER, pe, c);
				157	}
				158	}
				159	return c;
				160	}
				161
				162	U_CAPI UChar32 U_EXPORT2
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	163	ucase_totitle(UChar32 c) {
				164	uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	165	if(!PROPS_HAS_EXCEPTION(props)) {
				166	if(UCASE_GET_TYPE(props)==UCASE_LOWER) {
				167	c+=UCASE_GET_DELTA(props);
				168	}
				169	} else {
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	170	const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	171	uint16_t excWord=*pe++;
				172	int32_t idx;
				173	if(HAS_SLOT(excWord, UCASE_EXC_TITLE)) {
				174	idx=UCASE_EXC_TITLE;
				175	} else if(HAS_SLOT(excWord, UCASE_EXC_UPPER)) {
				176	idx=UCASE_EXC_UPPER;
				177	} else {
				178	return c;
				179	}
				180	GET_SLOT_VALUE(excWord, idx, pe, c);
				181	}
				182	return c;
				183	}
				184
				185	static const UChar iDot[2] = { 0x69, 0x307 };
				186	static const UChar jDot[2] = { 0x6a, 0x307 };
				187	static const UChar iOgonekDot[3] = { 0x12f, 0x307 };
				188	static const UChar iDotGrave[3] = { 0x69, 0x307, 0x300 };
				189	static const UChar iDotAcute[3] = { 0x69, 0x307, 0x301 };
				190	static const UChar iDotTilde[3] = { 0x69, 0x307, 0x303 };
				191
				192
				193	U_CFUNC void U_EXPORT2
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	194	ucase_addCaseClosure(UChar32 c, const USetAdder *sa) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	195	uint16_t props;
				196
				197	/*
				198	* Hardcode the case closure of i and its relatives and ignore the
				199	* data file data for these characters.
				200	* The Turkic dotless i and dotted I with their case mapping conditions
				201	* and case folding option make the related characters behave specially.
				202	* This code matches their closure behavior to their case folding behavior.
				203	*/
				204
				205	switch(c) {
				206	case 0x49:
				207	/* regular i and I are in one equivalence class */
				208	sa->add(sa->set, 0x69);
				209	return;
				210	case 0x69:
				211	sa->add(sa->set, 0x49);
				212	return;
				213	case 0x130:
				214	/* dotted I is in a class with <0069 0307> (for canonical equivalence with <0049 0307>) */
				215	sa->addString(sa->set, iDot, 2);
				216	return;
				217	case 0x131:
				218	/* dotless i is in a class by itself */
				219	return;
				220	default:
				221	/* otherwise use the data file data */
				222	break;
				223	}
				224
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	225	props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	226	if(!PROPS_HAS_EXCEPTION(props)) {
				227	if(UCASE_GET_TYPE(props)!=UCASE_NONE) {
				228	/* add the one simple case mapping, no matter what type it is */
				229	int32_t delta=UCASE_GET_DELTA(props);
				230	if(delta!=0) {
				231	sa->add(sa->set, c+delta);
				232	}
				233	}
				234	} else {
				235	/*
				236	* c has exceptions, so there may be multiple simple and/or
				237	* full case mappings. Add them all.
				238	*/
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	239	const uint16_t pe0, pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	240	const UChar *closure;
				241	uint16_t excWord=*pe++;
				242	int32_t idx, closureLength, fullLength, length;
				243
				244	pe0=pe;
				245
				246	/* add all simple case mappings */
				247	for(idx=UCASE_EXC_LOWER; idx<=UCASE_EXC_TITLE; ++idx) {
				248	if(HAS_SLOT(excWord, idx)) {
				249	pe=pe0;
				250	GET_SLOT_VALUE(excWord, idx, pe, c);
				251	sa->add(sa->set, c);
				252	}
				253	}
				254
				255	/* get the closure string pointer & length */
				256	if(HAS_SLOT(excWord, UCASE_EXC_CLOSURE)) {
				257	pe=pe0;
				258	GET_SLOT_VALUE(excWord, UCASE_EXC_CLOSURE, pe, closureLength);
				259	closureLength&=UCASE_CLOSURE_MAX_LENGTH; /* higher bits are reserved */
				260	closure=(const UChar )pe+1; / behind this slot, unless there are full case mappings */
				261	} else {
				262	closureLength=0;
				263	closure=NULL;
				264	}
				265
				266	/* add the full case folding */
				267	if(HAS_SLOT(excWord, UCASE_EXC_FULL_MAPPINGS)) {
				268	pe=pe0;
				269	GET_SLOT_VALUE(excWord, UCASE_EXC_FULL_MAPPINGS, pe, fullLength);
				270
				271	/* start of full case mapping strings */
				272	++pe;
				273
				274	fullLength&=0xffff; /* bits 16 and higher are reserved */
				275
				276	/* skip the lowercase result string */
				277	pe+=fullLength&UCASE_FULL_LOWER;
				278	fullLength>>=4;
				279
				280	/* add the full case folding string */
				281	length=fullLength&0xf;
				282	if(length!=0) {
				283	sa->addString(sa->set, (const UChar *)pe, length);
				284	pe+=length;
				285	}
				286
				287	/* skip the uppercase and titlecase strings */
				288	fullLength>>=4;
				289	pe+=fullLength&0xf;
				290	fullLength>>=4;
				291	pe+=fullLength;
				292
				293	closure=(const UChar )pe; / behind full case mappings */
				294	}
				295
				296	/* add each code point in the closure string */
				297	for(idx=0; idx<closureLength;) {
				298	U16_NEXT_UNSAFE(closure, idx, c);
				299	sa->add(sa->set, c);
				300	}
				301	}
				302	}
				303
				304	/*
				305	* compare s, which has a length, with t, which has a maximum length or is NUL-terminated
				306	* must be length>0 and max>0 and length<=max
				307	*/
				308	static inline int32_t
				309	strcmpMax(const UChar s, int32_t length, const UChar t, int32_t max) {
				310	int32_t c1, c2;
				311
				312	max-=length; /* we require length<=max, so no need to decrement max in the loop */
				313	do {
				314	c1=*s++;
				315	c2=*t++;
				316	if(c2==0) {
				317	return 1; /* reached the end of t but not of s */
				318	}
				319	c1-=c2;
				320	if(c1!=0) {
				321	return c1; /* return difference result */
				322	}
				323	} while(--length>0);
				324	/* ends with length==0 */
				325
				326	if(max==0 \|\| *t==0) {
				327	return 0; /* equal to length of both strings */
				328	} else {
				329	return -max; /* return lengh difference */
				330	}
				331	}
				332
				333	U_CFUNC UBool U_EXPORT2
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	334	ucase_addStringCaseClosure(const UChar s, int32_t length, const USetAdder sa) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	335	int32_t i, start, limit, result, unfoldRows, unfoldRowWidth, unfoldStringWidth;
				336
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	337	if(ucase_props_singleton.unfold==NULL \|\| s==NULL) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	338	return FALSE; /* no reverse case folding data, or no string */
				339	}
				340	if(length<=1) {
				341	/* the string is too short to find any match */
				342	/*
				343	* more precise would be:
				344	* if(!u_strHasMoreChar32Than(s, length, 1))
				345	* but this does not make much practical difference because
				346	* a single supplementary code point would just not be found
				347	*/
				348	return FALSE;
				349	}
				350
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	351	const uint16_t *unfold=ucase_props_singleton.unfold;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	352	unfoldRows=unfold[UCASE_UNFOLD_ROWS];
				353	unfoldRowWidth=unfold[UCASE_UNFOLD_ROW_WIDTH];
				354	unfoldStringWidth=unfold[UCASE_UNFOLD_STRING_WIDTH];
				355	unfold+=unfoldRowWidth;
				356
				357	if(length>unfoldStringWidth) {
				358	/* the string is too long to find any match */
				359	return FALSE;
				360	}
				361
				362	/* do a binary search for the string */
				363	start=0;
				364	limit=unfoldRows;
				365	while(start<limit) {
				366	i=(start+limit)/2;
				367	const UChar p=reinterpret_cast<const UChar >(unfold+(i*unfoldRowWidth));
				368	result=strcmpMax(s, length, p, unfoldStringWidth);
				369
				370	if(result==0) {
				371	/* found the string: add each code point, and its case closure */
				372	UChar32 c;
				373
				374	for(i=unfoldStringWidth; i<unfoldRowWidth && p[i]!=0;) {
				375	U16_NEXT_UNSAFE(p, i, c);
				376	sa->add(sa->set, c);
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	377	ucase_addCaseClosure(c, sa);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	378	}
				379	return TRUE;
				380	} else if(result<0) {
				381	limit=i;
				382	} else /* result>0 */ {
				383	start=i+1;
				384	}
				385	}
				386
				387	return FALSE; /* string not found */
				388	}
				389
				390	U_NAMESPACE_BEGIN
				391
				392	FullCaseFoldingIterator::FullCaseFoldingIterator()
				393	: unfold(reinterpret_cast<const UChar *>(ucase_props_singleton.unfold)),
				394	unfoldRows(unfold[UCASE_UNFOLD_ROWS]),
				395	unfoldRowWidth(unfold[UCASE_UNFOLD_ROW_WIDTH]),
				396	unfoldStringWidth(unfold[UCASE_UNFOLD_STRING_WIDTH]),
				397	currentRow(0),
				398	rowCpIndex(unfoldStringWidth) {
				399	unfold+=unfoldRowWidth;
				400	}
				401
				402	UChar32
				403	FullCaseFoldingIterator::next(UnicodeString &full) {
				404	// Advance past the last-delivered code point.
				405	const UChar p=unfold+(currentRowunfoldRowWidth);
				406	if(rowCpIndex>=unfoldRowWidth \|\| p[rowCpIndex]==0) {
				407	++currentRow;
				408	p+=unfoldRowWidth;
				409	rowCpIndex=unfoldStringWidth;
				410	}
				411	if(currentRow>=unfoldRows) { return U_SENTINEL; }
				412	// Set "full" to the NUL-terminated string in the first unfold column.
				413	int32_t length=unfoldStringWidth;
				414	while(length>0 && p[length-1]==0) { --length; }
				415	full.setTo(FALSE, p, length);
				416	// Return the code point.
				417	UChar32 c;
				418	U16_NEXT_UNSAFE(p, rowCpIndex, c);
				419	return c;
				420	}
				421
				422	U_NAMESPACE_END
				423
				424	/** @return UCASE_NONE, UCASE_LOWER, UCASE_UPPER, UCASE_TITLE */
				425	U_CAPI int32_t U_EXPORT2
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	426	ucase_getType(UChar32 c) {
				427	uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	428	return UCASE_GET_TYPE(props);
				429	}
				430
				431	/** @return same as ucase_getType() and set bit 2 if c is case-ignorable */
				432	U_CAPI int32_t U_EXPORT2
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	433	ucase_getTypeOrIgnorable(UChar32 c) {
				434	uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	435	return UCASE_GET_TYPE_AND_IGNORABLE(props);
				436	}
				437
				438	/** @return UCASE_NO_DOT, UCASE_SOFT_DOTTED, UCASE_ABOVE, UCASE_OTHER_ACCENT */
				439	static inline int32_t
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	440	getDotType(UChar32 c) {
				441	uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	442	if(!PROPS_HAS_EXCEPTION(props)) {
				443	return props&UCASE_DOT_MASK;
				444	} else {
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	445	const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	446	return (*pe>>UCASE_EXC_DOT_SHIFT)&UCASE_DOT_MASK;
				447	}
				448	}
				449
				450	U_CAPI UBool U_EXPORT2
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	451	ucase_isSoftDotted(UChar32 c) {
				452	return (UBool)(getDotType(c)==UCASE_SOFT_DOTTED);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	453	}
				454
				455	U_CAPI UBool U_EXPORT2
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	456	ucase_isCaseSensitive(UChar32 c) {
				457	uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	458	return (UBool)((props&UCASE_SENSITIVE)!=0);
				459	}
				460
				461	/* string casing ------------------------------------------------------------ */
				462
				463	/*
				464	* These internal functions form the core of string case mappings.
				465	* They map single code points to result code points or strings and take
				466	* all necessary conditions (context, locale ID, options) into account.
				467	*
				468	* They do not iterate over the source or write to the destination
				469	* so that the same functions are useful for non-standard string storage,
				470	* such as in a Replaceable (for Transliterator) or UTF-8/32 strings etc.
				471	* For the same reason, the "surrounding text" context is passed in as a
				472	* UCaseContextIterator which does not make any assumptions about
				473	* the underlying storage.
				474	*
				475	* This section contains helper functions that check for conditions
				476	* in the input text surrounding the current code point
				477	* according to SpecialCasing.txt.
				478	*
				479	* Each helper function gets the index
				480	* - after the current code point if it looks at following text
				481	* - before the current code point if it looks at preceding text
				482	*
				483	* Unicode 3.2 UAX 21 "Case Mappings" defines the conditions as follows:
				484	*
				485	* Final_Sigma
				486	* C is preceded by a sequence consisting of
				487	* a cased letter and a case-ignorable sequence,
				488	* and C is not followed by a sequence consisting of
				489	* an ignorable sequence and then a cased letter.
				490	*
				491	* More_Above
				492	* C is followed by one or more characters of combining class 230 (ABOVE)
				493	* in the combining character sequence.
				494	*
				495	* After_Soft_Dotted
				496	* The last preceding character with combining class of zero before C
				497	* was Soft_Dotted,
				498	* and there is no intervening combining character class 230 (ABOVE).
				499	*
				500	* Before_Dot
				501	* C is followed by combining dot above (U+0307).
				502	* Any sequence of characters with a combining class that is neither 0 nor 230
				503	* may intervene between the current character and the combining dot above.
				504	*
				505	* The erratum from 2002-10-31 adds the condition
				506	*
				507	* After_I
				508	* The last preceding base character was an uppercase I, and there is no
				509	* intervening combining character class 230 (ABOVE).
				510	*
				511	* (See Jitterbug 2344 and the comments on After_I below.)
				512	*
				513	* Helper definitions in Unicode 3.2 UAX 21:
				514	*
				515	* D1. A character C is defined to be cased
				516	* if it meets any of the following criteria:
				517	*
				518	* - The general category of C is Titlecase Letter (Lt)
				519	* - In [CoreProps], C has one of the properties Uppercase, or Lowercase
				520	* - Given D = NFD(C), then it is not the case that:
				521	* D = UCD_lower(D) = UCD_upper(D) = UCD_title(D)
				522	* (This third criterium does not add any characters to the list
				523	* for Unicode 3.2. Ignored.)
				524	*
				525	* D2. A character C is defined to be case-ignorable
				526	* if it meets either of the following criteria:
				527	*
				528	* - The general category of C is
				529	* Nonspacing Mark (Mn), or Enclosing Mark (Me), or Format Control (Cf), or
				530	* Letter Modifier (Lm), or Symbol Modifier (Sk)
				531	* - C is one of the following characters
				532	* U+0027 APOSTROPHE
				533	* U+00AD SOFT HYPHEN (SHY)
				534	* U+2019 RIGHT SINGLE QUOTATION MARK
				535	* (the preferred character for apostrophe)
				536	*
				537	* D3. A case-ignorable sequence is a sequence of
				538	* zero or more case-ignorable characters.
				539	*/
				540
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	541	#define is_d(c) ((c)=='d' \|\| (c)=='D')
				542	#define is_e(c) ((c)=='e' \|\| (c)=='E')
				543	#define is_i(c) ((c)=='i' \|\| (c)=='I')
				544	#define is_l(c) ((c)=='l' \|\| (c)=='L')
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	545	#define is_r(c) ((c)=='r' \|\| (c)=='R')
				546	#define is_t(c) ((c)=='t' \|\| (c)=='T')
				547	#define is_u(c) ((c)=='u' \|\| (c)=='U')
				548	#define is_z(c) ((c)=='z' \|\| (c)=='Z')
				549
				550	/* separator? */
				551	#define is_sep(c) ((c)=='_' \|\| (c)=='-' \|\| (c)==0)
				552
				553	/**
				554	* Requires non-NULL locale ID but otherwise does the equivalent of
				555	* checking for language codes as if uloc_getLanguage() were called:
				556	* Accepts both 2- and 3-letter codes and accepts case variants.
				557	*/
				558	U_CFUNC int32_t
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	559	ucase_getCaseLocale(const char *locale) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	560	/*
				561	* This function used to use uloc_getLanguage(), but the current code
				562	* removes the dependency of this low-level code on uloc implementation code
				563	* and is faster because not the whole locale ID has to be
				564	* examined and copied/transformed.
				565	*
				566	* Because this code does not want to depend on uloc, the caller must
				567	* pass in a non-NULL locale, i.e., may need to call uloc_getDefault().
				568	*/
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	569	char c=*locale++;
				570	// Fastpath for English "en" which is often used for default (=root locale) case mappings,
				571	// and for Chinese "zh": Very common but no special case mapping behavior.
				572	// Then check lowercase vs. uppercase to reduce the number of comparisons
				573	// for other locales without special behavior.
				574	if(c=='e') {
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	575	/* el or ell? */
				576	c=*locale++;
				577	if(is_l(c)) {
				578	c=*locale++;
				579	if(is_l(c)) {
				580	c=*locale;
				581	}
				582	if(is_sep(c)) {
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	583	return UCASE_LOC_GREEK;
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	584	}
				585	}
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	586	// en, es, ... -> root
				587	} else if(c=='z') {
				588	return UCASE_LOC_ROOT;
				589	#if U_CHARSET_FAMILY==U_ASCII_FAMILY
				590	} else if(c>='a') { // ASCII a-z = 0x61..0x7a, after A-Z
				591	#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
				592	} else if(c<='z') { // EBCDIC a-z = 0x81..0xa9 with two gaps, before A-Z
				593	#else
				594	# error Unknown charset family!
				595	#endif
				596	// lowercase c
				597	if(c=='t') {
				598	/* tr or tur? */
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	599	c=*locale++;
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	600	if(is_u(c)) {
				601	c=*locale++;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	602	}
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	603	if(is_r(c)) {
				604	c=*locale;
				605	if(is_sep(c)) {
				606	return UCASE_LOC_TURKISH;
				607	}
				608	}
				609	} else if(c=='a') {
				610	/* az or aze? */
				611	c=*locale++;
				612	if(is_z(c)) {
				613	c=*locale++;
				614	if(is_e(c)) {
				615	c=*locale;
				616	}
				617	if(is_sep(c)) {
				618	return UCASE_LOC_TURKISH;
				619	}
				620	}
				621	} else if(c=='l') {
				622	/* lt or lit? */
				623	c=*locale++;
				624	if(is_i(c)) {
				625	c=*locale++;
				626	}
				627	if(is_t(c)) {
				628	c=*locale;
				629	if(is_sep(c)) {
				630	return UCASE_LOC_LITHUANIAN;
				631	}
				632	}
				633	} else if(c=='n') {
				634	/* nl or nld? */
				635	c=*locale++;
				636	if(is_l(c)) {
				637	c=*locale++;
				638	if(is_d(c)) {
				639	c=*locale;
				640	}
				641	if(is_sep(c)) {
				642	return UCASE_LOC_DUTCH;
				643	}
				644	}
				645	}
				646	} else {
				647	// uppercase c
				648	// Same code as for lowercase c but also check for 'E'.
				649	if(c=='T') {
				650	/* tr or tur? */
				651	c=*locale++;
				652	if(is_u(c)) {
				653	c=*locale++;
				654	}
				655	if(is_r(c)) {
				656	c=*locale;
				657	if(is_sep(c)) {
				658	return UCASE_LOC_TURKISH;
				659	}
				660	}
				661	} else if(c=='A') {
				662	/* az or aze? */
				663	c=*locale++;
				664	if(is_z(c)) {
				665	c=*locale++;
				666	if(is_e(c)) {
				667	c=*locale;
				668	}
				669	if(is_sep(c)) {
				670	return UCASE_LOC_TURKISH;
				671	}
				672	}
				673	} else if(c=='L') {
				674	/* lt or lit? */
				675	c=*locale++;
				676	if(is_i(c)) {
				677	c=*locale++;
				678	}
				679	if(is_t(c)) {
				680	c=*locale;
				681	if(is_sep(c)) {
				682	return UCASE_LOC_LITHUANIAN;
				683	}
				684	}
				685	} else if(c=='E') {
				686	/* el or ell? */
				687	c=*locale++;
				688	if(is_l(c)) {
				689	c=*locale++;
				690	if(is_l(c)) {
				691	c=*locale;
				692	}
				693	if(is_sep(c)) {
				694	return UCASE_LOC_GREEK;
				695	}
				696	}
				697	} else if(c=='N') {
				698	/* nl or nld? */
				699	c=*locale++;
				700	if(is_l(c)) {
				701	c=*locale++;
				702	if(is_d(c)) {
				703	c=*locale;
				704	}
				705	if(is_sep(c)) {
				706	return UCASE_LOC_DUTCH;
				707	}
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	708	}
				709	}
				710	}
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	711	return UCASE_LOC_ROOT;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	712	}
				713
				714	/*
				715	* Is followed by
				716	* {case-ignorable}* cased
				717	* ?
				718	* (dir determines looking forward/backward)
				719	* If a character is case-ignorable, it is skipped regardless of whether
				720	* it is also cased or not.
				721	*/
				722	static UBool
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	723	isFollowedByCasedLetter(UCaseContextIterator iter, void context, int8_t dir) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	724	UChar32 c;
				725
				726	if(iter==NULL) {
				727	return FALSE;
				728	}
				729
				730	for(/* dir!=0 sets direction */; (c=iter(context, dir))>=0; dir=0) {
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	731	int32_t type=ucase_getTypeOrIgnorable(c);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	732	if(type&4) {
				733	/* case-ignorable, continue with the loop */
				734	} else if(type!=UCASE_NONE) {
				735	return TRUE; /* followed by cased letter */
				736	} else {
				737	return FALSE; /* uncased and not case-ignorable */
				738	}
				739	}
				740
				741	return FALSE; /* not followed by cased letter */
				742	}
				743
				744	/* Is preceded by Soft_Dotted character with no intervening cc=230 ? */
				745	static UBool
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	746	isPrecededBySoftDotted(UCaseContextIterator iter, void context) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	747	UChar32 c;
				748	int32_t dotType;
				749	int8_t dir;
				750
				751	if(iter==NULL) {
				752	return FALSE;
				753	}
				754
				755	for(dir=-1; (c=iter(context, dir))>=0; dir=0) {
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	756	dotType=getDotType(c);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	757	if(dotType==UCASE_SOFT_DOTTED) {
				758	return TRUE; /* preceded by TYPE_i */
				759	} else if(dotType!=UCASE_OTHER_ACCENT) {
				760	return FALSE; /* preceded by different base character (not TYPE_i), or intervening cc==230 */
				761	}
				762	}
				763
				764	return FALSE; /* not preceded by TYPE_i */
				765	}
				766
				767	/*
				768	* See Jitterbug 2344:
				769	* The condition After_I for Turkic-lowercasing of U+0307 combining dot above
				770	* is checked in ICU 2.0, 2.1, 2.6 but was not in 2.2 & 2.4 because
				771	* we made those releases compatible with Unicode 3.2 which had not fixed
				772	* a related bug in SpecialCasing.txt.
				773	*
				774	* From the Jitterbug 2344 text:
				775	* ... this bug is listed as a Unicode erratum
				776	* from 2002-10-31 at http://www.unicode.org/uni2errata/UnicodeErrata.html
				777	* <quote>
				778	* There are two errors in SpecialCasing.txt.
				779	* 1. Missing semicolons on two lines. ... [irrelevant for ICU]
				780	* 2. An incorrect context definition. Correct as follows:
				781	* < 0307; ; 0307; 0307; tr After_Soft_Dotted; # COMBINING DOT ABOVE
				782	* < 0307; ; 0307; 0307; az After_Soft_Dotted; # COMBINING DOT ABOVE
				783	* ---
				784	* > 0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE
				785	* > 0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE
				786	* where the context After_I is defined as:
				787	* The last preceding base character was an uppercase I, and there is no
				788	* intervening combining character class 230 (ABOVE).
				789	* </quote>
				790	*
				791	* Note that SpecialCasing.txt even in Unicode 3.2 described the condition as:
				792	*
				793	* # When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i.
				794	* # This matches the behavior of the canonically equivalent I-dot_above
				795	*
				796	* See also the description in this place in older versions of uchar.c (revision 1.100).
				797	*
				798	* Markus W. Scherer 2003-feb-15
				799	*/
				800
				801	/* Is preceded by base character 'I' with no intervening cc=230 ? */
				802	static UBool
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	803	isPrecededBy_I(UCaseContextIterator iter, void context) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	804	UChar32 c;
				805	int32_t dotType;
				806	int8_t dir;
				807
				808	if(iter==NULL) {
				809	return FALSE;
				810	}
				811
				812	for(dir=-1; (c=iter(context, dir))>=0; dir=0) {
				813	if(c==0x49) {
				814	return TRUE; /* preceded by I */
				815	}
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	816	dotType=getDotType(c);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	817	if(dotType!=UCASE_OTHER_ACCENT) {
				818	return FALSE; /* preceded by different base character (not I), or intervening cc==230 */
				819	}
				820	}
				821
				822	return FALSE; /* not preceded by I */
				823	}
				824
				825	/* Is followed by one or more cc==230 ? */
				826	static UBool
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	827	isFollowedByMoreAbove(UCaseContextIterator iter, void context) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	828	UChar32 c;
				829	int32_t dotType;
				830	int8_t dir;
				831
				832	if(iter==NULL) {
				833	return FALSE;
				834	}
				835
				836	for(dir=1; (c=iter(context, dir))>=0; dir=0) {
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	837	dotType=getDotType(c);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	838	if(dotType==UCASE_ABOVE) {
				839	return TRUE; /* at least one cc==230 following */
				840	} else if(dotType!=UCASE_OTHER_ACCENT) {
				841	return FALSE; /* next base character, no more cc==230 following */
				842	}
				843	}
				844
				845	return FALSE; /* no more cc==230 following */
				846	}
				847
				848	/* Is followed by a dot above (without cc==230 in between) ? */
				849	static UBool
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	850	isFollowedByDotAbove(UCaseContextIterator iter, void context) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	851	UChar32 c;
				852	int32_t dotType;
				853	int8_t dir;
				854
				855	if(iter==NULL) {
				856	return FALSE;
				857	}
				858
				859	for(dir=1; (c=iter(context, dir))>=0; dir=0) {
				860	if(c==0x307) {
				861	return TRUE;
				862	}
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	863	dotType=getDotType(c);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	864	if(dotType!=UCASE_OTHER_ACCENT) {
				865	return FALSE; /* next base character or cc==230 in between */
				866	}
				867	}
				868
				869	return FALSE; /* no dot above following */
				870	}
				871
				872	U_CAPI int32_t U_EXPORT2
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	873	ucase_toFullLower(UChar32 c,
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	874	UCaseContextIterator iter, void context,
				875	const UChar **pString,
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	876	int32_t loc) {
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	877	// The sign of the result has meaning, input must be non-negative so that it can be returned as is.
				878	U_ASSERT(c >= 0);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	879	UChar32 result=c;
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	880	uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	881	if(!PROPS_HAS_EXCEPTION(props)) {
				882	if(UCASE_GET_TYPE(props)>=UCASE_UPPER) {
				883	result=c+UCASE_GET_DELTA(props);
				884	}
				885	} else {
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	886	const uint16_t pe=GET_EXCEPTIONS(&ucase_props_singleton, props), pe2;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	887	uint16_t excWord=*pe++;
				888	int32_t full;
				889
				890	pe2=pe;
				891
				892	if(excWord&UCASE_EXC_CONDITIONAL_SPECIAL) {
				893	/* use hardcoded conditions and mappings */
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	894
				895	/*
				896	* Test for conditional mappings first
				897	* (otherwise the unconditional default mappings are always taken),
				898	* then test for characters that have unconditional mappings in SpecialCasing.txt,
				899	* then get the UnicodeData.txt mappings.
				900	*/
				901	if( loc==UCASE_LOC_LITHUANIAN &&
				902	/* base characters, find accents above */
				903	(((c==0x49 \|\| c==0x4a \|\| c==0x12e) &&
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	904	isFollowedByMoreAbove(iter, context)) \|\|
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	905	/* precomposed with accent above, no need to find one */
				906	(c==0xcc \|\| c==0xcd \|\| c==0x128))
				907	) {
				908	/*
				909	# Lithuanian
				910
				911	# Lithuanian retains the dot in a lowercase i when followed by accents.
				912
				913	# Introduce an explicit dot above when lowercasing capital I's and J's
				914	# whenever there are more accents above.
				915	# (of the accents used in Lithuanian: grave, acute, tilde above, and ogonek)
				916
				917	0049; 0069 0307; 0049; 0049; lt More_Above; # LATIN CAPITAL LETTER I
				918	004A; 006A 0307; 004A; 004A; lt More_Above; # LATIN CAPITAL LETTER J
				919	012E; 012F 0307; 012E; 012E; lt More_Above; # LATIN CAPITAL LETTER I WITH OGONEK
				920	00CC; 0069 0307 0300; 00CC; 00CC; lt; # LATIN CAPITAL LETTER I WITH GRAVE
				921	00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE
				922	0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE
				923	*/
				924	switch(c) {
				925	case 0x49: /* LATIN CAPITAL LETTER I */
				926	*pString=iDot;
				927	return 2;
				928	case 0x4a: /* LATIN CAPITAL LETTER J */
				929	*pString=jDot;
				930	return 2;
				931	case 0x12e: /* LATIN CAPITAL LETTER I WITH OGONEK */
				932	*pString=iOgonekDot;
				933	return 2;
				934	case 0xcc: /* LATIN CAPITAL LETTER I WITH GRAVE */
				935	*pString=iDotGrave;
				936	return 3;
				937	case 0xcd: /* LATIN CAPITAL LETTER I WITH ACUTE */
				938	*pString=iDotAcute;
				939	return 3;
				940	case 0x128: /* LATIN CAPITAL LETTER I WITH TILDE */
				941	*pString=iDotTilde;
				942	return 3;
				943	default:
				944	return 0; /* will not occur */
				945	}
				946	/* # Turkish and Azeri */
				947	} else if(loc==UCASE_LOC_TURKISH && c==0x130) {
				948	/*
				949	# I and i-dotless; I-dot and i are case pairs in Turkish and Azeri
				950	# The following rules handle those cases.
				951
				952	0130; 0069; 0130; 0130; tr # LATIN CAPITAL LETTER I WITH DOT ABOVE
				953	0130; 0069; 0130; 0130; az # LATIN CAPITAL LETTER I WITH DOT ABOVE
				954	*/
				955	return 0x69;
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	956	} else if(loc==UCASE_LOC_TURKISH && c==0x307 && isPrecededBy_I(iter, context)) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	957	/*
				958	# When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i.
				959	# This matches the behavior of the canonically equivalent I-dot_above
				960
				961	0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE
				962	0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE
				963	*/
				964	return 0; /* remove the dot (continue without output) */
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	965	} else if(loc==UCASE_LOC_TURKISH && c==0x49 && !isFollowedByDotAbove(iter, context)) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	966	/*
				967	# When lowercasing, unless an I is before a dot_above, it turns into a dotless i.
				968
				969	0049; 0131; 0049; 0049; tr Not_Before_Dot; # LATIN CAPITAL LETTER I
				970	0049; 0131; 0049; 0049; az Not_Before_Dot; # LATIN CAPITAL LETTER I
				971	*/
				972	return 0x131;
				973	} else if(c==0x130) {
				974	/*
				975	# Preserve canonical equivalence for I with dot. Turkic is handled below.
				976
				977	0130; 0069 0307; 0130; 0130; # LATIN CAPITAL LETTER I WITH DOT ABOVE
				978	*/
				979	*pString=iDot;
				980	return 2;
				981	} else if( c==0x3a3 &&
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	982	!isFollowedByCasedLetter(iter, context, 1) &&
				983	isFollowedByCasedLetter(iter, context, -1) /* -1=preceded */
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	984	) {
				985	/* greek capital sigma maps depending on surrounding cased letters (see SpecialCasing.txt) */
				986	/*
				987	# Special case for final form of sigma
				988
				989	03A3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK CAPITAL LETTER SIGMA
				990	*/
				991	return 0x3c2; /* greek small final sigma */
				992	} else {
				993	/* no known conditional special case mapping, use a normal mapping */
				994	}
				995	} else if(HAS_SLOT(excWord, UCASE_EXC_FULL_MAPPINGS)) {
				996	GET_SLOT_VALUE(excWord, UCASE_EXC_FULL_MAPPINGS, pe, full);
				997	full&=UCASE_FULL_LOWER;
				998	if(full!=0) {
				999	/* set the output pointer to the lowercase mapping */
				1000	pString=reinterpret_cast<const UChar >(pe+1);
				1001
				1002	/* return the string length */
				1003	return full;
				1004	}
				1005	}
				1006
				1007	if(HAS_SLOT(excWord, UCASE_EXC_LOWER)) {
				1008	GET_SLOT_VALUE(excWord, UCASE_EXC_LOWER, pe2, result);
				1009	}
				1010	}
				1011
				1012	return (result==c) ? ~result : result;
				1013	}
				1014
				1015	/* internal */
				1016	static int32_t
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	1017	toUpperOrTitle(UChar32 c,
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1018	UCaseContextIterator iter, void context,
				1019	const UChar **pString,
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	1020	int32_t loc,
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1021	UBool upperNotTitle) {
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	1022	// The sign of the result has meaning, input must be non-negative so that it can be returned as is.
				1023	U_ASSERT(c >= 0);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1024	UChar32 result=c;
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	1025	uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1026	if(!PROPS_HAS_EXCEPTION(props)) {
				1027	if(UCASE_GET_TYPE(props)==UCASE_LOWER) {
				1028	result=c+UCASE_GET_DELTA(props);
				1029	}
				1030	} else {
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	1031	const uint16_t pe=GET_EXCEPTIONS(&ucase_props_singleton, props), pe2;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1032	uint16_t excWord=*pe++;
				1033	int32_t full, idx;
				1034
				1035	pe2=pe;
				1036
				1037	if(excWord&UCASE_EXC_CONDITIONAL_SPECIAL) {
				1038	/* use hardcoded conditions and mappings */
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1039	if(loc==UCASE_LOC_TURKISH && c==0x69) {
				1040	/*
				1041	# Turkish and Azeri
				1042
				1043	# I and i-dotless; I-dot and i are case pairs in Turkish and Azeri
				1044	# The following rules handle those cases.
				1045
				1046	# When uppercasing, i turns into a dotted capital I
				1047
				1048	0069; 0069; 0130; 0130; tr; # LATIN SMALL LETTER I
				1049	0069; 0069; 0130; 0130; az; # LATIN SMALL LETTER I
				1050	*/
				1051	return 0x130;
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	1052	} else if(loc==UCASE_LOC_LITHUANIAN && c==0x307 && isPrecededBySoftDotted(iter, context)) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1053	/*
				1054	# Lithuanian
				1055
				1056	# Lithuanian retains the dot in a lowercase i when followed by accents.
				1057
				1058	# Remove DOT ABOVE after "i" with upper or titlecase
				1059
				1060	0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE
				1061	*/
				1062	return 0; /* remove the dot (continue without output) */
				1063	} else {
				1064	/* no known conditional special case mapping, use a normal mapping */
				1065	}
				1066	} else if(HAS_SLOT(excWord, UCASE_EXC_FULL_MAPPINGS)) {
				1067	GET_SLOT_VALUE(excWord, UCASE_EXC_FULL_MAPPINGS, pe, full);
				1068
				1069	/* start of full case mapping strings */
				1070	++pe;
				1071
				1072	/* skip the lowercase and case-folding result strings */
				1073	pe+=full&UCASE_FULL_LOWER;
				1074	full>>=4;
				1075	pe+=full&0xf;
				1076	full>>=4;
				1077
				1078	if(upperNotTitle) {
				1079	full&=0xf;
				1080	} else {
				1081	/* skip the uppercase result string */
				1082	pe+=full&0xf;
				1083	full=(full>>4)&0xf;
				1084	}
				1085
				1086	if(full!=0) {
				1087	/* set the output pointer to the result string */
				1088	pString=reinterpret_cast<const UChar >(pe);
				1089
				1090	/* return the string length */
				1091	return full;
				1092	}
				1093	}
				1094
				1095	if(!upperNotTitle && HAS_SLOT(excWord, UCASE_EXC_TITLE)) {
				1096	idx=UCASE_EXC_TITLE;
				1097	} else if(HAS_SLOT(excWord, UCASE_EXC_UPPER)) {
				1098	/* here, titlecase is same as uppercase */
				1099	idx=UCASE_EXC_UPPER;
				1100	} else {
				1101	return ~c;
				1102	}
				1103	GET_SLOT_VALUE(excWord, idx, pe2, result);
				1104	}
				1105
				1106	return (result==c) ? ~result : result;
				1107	}
				1108
				1109	U_CAPI int32_t U_EXPORT2
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	1110	ucase_toFullUpper(UChar32 c,
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1111	UCaseContextIterator iter, void context,
				1112	const UChar **pString,
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	1113	int32_t caseLocale) {
				1114	return toUpperOrTitle(c, iter, context, pString, caseLocale, TRUE);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1115	}
				1116
				1117	U_CAPI int32_t U_EXPORT2
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	1118	ucase_toFullTitle(UChar32 c,
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1119	UCaseContextIterator iter, void context,
				1120	const UChar **pString,
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	1121	int32_t caseLocale) {
				1122	return toUpperOrTitle(c, iter, context, pString, caseLocale, FALSE);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1123	}
				1124
				1125	/* case folding ------------------------------------------------------------- */
				1126
				1127	/*
				1128	* Case folding is similar to lowercasing.
				1129	* The result may be a simple mapping, i.e., a single code point, or
				1130	* a full mapping, i.e., a string.
				1131	* If the case folding for a code point is the same as its simple (1:1) lowercase mapping,
				1132	* then only the lowercase mapping is stored.
				1133	*
				1134	* Some special cases are hardcoded because their conditions cannot be
				1135	* parsed and processed from CaseFolding.txt.
				1136	*
				1137	* Unicode 3.2 CaseFolding.txt specifies for its status field:
				1138
				1139	# C: common case folding, common mappings shared by both simple and full mappings.
				1140	# F: full case folding, mappings that cause strings to grow in length. Multiple characters are separated by spaces.
				1141	# S: simple case folding, mappings to single characters where different from F.
				1142	# T: special case for uppercase I and dotted uppercase I
				1143	# - For non-Turkic languages, this mapping is normally not used.
				1144	# - For Turkic languages (tr, az), this mapping can be used instead of the normal mapping for these characters.
				1145	#
				1146	# Usage:
				1147	# A. To do a simple case folding, use the mappings with status C + S.
				1148	# B. To do a full case folding, use the mappings with status C + F.
				1149	#
				1150	# The mappings with status T can be used or omitted depending on the desired case-folding
				1151	# behavior. (The default option is to exclude them.)
				1152
				1153	* Unicode 3.2 has 'T' mappings as follows:
				1154
				1155	0049; T; 0131; # LATIN CAPITAL LETTER I
				1156	0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE
				1157
				1158	* while the default mappings for these code points are:
				1159
				1160	0049; C; 0069; # LATIN CAPITAL LETTER I
				1161	0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE
				1162
				1163	* U+0130 has no simple case folding (simple-case-folds to itself).
				1164	*/
				1165
				1166	/* return the simple case folding mapping for c */
				1167	U_CAPI UChar32 U_EXPORT2
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	1168	ucase_fold(UChar32 c, uint32_t options) {
				1169	uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1170	if(!PROPS_HAS_EXCEPTION(props)) {
				1171	if(UCASE_GET_TYPE(props)>=UCASE_UPPER) {
				1172	c+=UCASE_GET_DELTA(props);
				1173	}
				1174	} else {
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	1175	const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1176	uint16_t excWord=*pe++;
				1177	int32_t idx;
				1178	if(excWord&UCASE_EXC_CONDITIONAL_FOLD) {
				1179	/* special case folding mappings, hardcoded */
				1180	if((options&_FOLD_CASE_OPTIONS_MASK)==U_FOLD_CASE_DEFAULT) {
				1181	/* default mappings */
				1182	if(c==0x49) {
				1183	/* 0049; C; 0069; # LATIN CAPITAL LETTER I */
				1184	return 0x69;
				1185	} else if(c==0x130) {
				1186	/* no simple case folding for U+0130 */
				1187	return c;
				1188	}
				1189	} else {
				1190	/* Turkic mappings */
				1191	if(c==0x49) {
				1192	/* 0049; T; 0131; # LATIN CAPITAL LETTER I */
				1193	return 0x131;
				1194	} else if(c==0x130) {
				1195	/* 0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE */
				1196	return 0x69;
				1197	}
				1198	}
				1199	}
				1200	if(HAS_SLOT(excWord, UCASE_EXC_FOLD)) {
				1201	idx=UCASE_EXC_FOLD;
				1202	} else if(HAS_SLOT(excWord, UCASE_EXC_LOWER)) {
				1203	idx=UCASE_EXC_LOWER;
				1204	} else {
				1205	return c;
				1206	}
				1207	GET_SLOT_VALUE(excWord, idx, pe, c);
				1208	}
				1209	return c;
				1210	}
				1211
				1212	/*
				1213	* Issue for canonical caseless match (UAX #21):
				1214	* Turkic casefolding (using "T" mappings in CaseFolding.txt) does not preserve
				1215	* canonical equivalence, unlike default-option casefolding.
				1216	* For example, I-grave and I + grave fold to strings that are not canonically
				1217	* equivalent.
				1218	* For more details, see the comment in unorm_compare() in unorm.cpp
				1219	* and the intermediate prototype changes for Jitterbug 2021.
				1220	* (For example, revision 1.104 of uchar.c and 1.4 of CaseFolding.txt.)
				1221	*
				1222	* This did not get fixed because it appears that it is not possible to fix
				1223	* it for uppercase and lowercase characters (I-grave vs. i-grave)
				1224	* together in a way that they still fold to common result strings.
				1225	*/
				1226
				1227	U_CAPI int32_t U_EXPORT2
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	1228	ucase_toFullFolding(UChar32 c,
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1229	const UChar **pString,
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	1230	uint32_t options) {
				1231	// The sign of the result has meaning, input must be non-negative so that it can be returned as is.
				1232	U_ASSERT(c >= 0);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1233	UChar32 result=c;
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	1234	uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1235	if(!PROPS_HAS_EXCEPTION(props)) {
				1236	if(UCASE_GET_TYPE(props)>=UCASE_UPPER) {
				1237	result=c+UCASE_GET_DELTA(props);
				1238	}
				1239	} else {
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	1240	const uint16_t pe=GET_EXCEPTIONS(&ucase_props_singleton, props), pe2;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1241	uint16_t excWord=*pe++;
				1242	int32_t full, idx;
				1243
				1244	pe2=pe;
				1245
				1246	if(excWord&UCASE_EXC_CONDITIONAL_FOLD) {
				1247	/* use hardcoded conditions and mappings */
				1248	if((options&_FOLD_CASE_OPTIONS_MASK)==U_FOLD_CASE_DEFAULT) {
				1249	/* default mappings */
				1250	if(c==0x49) {
				1251	/* 0049; C; 0069; # LATIN CAPITAL LETTER I */
				1252	return 0x69;
				1253	} else if(c==0x130) {
				1254	/* 0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE */
				1255	*pString=iDot;
				1256	return 2;
				1257	}
				1258	} else {
				1259	/* Turkic mappings */
				1260	if(c==0x49) {
				1261	/* 0049; T; 0131; # LATIN CAPITAL LETTER I */
				1262	return 0x131;
				1263	} else if(c==0x130) {
				1264	/* 0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE */
				1265	return 0x69;
				1266	}
				1267	}
				1268	} else if(HAS_SLOT(excWord, UCASE_EXC_FULL_MAPPINGS)) {
				1269	GET_SLOT_VALUE(excWord, UCASE_EXC_FULL_MAPPINGS, pe, full);
				1270
				1271	/* start of full case mapping strings */
				1272	++pe;
				1273
				1274	/* skip the lowercase result string */
				1275	pe+=full&UCASE_FULL_LOWER;
				1276	full=(full>>4)&0xf;
				1277
				1278	if(full!=0) {
				1279	/* set the output pointer to the result string */
				1280	pString=reinterpret_cast<const UChar >(pe);
				1281
				1282	/* return the string length */
				1283	return full;
				1284	}
				1285	}
				1286
				1287	if(HAS_SLOT(excWord, UCASE_EXC_FOLD)) {
				1288	idx=UCASE_EXC_FOLD;
				1289	} else if(HAS_SLOT(excWord, UCASE_EXC_LOWER)) {
				1290	idx=UCASE_EXC_LOWER;
				1291	} else {
				1292	return ~c;
				1293	}
				1294	GET_SLOT_VALUE(excWord, idx, pe2, result);
				1295	}
				1296
				1297	return (result==c) ? ~result : result;
				1298	}
				1299
				1300	/* case mapping properties API ---------------------------------------------- */
				1301
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1302	/* public API (see uchar.h) */
				1303
				1304	U_CAPI UBool U_EXPORT2
				1305	u_isULowercase(UChar32 c) {
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	1306	return (UBool)(UCASE_LOWER==ucase_getType(c));
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1307	}
				1308
				1309	U_CAPI UBool U_EXPORT2
				1310	u_isUUppercase(UChar32 c) {
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	1311	return (UBool)(UCASE_UPPER==ucase_getType(c));
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1312	}
				1313
				1314	/* Transforms the Unicode character to its lower case equivalent.*/
				1315	U_CAPI UChar32 U_EXPORT2
				1316	u_tolower(UChar32 c) {
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	1317	return ucase_tolower(c);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1318	}
				1319
				1320	/* Transforms the Unicode character to its upper case equivalent.*/
				1321	U_CAPI UChar32 U_EXPORT2
				1322	u_toupper(UChar32 c) {
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	1323	return ucase_toupper(c);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1324	}
				1325
				1326	/* Transforms the Unicode character to its title case equivalent.*/
				1327	U_CAPI UChar32 U_EXPORT2
				1328	u_totitle(UChar32 c) {
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	1329	return ucase_totitle(c);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1330	}
				1331
				1332	/* return the simple case folding mapping for c */
				1333	U_CAPI UChar32 U_EXPORT2
				1334	u_foldCase(UChar32 c, uint32_t options) {
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	1335	return ucase_fold(c, options);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1336	}
				1337
				1338	U_CFUNC int32_t U_EXPORT2
				1339	ucase_hasBinaryProperty(UChar32 c, UProperty which) {
				1340	/* case mapping properties */
				1341	const UChar *resultString;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1342	switch(which) {
				1343	case UCHAR_LOWERCASE:
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	1344	return (UBool)(UCASE_LOWER==ucase_getType(c));
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1345	case UCHAR_UPPERCASE:
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	1346	return (UBool)(UCASE_UPPER==ucase_getType(c));
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1347	case UCHAR_SOFT_DOTTED:
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	1348	return ucase_isSoftDotted(c);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1349	case UCHAR_CASE_SENSITIVE:
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	1350	return ucase_isCaseSensitive(c);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1351	case UCHAR_CASED:
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	1352	return (UBool)(UCASE_NONE!=ucase_getType(c));
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1353	case UCHAR_CASE_IGNORABLE:
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	1354	return (UBool)(ucase_getTypeOrIgnorable(c)>>2);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1355	/*
				1356	* Note: The following Changes_When_Xyz are defined as testing whether
				1357	* the NFD form of the input changes when Xyz-case-mapped.
				1358	* However, this simpler implementation of these properties,
				1359	* ignoring NFD, passes the tests.
				1360	* The implementation needs to be changed if the tests start failing.
				1361	* When that happens, optimizations should be used to work with the
				1362	* per-single-code point ucase_toFullXyz() functions unless
				1363	* the NFD form has more than one code point,
				1364	* and the property starts set needs to be the union of the
				1365	* start sets for normalization and case mappings.
				1366	*/
				1367	case UCHAR_CHANGES_WHEN_LOWERCASED:
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	1368	return (UBool)(ucase_toFullLower(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1369	case UCHAR_CHANGES_WHEN_UPPERCASED:
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	1370	return (UBool)(ucase_toFullUpper(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1371	case UCHAR_CHANGES_WHEN_TITLECASED:
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	1372	return (UBool)(ucase_toFullTitle(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1373	/* case UCHAR_CHANGES_WHEN_CASEFOLDED: -- in uprops.c */
				1374	case UCHAR_CHANGES_WHEN_CASEMAPPED:
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1375	return (UBool)(
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame^]	1376	ucase_toFullLower(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0 \|\|
				1377	ucase_toFullUpper(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0 \|\|
				1378	ucase_toFullTitle(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1379	default:
				1380	return FALSE;
				1381	}
				1382	}