Blame - source/common/normalizer2impl.cpp - chromium.googlesource.com/chromium/deps/icu

blob: ec4809c4630995c13a4bedbba3ca49b60be755aa [file] [log] [blame]

jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1	/*
				2	*******************************************************************************
				3	*
Jungshik Shin (jungshik at google)	0f8746a	2015-01-08 15:46:45 -0800	[diff] [blame^]	4	* Copyright (C) 2009-2014, International Business Machines
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	5	* Corporation and others. All Rights Reserved.
				6	*
				7	*******************************************************************************
				8	* file name: normalizer2impl.cpp
				9	* encoding: US-ASCII
				10	* tab size: 8 (not used)
				11	* indentation:4
				12	*
				13	* created on: 2009nov22
				14	* created by: Markus W. Scherer
				15	*/
				16
				17	#include "unicode/utypes.h"
				18
				19	#if !UCONFIG_NO_NORMALIZATION
				20
				21	#include "unicode/normalizer2.h"
				22	#include "unicode/udata.h"
				23	#include "unicode/ustring.h"
				24	#include "unicode/utf16.h"
				25	#include "cmemory.h"
				26	#include "mutex.h"
				27	#include "normalizer2impl.h"
				28	#include "putilimp.h"
				29	#include "uassert.h"
				30	#include "uset_imp.h"
				31	#include "utrie2.h"
				32	#include "uvector.h"
				33
				34	U_NAMESPACE_BEGIN
				35
				36	// ReorderingBuffer -------------------------------------------------------- ***
				37
				38	UBool ReorderingBuffer::init(int32_t destCapacity, UErrorCode &errorCode) {
				39	int32_t length=str.length();
				40	start=str.getBuffer(destCapacity);
				41	if(start==NULL) {
				42	// getBuffer() already did str.setToBogus()
				43	errorCode=U_MEMORY_ALLOCATION_ERROR;
				44	return FALSE;
				45	}
				46	limit=start+length;
				47	remainingCapacity=str.getCapacity()-length;
				48	reorderStart=start;
				49	if(start==limit) {
				50	lastCC=0;
				51	} else {
				52	setIterator();
				53	lastCC=previousCC();
				54	// Set reorderStart after the last code point with cc<=1 if there is one.
				55	if(lastCC>1) {
				56	while(previousCC()>1) {}
				57	}
				58	reorderStart=codePointLimit;
				59	}
				60	return TRUE;
				61	}
				62
				63	UBool ReorderingBuffer::equals(const UChar otherStart, const UChar otherLimit) const {
				64	int32_t length=(int32_t)(limit-start);
				65	return
				66	length==(int32_t)(otherLimit-otherStart) &&
				67	0==u_memcmp(start, otherStart, length);
				68	}
				69
				70	UBool ReorderingBuffer::appendSupplementary(UChar32 c, uint8_t cc, UErrorCode &errorCode) {
				71	if(remainingCapacity<2 && !resize(2, errorCode)) {
				72	return FALSE;
				73	}
				74	if(lastCC<=cc \|\| cc==0) {
				75	limit[0]=U16_LEAD(c);
				76	limit[1]=U16_TRAIL(c);
				77	limit+=2;
				78	lastCC=cc;
				79	if(cc<=1) {
				80	reorderStart=limit;
				81	}
				82	} else {
				83	insert(c, cc);
				84	}
				85	remainingCapacity-=2;
				86	return TRUE;
				87	}
				88
				89	UBool ReorderingBuffer::append(const UChar *s, int32_t length,
				90	uint8_t leadCC, uint8_t trailCC,
				91	UErrorCode &errorCode) {
				92	if(length==0) {
				93	return TRUE;
				94	}
				95	if(remainingCapacity<length && !resize(length, errorCode)) {
				96	return FALSE;
				97	}
				98	remainingCapacity-=length;
				99	if(lastCC<=leadCC \|\| leadCC==0) {
				100	if(trailCC<=1) {
				101	reorderStart=limit+length;
				102	} else if(leadCC<=1) {
				103	reorderStart=limit+1; // Ok if not a code point boundary.
				104	}
				105	const UChar *sLimit=s+length;
				106	do { limit++=s++; } while(s!=sLimit);
				107	lastCC=trailCC;
				108	} else {
				109	int32_t i=0;
				110	UChar32 c;
				111	U16_NEXT(s, i, length, c);
				112	insert(c, leadCC); // insert first code point
				113	while(i<length) {
				114	U16_NEXT(s, i, length, c);
				115	if(i<length) {
				116	// s must be in NFD, otherwise we need to use getCC().
				117	leadCC=Normalizer2Impl::getCCFromYesOrMaybe(impl.getNorm16(c));
				118	} else {
				119	leadCC=trailCC;
				120	}
				121	append(c, leadCC, errorCode);
				122	}
				123	}
				124	return TRUE;
				125	}
				126
				127	UBool ReorderingBuffer::appendZeroCC(UChar32 c, UErrorCode &errorCode) {
				128	int32_t cpLength=U16_LENGTH(c);
				129	if(remainingCapacity<cpLength && !resize(cpLength, errorCode)) {
				130	return FALSE;
				131	}
				132	remainingCapacity-=cpLength;
				133	if(cpLength==1) {
				134	*limit++=(UChar)c;
				135	} else {
				136	limit[0]=U16_LEAD(c);
				137	limit[1]=U16_TRAIL(c);
				138	limit+=2;
				139	}
				140	lastCC=0;
				141	reorderStart=limit;
				142	return TRUE;
				143	}
				144
				145	UBool ReorderingBuffer::appendZeroCC(const UChar s, const UChar sLimit, UErrorCode &errorCode) {
				146	if(s==sLimit) {
				147	return TRUE;
				148	}
				149	int32_t length=(int32_t)(sLimit-s);
				150	if(remainingCapacity<length && !resize(length, errorCode)) {
				151	return FALSE;
				152	}
				153	u_memcpy(limit, s, length);
				154	limit+=length;
				155	remainingCapacity-=length;
				156	lastCC=0;
				157	reorderStart=limit;
				158	return TRUE;
				159	}
				160
				161	void ReorderingBuffer::remove() {
				162	reorderStart=limit=start;
				163	remainingCapacity=str.getCapacity();
				164	lastCC=0;
				165	}
				166
				167	void ReorderingBuffer::removeSuffix(int32_t suffixLength) {
				168	if(suffixLength<(limit-start)) {
				169	limit-=suffixLength;
				170	remainingCapacity+=suffixLength;
				171	} else {
				172	limit=start;
				173	remainingCapacity=str.getCapacity();
				174	}
				175	lastCC=0;
				176	reorderStart=limit;
				177	}
				178
				179	UBool ReorderingBuffer::resize(int32_t appendLength, UErrorCode &errorCode) {
				180	int32_t reorderStartIndex=(int32_t)(reorderStart-start);
				181	int32_t length=(int32_t)(limit-start);
				182	str.releaseBuffer(length);
				183	int32_t newCapacity=length+appendLength;
				184	int32_t doubleCapacity=2*str.getCapacity();
				185	if(newCapacity<doubleCapacity) {
				186	newCapacity=doubleCapacity;
				187	}
				188	if(newCapacity<256) {
				189	newCapacity=256;
				190	}
				191	start=str.getBuffer(newCapacity);
				192	if(start==NULL) {
				193	// getBuffer() already did str.setToBogus()
				194	errorCode=U_MEMORY_ALLOCATION_ERROR;
				195	return FALSE;
				196	}
				197	reorderStart=start+reorderStartIndex;
				198	limit=start+length;
				199	remainingCapacity=str.getCapacity()-length;
				200	return TRUE;
				201	}
				202
				203	void ReorderingBuffer::skipPrevious() {
				204	codePointLimit=codePointStart;
				205	UChar c=*--codePointStart;
				206	if(U16_IS_TRAIL(c) && start<codePointStart && U16_IS_LEAD(*(codePointStart-1))) {
				207	--codePointStart;
				208	}
				209	}
				210
				211	uint8_t ReorderingBuffer::previousCC() {
				212	codePointLimit=codePointStart;
				213	if(reorderStart>=codePointStart) {
				214	return 0;
				215	}
				216	UChar32 c=*--codePointStart;
				217	if(c<Normalizer2Impl::MIN_CCC_LCCC_CP) {
				218	return 0;
				219	}
				220
				221	UChar c2;
				222	if(U16_IS_TRAIL(c) && start<codePointStart && U16_IS_LEAD(c2=*(codePointStart-1))) {
				223	--codePointStart;
				224	c=U16_GET_SUPPLEMENTARY(c2, c);
				225	}
				226	return Normalizer2Impl::getCCFromYesOrMaybe(impl.getNorm16(c));
				227	}
				228
				229	// Inserts c somewhere before the last character.
				230	// Requires 0<cc<lastCC which implies reorderStart<limit.
				231	void ReorderingBuffer::insert(UChar32 c, uint8_t cc) {
				232	for(setIterator(), skipPrevious(); previousCC()>cc;) {}
				233	// insert c at codePointLimit, after the character with prevCC<=cc
				234	UChar *q=limit;
				235	UChar *r=limit+=U16_LENGTH(c);
				236	do {
				237	--r=--q;
				238	} while(codePointLimit!=q);
				239	writeCodePoint(q, c);
				240	if(cc<=1) {
				241	reorderStart=r;
				242	}
				243	}
				244
				245	// Normalizer2Impl --------------------------------------------------------- ***
				246
				247	struct CanonIterData : public UMemory {
				248	CanonIterData(UErrorCode &errorCode);
				249	~CanonIterData();
				250	void addToStartSet(UChar32 origin, UChar32 decompLead, UErrorCode &errorCode);
				251	UTrie2 *trie;
				252	UVector canonStartSets; // contains UnicodeSet *
				253	};
				254
				255	Normalizer2Impl::~Normalizer2Impl() {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	256	delete fCanonIterData;
				257	}
				258
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	259	void
Jungshik Shin (jungshik at google)	0f8746a	2015-01-08 15:46:45 -0800	[diff] [blame^]	260	Normalizer2Impl::init(const int32_t inIndexes, const UTrie2 inTrie,
				261	const uint16_t inExtraData, const uint8_t inSmallFCD) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	262	minDecompNoCP=inIndexes[IX_MIN_DECOMP_NO_CP];
				263	minCompNoMaybeCP=inIndexes[IX_MIN_COMP_NO_MAYBE_CP];
				264
				265	minYesNo=inIndexes[IX_MIN_YES_NO];
				266	minYesNoMappingsOnly=inIndexes[IX_MIN_YES_NO_MAPPINGS_ONLY];
				267	minNoNo=inIndexes[IX_MIN_NO_NO];
				268	limitNoNo=inIndexes[IX_LIMIT_NO_NO];
				269	minMaybeYes=inIndexes[IX_MIN_MAYBE_YES];
				270
Jungshik Shin (jungshik at google)	0f8746a	2015-01-08 15:46:45 -0800	[diff] [blame^]	271	normTrie=inTrie;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	272
Jungshik Shin (jungshik at google)	0f8746a	2015-01-08 15:46:45 -0800	[diff] [blame^]	273	maybeYesCompositions=inExtraData;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	274	extraData=maybeYesCompositions+(MIN_NORMAL_MAYBE_YES-minMaybeYes);
				275
Jungshik Shin (jungshik at google)	0f8746a	2015-01-08 15:46:45 -0800	[diff] [blame^]	276	smallFCD=inSmallFCD;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	277
				278	// Build tccc180[].
				279	// gennorm2 enforces lccc=0 for c<MIN_CCC_LCCC_CP=U+0300.
				280	uint8_t bits=0;
				281	for(UChar c=0; c<0x180; bits>>=1) {
				282	if((c&0xff)==0) {
				283	bits=smallFCD[c>>8]; // one byte per 0x100 code points
				284	}
				285	if(bits&1) {
				286	for(int i=0; i<0x20; ++i, ++c) {
				287	tccc180[c]=(uint8_t)getFCD16FromNormData(c);
				288	}
				289	} else {
				290	uprv_memset(tccc180+c, 0, 0x20);
				291	c+=0x20;
				292	}
				293	}
				294	}
				295
				296	uint8_t Normalizer2Impl::getTrailCCFromCompYesAndZeroCC(const UChar cpStart, const UChar cpLimit) const {
				297	UChar32 c;
				298	if(cpStart==(cpLimit-1)) {
				299	c=*cpStart;
				300	} else {
				301	c=U16_GET_SUPPLEMENTARY(cpStart[0], cpStart[1]);
				302	}
				303	uint16_t prevNorm16=getNorm16(c);
				304	if(prevNorm16<=minYesNo) {
				305	return 0; // yesYes and Hangul LV/LVT have ccc=tccc=0
				306	} else {
				307	return (uint8_t)(*getMapping(prevNorm16)>>8); // tccc from yesNo
				308	}
				309	}
				310
Jungshik Shin (jungshik at google)	0f8746a	2015-01-08 15:46:45 -0800	[diff] [blame^]	311	namespace {
				312
				313	class LcccContext {
				314	public:
				315	LcccContext(const Normalizer2Impl &ni, UnicodeSet &s) : impl(ni), set(s) {}
				316
				317	void handleRange(UChar32 start, UChar32 end, uint16_t norm16) {
				318	if(impl.isAlgorithmicNoNo(norm16)) {
				319	// Range of code points with same-norm16-value algorithmic decompositions.
				320	// They might have different non-zero FCD16 values.
				321	do {
				322	uint16_t fcd16=impl.getFCD16(start);
				323	if(fcd16>0xff) { set.add(start); }
				324	} while(++start<=end);
				325	} else {
				326	uint16_t fcd16=impl.getFCD16(start);
				327	if(fcd16>0xff) { set.add(start, end); }
				328	}
				329	}
				330
				331	private:
				332	const Normalizer2Impl &impl;
				333	UnicodeSet &set;
				334	};
				335
				336	struct PropertyStartsContext {
				337	PropertyStartsContext(const Normalizer2Impl &ni, const USetAdder *adder)
				338	: impl(ni), sa(adder) {}
				339
				340	const Normalizer2Impl &impl;
				341	const USetAdder *sa;
				342	};
				343
				344	} // namespace
				345
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	346	U_CDECL_BEGIN
				347
				348	static UBool U_CALLCONV
Jungshik Shin (jungshik at google)	0f8746a	2015-01-08 15:46:45 -0800	[diff] [blame^]	349	enumLcccRange(const void *context, UChar32 start, UChar32 end, uint32_t value) {
				350	((LcccContext *)context)->handleRange(start, end, (uint16_t)value);
				351	return TRUE;
				352	}
				353
				354	static UBool U_CALLCONV
				355	enumNorm16PropertyStartsRange(const void *context, UChar32 start, UChar32 end, uint32_t value) {
				356	/* add the start code point to the USet */
				357	const PropertyStartsContext ctx=(const PropertyStartsContext )context;
				358	const USetAdder *sa=ctx->sa;
				359	sa->add(sa->set, start);
				360	if(start!=end && ctx->impl.isAlgorithmicNoNo((uint16_t)value)) {
				361	// Range of code points with same-norm16-value algorithmic decompositions.
				362	// They might have different non-zero FCD16 values.
				363	uint16_t prevFCD16=ctx->impl.getFCD16(start);
				364	while(++start<=end) {
				365	uint16_t fcd16=ctx->impl.getFCD16(start);
				366	if(fcd16!=prevFCD16) {
				367	sa->add(sa->set, start);
				368	prevFCD16=fcd16;
				369	}
				370	}
				371	}
				372	return TRUE;
				373	}
				374
				375	static UBool U_CALLCONV
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	376	enumPropertyStartsRange(const void context, UChar32 start, UChar32 /end/, uint32_t /value*/) {
				377	/* add the start code point to the USet */
				378	const USetAdder sa=(const USetAdder )context;
				379	sa->add(sa->set, start);
				380	return TRUE;
				381	}
				382
				383	static uint32_t U_CALLCONV
				384	segmentStarterMapper(const void * /context/, uint32_t value) {
				385	return value&CANON_NOT_SEGMENT_STARTER;
				386	}
				387
				388	U_CDECL_END
				389
				390	void
Jungshik Shin (jungshik at google)	0f8746a	2015-01-08 15:46:45 -0800	[diff] [blame^]	391	Normalizer2Impl::addLcccChars(UnicodeSet &set) const {
				392	/* add the start code point of each same-value range of each trie */
				393	LcccContext context(*this, set);
				394	utrie2_enum(normTrie, NULL, enumLcccRange, &context);
				395	}
				396
				397	void
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	398	Normalizer2Impl::addPropertyStarts(const USetAdder sa, UErrorCode & /errorCode*/) const {
				399	/* add the start code point of each same-value range of each trie */
Jungshik Shin (jungshik at google)	0f8746a	2015-01-08 15:46:45 -0800	[diff] [blame^]	400	PropertyStartsContext context(*this, sa);
				401	utrie2_enum(normTrie, NULL, enumNorm16PropertyStartsRange, &context);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	402
				403	/* add Hangul LV syllables and LV+1 because of skippables */
				404	for(UChar c=Hangul::HANGUL_BASE; c<Hangul::HANGUL_LIMIT; c+=Hangul::JAMO_T_COUNT) {
				405	sa->add(sa->set, c);
				406	sa->add(sa->set, c+1);
				407	}
				408	sa->add(sa->set, Hangul::HANGUL_LIMIT); /* add Hangul+1 to continue with other properties */
				409	}
				410
				411	void
				412	Normalizer2Impl::addCanonIterPropertyStarts(const USetAdder *sa, UErrorCode &errorCode) const {
				413	/* add the start code point of each same-value range of the canonical iterator data trie */
				414	if(ensureCanonIterData(errorCode)) {
				415	// currently only used for the SEGMENT_STARTER property
				416	utrie2_enum(fCanonIterData->trie, segmentStarterMapper, enumPropertyStartsRange, sa);
				417	}
				418	}
				419
				420	const UChar *
				421	Normalizer2Impl::copyLowPrefixFromNulTerminated(const UChar *src,
				422	UChar32 minNeedDataCP,
				423	ReorderingBuffer *buffer,
				424	UErrorCode &errorCode) const {
				425	// Make some effort to support NUL-terminated strings reasonably.
				426	// Take the part of the fast quick check loop that does not look up
				427	// data and check the first part of the string.
				428	// After this prefix, determine the string length to simplify the rest
				429	// of the code.
				430	const UChar *prevSrc=src;
				431	UChar c;
				432	while((c=*src++)<minNeedDataCP && c!=0) {}
				433	// Back out the last character for full processing.
				434	// Copy this prefix.
				435	if(--src!=prevSrc) {
				436	if(buffer!=NULL) {
				437	buffer->appendZeroCC(prevSrc, src, errorCode);
				438	}
				439	}
				440	return src;
				441	}
				442
Jungshik Shin (jungshik at google)	0f8746a	2015-01-08 15:46:45 -0800	[diff] [blame^]	443	UnicodeString &
				444	Normalizer2Impl::decompose(const UnicodeString &src, UnicodeString &dest,
				445	UErrorCode &errorCode) const {
				446	if(U_FAILURE(errorCode)) {
				447	dest.setToBogus();
				448	return dest;
				449	}
				450	const UChar *sArray=src.getBuffer();
				451	if(&dest==&src \|\| sArray==NULL) {
				452	errorCode=U_ILLEGAL_ARGUMENT_ERROR;
				453	dest.setToBogus();
				454	return dest;
				455	}
				456	decompose(sArray, sArray+src.length(), dest, src.length(), errorCode);
				457	return dest;
				458	}
				459
				460	void
				461	Normalizer2Impl::decompose(const UChar src, const UChar limit,
				462	UnicodeString &dest,
				463	int32_t destLengthEstimate,
				464	UErrorCode &errorCode) const {
				465	if(destLengthEstimate<0 && limit!=NULL) {
				466	destLengthEstimate=(int32_t)(limit-src);
				467	}
				468	dest.remove();
				469	ReorderingBuffer buffer(*this, dest);
				470	if(buffer.init(destLengthEstimate, errorCode)) {
				471	decompose(src, limit, &buffer, errorCode);
				472	}
				473	}
				474
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	475	// Dual functionality:
				476	// buffer!=NULL: normalize
				477	// buffer==NULL: isNormalized/spanQuickCheckYes
				478	const UChar *
				479	Normalizer2Impl::decompose(const UChar src, const UChar limit,
				480	ReorderingBuffer *buffer,
				481	UErrorCode &errorCode) const {
				482	UChar32 minNoCP=minDecompNoCP;
				483	if(limit==NULL) {
				484	src=copyLowPrefixFromNulTerminated(src, minNoCP, buffer, errorCode);
				485	if(U_FAILURE(errorCode)) {
				486	return src;
				487	}
				488	limit=u_strchr(src, 0);
				489	}
				490
				491	const UChar *prevSrc;
				492	UChar32 c=0;
				493	uint16_t norm16=0;
				494
				495	// only for quick check
				496	const UChar *prevBoundary=src;
				497	uint8_t prevCC=0;
				498
				499	for(;;) {
				500	// count code units below the minimum or with irrelevant data for the quick check
				501	for(prevSrc=src; src!=limit;) {
				502	if( (c=*src)<minNoCP \|\|
				503	isMostDecompYesAndZeroCC(norm16=UTRIE2_GET16_FROM_U16_SINGLE_LEAD(normTrie, c))
				504	) {
				505	++src;
				506	} else if(!U16_IS_SURROGATE(c)) {
				507	break;
				508	} else {
				509	UChar c2;
				510	if(U16_IS_SURROGATE_LEAD(c)) {
				511	if((src+1)!=limit && U16_IS_TRAIL(c2=src[1])) {
				512	c=U16_GET_SUPPLEMENTARY(c, c2);
				513	}
				514	} else /* trail surrogate */ {
				515	if(prevSrc<src && U16_IS_LEAD(c2=*(src-1))) {
				516	--src;
				517	c=U16_GET_SUPPLEMENTARY(c2, c);
				518	}
				519	}
				520	if(isMostDecompYesAndZeroCC(norm16=getNorm16(c))) {
				521	src+=U16_LENGTH(c);
				522	} else {
				523	break;
				524	}
				525	}
				526	}
				527	// copy these code units all at once
				528	if(src!=prevSrc) {
				529	if(buffer!=NULL) {
				530	if(!buffer->appendZeroCC(prevSrc, src, errorCode)) {
				531	break;
				532	}
				533	} else {
				534	prevCC=0;
				535	prevBoundary=src;
				536	}
				537	}
				538	if(src==limit) {
				539	break;
				540	}
				541
				542	// Check one above-minimum, relevant code point.
				543	src+=U16_LENGTH(c);
				544	if(buffer!=NULL) {
				545	if(!decompose(c, norm16, *buffer, errorCode)) {
				546	break;
				547	}
				548	} else {
				549	if(isDecompYes(norm16)) {
				550	uint8_t cc=getCCFromYesOrMaybe(norm16);
				551	if(prevCC<=cc \|\| cc==0) {
				552	prevCC=cc;
				553	if(cc<=1) {
				554	prevBoundary=src;
				555	}
				556	continue;
				557	}
				558	}
				559	return prevBoundary; // "no" or cc out of order
				560	}
				561	}
				562	return src;
				563	}
				564
				565	// Decompose a short piece of text which is likely to contain characters that
				566	// fail the quick check loop and/or where the quick check loop's overhead
				567	// is unlikely to be amortized.
				568	// Called by the compose() and makeFCD() implementations.
				569	UBool Normalizer2Impl::decomposeShort(const UChar src, const UChar limit,
				570	ReorderingBuffer &buffer,
				571	UErrorCode &errorCode) const {
				572	while(src<limit) {
				573	UChar32 c;
				574	uint16_t norm16;
				575	UTRIE2_U16_NEXT16(normTrie, src, limit, c, norm16);
				576	if(!decompose(c, norm16, buffer, errorCode)) {
				577	return FALSE;
				578	}
				579	}
				580	return TRUE;
				581	}
				582
				583	UBool Normalizer2Impl::decompose(UChar32 c, uint16_t norm16,
				584	ReorderingBuffer &buffer,
				585	UErrorCode &errorCode) const {
				586	// Only loops for 1:1 algorithmic mappings.
				587	for(;;) {
				588	// get the decomposition and the lead and trail cc's
				589	if(isDecompYes(norm16)) {
				590	// c does not decompose
				591	return buffer.append(c, getCCFromYesOrMaybe(norm16), errorCode);
				592	} else if(isHangul(norm16)) {
				593	// Hangul syllable: decompose algorithmically
				594	UChar jamos[3];
				595	return buffer.appendZeroCC(jamos, jamos+Hangul::decompose(c, jamos), errorCode);
				596	} else if(isDecompNoAlgorithmic(norm16)) {
				597	c=mapAlgorithmic(c, norm16);
				598	norm16=getNorm16(c);
				599	} else {
				600	// c decomposes, get everything from the variable-length extra data
				601	const uint16_t *mapping=getMapping(norm16);
				602	uint16_t firstUnit=*mapping;
				603	int32_t length=firstUnit&MAPPING_LENGTH_MASK;
				604	uint8_t leadCC, trailCC;
				605	trailCC=(uint8_t)(firstUnit>>8);
				606	if(firstUnit&MAPPING_HAS_CCC_LCCC_WORD) {
				607	leadCC=(uint8_t)(*(mapping-1)>>8);
				608	} else {
				609	leadCC=0;
				610	}
				611	return buffer.append((const UChar *)mapping+1, length, leadCC, trailCC, errorCode);
				612	}
				613	}
				614	}
				615
				616	const UChar *
				617	Normalizer2Impl::getDecomposition(UChar32 c, UChar buffer[4], int32_t &length) const {
				618	const UChar *decomp=NULL;
				619	uint16_t norm16;
				620	for(;;) {
				621	if(c<minDecompNoCP \|\| isDecompYes(norm16=getNorm16(c))) {
				622	// c does not decompose
				623	return decomp;
				624	} else if(isHangul(norm16)) {
				625	// Hangul syllable: decompose algorithmically
				626	length=Hangul::decompose(c, buffer);
				627	return buffer;
				628	} else if(isDecompNoAlgorithmic(norm16)) {
				629	c=mapAlgorithmic(c, norm16);
				630	decomp=buffer;
				631	length=0;
				632	U16_APPEND_UNSAFE(buffer, length, c);
				633	} else {
				634	// c decomposes, get everything from the variable-length extra data
				635	const uint16_t *mapping=getMapping(norm16);
				636	length=*mapping&MAPPING_LENGTH_MASK;
				637	return (const UChar *)mapping+1;
				638	}
				639	}
				640	}
				641
				642	// The capacity of the buffer must be 30=MAPPING_LENGTH_MASK-1
				643	// so that a raw mapping fits that consists of one unit ("rm0")
				644	// plus all but the first two code units of the normal mapping.
				645	// The maximum length of a normal mapping is 31=MAPPING_LENGTH_MASK.
				646	const UChar *
				647	Normalizer2Impl::getRawDecomposition(UChar32 c, UChar buffer[30], int32_t &length) const {
				648	// We do not loop in this method because an algorithmic mapping itself
				649	// becomes a final result rather than having to be decomposed recursively.
				650	uint16_t norm16;
				651	if(c<minDecompNoCP \|\| isDecompYes(norm16=getNorm16(c))) {
				652	// c does not decompose
				653	return NULL;
				654	} else if(isHangul(norm16)) {
				655	// Hangul syllable: decompose algorithmically
				656	Hangul::getRawDecomposition(c, buffer);
				657	length=2;
				658	return buffer;
				659	} else if(isDecompNoAlgorithmic(norm16)) {
				660	c=mapAlgorithmic(c, norm16);
				661	length=0;
				662	U16_APPEND_UNSAFE(buffer, length, c);
				663	return buffer;
				664	} else {
				665	// c decomposes, get everything from the variable-length extra data
				666	const uint16_t *mapping=getMapping(norm16);
				667	uint16_t firstUnit=*mapping;
				668	int32_t mLength=firstUnit&MAPPING_LENGTH_MASK; // length of normal mapping
				669	if(firstUnit&MAPPING_HAS_RAW_MAPPING) {
				670	// Read the raw mapping from before the firstUnit and before the optional ccc/lccc word.
				671	// Bit 7=MAPPING_HAS_CCC_LCCC_WORD
				672	const uint16_t *rawMapping=mapping-((firstUnit>>7)&1)-1;
				673	uint16_t rm0=*rawMapping;
				674	if(rm0<=MAPPING_LENGTH_MASK) {
				675	length=rm0;
				676	return (const UChar *)rawMapping-rm0;
				677	} else {
				678	// Copy the normal mapping and replace its first two code units with rm0.
				679	buffer[0]=(UChar)rm0;
				680	u_memcpy(buffer+1, (const UChar *)mapping+1+2, mLength-2);
				681	length=mLength-1;
				682	return buffer;
				683	}
				684	} else {
				685	length=mLength;
				686	return (const UChar *)mapping+1;
				687	}
				688	}
				689	}
				690
				691	void Normalizer2Impl::decomposeAndAppend(const UChar src, const UChar limit,
				692	UBool doDecompose,
				693	UnicodeString &safeMiddle,
				694	ReorderingBuffer &buffer,
				695	UErrorCode &errorCode) const {
				696	buffer.copyReorderableSuffixTo(safeMiddle);
				697	if(doDecompose) {
				698	decompose(src, limit, &buffer, errorCode);
				699	return;
				700	}
				701	// Just merge the strings at the boundary.
				702	ForwardUTrie2StringIterator iter(normTrie, src, limit);
				703	uint8_t firstCC, prevCC, cc;
				704	firstCC=prevCC=cc=getCC(iter.next16());
				705	while(cc!=0) {
				706	prevCC=cc;
				707	cc=getCC(iter.next16());
				708	};
				709	if(limit==NULL) { // appendZeroCC() needs limit!=NULL
				710	limit=u_strchr(iter.codePointStart, 0);
				711	}
				712
				713	if (buffer.append(src, (int32_t)(iter.codePointStart-src), firstCC, prevCC, errorCode)) {
				714	buffer.appendZeroCC(iter.codePointStart, limit, errorCode);
				715	}
				716	}
				717
				718	// Note: hasDecompBoundary() could be implemented as aliases to
				719	// hasFCDBoundaryBefore() and hasFCDBoundaryAfter()
				720	// at the cost of building the FCD trie for a decomposition normalizer.
				721	UBool Normalizer2Impl::hasDecompBoundary(UChar32 c, UBool before) const {
				722	for(;;) {
				723	if(c<minDecompNoCP) {
				724	return TRUE;
				725	}
				726	uint16_t norm16=getNorm16(c);
				727	if(isHangul(norm16) \|\| isDecompYesAndZeroCC(norm16)) {
				728	return TRUE;
				729	} else if(norm16>MIN_NORMAL_MAYBE_YES) {
				730	return FALSE; // ccc!=0
				731	} else if(isDecompNoAlgorithmic(norm16)) {
				732	c=mapAlgorithmic(c, norm16);
				733	} else {
				734	// c decomposes, get everything from the variable-length extra data
				735	const uint16_t *mapping=getMapping(norm16);
				736	uint16_t firstUnit=*mapping;
				737	if((firstUnit&MAPPING_LENGTH_MASK)==0) {
				738	return FALSE;
				739	}
				740	if(!before) {
				741	// decomp after-boundary: same as hasFCDBoundaryAfter(),
				742	// fcd16<=1 \|\| trailCC==0
				743	if(firstUnit>0x1ff) {
				744	return FALSE; // trailCC>1
				745	}
				746	if(firstUnit<=0xff) {
				747	return TRUE; // trailCC==0
				748	}
				749	// if(trailCC==1) test leadCC==0, same as checking for before-boundary
				750	}
				751	// TRUE if leadCC==0 (hasFCDBoundaryBefore())
				752	return (firstUnit&MAPPING_HAS_CCC_LCCC_WORD)==0 \|\| (*(mapping-1)&0xff00)==0;
				753	}
				754	}
				755	}
				756
				757	/*
				758	* Finds the recomposition result for
				759	* a forward-combining "lead" character,
				760	* specified with a pointer to its compositions list,
				761	* and a backward-combining "trail" character.
				762	*
				763	* If the lead and trail characters combine, then this function returns
				764	* the following "compositeAndFwd" value:
				765	* Bits 21..1 composite character
				766	* Bit 0 set if the composite is a forward-combining starter
				767	* otherwise it returns -1.
				768	*
				769	* The compositions list has (trail, compositeAndFwd) pair entries,
				770	* encoded as either pairs or triples of 16-bit units.
				771	* The last entry has the high bit of its first unit set.
				772	*
				773	* The list is sorted by ascending trail characters (there are no duplicates).
				774	* A linear search is used.
				775	*
				776	* See normalizer2impl.h for a more detailed description
				777	* of the compositions list format.
				778	*/
				779	int32_t Normalizer2Impl::combine(const uint16_t *list, UChar32 trail) {
				780	uint16_t key1, firstUnit;
				781	if(trail<COMP_1_TRAIL_LIMIT) {
				782	// trail character is 0..33FF
				783	// result entry may have 2 or 3 units
				784	key1=(uint16_t)(trail<<1);
				785	while(key1>(firstUnit=*list)) {
				786	list+=2+(firstUnit&COMP_1_TRIPLE);
				787	}
				788	if(key1==(firstUnit&COMP_1_TRAIL_MASK)) {
				789	if(firstUnit&COMP_1_TRIPLE) {
				790	return ((int32_t)list[1]<<16)\|list[2];
				791	} else {
				792	return list[1];
				793	}
				794	}
				795	} else {
				796	// trail character is 3400..10FFFF
				797	// result entry has 3 units
				798	key1=(uint16_t)(COMP_1_TRAIL_LIMIT+
				799	(((trail>>COMP_1_TRAIL_SHIFT))&
				800	~COMP_1_TRIPLE));
				801	uint16_t key2=(uint16_t)(trail<<COMP_2_TRAIL_SHIFT);
				802	uint16_t secondUnit;
				803	for(;;) {
				804	if(key1>(firstUnit=*list)) {
				805	list+=2+(firstUnit&COMP_1_TRIPLE);
				806	} else if(key1==(firstUnit&COMP_1_TRAIL_MASK)) {
				807	if(key2>(secondUnit=list[1])) {
				808	if(firstUnit&COMP_1_LAST_TUPLE) {
				809	break;
				810	} else {
				811	list+=3;
				812	}
				813	} else if(key2==(secondUnit&COMP_2_TRAIL_MASK)) {
				814	return ((int32_t)(secondUnit&~COMP_2_TRAIL_MASK)<<16)\|list[2];
				815	} else {
				816	break;
				817	}
				818	} else {
				819	break;
				820	}
				821	}
				822	}
				823	return -1;
				824	}
				825
				826	/**
				827	* @param list some character's compositions list
				828	* @param set recursively receives the composites from these compositions
				829	*/
				830	void Normalizer2Impl::addComposites(const uint16_t *list, UnicodeSet &set) const {
				831	uint16_t firstUnit;
				832	int32_t compositeAndFwd;
				833	do {
				834	firstUnit=*list;
				835	if((firstUnit&COMP_1_TRIPLE)==0) {
				836	compositeAndFwd=list[1];
				837	list+=2;
				838	} else {
				839	compositeAndFwd=(((int32_t)list[1]&~COMP_2_TRAIL_MASK)<<16)\|list[2];
				840	list+=3;
				841	}
				842	UChar32 composite=compositeAndFwd>>1;
				843	if((compositeAndFwd&1)!=0) {
				844	addComposites(getCompositionsListForComposite(getNorm16(composite)), set);
				845	}
				846	set.add(composite);
				847	} while((firstUnit&COMP_1_LAST_TUPLE)==0);
				848	}
				849
				850	/*
				851	* Recomposes the buffer text starting at recomposeStartIndex
				852	* (which is in NFD - decomposed and canonically ordered),
				853	* and truncates the buffer contents.
				854	*
				855	* Note that recomposition never lengthens the text:
				856	* Any character consists of either one or two code units;
				857	* a composition may contain at most one more code unit than the original starter,
				858	* while the combining mark that is removed has at least one code unit.
				859	*/
				860	void Normalizer2Impl::recompose(ReorderingBuffer &buffer, int32_t recomposeStartIndex,
				861	UBool onlyContiguous) const {
				862	UChar *p=buffer.getStart()+recomposeStartIndex;
				863	UChar *limit=buffer.getLimit();
				864	if(p==limit) {
				865	return;
				866	}
				867
				868	UChar starter, pRemove, q, r;
				869	const uint16_t *compositionsList;
				870	UChar32 c, compositeAndFwd;
				871	uint16_t norm16;
				872	uint8_t cc, prevCC;
				873	UBool starterIsSupplementary;
				874
				875	// Some of the following variables are not used until we have a forward-combining starter
				876	// and are only initialized now to avoid compiler warnings.
				877	compositionsList=NULL; // used as indicator for whether we have a forward-combining starter
				878	starter=NULL;
				879	starterIsSupplementary=FALSE;
				880	prevCC=0;
				881
				882	for(;;) {
				883	UTRIE2_U16_NEXT16(normTrie, p, limit, c, norm16);
				884	cc=getCCFromYesOrMaybe(norm16);
				885	if( // this character combines backward and
				886	isMaybe(norm16) &&
				887	// we have seen a starter that combines forward and
				888	compositionsList!=NULL &&
				889	// the backward-combining character is not blocked
				890	(prevCC<cc \|\| prevCC==0)
				891	) {
				892	if(isJamoVT(norm16)) {
				893	// c is a Jamo V/T, see if we can compose it with the previous character.
				894	if(c<Hangul::JAMO_T_BASE) {
				895	// c is a Jamo Vowel, compose with previous Jamo L and following Jamo T.
				896	UChar prev=(UChar)(*starter-Hangul::JAMO_L_BASE);
				897	if(prev<Hangul::JAMO_L_COUNT) {
				898	pRemove=p-1;
				899	UChar syllable=(UChar)
				900	(Hangul::HANGUL_BASE+
				901	(prevHangul::JAMO_V_COUNT+(c-Hangul::JAMO_V_BASE))
				902	Hangul::JAMO_T_COUNT);
				903	UChar t;
				904	if(p!=limit && (t=(UChar)(*p-Hangul::JAMO_T_BASE))<Hangul::JAMO_T_COUNT) {
				905	++p;
				906	syllable+=t; // The next character was a Jamo T.
				907	}
				908	*starter=syllable;
				909	// remove the Jamo V/T
				910	q=pRemove;
				911	r=p;
				912	while(r<limit) {
				913	q++=r++;
				914	}
				915	limit=q;
				916	p=pRemove;
				917	}
				918	}
				919	/*
				920	* No "else" for Jamo T:
				921	* Since the input is in NFD, there are no Hangul LV syllables that
				922	* a Jamo T could combine with.
				923	* All Jamo Ts are combined above when handling Jamo Vs.
				924	*/
				925	if(p==limit) {
				926	break;
				927	}
				928	compositionsList=NULL;
				929	continue;
				930	} else if((compositeAndFwd=combine(compositionsList, c))>=0) {
				931	// The starter and the combining mark (c) do combine.
				932	UChar32 composite=compositeAndFwd>>1;
				933
				934	// Replace the starter with the composite, remove the combining mark.
				935	pRemove=p-U16_LENGTH(c); // pRemove & p: start & limit of the combining mark
				936	if(starterIsSupplementary) {
				937	if(U_IS_SUPPLEMENTARY(composite)) {
				938	// both are supplementary
				939	starter[0]=U16_LEAD(composite);
				940	starter[1]=U16_TRAIL(composite);
				941	} else {
				942	*starter=(UChar)composite;
				943	// The composite is shorter than the starter,
				944	// move the intermediate characters forward one.
				945	starterIsSupplementary=FALSE;
				946	q=starter+1;
				947	r=q+1;
				948	while(r<pRemove) {
				949	q++=r++;
				950	}
				951	--pRemove;
				952	}
				953	} else if(U_IS_SUPPLEMENTARY(composite)) {
				954	// The composite is longer than the starter,
				955	// move the intermediate characters back one.
				956	starterIsSupplementary=TRUE;
				957	++starter; // temporarily increment for the loop boundary
				958	q=pRemove;
				959	r=++pRemove;
				960	while(starter<q) {
				961	--r=--q;
				962	}
				963	*starter=U16_TRAIL(composite);
				964	*--starter=U16_LEAD(composite); // undo the temporary increment
				965	} else {
				966	// both are on the BMP
				967	*starter=(UChar)composite;
				968	}
				969
				970	/* remove the combining mark by moving the following text over it */
				971	if(pRemove<p) {
				972	q=pRemove;
				973	r=p;
				974	while(r<limit) {
				975	q++=r++;
				976	}
				977	limit=q;
				978	p=pRemove;
				979	}
				980	// Keep prevCC because we removed the combining mark.
				981
				982	if(p==limit) {
				983	break;
				984	}
				985	// Is the composite a starter that combines forward?
				986	if(compositeAndFwd&1) {
				987	compositionsList=
				988	getCompositionsListForComposite(getNorm16(composite));
				989	} else {
				990	compositionsList=NULL;
				991	}
				992
				993	// We combined; continue with looking for compositions.
				994	continue;
				995	}
				996	}
				997
				998	// no combination this time
				999	prevCC=cc;
				1000	if(p==limit) {
				1001	break;
				1002	}
				1003
				1004	// If c did not combine, then check if it is a starter.
				1005	if(cc==0) {
				1006	// Found a new starter.
				1007	if((compositionsList=getCompositionsListForDecompYes(norm16))!=NULL) {
				1008	// It may combine with something, prepare for it.
				1009	if(U_IS_BMP(c)) {
				1010	starterIsSupplementary=FALSE;
				1011	starter=p-1;
				1012	} else {
				1013	starterIsSupplementary=TRUE;
				1014	starter=p-2;
				1015	}
				1016	}
				1017	} else if(onlyContiguous) {
				1018	// FCC: no discontiguous compositions; any intervening character blocks.
				1019	compositionsList=NULL;
				1020	}
				1021	}
				1022	buffer.setReorderingLimit(limit);
				1023	}
				1024
				1025	UChar32
				1026	Normalizer2Impl::composePair(UChar32 a, UChar32 b) const {
				1027	uint16_t norm16=getNorm16(a); // maps an out-of-range 'a' to inert norm16=0
				1028	const uint16_t *list;
				1029	if(isInert(norm16)) {
				1030	return U_SENTINEL;
				1031	} else if(norm16<minYesNoMappingsOnly) {
				1032	if(isJamoL(norm16)) {
				1033	b-=Hangul::JAMO_V_BASE;
				1034	if(0<=b && b<Hangul::JAMO_V_COUNT) {
				1035	return
				1036	(Hangul::HANGUL_BASE+
				1037	((a-Hangul::JAMO_L_BASE)Hangul::JAMO_V_COUNT+b)
				1038	Hangul::JAMO_T_COUNT);
				1039	} else {
				1040	return U_SENTINEL;
				1041	}
				1042	} else if(isHangul(norm16)) {
				1043	b-=Hangul::JAMO_T_BASE;
				1044	if(Hangul::isHangulWithoutJamoT(a) && 0<b && b<Hangul::JAMO_T_COUNT) { // not b==0!
				1045	return a+b;
				1046	} else {
				1047	return U_SENTINEL;
				1048	}
				1049	} else {
				1050	// 'a' has a compositions list in extraData
				1051	list=extraData+norm16;
				1052	if(norm16>minYesNo) { // composite 'a' has both mapping & compositions list
				1053	list+= // mapping pointer
				1054	1+ // +1 to skip the first unit with the mapping lenth
				1055	(*list&MAPPING_LENGTH_MASK); // + mapping length
				1056	}
				1057	}
				1058	} else if(norm16<minMaybeYes \|\| MIN_NORMAL_MAYBE_YES<=norm16) {
				1059	return U_SENTINEL;
				1060	} else {
				1061	list=maybeYesCompositions+norm16-minMaybeYes;
				1062	}
				1063	if(b<0 \|\| 0x10ffff<b) { // combine(list, b) requires a valid code point b
				1064	return U_SENTINEL;
				1065	}
				1066	#if U_SIGNED_RIGHT_SHIFT_IS_ARITHMETIC
				1067	return combine(list, b)>>1;
				1068	#else
				1069	int32_t compositeAndFwd=combine(list, b);
				1070	return compositeAndFwd>=0 ? compositeAndFwd>>1 : U_SENTINEL;
				1071	#endif
				1072	}
				1073
				1074	// Very similar to composeQuickCheck(): Make the same changes in both places if relevant.
				1075	// doCompose: normalize
				1076	// !doCompose: isNormalized (buffer must be empty and initialized)
				1077	UBool
				1078	Normalizer2Impl::compose(const UChar src, const UChar limit,
				1079	UBool onlyContiguous,
				1080	UBool doCompose,
				1081	ReorderingBuffer &buffer,
				1082	UErrorCode &errorCode) const {
				1083	/*
				1084	* prevBoundary points to the last character before the current one
				1085	* that has a composition boundary before it with ccc==0 and quick check "yes".
				1086	* Keeping track of prevBoundary saves us looking for a composition boundary
				1087	* when we find a "no" or "maybe".
				1088	*
				1089	* When we back out from prevSrc back to prevBoundary,
				1090	* then we also remove those same characters (which had been simply copied
				1091	* or canonically-order-inserted) from the ReorderingBuffer.
				1092	* Therefore, at all times, the [prevBoundary..prevSrc[ source units
				1093	* must correspond 1:1 to destination units at the end of the destination buffer.
				1094	*/
				1095	const UChar *prevBoundary=src;
				1096	UChar32 minNoMaybeCP=minCompNoMaybeCP;
				1097	if(limit==NULL) {
				1098	src=copyLowPrefixFromNulTerminated(src, minNoMaybeCP,
				1099	doCompose ? &buffer : NULL,
				1100	errorCode);
				1101	if(U_FAILURE(errorCode)) {
				1102	return FALSE;
				1103	}
				1104	if(prevBoundary<src) {
				1105	// Set prevBoundary to the last character in the prefix.
				1106	prevBoundary=src-1;
				1107	}
				1108	limit=u_strchr(src, 0);
				1109	}
				1110
				1111	const UChar *prevSrc;
				1112	UChar32 c=0;
				1113	uint16_t norm16=0;
				1114
				1115	// only for isNormalized
				1116	uint8_t prevCC=0;
				1117
				1118	for(;;) {
				1119	// count code units below the minimum or with irrelevant data for the quick check
				1120	for(prevSrc=src; src!=limit;) {
				1121	if( (c=*src)<minNoMaybeCP \|\|
				1122	isCompYesAndZeroCC(norm16=UTRIE2_GET16_FROM_U16_SINGLE_LEAD(normTrie, c))
				1123	) {
				1124	++src;
				1125	} else if(!U16_IS_SURROGATE(c)) {
				1126	break;
				1127	} else {
				1128	UChar c2;
				1129	if(U16_IS_SURROGATE_LEAD(c)) {
				1130	if((src+1)!=limit && U16_IS_TRAIL(c2=src[1])) {
				1131	c=U16_GET_SUPPLEMENTARY(c, c2);
				1132	}
				1133	} else /* trail surrogate */ {
				1134	if(prevSrc<src && U16_IS_LEAD(c2=*(src-1))) {
				1135	--src;
				1136	c=U16_GET_SUPPLEMENTARY(c2, c);
				1137	}
				1138	}
				1139	if(isCompYesAndZeroCC(norm16=getNorm16(c))) {
				1140	src+=U16_LENGTH(c);
				1141	} else {
				1142	break;
				1143	}
				1144	}
				1145	}
				1146	// copy these code units all at once
				1147	if(src!=prevSrc) {
				1148	if(doCompose) {
				1149	if(!buffer.appendZeroCC(prevSrc, src, errorCode)) {
				1150	break;
				1151	}
				1152	} else {
				1153	prevCC=0;
				1154	}
				1155	if(src==limit) {
				1156	break;
				1157	}
				1158	// Set prevBoundary to the last character in the quick check loop.
				1159	prevBoundary=src-1;
				1160	if( U16_IS_TRAIL(*prevBoundary) && prevSrc<prevBoundary &&
				1161	U16_IS_LEAD(*(prevBoundary-1))
				1162	) {
				1163	--prevBoundary;
				1164	}
				1165	// The start of the current character (c).
				1166	prevSrc=src;
				1167	} else if(src==limit) {
				1168	break;
				1169	}
				1170
				1171	src+=U16_LENGTH(c);
				1172	/*
				1173	* isCompYesAndZeroCC(norm16) is false, that is, norm16>=minNoNo.
				1174	* c is either a "noNo" (has a mapping) or a "maybeYes" (combines backward)
				1175	* or has ccc!=0.
				1176	* Check for Jamo V/T, then for regular characters.
				1177	* c is not a Hangul syllable or Jamo L because those have "yes" properties.
				1178	*/
				1179	if(isJamoVT(norm16) && prevBoundary!=prevSrc) {
				1180	UChar prev=*(prevSrc-1);
				1181	UBool needToDecompose=FALSE;
				1182	if(c<Hangul::JAMO_T_BASE) {
				1183	// c is a Jamo Vowel, compose with previous Jamo L and following Jamo T.
				1184	prev=(UChar)(prev-Hangul::JAMO_L_BASE);
				1185	if(prev<Hangul::JAMO_L_COUNT) {
				1186	if(!doCompose) {
				1187	return FALSE;
				1188	}
				1189	UChar syllable=(UChar)
				1190	(Hangul::HANGUL_BASE+
				1191	(prevHangul::JAMO_V_COUNT+(c-Hangul::JAMO_V_BASE))
				1192	Hangul::JAMO_T_COUNT);
				1193	UChar t;
				1194	if(src!=limit && (t=(UChar)(*src-Hangul::JAMO_T_BASE))<Hangul::JAMO_T_COUNT) {
				1195	++src;
				1196	syllable+=t; // The next character was a Jamo T.
				1197	prevBoundary=src;
				1198	buffer.setLastChar(syllable);
				1199	continue;
				1200	}
				1201	// If we see L+V+x where x!=T then we drop to the slow path,
				1202	// decompose and recompose.
				1203	// This is to deal with NFKC finding normal L and V but a
				1204	// compatibility variant of a T. We need to either fully compose that
				1205	// combination here (which would complicate the code and may not work
				1206	// with strange custom data) or use the slow path -- or else our replacing
				1207	// two input characters (L+V) with one output character (LV syllable)
				1208	// would violate the invariant that [prevBoundary..prevSrc[ has the same
				1209	// length as what we appended to the buffer since prevBoundary.
				1210	needToDecompose=TRUE;
				1211	}
				1212	} else if(Hangul::isHangulWithoutJamoT(prev)) {
				1213	// c is a Jamo Trailing consonant,
				1214	// compose with previous Hangul LV that does not contain a Jamo T.
				1215	if(!doCompose) {
				1216	return FALSE;
				1217	}
				1218	buffer.setLastChar((UChar)(prev+c-Hangul::JAMO_T_BASE));
				1219	prevBoundary=src;
				1220	continue;
				1221	}
				1222	if(!needToDecompose) {
				1223	// The Jamo V/T did not compose into a Hangul syllable.
				1224	if(doCompose) {
				1225	if(!buffer.appendBMP((UChar)c, 0, errorCode)) {
				1226	break;
				1227	}
				1228	} else {
				1229	prevCC=0;
				1230	}
				1231	continue;
				1232	}
				1233	}
				1234	/*
				1235	* Source buffer pointers:
				1236	*
				1237	* all done quick check current char not yet
				1238	* "yes" but (c) processed
				1239	* may combine
				1240	* forward
				1241	* [-------------[-------------[-------------[-------------[
				1242	* \| \| \| \| \|
				1243	* orig. src prevBoundary prevSrc src limit
				1244	*
				1245	*
				1246	* Destination buffer pointers inside the ReorderingBuffer:
				1247	*
				1248	* all done might take not filled yet
				1249	* characters for
				1250	* reordering
				1251	* [-------------[-------------[-------------[
				1252	* \| \| \| \|
				1253	* start reorderStart limit \|
				1254	* +remainingCap.+
				1255	*/
				1256	if(norm16>=MIN_YES_YES_WITH_CC) {
				1257	uint8_t cc=(uint8_t)norm16; // cc!=0
				1258	if( onlyContiguous && // FCC
				1259	(doCompose ? buffer.getLastCC() : prevCC)==0 &&
				1260	prevBoundary<prevSrc &&
				1261	// buffer.getLastCC()==0 && prevBoundary<prevSrc tell us that
				1262	// [prevBoundary..prevSrc[ (which is exactly one character under these conditions)
				1263	// passed the quick check "yes && ccc==0" test.
				1264	// Check whether the last character was a "yesYes" or a "yesNo".
				1265	// If a "yesNo", then we get its trailing ccc from its
				1266	// mapping and check for canonical order.
				1267	// All other cases are ok.
				1268	getTrailCCFromCompYesAndZeroCC(prevBoundary, prevSrc)>cc
				1269	) {
				1270	// Fails FCD test, need to decompose and contiguously recompose.
				1271	if(!doCompose) {
				1272	return FALSE;
				1273	}
				1274	} else if(doCompose) {
				1275	if(!buffer.append(c, cc, errorCode)) {
				1276	break;
				1277	}
				1278	continue;
				1279	} else if(prevCC<=cc) {
				1280	prevCC=cc;
				1281	continue;
				1282	} else {
				1283	return FALSE;
				1284	}
				1285	} else if(!doCompose && !isMaybeOrNonZeroCC(norm16)) {
				1286	return FALSE;
				1287	}
				1288
				1289	/*
				1290	* Find appropriate boundaries around this character,
				1291	* decompose the source text from between the boundaries,
				1292	* and recompose it.
				1293	*
				1294	* We may need to remove the last few characters from the ReorderingBuffer
				1295	* to account for source text that was copied or appended
				1296	* but needs to take part in the recomposition.
				1297	*/
				1298
				1299	/*
				1300	* Find the last composition boundary in [prevBoundary..src[.
				1301	* It is either the decomposition of the current character (at prevSrc),
				1302	* or prevBoundary.
				1303	*/
				1304	if(hasCompBoundaryBefore(c, norm16)) {
				1305	prevBoundary=prevSrc;
				1306	} else if(doCompose) {
				1307	buffer.removeSuffix((int32_t)(prevSrc-prevBoundary));
				1308	}
				1309
				1310	// Find the next composition boundary in [src..limit[ -
				1311	// modifies src to point to the next starter.
				1312	src=(UChar *)findNextCompBoundary(src, limit);
				1313
				1314	// Decompose [prevBoundary..src[ into the buffer and then recompose that part of it.
				1315	int32_t recomposeStartIndex=buffer.length();
				1316	if(!decomposeShort(prevBoundary, src, buffer, errorCode)) {
				1317	break;
				1318	}
				1319	recompose(buffer, recomposeStartIndex, onlyContiguous);
				1320	if(!doCompose) {
				1321	if(!buffer.equals(prevBoundary, src)) {
				1322	return FALSE;
				1323	}
				1324	buffer.remove();
				1325	prevCC=0;
				1326	}
				1327
				1328	// Move to the next starter. We never need to look back before this point again.
				1329	prevBoundary=src;
				1330	}
				1331	return TRUE;
				1332	}
				1333
				1334	// Very similar to compose(): Make the same changes in both places if relevant.
				1335	// pQCResult==NULL: spanQuickCheckYes
				1336	// pQCResult!=NULL: quickCheck (*pQCResult must be UNORM_YES)
				1337	const UChar *
				1338	Normalizer2Impl::composeQuickCheck(const UChar src, const UChar limit,
				1339	UBool onlyContiguous,
				1340	UNormalizationCheckResult *pQCResult) const {
				1341	/*
				1342	* prevBoundary points to the last character before the current one
				1343	* that has a composition boundary before it with ccc==0 and quick check "yes".
				1344	*/
				1345	const UChar *prevBoundary=src;
				1346	UChar32 minNoMaybeCP=minCompNoMaybeCP;
				1347	if(limit==NULL) {
				1348	UErrorCode errorCode=U_ZERO_ERROR;
				1349	src=copyLowPrefixFromNulTerminated(src, minNoMaybeCP, NULL, errorCode);
				1350	if(prevBoundary<src) {
				1351	// Set prevBoundary to the last character in the prefix.
				1352	prevBoundary=src-1;
				1353	}
				1354	limit=u_strchr(src, 0);
				1355	}
				1356
				1357	const UChar *prevSrc;
				1358	UChar32 c=0;
				1359	uint16_t norm16=0;
				1360	uint8_t prevCC=0;
				1361
				1362	for(;;) {
				1363	// count code units below the minimum or with irrelevant data for the quick check
				1364	for(prevSrc=src;;) {
				1365	if(src==limit) {
				1366	return src;
				1367	}
				1368	if( (c=*src)<minNoMaybeCP \|\|
				1369	isCompYesAndZeroCC(norm16=UTRIE2_GET16_FROM_U16_SINGLE_LEAD(normTrie, c))
				1370	) {
				1371	++src;
				1372	} else if(!U16_IS_SURROGATE(c)) {
				1373	break;
				1374	} else {
				1375	UChar c2;
				1376	if(U16_IS_SURROGATE_LEAD(c)) {
				1377	if((src+1)!=limit && U16_IS_TRAIL(c2=src[1])) {
				1378	c=U16_GET_SUPPLEMENTARY(c, c2);
				1379	}
				1380	} else /* trail surrogate */ {
				1381	if(prevSrc<src && U16_IS_LEAD(c2=*(src-1))) {
				1382	--src;
				1383	c=U16_GET_SUPPLEMENTARY(c2, c);
				1384	}
				1385	}
				1386	if(isCompYesAndZeroCC(norm16=getNorm16(c))) {
				1387	src+=U16_LENGTH(c);
				1388	} else {
				1389	break;
				1390	}
				1391	}
				1392	}
				1393	if(src!=prevSrc) {
				1394	// Set prevBoundary to the last character in the quick check loop.
				1395	prevBoundary=src-1;
				1396	if( U16_IS_TRAIL(*prevBoundary) && prevSrc<prevBoundary &&
				1397	U16_IS_LEAD(*(prevBoundary-1))
				1398	) {
				1399	--prevBoundary;
				1400	}
				1401	prevCC=0;
				1402	// The start of the current character (c).
				1403	prevSrc=src;
				1404	}
				1405
				1406	src+=U16_LENGTH(c);
				1407	/*
				1408	* isCompYesAndZeroCC(norm16) is false, that is, norm16>=minNoNo.
				1409	* c is either a "noNo" (has a mapping) or a "maybeYes" (combines backward)
				1410	* or has ccc!=0.
				1411	*/
				1412	if(isMaybeOrNonZeroCC(norm16)) {
				1413	uint8_t cc=getCCFromYesOrMaybe(norm16);
				1414	if( onlyContiguous && // FCC
				1415	cc!=0 &&
				1416	prevCC==0 &&
				1417	prevBoundary<prevSrc &&
				1418	// prevCC==0 && prevBoundary<prevSrc tell us that
				1419	// [prevBoundary..prevSrc[ (which is exactly one character under these conditions)
				1420	// passed the quick check "yes && ccc==0" test.
				1421	// Check whether the last character was a "yesYes" or a "yesNo".
				1422	// If a "yesNo", then we get its trailing ccc from its
				1423	// mapping and check for canonical order.
				1424	// All other cases are ok.
				1425	getTrailCCFromCompYesAndZeroCC(prevBoundary, prevSrc)>cc
				1426	) {
				1427	// Fails FCD test.
				1428	} else if(prevCC<=cc \|\| cc==0) {
				1429	prevCC=cc;
				1430	if(norm16<MIN_YES_YES_WITH_CC) {
				1431	if(pQCResult!=NULL) {
				1432	*pQCResult=UNORM_MAYBE;
				1433	} else {
				1434	return prevBoundary;
				1435	}
				1436	}
				1437	continue;
				1438	}
				1439	}
				1440	if(pQCResult!=NULL) {
				1441	*pQCResult=UNORM_NO;
				1442	}
				1443	return prevBoundary;
				1444	}
				1445	}
				1446
				1447	void Normalizer2Impl::composeAndAppend(const UChar src, const UChar limit,
				1448	UBool doCompose,
				1449	UBool onlyContiguous,
				1450	UnicodeString &safeMiddle,
				1451	ReorderingBuffer &buffer,
				1452	UErrorCode &errorCode) const {
				1453	if(!buffer.isEmpty()) {
				1454	const UChar *firstStarterInSrc=findNextCompBoundary(src, limit);
				1455	if(src!=firstStarterInSrc) {
				1456	const UChar *lastStarterInDest=findPreviousCompBoundary(buffer.getStart(),
				1457	buffer.getLimit());
				1458	int32_t destSuffixLength=(int32_t)(buffer.getLimit()-lastStarterInDest);
				1459	UnicodeString middle(lastStarterInDest, destSuffixLength);
				1460	buffer.removeSuffix(destSuffixLength);
				1461	safeMiddle=middle;
				1462	middle.append(src, (int32_t)(firstStarterInSrc-src));
				1463	const UChar *middleStart=middle.getBuffer();
				1464	compose(middleStart, middleStart+middle.length(), onlyContiguous,
				1465	TRUE, buffer, errorCode);
				1466	if(U_FAILURE(errorCode)) {
				1467	return;
				1468	}
				1469	src=firstStarterInSrc;
				1470	}
				1471	}
				1472	if(doCompose) {
				1473	compose(src, limit, onlyContiguous, TRUE, buffer, errorCode);
				1474	} else {
				1475	if(limit==NULL) { // appendZeroCC() needs limit!=NULL
				1476	limit=u_strchr(src, 0);
				1477	}
				1478	buffer.appendZeroCC(src, limit, errorCode);
				1479	}
				1480	}
				1481
				1482	/**
				1483	* Does c have a composition boundary before it?
				1484	* True if its decomposition begins with a character that has
				1485	* ccc=0 && NFC_QC=Yes (isCompYesAndZeroCC()).
				1486	* As a shortcut, this is true if c itself has ccc=0 && NFC_QC=Yes
				1487	* (isCompYesAndZeroCC()) so we need not decompose.
				1488	*/
				1489	UBool Normalizer2Impl::hasCompBoundaryBefore(UChar32 c, uint16_t norm16) const {
				1490	for(;;) {
				1491	if(isCompYesAndZeroCC(norm16)) {
				1492	return TRUE;
				1493	} else if(isMaybeOrNonZeroCC(norm16)) {
				1494	return FALSE;
				1495	} else if(isDecompNoAlgorithmic(norm16)) {
				1496	c=mapAlgorithmic(c, norm16);
				1497	norm16=getNorm16(c);
				1498	} else {
				1499	// c decomposes, get everything from the variable-length extra data
				1500	const uint16_t *mapping=getMapping(norm16);
				1501	uint16_t firstUnit=*mapping;
				1502	if((firstUnit&MAPPING_LENGTH_MASK)==0) {
				1503	return FALSE;
				1504	}
				1505	if((firstUnit&MAPPING_HAS_CCC_LCCC_WORD) && (*(mapping-1)&0xff00)) {
				1506	return FALSE; // non-zero leadCC
				1507	}
				1508	int32_t i=1; // skip over the firstUnit
				1509	UChar32 c;
				1510	U16_NEXT_UNSAFE(mapping, i, c);
				1511	return isCompYesAndZeroCC(getNorm16(c));
				1512	}
				1513	}
				1514	}
				1515
				1516	UBool Normalizer2Impl::hasCompBoundaryAfter(UChar32 c, UBool onlyContiguous, UBool testInert) const {
				1517	for(;;) {
				1518	uint16_t norm16=getNorm16(c);
				1519	if(isInert(norm16)) {
				1520	return TRUE;
				1521	} else if(norm16<=minYesNo) {
				1522	// Hangul: norm16==minYesNo
				1523	// Hangul LVT has a boundary after it.
				1524	// Hangul LV and non-inert yesYes characters combine forward.
				1525	return isHangul(norm16) && !Hangul::isHangulWithoutJamoT((UChar)c);
				1526	} else if(norm16>= (testInert ? minNoNo : minMaybeYes)) {
				1527	return FALSE;
				1528	} else if(isDecompNoAlgorithmic(norm16)) {
				1529	c=mapAlgorithmic(c, norm16);
				1530	} else {
				1531	// c decomposes, get everything from the variable-length extra data.
				1532	// If testInert, then c must be a yesNo character which has lccc=0,
				1533	// otherwise it could be a noNo.
				1534	const uint16_t *mapping=getMapping(norm16);
				1535	uint16_t firstUnit=*mapping;
				1536	// TRUE if
				1537	// not MAPPING_NO_COMP_BOUNDARY_AFTER
				1538	// (which is set if
				1539	// c is not deleted, and
				1540	// it and its decomposition do not combine forward, and it has a starter)
				1541	// and if FCC then trailCC<=1
				1542	return
				1543	(firstUnit&MAPPING_NO_COMP_BOUNDARY_AFTER)==0 &&
				1544	(!onlyContiguous \|\| firstUnit<=0x1ff);
				1545	}
				1546	}
				1547	}
				1548
				1549	const UChar Normalizer2Impl::findPreviousCompBoundary(const UChar start, const UChar *p) const {
				1550	BackwardUTrie2StringIterator iter(normTrie, start, p);
				1551	uint16_t norm16;
				1552	do {
				1553	norm16=iter.previous16();
				1554	} while(!hasCompBoundaryBefore(iter.codePoint, norm16));
				1555	// We could also test hasCompBoundaryAfter() and return iter.codePointLimit,
				1556	// but that's probably not worth the extra cost.
				1557	return iter.codePointStart;
				1558	}
				1559
				1560	const UChar Normalizer2Impl::findNextCompBoundary(const UChar p, const UChar *limit) const {
				1561	ForwardUTrie2StringIterator iter(normTrie, p, limit);
				1562	uint16_t norm16;
				1563	do {
				1564	norm16=iter.next16();
				1565	} while(!hasCompBoundaryBefore(iter.codePoint, norm16));
				1566	return iter.codePointStart;
				1567	}
				1568
				1569	// Note: normalizer2impl.cpp r30982 (2011-nov-27)
				1570	// still had getFCDTrie() which built and cached an FCD trie.
				1571	// That provided faster access to FCD data than getFCD16FromNormData()
				1572	// but required synchronization and consumed some 10kB of heap memory
				1573	// in any process that uses FCD (e.g., via collation).
				1574	// tccc180[] and smallFCD[] are intended to help with any loss of performance,
				1575	// at least for Latin & CJK.
				1576
				1577	// Gets the FCD value from the regular normalization data.
				1578	uint16_t Normalizer2Impl::getFCD16FromNormData(UChar32 c) const {
				1579	// Only loops for 1:1 algorithmic mappings.
				1580	for(;;) {
				1581	uint16_t norm16=getNorm16(c);
				1582	if(norm16<=minYesNo) {
				1583	// no decomposition or Hangul syllable, all zeros
				1584	return 0;
				1585	} else if(norm16>=MIN_NORMAL_MAYBE_YES) {
				1586	// combining mark
				1587	norm16&=0xff;
				1588	return norm16\|(norm16<<8);
				1589	} else if(norm16>=minMaybeYes) {
				1590	return 0;
				1591	} else if(isDecompNoAlgorithmic(norm16)) {
				1592	c=mapAlgorithmic(c, norm16);
				1593	} else {
				1594	// c decomposes, get everything from the variable-length extra data
				1595	const uint16_t *mapping=getMapping(norm16);
				1596	uint16_t firstUnit=*mapping;
				1597	if((firstUnit&MAPPING_LENGTH_MASK)==0) {
				1598	// A character that is deleted (maps to an empty string) must
				1599	// get the worst-case lccc and tccc values because arbitrary
				1600	// characters on both sides will become adjacent.
				1601	return 0x1ff;
				1602	} else {
				1603	norm16=firstUnit>>8; // tccc
				1604	if(firstUnit&MAPPING_HAS_CCC_LCCC_WORD) {
				1605	norm16\|=*(mapping-1)&0xff00; // lccc
				1606	}
				1607	return norm16;
				1608	}
				1609	}
				1610	}
				1611	}
				1612
				1613	// Dual functionality:
				1614	// buffer!=NULL: normalize
				1615	// buffer==NULL: isNormalized/quickCheck/spanQuickCheckYes
				1616	const UChar *
				1617	Normalizer2Impl::makeFCD(const UChar src, const UChar limit,
				1618	ReorderingBuffer *buffer,
				1619	UErrorCode &errorCode) const {
				1620	// Tracks the last FCD-safe boundary, before lccc=0 or after properly-ordered tccc<=1.
				1621	// Similar to the prevBoundary in the compose() implementation.
				1622	const UChar *prevBoundary=src;
				1623	int32_t prevFCD16=0;
				1624	if(limit==NULL) {
				1625	src=copyLowPrefixFromNulTerminated(src, MIN_CCC_LCCC_CP, buffer, errorCode);
				1626	if(U_FAILURE(errorCode)) {
				1627	return src;
				1628	}
				1629	if(prevBoundary<src) {
				1630	prevBoundary=src;
				1631	// We know that the previous character's lccc==0.
				1632	// Fetching the fcd16 value was deferred for this below-U+0300 code point.
				1633	prevFCD16=getFCD16(*(src-1));
				1634	if(prevFCD16>1) {
				1635	--prevBoundary;
				1636	}
				1637	}
				1638	limit=u_strchr(src, 0);
				1639	}
				1640
				1641	// Note: In this function we use buffer->appendZeroCC() because we track
				1642	// the lead and trail combining classes here, rather than leaving it to
				1643	// the ReorderingBuffer.
				1644	// The exception is the call to decomposeShort() which uses the buffer
				1645	// in the normal way.
				1646
				1647	const UChar *prevSrc;
				1648	UChar32 c=0;
				1649	uint16_t fcd16=0;
				1650
				1651	for(;;) {
				1652	// count code units with lccc==0
				1653	for(prevSrc=src; src!=limit;) {
				1654	if((c=*src)<MIN_CCC_LCCC_CP) {
				1655	prevFCD16=~c;
				1656	++src;
				1657	} else if(!singleLeadMightHaveNonZeroFCD16(c)) {
				1658	prevFCD16=0;
				1659	++src;
				1660	} else {
				1661	if(U16_IS_SURROGATE(c)) {
				1662	UChar c2;
				1663	if(U16_IS_SURROGATE_LEAD(c)) {
				1664	if((src+1)!=limit && U16_IS_TRAIL(c2=src[1])) {
				1665	c=U16_GET_SUPPLEMENTARY(c, c2);
				1666	}
				1667	} else /* trail surrogate */ {
				1668	if(prevSrc<src && U16_IS_LEAD(c2=*(src-1))) {
				1669	--src;
				1670	c=U16_GET_SUPPLEMENTARY(c2, c);
				1671	}
				1672	}
				1673	}
				1674	if((fcd16=getFCD16FromNormData(c))<=0xff) {
				1675	prevFCD16=fcd16;
				1676	src+=U16_LENGTH(c);
				1677	} else {
				1678	break;
				1679	}
				1680	}
				1681	}
				1682	// copy these code units all at once
				1683	if(src!=prevSrc) {
				1684	if(buffer!=NULL && !buffer->appendZeroCC(prevSrc, src, errorCode)) {
				1685	break;
				1686	}
				1687	if(src==limit) {
				1688	break;
				1689	}
				1690	prevBoundary=src;
				1691	// We know that the previous character's lccc==0.
				1692	if(prevFCD16<0) {
				1693	// Fetching the fcd16 value was deferred for this below-U+0300 code point.
				1694	UChar32 prev=~prevFCD16;
				1695	prevFCD16= prev<0x180 ? tccc180[prev] : getFCD16FromNormData(prev);
				1696	if(prevFCD16>1) {
				1697	--prevBoundary;
				1698	}
				1699	} else {
				1700	const UChar *p=src-1;
				1701	if(U16_IS_TRAIL(p) && prevSrc<p && U16_IS_LEAD((p-1))) {
				1702	--p;
				1703	// Need to fetch the previous character's FCD value because
				1704	// prevFCD16 was just for the trail surrogate code point.
				1705	prevFCD16=getFCD16FromNormData(U16_GET_SUPPLEMENTARY(p[0], p[1]));
				1706	// Still known to have lccc==0 because its lead surrogate unit had lccc==0.
				1707	}
				1708	if(prevFCD16>1) {
				1709	prevBoundary=p;
				1710	}
				1711	}
				1712	// The start of the current character (c).
				1713	prevSrc=src;
				1714	} else if(src==limit) {
				1715	break;
				1716	}
				1717
				1718	src+=U16_LENGTH(c);
				1719	// The current character (c) at [prevSrc..src[ has a non-zero lead combining class.
				1720	// Check for proper order, and decompose locally if necessary.
				1721	if((prevFCD16&0xff)<=(fcd16>>8)) {
				1722	// proper order: prev tccc <= current lccc
				1723	if((fcd16&0xff)<=1) {
				1724	prevBoundary=src;
				1725	}
				1726	if(buffer!=NULL && !buffer->appendZeroCC(c, errorCode)) {
				1727	break;
				1728	}
				1729	prevFCD16=fcd16;
				1730	continue;
				1731	} else if(buffer==NULL) {
				1732	return prevBoundary; // quick check "no"
				1733	} else {
				1734	/*
				1735	* Back out the part of the source that we copied or appended
				1736	* already but is now going to be decomposed.
				1737	* prevSrc is set to after what was copied/appended.
				1738	*/
				1739	buffer->removeSuffix((int32_t)(prevSrc-prevBoundary));
				1740	/*
				1741	* Find the part of the source that needs to be decomposed,
				1742	* up to the next safe boundary.
				1743	*/
				1744	src=findNextFCDBoundary(src, limit);
				1745	/*
				1746	* The source text does not fulfill the conditions for FCD.
				1747	* Decompose and reorder a limited piece of the text.
				1748	*/
				1749	if(!decomposeShort(prevBoundary, src, *buffer, errorCode)) {
				1750	break;
				1751	}
				1752	prevBoundary=src;
				1753	prevFCD16=0;
				1754	}
				1755	}
				1756	return src;
				1757	}
				1758
				1759	void Normalizer2Impl::makeFCDAndAppend(const UChar src, const UChar limit,
				1760	UBool doMakeFCD,
				1761	UnicodeString &safeMiddle,
				1762	ReorderingBuffer &buffer,
				1763	UErrorCode &errorCode) const {
				1764	if(!buffer.isEmpty()) {
				1765	const UChar *firstBoundaryInSrc=findNextFCDBoundary(src, limit);
				1766	if(src!=firstBoundaryInSrc) {
				1767	const UChar *lastBoundaryInDest=findPreviousFCDBoundary(buffer.getStart(),
				1768	buffer.getLimit());
				1769	int32_t destSuffixLength=(int32_t)(buffer.getLimit()-lastBoundaryInDest);
				1770	UnicodeString middle(lastBoundaryInDest, destSuffixLength);
				1771	buffer.removeSuffix(destSuffixLength);
				1772	safeMiddle=middle;
				1773	middle.append(src, (int32_t)(firstBoundaryInSrc-src));
				1774	const UChar *middleStart=middle.getBuffer();
				1775	makeFCD(middleStart, middleStart+middle.length(), &buffer, errorCode);
				1776	if(U_FAILURE(errorCode)) {
				1777	return;
				1778	}
				1779	src=firstBoundaryInSrc;
				1780	}
				1781	}
				1782	if(doMakeFCD) {
				1783	makeFCD(src, limit, &buffer, errorCode);
				1784	} else {
				1785	if(limit==NULL) { // appendZeroCC() needs limit!=NULL
				1786	limit=u_strchr(src, 0);
				1787	}
				1788	buffer.appendZeroCC(src, limit, errorCode);
				1789	}
				1790	}
				1791
				1792	const UChar Normalizer2Impl::findPreviousFCDBoundary(const UChar start, const UChar *p) const {
				1793	while(start<p && previousFCD16(start, p)>0xff) {}
				1794	return p;
				1795	}
				1796
				1797	const UChar Normalizer2Impl::findNextFCDBoundary(const UChar p, const UChar *limit) const {
				1798	while(p<limit) {
				1799	const UChar *codePointStart=p;
				1800	if(nextFCD16(p, limit)<=0xff) {
				1801	return codePointStart;
				1802	}
				1803	}
				1804	return p;
				1805	}
				1806
				1807	// CanonicalIterator data -------------------------------------------------- ***
				1808
				1809	CanonIterData::CanonIterData(UErrorCode &errorCode) :
				1810	trie(utrie2_open(0, 0, &errorCode)),
				1811	canonStartSets(uprv_deleteUObject, NULL, errorCode) {}
				1812
				1813	CanonIterData::~CanonIterData() {
				1814	utrie2_close(trie);
				1815	}
				1816
				1817	void CanonIterData::addToStartSet(UChar32 origin, UChar32 decompLead, UErrorCode &errorCode) {
				1818	uint32_t canonValue=utrie2_get32(trie, decompLead);
				1819	if((canonValue&(CANON_HAS_SET\|CANON_VALUE_MASK))==0 && origin!=0) {
				1820	// origin is the first character whose decomposition starts with
				1821	// the character for which we are setting the value.
				1822	utrie2_set32(trie, decompLead, canonValue\|origin, &errorCode);
				1823	} else {
				1824	// origin is not the first character, or it is U+0000.
				1825	UnicodeSet *set;
				1826	if((canonValue&CANON_HAS_SET)==0) {
				1827	set=new UnicodeSet;
				1828	if(set==NULL) {
				1829	errorCode=U_MEMORY_ALLOCATION_ERROR;
				1830	return;
				1831	}
				1832	UChar32 firstOrigin=(UChar32)(canonValue&CANON_VALUE_MASK);
				1833	canonValue=(canonValue&~CANON_VALUE_MASK)\|CANON_HAS_SET\|(uint32_t)canonStartSets.size();
				1834	utrie2_set32(trie, decompLead, canonValue, &errorCode);
				1835	canonStartSets.addElement(set, errorCode);
				1836	if(firstOrigin!=0) {
				1837	set->add(firstOrigin);
				1838	}
				1839	} else {
				1840	set=(UnicodeSet *)canonStartSets[(int32_t)(canonValue&CANON_VALUE_MASK)];
				1841	}
				1842	set->add(origin);
				1843	}
				1844	}
				1845
				1846	U_CDECL_BEGIN
				1847
				1848	// Call Normalizer2Impl::makeCanonIterDataFromNorm16() for a range of same-norm16 characters.
				1849	// context: the Normalizer2Impl
				1850	static UBool U_CALLCONV
				1851	enumCIDRangeHandler(const void *context, UChar32 start, UChar32 end, uint32_t value) {
				1852	UErrorCode errorCode = U_ZERO_ERROR;
				1853	if (value != 0) {
				1854	Normalizer2Impl impl = (Normalizer2Impl )context;
				1855	impl->makeCanonIterDataFromNorm16(
				1856	start, end, (uint16_t)value, *impl->fCanonIterData, errorCode);
				1857	}
				1858	return U_SUCCESS(errorCode);
				1859	}
				1860
				1861
				1862
				1863	// UInitOnce instantiation function for CanonIterData
				1864
				1865	static void U_CALLCONV
				1866	initCanonIterData(Normalizer2Impl *impl, UErrorCode &errorCode) {
				1867	U_ASSERT(impl->fCanonIterData == NULL);
				1868	impl->fCanonIterData = new CanonIterData(errorCode);
				1869	if (impl->fCanonIterData == NULL) {
				1870	errorCode=U_MEMORY_ALLOCATION_ERROR;
				1871	}
				1872	if (U_SUCCESS(errorCode)) {
				1873	utrie2_enum(impl->getNormTrie(), NULL, enumCIDRangeHandler, impl);
				1874	utrie2_freeze(impl->fCanonIterData->trie, UTRIE2_32_VALUE_BITS, &errorCode);
				1875	}
				1876	if (U_FAILURE(errorCode)) {
				1877	delete impl->fCanonIterData;
				1878	impl->fCanonIterData = NULL;
				1879	}
				1880	}
				1881
				1882	U_CDECL_END
				1883
				1884	void Normalizer2Impl::makeCanonIterDataFromNorm16(UChar32 start, UChar32 end, uint16_t norm16,
				1885	CanonIterData &newData,
				1886	UErrorCode &errorCode) const {
				1887	if(norm16==0 \|\| (minYesNo<=norm16 && norm16<minNoNo)) {
				1888	// Inert, or 2-way mapping (including Hangul syllable).
				1889	// We do not write a canonStartSet for any yesNo character.
				1890	// Composites from 2-way mappings are added at runtime from the
				1891	// starter's compositions list, and the other characters in
				1892	// 2-way mappings get CANON_NOT_SEGMENT_STARTER set because they are
				1893	// "maybe" characters.
				1894	return;
				1895	}
				1896	for(UChar32 c=start; c<=end; ++c) {
				1897	uint32_t oldValue=utrie2_get32(newData.trie, c);
				1898	uint32_t newValue=oldValue;
				1899	if(norm16>=minMaybeYes) {
				1900	// not a segment starter if it occurs in a decomposition or has cc!=0
				1901	newValue\|=CANON_NOT_SEGMENT_STARTER;
				1902	if(norm16<MIN_NORMAL_MAYBE_YES) {
				1903	newValue\|=CANON_HAS_COMPOSITIONS;
				1904	}
				1905	} else if(norm16<minYesNo) {
				1906	newValue\|=CANON_HAS_COMPOSITIONS;
				1907	} else {
				1908	// c has a one-way decomposition
				1909	UChar32 c2=c;
				1910	uint16_t norm16_2=norm16;
				1911	while(limitNoNo<=norm16_2 && norm16_2<minMaybeYes) {
				1912	c2=mapAlgorithmic(c2, norm16_2);
				1913	norm16_2=getNorm16(c2);
				1914	}
				1915	if(minYesNo<=norm16_2 && norm16_2<limitNoNo) {
				1916	// c decomposes, get everything from the variable-length extra data
				1917	const uint16_t *mapping=getMapping(norm16_2);
				1918	uint16_t firstUnit=*mapping;
				1919	int32_t length=firstUnit&MAPPING_LENGTH_MASK;
				1920	if((firstUnit&MAPPING_HAS_CCC_LCCC_WORD)!=0) {
				1921	if(c==c2 && (*(mapping-1)&0xff)!=0) {
				1922	newValue\|=CANON_NOT_SEGMENT_STARTER; // original c has cc!=0
				1923	}
				1924	}
				1925	// Skip empty mappings (no characters in the decomposition).
				1926	if(length!=0) {
				1927	++mapping; // skip over the firstUnit
				1928	// add c to first code point's start set
				1929	int32_t i=0;
				1930	U16_NEXT_UNSAFE(mapping, i, c2);
				1931	newData.addToStartSet(c, c2, errorCode);
				1932	// Set CANON_NOT_SEGMENT_STARTER for each remaining code point of a
				1933	// one-way mapping. A 2-way mapping is possible here after
				1934	// intermediate algorithmic mapping.
				1935	if(norm16_2>=minNoNo) {
				1936	while(i<length) {
				1937	U16_NEXT_UNSAFE(mapping, i, c2);
				1938	uint32_t c2Value=utrie2_get32(newData.trie, c2);
				1939	if((c2Value&CANON_NOT_SEGMENT_STARTER)==0) {
				1940	utrie2_set32(newData.trie, c2, c2Value\|CANON_NOT_SEGMENT_STARTER,
				1941	&errorCode);
				1942	}
				1943	}
				1944	}
				1945	}
				1946	} else {
				1947	// c decomposed to c2 algorithmically; c has cc==0
				1948	newData.addToStartSet(c, c2, errorCode);
				1949	}
				1950	}
				1951	if(newValue!=oldValue) {
				1952	utrie2_set32(newData.trie, c, newValue, &errorCode);
				1953	}
				1954	}
				1955	}
				1956
				1957	UBool Normalizer2Impl::ensureCanonIterData(UErrorCode &errorCode) const {
				1958	// Logically const: Synchronized instantiation.
				1959	Normalizer2Impl me=const_cast<Normalizer2Impl >(this);
				1960	umtx_initOnce(me->fCanonIterDataInitOnce, &initCanonIterData, me, errorCode);
				1961	return U_SUCCESS(errorCode);
				1962	}
				1963
				1964	int32_t Normalizer2Impl::getCanonValue(UChar32 c) const {
				1965	return (int32_t)utrie2_get32(fCanonIterData->trie, c);
				1966	}
				1967
				1968	const UnicodeSet &Normalizer2Impl::getCanonStartSet(int32_t n) const {
				1969	return (const UnicodeSet )fCanonIterData->canonStartSets[n];
				1970	}
				1971
				1972	UBool Normalizer2Impl::isCanonSegmentStarter(UChar32 c) const {
				1973	return getCanonValue(c)>=0;
				1974	}
				1975
				1976	UBool Normalizer2Impl::getCanonStartSet(UChar32 c, UnicodeSet &set) const {
				1977	int32_t canonValue=getCanonValue(c)&~CANON_NOT_SEGMENT_STARTER;
				1978	if(canonValue==0) {
				1979	return FALSE;
				1980	}
				1981	set.clear();
				1982	int32_t value=canonValue&CANON_VALUE_MASK;
				1983	if((canonValue&CANON_HAS_SET)!=0) {
				1984	set.addAll(getCanonStartSet(value));
				1985	} else if(value!=0) {
				1986	set.add(value);
				1987	}
				1988	if((canonValue&CANON_HAS_COMPOSITIONS)!=0) {
				1989	uint16_t norm16=getNorm16(c);
				1990	if(norm16==JAMO_L) {
				1991	UChar32 syllable=
				1992	(UChar32)(Hangul::HANGUL_BASE+(c-Hangul::JAMO_L_BASE)*Hangul::JAMO_VT_COUNT);
				1993	set.add(syllable, syllable+Hangul::JAMO_VT_COUNT-1);
				1994	} else {
				1995	addComposites(getCompositionsList(norm16), set);
				1996	}
				1997	}
				1998	return TRUE;
				1999	}
				2000
				2001	U_NAMESPACE_END
				2002
				2003	// Normalizer2 data swapping ----------------------------------------------- ***
				2004
				2005	U_NAMESPACE_USE
				2006
				2007	U_CAPI int32_t U_EXPORT2
				2008	unorm2_swap(const UDataSwapper *ds,
				2009	const void inData, int32_t length, void outData,
				2010	UErrorCode *pErrorCode) {
				2011	const UDataInfo *pInfo;
				2012	int32_t headerSize;
				2013
				2014	const uint8_t *inBytes;
				2015	uint8_t *outBytes;
				2016
				2017	const int32_t *inIndexes;
				2018	int32_t indexes[Normalizer2Impl::IX_MIN_MAYBE_YES+1];
				2019
				2020	int32_t i, offset, nextOffset, size;
				2021
				2022	/* udata_swapDataHeader checks the arguments */
				2023	headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
				2024	if(pErrorCode==NULL \|\| U_FAILURE(*pErrorCode)) {
				2025	return 0;
				2026	}
				2027
				2028	/* check data format and format version */
				2029	pInfo=(const UDataInfo )((const char )inData+4);
				2030	if(!(
				2031	pInfo->dataFormat[0]==0x4e && /* dataFormat="Nrm2" */
				2032	pInfo->dataFormat[1]==0x72 &&
				2033	pInfo->dataFormat[2]==0x6d &&
				2034	pInfo->dataFormat[3]==0x32 &&
				2035	(pInfo->formatVersion[0]==1 \|\| pInfo->formatVersion[0]==2)
				2036	)) {
				2037	udata_printError(ds, "unorm2_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as Normalizer2 data\n",
				2038	pInfo->dataFormat[0], pInfo->dataFormat[1],
				2039	pInfo->dataFormat[2], pInfo->dataFormat[3],
				2040	pInfo->formatVersion[0]);
				2041	*pErrorCode=U_UNSUPPORTED_ERROR;
				2042	return 0;
				2043	}
				2044
				2045	inBytes=(const uint8_t *)inData+headerSize;
				2046	outBytes=(uint8_t *)outData+headerSize;
				2047
				2048	inIndexes=(const int32_t *)inBytes;
				2049
				2050	if(length>=0) {
				2051	length-=headerSize;
				2052	if(length<(int32_t)sizeof(indexes)) {
				2053	udata_printError(ds, "unorm2_swap(): too few bytes (%d after header) for Normalizer2 data\n",
				2054	length);
				2055	*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
				2056	return 0;
				2057	}
				2058	}
				2059
				2060	/* read the first few indexes */
				2061	for(i=0; i<=Normalizer2Impl::IX_MIN_MAYBE_YES; ++i) {
				2062	indexes[i]=udata_readInt32(ds, inIndexes[i]);
				2063	}
				2064
				2065	/* get the total length of the data */
				2066	size=indexes[Normalizer2Impl::IX_TOTAL_SIZE];
				2067
				2068	if(length>=0) {
				2069	if(length<size) {
				2070	udata_printError(ds, "unorm2_swap(): too few bytes (%d after header) for all of Normalizer2 data\n",
				2071	length);
				2072	*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
				2073	return 0;
				2074	}
				2075
				2076	/* copy the data for inaccessible bytes */
				2077	if(inBytes!=outBytes) {
				2078	uprv_memcpy(outBytes, inBytes, size);
				2079	}
				2080
				2081	offset=0;
				2082
				2083	/* swap the int32_t indexes[] */
				2084	nextOffset=indexes[Normalizer2Impl::IX_NORM_TRIE_OFFSET];
				2085	ds->swapArray32(ds, inBytes, nextOffset-offset, outBytes, pErrorCode);
				2086	offset=nextOffset;
				2087
				2088	/* swap the UTrie2 */
				2089	nextOffset=indexes[Normalizer2Impl::IX_EXTRA_DATA_OFFSET];
				2090	utrie2_swap(ds, inBytes+offset, nextOffset-offset, outBytes+offset, pErrorCode);
				2091	offset=nextOffset;
				2092
				2093	/* swap the uint16_t extraData[] */
				2094	nextOffset=indexes[Normalizer2Impl::IX_SMALL_FCD_OFFSET];
				2095	ds->swapArray16(ds, inBytes+offset, nextOffset-offset, outBytes+offset, pErrorCode);
				2096	offset=nextOffset;
				2097
				2098	/* no need to swap the uint8_t smallFCD[] (new in formatVersion 2) */
				2099	nextOffset=indexes[Normalizer2Impl::IX_SMALL_FCD_OFFSET+1];
				2100	offset=nextOffset;
				2101
				2102	U_ASSERT(offset==size);
				2103	}
				2104
				2105	return headerSize+size;
				2106	}
				2107
				2108	#endif // !UCONFIG_NO_NORMALIZATION