Blame - source/common/unormcmp.cpp - chromium.googlesource.com/chromium/deps/icu

blob: e22419097255c50e96f2f2929dd39369fb9ef7ef [file] [log] [blame]

Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	1	// © 2016 and later: Unicode, Inc. and others.
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	2	// License & terms of use: http://www.unicode.org/copyright.html
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	3	/*
				4	*******************************************************************************
				5	*
Jungshik Shin (jungshik at google)	0f8746a	2015-01-08 15:46:45 -0800	[diff] [blame]	6	* Copyright (C) 2001-2014, International Business Machines
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	7	* Corporation and others. All Rights Reserved.
				8	*
				9	*******************************************************************************
				10	* file name: unormcmp.cpp
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	11	* encoding: UTF-8
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	12	* tab size: 8 (not used)
				13	* indentation:4
				14	*
				15	* created on: 2004sep13
				16	* created by: Markus W. Scherer
				17	*
				18	* unorm_compare() function moved here from unorm.cpp for better modularization.
				19	* Depends on both normalization and case folding.
				20	* Allows unorm.cpp to not depend on any character properties code.
				21	*/
				22
				23	#include "unicode/utypes.h"
				24
				25	#if !UCONFIG_NO_NORMALIZATION
				26
				27	#include "unicode/unorm.h"
				28	#include "unicode/ustring.h"
				29	#include "cmemory.h"
				30	#include "normalizer2impl.h"
				31	#include "ucase.h"
				32	#include "uprops.h"
				33	#include "ustr_imp.h"
				34
				35	U_NAMESPACE_USE
				36
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	37	/* compare canonically equivalent ------------------------------------------- */
				38
				39	/*
				40	* Compare two strings for canonical equivalence.
				41	* Further options include case-insensitive comparison and
				42	* code point order (as opposed to code unit order).
				43	*
				44	* In this function, canonical equivalence is optional as well.
				45	* If canonical equivalence is tested, then both strings must fulfill
				46	* the FCD check.
				47	*
				48	* Semantically, this is equivalent to
				49	* strcmp[CodePointOrder](NFD(foldCase(s1)), NFD(foldCase(s2)))
				50	* where code point order, NFD and foldCase are all optional.
				51	*
				52	* String comparisons almost always yield results before processing both strings
				53	* completely.
				54	* They are generally more efficient working incrementally instead of
				55	* performing the sub-processing (strlen, normalization, case-folding)
				56	* on the entire strings first.
				57	*
				58	* It is also unnecessary to not normalize identical characters.
				59	*
				60	* This function works in principle as follows:
				61	*
				62	* loop {
				63	* get one code unit c1 from s1 (-1 if end of source)
				64	* get one code unit c2 from s2 (-1 if end of source)
				65	*
				66	* if(either string finished) {
				67	* return result;
				68	* }
				69	* if(c1==c2) {
				70	* continue;
				71	* }
				72	*
				73	* // c1!=c2
				74	* try to decompose/case-fold c1/c2, and continue if one does;
				75	*
				76	* // still c1!=c2 and neither decomposes/case-folds, return result
				77	* return c1-c2;
				78	* }
				79	*
				80	* When a character decomposes, then the pointer for that source changes to
				81	* the decomposition, pushing the previous pointer onto a stack.
				82	* When the end of the decomposition is reached, then the code unit reader
				83	* pops the previous source from the stack.
				84	* (Same for case-folding.)
				85	*
				86	* This is complicated further by operating on variable-width UTF-16.
				87	* The top part of the loop works on code units, while lookups for decomposition
				88	* and case-folding need code points.
				89	* Code points are assembled after the equality/end-of-source part.
				90	* The source pointer is only advanced beyond all code units when the code point
				91	* actually decomposes/case-folds.
				92	*
				93	* If we were on a trail surrogate unit when assembling a code point,
				94	* and the code point decomposes/case-folds, then the decomposition/folding
				95	* result must be compared with the part of the other string that corresponds to
				96	* this string's lead surrogate.
				97	* Since we only assemble a code point when hitting a trail unit when the
				98	* preceding lead units were identical, we back up the other string by one unit
				99	* in such a case.
				100	*
				101	* The optional code point order comparison at the end works with
				102	* the same fix-up as the other code point order comparison functions.
				103	* See ustring.c and the comment near the end of this function.
				104	*
				105	* Assumption: A decomposition or case-folding result string never contains
				106	* a single surrogate. This is a safe assumption in the Unicode Standard.
				107	* Therefore, we do not need to check for surrogate pairs across
				108	* decomposition/case-folding boundaries.
				109	*
				110	* Further assumptions (see verifications tstnorm.cpp):
				111	* The API function checks for FCD first, while the core function
				112	* first case-folds and then decomposes. This requires that case-folding does not
				113	* un-FCD any strings.
				114	*
				115	* The API function may also NFD the input and turn off decomposition.
				116	* This requires that case-folding does not un-NFD strings either.
				117	*
				118	* TODO If any of the above two assumptions is violated,
				119	* then this entire code must be re-thought.
				120	* If this happens, then a simple solution is to case-fold both strings up front
				121	* and to turn off UNORM_INPUT_IS_FCD.
				122	* We already do this when not both strings are in FCD because makeFCD
				123	* would be a partial NFD before the case folding, which does not work.
				124	* Note that all of this is only a problem when case-folding _and_
				125	* canonical equivalence come together.
				126	* (Comments in unorm_compare() are more up to date than this TODO.)
				127	*/
				128
				129	/* stack element for previous-level source/decomposition pointers */
				130	struct CmpEquivLevel {
				131	const UChar start, s, *limit;
				132	};
				133	typedef struct CmpEquivLevel CmpEquivLevel;
				134
				135	/**
				136	* Internal option for unorm_cmpEquivFold() for decomposing.
				137	* If not set, just do strcasecmp().
				138	*/
				139	#define _COMPARE_EQUIV 0x80000
				140
				141	/* internal function */
				142	static int32_t
				143	unorm_cmpEquivFold(const UChar *s1, int32_t length1,
				144	const UChar *s2, int32_t length2,
				145	uint32_t options,
				146	UErrorCode *pErrorCode) {
				147	const Normalizer2Impl *nfcImpl;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	148
				149	/* current-level start/limit - s1/s2 as current */
				150	const UChar start1, start2, limit1, limit2;
				151
				152	/* decomposition and case folding variables */
				153	const UChar *p;
				154	int32_t length;
				155
				156	/* stacks of previous-level start/current/limit */
				157	CmpEquivLevel stack1[2], stack2[2];
				158
				159	/* buffers for algorithmic decompositions */
				160	UChar decomp1[4], decomp2[4];
				161
				162	/* case folding buffers, only use current-level start/limit */
				163	UChar fold1[UCASE_MAX_STRING_LENGTH+1], fold2[UCASE_MAX_STRING_LENGTH+1];
				164
				165	/* track which is the current level per string */
				166	int32_t level1, level2;
				167
				168	/* current code units, and code points for lookups */
				169	UChar32 c1, c2, cp1, cp2;
				170
				171	/* no argument error checking because this itself is not an API */
				172
				173	/*
				174	* assume that at least one of the options _COMPARE_EQUIV and U_COMPARE_IGNORE_CASE is set
				175	* otherwise this function must behave exactly as uprv_strCompare()
				176	* not checking for that here makes testing this function easier
				177	*/
				178
				179	/* normalization/properties data loaded? */
				180	if((options&_COMPARE_EQUIV)!=0) {
				181	nfcImpl=Normalizer2Factory::getNFCImpl(*pErrorCode);
				182	} else {
				183	nfcImpl=NULL;
				184	}
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	185	if(U_FAILURE(*pErrorCode)) {
				186	return 0;
				187	}
				188
				189	/* initialize */
				190	start1=s1;
				191	if(length1==-1) {
				192	limit1=NULL;
				193	} else {
				194	limit1=s1+length1;
				195	}
				196
				197	start2=s2;
				198	if(length2==-1) {
				199	limit2=NULL;
				200	} else {
				201	limit2=s2+length2;
				202	}
				203
				204	level1=level2=0;
				205	c1=c2=-1;
				206
				207	/* comparison loop */
				208	for(;;) {
				209	/*
				210	* here a code unit value of -1 means "get another code unit"
				211	* below it will mean "this source is finished"
				212	*/
				213
				214	if(c1<0) {
				215	/* get next code unit from string 1, post-increment */
				216	for(;;) {
				217	if(s1==limit1 \|\| ((c1=*s1)==0 && (limit1==NULL \|\| (options&_STRNCMP_STYLE)))) {
				218	if(level1==0) {
				219	c1=-1;
				220	break;
				221	}
				222	} else {
				223	++s1;
				224	break;
				225	}
				226
				227	/* reached end of level buffer, pop one level */
				228	do {
				229	--level1;
				230	start1=stack1[level1].start; /Not uninitialized/
				231	} while(start1==NULL);
				232	s1=stack1[level1].s; /Not uninitialized/
				233	limit1=stack1[level1].limit; /Not uninitialized/
				234	}
				235	}
				236
				237	if(c2<0) {
				238	/* get next code unit from string 2, post-increment */
				239	for(;;) {
				240	if(s2==limit2 \|\| ((c2=*s2)==0 && (limit2==NULL \|\| (options&_STRNCMP_STYLE)))) {
				241	if(level2==0) {
				242	c2=-1;
				243	break;
				244	}
				245	} else {
				246	++s2;
				247	break;
				248	}
				249
				250	/* reached end of level buffer, pop one level */
				251	do {
				252	--level2;
				253	start2=stack2[level2].start; /Not uninitialized/
				254	} while(start2==NULL);
				255	s2=stack2[level2].s; /Not uninitialized/
				256	limit2=stack2[level2].limit; /Not uninitialized/
				257	}
				258	}
				259
				260	/*
				261	* compare c1 and c2
				262	* either variable c1, c2 is -1 only if the corresponding string is finished
				263	*/
				264	if(c1==c2) {
				265	if(c1<0) {
				266	return 0; /* c1==c2==-1 indicating end of strings */
				267	}
				268	c1=c2=-1; /* make us fetch new code units */
				269	continue;
				270	} else if(c1<0) {
				271	return -1; /* string 1 ends before string 2 */
				272	} else if(c2<0) {
				273	return 1; /* string 2 ends before string 1 */
				274	}
				275	/* c1!=c2 && c1>=0 && c2>=0 */
				276
				277	/* get complete code points for c1, c2 for lookups if either is a surrogate */
				278	cp1=c1;
				279	if(U_IS_SURROGATE(c1)) {
				280	UChar c;
				281
				282	if(U_IS_SURROGATE_LEAD(c1)) {
				283	if(s1!=limit1 && U16_IS_TRAIL(c=*s1)) {
				284	/* advance ++s1; only below if cp1 decomposes/case-folds */
				285	cp1=U16_GET_SUPPLEMENTARY(c1, c);
				286	}
				287	} else /* isTrail(c1) */ {
				288	if(start1<=(s1-2) && U16_IS_LEAD(c=*(s1-2))) {
				289	cp1=U16_GET_SUPPLEMENTARY(c, c1);
				290	}
				291	}
				292	}
				293
				294	cp2=c2;
				295	if(U_IS_SURROGATE(c2)) {
				296	UChar c;
				297
				298	if(U_IS_SURROGATE_LEAD(c2)) {
				299	if(s2!=limit2 && U16_IS_TRAIL(c=*s2)) {
				300	/* advance ++s2; only below if cp2 decomposes/case-folds */
				301	cp2=U16_GET_SUPPLEMENTARY(c2, c);
				302	}
				303	} else /* isTrail(c2) */ {
				304	if(start2<=(s2-2) && U16_IS_LEAD(c=*(s2-2))) {
				305	cp2=U16_GET_SUPPLEMENTARY(c, c2);
				306	}
				307	}
				308	}
				309
				310	/*
				311	* go down one level for each string
				312	* continue with the main loop as soon as there is a real change
				313	*/
				314
				315	if( level1==0 && (options&U_COMPARE_IGNORE_CASE) &&
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	316	(length=ucase_toFullFolding((UChar32)cp1, &p, options))>=0
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	317	) {
				318	/* cp1 case-folds to the code point "length" or to p[length] */
				319	if(U_IS_SURROGATE(c1)) {
				320	if(U_IS_SURROGATE_LEAD(c1)) {
				321	/* advance beyond source surrogate pair if it case-folds */
				322	++s1;
				323	} else /* isTrail(c1) */ {
				324	/*
				325	* we got a supplementary code point when hitting its trail surrogate,
				326	* therefore the lead surrogate must have been the same as in the other string;
				327	* compare this decomposition with the lead surrogate in the other string
				328	* remember that this simulates bulk text replacement:
				329	* the decomposition would replace the entire code point
				330	*/
				331	--s2;
				332	c2=*(s2-1);
				333	}
				334	}
				335
				336	/* push current level pointers */
				337	stack1[0].start=start1;
				338	stack1[0].s=s1;
				339	stack1[0].limit=limit1;
				340	++level1;
				341
				342	/* copy the folding result to fold1[] */
				343	if(length<=UCASE_MAX_STRING_LENGTH) {
				344	u_memcpy(fold1, p, length);
				345	} else {
				346	int32_t i=0;
				347	U16_APPEND_UNSAFE(fold1, i, length);
				348	length=i;
				349	}
				350
				351	/* set next level pointers to case folding */
				352	start1=s1=fold1;
				353	limit1=fold1+length;
				354
				355	/* get ready to read from decomposition, continue with loop */
				356	c1=-1;
				357	continue;
				358	}
				359
				360	if( level2==0 && (options&U_COMPARE_IGNORE_CASE) &&
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	361	(length=ucase_toFullFolding((UChar32)cp2, &p, options))>=0
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	362	) {
				363	/* cp2 case-folds to the code point "length" or to p[length] */
				364	if(U_IS_SURROGATE(c2)) {
				365	if(U_IS_SURROGATE_LEAD(c2)) {
				366	/* advance beyond source surrogate pair if it case-folds */
				367	++s2;
				368	} else /* isTrail(c2) */ {
				369	/*
				370	* we got a supplementary code point when hitting its trail surrogate,
				371	* therefore the lead surrogate must have been the same as in the other string;
				372	* compare this decomposition with the lead surrogate in the other string
				373	* remember that this simulates bulk text replacement:
				374	* the decomposition would replace the entire code point
				375	*/
				376	--s1;
				377	c1=*(s1-1);
				378	}
				379	}
				380
				381	/* push current level pointers */
				382	stack2[0].start=start2;
				383	stack2[0].s=s2;
				384	stack2[0].limit=limit2;
				385	++level2;
				386
				387	/* copy the folding result to fold2[] */
				388	if(length<=UCASE_MAX_STRING_LENGTH) {
				389	u_memcpy(fold2, p, length);
				390	} else {
				391	int32_t i=0;
				392	U16_APPEND_UNSAFE(fold2, i, length);
				393	length=i;
				394	}
				395
				396	/* set next level pointers to case folding */
				397	start2=s2=fold2;
				398	limit2=fold2+length;
				399
				400	/* get ready to read from decomposition, continue with loop */
				401	c2=-1;
				402	continue;
				403	}
				404
				405	if( level1<2 && (options&_COMPARE_EQUIV) &&
				406	0!=(p=nfcImpl->getDecomposition((UChar32)cp1, decomp1, length))
				407	) {
				408	/* cp1 decomposes into p[length] */
				409	if(U_IS_SURROGATE(c1)) {
				410	if(U_IS_SURROGATE_LEAD(c1)) {
				411	/* advance beyond source surrogate pair if it decomposes */
				412	++s1;
				413	} else /* isTrail(c1) */ {
				414	/*
				415	* we got a supplementary code point when hitting its trail surrogate,
				416	* therefore the lead surrogate must have been the same as in the other string;
				417	* compare this decomposition with the lead surrogate in the other string
				418	* remember that this simulates bulk text replacement:
				419	* the decomposition would replace the entire code point
				420	*/
				421	--s2;
				422	c2=*(s2-1);
				423	}
				424	}
				425
				426	/* push current level pointers */
				427	stack1[level1].start=start1;
				428	stack1[level1].s=s1;
				429	stack1[level1].limit=limit1;
				430	++level1;
				431
				432	/* set empty intermediate level if skipped */
				433	if(level1<2) {
				434	stack1[level1++].start=NULL;
				435	}
				436
				437	/* set next level pointers to decomposition */
				438	start1=s1=p;
				439	limit1=p+length;
				440
				441	/* get ready to read from decomposition, continue with loop */
				442	c1=-1;
				443	continue;
				444	}
				445
				446	if( level2<2 && (options&_COMPARE_EQUIV) &&
				447	0!=(p=nfcImpl->getDecomposition((UChar32)cp2, decomp2, length))
				448	) {
				449	/* cp2 decomposes into p[length] */
				450	if(U_IS_SURROGATE(c2)) {
				451	if(U_IS_SURROGATE_LEAD(c2)) {
				452	/* advance beyond source surrogate pair if it decomposes */
				453	++s2;
				454	} else /* isTrail(c2) */ {
				455	/*
				456	* we got a supplementary code point when hitting its trail surrogate,
				457	* therefore the lead surrogate must have been the same as in the other string;
				458	* compare this decomposition with the lead surrogate in the other string
				459	* remember that this simulates bulk text replacement:
				460	* the decomposition would replace the entire code point
				461	*/
				462	--s1;
				463	c1=*(s1-1);
				464	}
				465	}
				466
				467	/* push current level pointers */
				468	stack2[level2].start=start2;
				469	stack2[level2].s=s2;
				470	stack2[level2].limit=limit2;
				471	++level2;
				472
				473	/* set empty intermediate level if skipped */
				474	if(level2<2) {
				475	stack2[level2++].start=NULL;
				476	}
				477
				478	/* set next level pointers to decomposition */
				479	start2=s2=p;
				480	limit2=p+length;
				481
				482	/* get ready to read from decomposition, continue with loop */
				483	c2=-1;
				484	continue;
				485	}
				486
				487	/*
				488	* no decomposition/case folding, max level for both sides:
				489	* return difference result
				490	*
				491	* code point order comparison must not just return cp1-cp2
				492	* because when single surrogates are present then the surrogate pairs
				493	* that formed cp1 and cp2 may be from different string indexes
				494	*
				495	* example: { d800 d800 dc01 } vs. { d800 dc00 }, compare at second code units
				496	* c1=d800 cp1=10001 c2=dc00 cp2=10000
				497	* cp1-cp2>0 but c1-c2<0 and in fact in UTF-32 it is { d800 10001 } < { 10000 }
				498	*
				499	* therefore, use same fix-up as in ustring.c/uprv_strCompare()
				500	* except: uprv_strCompare() fetches c=s while this functions fetches c=s++
				501	* so we have slightly different pointer/start/limit comparisons here
				502	*/
				503
				504	if(c1>=0xd800 && c2>=0xd800 && (options&U_COMPARE_CODE_POINT_ORDER)) {
				505	/* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */
				506	if(
				507	(c1<=0xdbff && s1!=limit1 && U16_IS_TRAIL(*s1)) \|\|
				508	(U16_IS_TRAIL(c1) && start1!=(s1-1) && U16_IS_LEAD(*(s1-2)))
				509	) {
				510	/* part of a surrogate pair, leave >=d800 */
				511	} else {
				512	/* BMP code point - may be surrogate code point - make <d800 */
				513	c1-=0x2800;
				514	}
				515
				516	if(
				517	(c2<=0xdbff && s2!=limit2 && U16_IS_TRAIL(*s2)) \|\|
				518	(U16_IS_TRAIL(c2) && start2!=(s2-1) && U16_IS_LEAD(*(s2-2)))
				519	) {
				520	/* part of a surrogate pair, leave >=d800 */
				521	} else {
				522	/* BMP code point - may be surrogate code point - make <d800 */
				523	c2-=0x2800;
				524	}
				525	}
				526
				527	return c1-c2;
				528	}
				529	}
				530
				531	static
				532	UBool _normalize(const Normalizer2 n2, const UChar s, int32_t length,
				533	UnicodeString &normalized, UErrorCode *pErrorCode) {
				534	UnicodeString str(length<0, s, length);
				535
				536	// check if s fulfill the conditions
				537	int32_t spanQCYes=n2->spanQuickCheckYes(str, *pErrorCode);
				538	if (U_FAILURE(*pErrorCode)) {
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	539	return false;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	540	}
				541	/*
				542	* ICU 2.4 had a further optimization:
				543	* If both strings were not in FCD, then they were both NFD'ed,
				544	* and the _COMPARE_EQUIV option was turned off.
				545	* It is not entirely clear that this is valid with the current
				546	* definition of the canonical caseless match.
				547	* Therefore, ICU 2.6 removes that optimization.
				548	*/
				549	if(spanQCYes<str.length()) {
				550	UnicodeString unnormalized=str.tempSubString(spanQCYes);
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	551	normalized.setTo(false, str.getBuffer(), spanQCYes);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	552	n2->normalizeSecondAndAppend(normalized, unnormalized, *pErrorCode);
				553	if (U_SUCCESS(*pErrorCode)) {
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	554	return true;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	555	}
				556	}
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	557	return false;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	558	}
				559
				560	U_CAPI int32_t U_EXPORT2
				561	unorm_compare(const UChar *s1, int32_t length1,
				562	const UChar *s2, int32_t length2,
				563	uint32_t options,
				564	UErrorCode *pErrorCode) {
				565	/* argument checking */
				566	if(U_FAILURE(*pErrorCode)) {
				567	return 0;
				568	}
				569	if(s1==0 \|\| length1<-1 \|\| s2==0 \|\| length2<-1) {
				570	*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
				571	return 0;
				572	}
				573
				574	UnicodeString fcd1, fcd2;
				575	int32_t normOptions=(int32_t)(options>>UNORM_COMPARE_NORM_OPTIONS_SHIFT);
				576	options\|=_COMPARE_EQUIV;
				577
				578	/*
				579	* UAX #21 Case Mappings, as fixed for Unicode version 4
				580	* (see Jitterbug 2021), defines a canonical caseless match as
				581	*
				582	* A string X is a canonical caseless match
				583	* for a string Y if and only if
				584	* NFD(toCasefold(NFD(X))) = NFD(toCasefold(NFD(Y)))
				585	*
				586	* For better performance, we check for FCD (or let the caller tell us that
				587	* both strings are in FCD) for the inner normalization.
				588	* BasicNormalizerTest::FindFoldFCDExceptions() makes sure that
				589	* case-folding preserves the FCD-ness of a string.
				590	* The outer normalization is then only performed by unorm_cmpEquivFold()
				591	* when there is a difference.
				592	*
				593	* Exception: When using the Turkic case-folding option, we do perform
				594	* full NFD first. This is because in the Turkic case precomposed characters
				595	* with 0049 capital I or 0069 small i fold differently whether they
				596	* are first decomposed or not, so an FCD check - a check only for
				597	* canonical order - is not sufficient.
				598	*/
				599	if(!(options&UNORM_INPUT_IS_FCD) \|\| (options&U_FOLD_CASE_EXCLUDE_SPECIAL_I)) {
				600	const Normalizer2 *n2;
				601	if(options&U_FOLD_CASE_EXCLUDE_SPECIAL_I) {
Jungshik Shin (jungshik at google)	0f8746a	2015-01-08 15:46:45 -0800	[diff] [blame]	602	n2=Normalizer2::getNFDInstance(*pErrorCode);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	603	} else {
				604	n2=Normalizer2Factory::getFCDInstance(*pErrorCode);
				605	}
				606	if (U_FAILURE(*pErrorCode)) {
				607	return 0;
				608	}
				609
				610	if(normOptions&UNORM_UNICODE_3_2) {
				611	const UnicodeSet uni32=uniset_getUnicode32Instance(pErrorCode);
				612	FilteredNormalizer2 fn2(n2, uni32);
				613	if(_normalize(&fn2, s1, length1, fcd1, pErrorCode)) {
				614	s1=fcd1.getBuffer();
				615	length1=fcd1.length();
				616	}
				617	if(_normalize(&fn2, s2, length2, fcd2, pErrorCode)) {
				618	s2=fcd2.getBuffer();
				619	length2=fcd2.length();
				620	}
				621	} else {
				622	if(_normalize(n2, s1, length1, fcd1, pErrorCode)) {
				623	s1=fcd1.getBuffer();
				624	length1=fcd1.length();
				625	}
				626	if(_normalize(n2, s2, length2, fcd2, pErrorCode)) {
				627	s2=fcd2.getBuffer();
				628	length2=fcd2.length();
				629	}
				630	}
				631	}
				632
				633	if(U_SUCCESS(*pErrorCode)) {
				634	return unorm_cmpEquivFold(s1, length1, s2, length2, options, pErrorCode);
				635	} else {
				636	return 0;
				637	}
				638	}
				639
				640	#endif /* #if !UCONFIG_NO_NORMALIZATION */