Blame - source/common/caniter.cpp - chromium.googlesource.com/chromium/deps/icu

blob: b28acfc84ee3b0bd93eb6e8f925ef4d4f1f280df [file] [log] [blame]

Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	1	// © 2016 and later: Unicode, Inc. and others.
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	2	// License & terms of use: http://www.unicode.org/copyright.html
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	3	/*
				4	*****************************************************************************
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	5	* Copyright (C) 1996-2015, International Business Machines Corporation and
Jungshik Shin (jungshik at google)	0f8746a	2015-01-08 15:46:45 -0800	[diff] [blame]	6	* others. All Rights Reserved.
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	7	*****************************************************************************
				8	*/
				9
				10	#include "unicode/utypes.h"
				11
				12	#if !UCONFIG_NO_NORMALIZATION
				13
				14	#include "unicode/caniter.h"
				15	#include "unicode/normalizer2.h"
				16	#include "unicode/uchar.h"
				17	#include "unicode/uniset.h"
				18	#include "unicode/usetiter.h"
				19	#include "unicode/ustring.h"
				20	#include "unicode/utf16.h"
				21	#include "cmemory.h"
				22	#include "hash.h"
				23	#include "normalizer2impl.h"
				24
				25	/**
				26	* This class allows one to iterate through all the strings that are canonically equivalent to a given
				27	* string. For example, here are some sample results:
				28	Results for: {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
				29	1: \u0041\u030A\u0064\u0307\u0327
				30	= {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
				31	2: \u0041\u030A\u0064\u0327\u0307
				32	= {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE}
				33	3: \u0041\u030A\u1E0B\u0327
				34	= {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA}
				35	4: \u0041\u030A\u1E11\u0307
				36	= {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE}
				37	5: \u00C5\u0064\u0307\u0327
				38	= {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
				39	6: \u00C5\u0064\u0327\u0307
				40	= {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE}
				41	7: \u00C5\u1E0B\u0327
				42	= {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA}
				43	8: \u00C5\u1E11\u0307
				44	= {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE}
				45	9: \u212B\u0064\u0307\u0327
				46	= {ANGSTROM SIGN}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
				47	10: \u212B\u0064\u0327\u0307
				48	= {ANGSTROM SIGN}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE}
				49	11: \u212B\u1E0B\u0327
				50	= {ANGSTROM SIGN}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA}
				51	12: \u212B\u1E11\u0307
				52	= {ANGSTROM SIGN}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE}
				53	*<br>Note: the code is intended for use with small strings, and is not suitable for larger ones,
				54	* since it has not been optimized for that situation.
				55	*@author M. Davis
				56	*@draft
				57	*/
				58
				59	// public
				60
				61	U_NAMESPACE_BEGIN
				62
				63	// TODO: add boilerplate methods.
				64
				65	UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CanonicalIterator)
				66
				67	/**
				68	*@param source string to get results for
				69	*/
				70	CanonicalIterator::CanonicalIterator(const UnicodeString &sourceStr, UErrorCode &status) :
				71	pieces(NULL),
				72	pieces_length(0),
				73	pieces_lengths(NULL),
				74	current(NULL),
				75	current_length(0),
Jungshik Shin (jungshik at google)	0f8746a	2015-01-08 15:46:45 -0800	[diff] [blame]	76	nfd(*Normalizer2::getNFDInstance(status)),
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	77	nfcImpl(*Normalizer2Factory::getNFCImpl(status))
				78	{
				79	if(U_SUCCESS(status) && nfcImpl.ensureCanonIterData(status)) {
				80	setSource(sourceStr, status);
				81	}
				82	}
				83
				84	CanonicalIterator::~CanonicalIterator() {
				85	cleanPieces();
				86	}
				87
				88	void CanonicalIterator::cleanPieces() {
				89	int32_t i = 0;
				90	if(pieces != NULL) {
				91	for(i = 0; i < pieces_length; i++) {
				92	if(pieces[i] != NULL) {
				93	delete[] pieces[i];
				94	}
				95	}
				96	uprv_free(pieces);
				97	pieces = NULL;
				98	pieces_length = 0;
				99	}
				100	if(pieces_lengths != NULL) {
				101	uprv_free(pieces_lengths);
				102	pieces_lengths = NULL;
				103	}
				104	if(current != NULL) {
				105	uprv_free(current);
				106	current = NULL;
				107	current_length = 0;
				108	}
				109	}
				110
				111	/**
				112	*@return gets the source: NOTE: it is the NFD form of source
				113	*/
				114	UnicodeString CanonicalIterator::getSource() {
				115	return source;
				116	}
				117
				118	/**
				119	* Resets the iterator so that one can start again from the beginning.
				120	*/
				121	void CanonicalIterator::reset() {
				122	done = FALSE;
				123	for (int i = 0; i < current_length; ++i) {
				124	current[i] = 0;
				125	}
				126	}
				127
				128	/**
				129	*@return the next string that is canonically equivalent. The value null is returned when
				130	* the iteration is done.
				131	*/
				132	UnicodeString CanonicalIterator::next() {
				133	int32_t i = 0;
				134
				135	if (done) {
				136	buffer.setToBogus();
				137	return buffer;
				138	}
				139
				140	// delete old contents
				141	buffer.remove();
				142
				143	// construct return value
				144
				145	for (i = 0; i < pieces_length; ++i) {
				146	buffer.append(pieces[i][current[i]]);
				147	}
				148	//String result = buffer.toString(); // not needed
				149
				150	// find next value for next time
				151
				152	for (i = current_length - 1; ; --i) {
				153	if (i < 0) {
				154	done = TRUE;
				155	break;
				156	}
				157	current[i]++;
				158	if (current[i] < pieces_lengths[i]) break; // got sequence
				159	current[i] = 0;
				160	}
				161	return buffer;
				162	}
				163
				164	/**
				165	*@param set the source string to iterate against. This allows the same iterator to be used
				166	* while changing the source string, saving object creation.
				167	*/
				168	void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &status) {
				169	int32_t list_length = 0;
				170	UChar32 cp = 0;
				171	int32_t start = 0;
				172	int32_t i = 0;
				173	UnicodeString *list = NULL;
				174
				175	nfd.normalize(newSource, source, status);
				176	if(U_FAILURE(status)) {
				177	return;
				178	}
				179	done = FALSE;
				180
				181	cleanPieces();
				182
				183	// catch degenerate case
				184	if (newSource.length() == 0) {
				185	pieces = (UnicodeString *)uprv_malloc(sizeof(UnicodeString ));
				186	pieces_lengths = (int32_t)uprv_malloc(1 sizeof(int32_t));
				187	pieces_length = 1;
				188	current = (int32_t)uprv_malloc(1 sizeof(int32_t));
				189	current_length = 1;
				190	if (pieces == NULL \|\| pieces_lengths == NULL \|\| current == NULL) {
				191	status = U_MEMORY_ALLOCATION_ERROR;
				192	goto CleanPartialInitialization;
				193	}
				194	current[0] = 0;
				195	pieces[0] = new UnicodeString[1];
				196	pieces_lengths[0] = 1;
				197	if (pieces[0] == 0) {
				198	status = U_MEMORY_ALLOCATION_ERROR;
				199	goto CleanPartialInitialization;
				200	}
				201	return;
				202	}
				203
				204
				205	list = new UnicodeString[source.length()];
				206	if (list == 0) {
				207	status = U_MEMORY_ALLOCATION_ERROR;
				208	goto CleanPartialInitialization;
				209	}
				210
				211	// i should initialy be the number of code units at the
				212	// start of the string
				213	i = U16_LENGTH(source.char32At(0));
				214	//int32_t i = 1;
				215	// find the segments
				216	// This code iterates through the source string and
				217	// extracts segments that end up on a codepoint that
				218	// doesn't start any decompositions. (Analysis is done
				219	// on the NFD form - see above).
				220	for (; i < source.length(); i += U16_LENGTH(cp)) {
				221	cp = source.char32At(i);
				222	if (nfcImpl.isCanonSegmentStarter(cp)) {
				223	source.extract(start, i-start, list[list_length++]); // add up to i
				224	start = i;
				225	}
				226	}
				227	source.extract(start, i-start, list[list_length++]); // add last one
				228
				229
				230	// allocate the arrays, and find the strings that are CE to each segment
				231	pieces = (UnicodeString *)uprv_malloc(list_length sizeof(UnicodeString *));
				232	pieces_length = list_length;
				233	pieces_lengths = (int32_t)uprv_malloc(list_length sizeof(int32_t));
				234	current = (int32_t)uprv_malloc(list_length sizeof(int32_t));
				235	current_length = list_length;
				236	if (pieces == NULL \|\| pieces_lengths == NULL \|\| current == NULL) {
				237	status = U_MEMORY_ALLOCATION_ERROR;
				238	goto CleanPartialInitialization;
				239	}
				240
				241	for (i = 0; i < current_length; i++) {
				242	current[i] = 0;
				243	}
				244	// for each segment, get all the combinations that can produce
				245	// it after NFD normalization
				246	for (i = 0; i < pieces_length; ++i) {
				247	//if (PROGRESS) printf("SEGMENT\n");
				248	pieces[i] = getEquivalents(list[i], pieces_lengths[i], status);
				249	}
				250
				251	delete[] list;
				252	return;
				253	// Common section to cleanup all local variables and reset object variables.
				254	CleanPartialInitialization:
				255	if (list != NULL) {
				256	delete[] list;
				257	}
				258	cleanPieces();
				259	}
				260
				261	/**
				262	* Dumb recursive implementation of permutation.
				263	* TODO: optimize
				264	* @param source the string to find permutations for
				265	* @return the results in a set.
				266	*/
				267	void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status) {
				268	if(U_FAILURE(status)) {
				269	return;
				270	}
				271	//if (PROGRESS) printf("Permute: %s\n", UToS(Tr(source)));
				272	int32_t i = 0;
				273
				274	// optimization:
				275	// if zero or one character, just return a set with it
				276	// we check for length < 2 to keep from counting code points all the time
				277	if (source.length() <= 2 && source.countChar32() <= 1) {
				278	UnicodeString *toPut = new UnicodeString(source);
				279	/* test for NULL */
				280	if (toPut == 0) {
				281	status = U_MEMORY_ALLOCATION_ERROR;
				282	return;
				283	}
				284	result->put(source, toPut, status);
				285	return;
				286	}
				287
				288	// otherwise iterate through the string, and recursively permute all the other characters
				289	UChar32 cp;
				290	Hashtable subpermute(status);
				291	if(U_FAILURE(status)) {
				292	return;
				293	}
				294	subpermute.setValueDeleter(uprv_deleteUObject);
				295
				296	for (i = 0; i < source.length(); i += U16_LENGTH(cp)) {
				297	cp = source.char32At(i);
				298	const UHashElement *ne = NULL;
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	299	int32_t el = UHASH_FIRST;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	300	UnicodeString subPermuteString = source;
				301
				302	// optimization:
				303	// if the character is canonical combining class zero,
				304	// don't permute it
				305	if (skipZeros && i != 0 && u_getCombiningClass(cp) == 0) {
				306	//System.out.println("Skipping " + Utility.hex(UTF16.valueOf(source, i)));
				307	continue;
				308	}
				309
				310	subpermute.removeAll();
				311
				312	// see what the permutations of the characters before and after this one are
				313	//Hashtable *subpermute = permute(source.substring(0,i) + source.substring(i + UTF16.getCharCount(cp)));
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	314	permute(subPermuteString.remove(i, U16_LENGTH(cp)), skipZeros, &subpermute, status);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	315	/* Test for buffer overflows */
				316	if(U_FAILURE(status)) {
				317	return;
				318	}
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	319	// The upper remove is destructive. The question is do we have to make a copy, or we don't care about the contents
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	320	// of source at this point.
				321
				322	// prefix this character to all of them
				323	ne = subpermute.nextElement(el);
				324	while (ne != NULL) {
				325	UnicodeString permRes = (UnicodeString )(ne->value.pointer);
				326	UnicodeString *chStr = new UnicodeString(cp);
				327	//test for NULL
				328	if (chStr == NULL) {
				329	status = U_MEMORY_ALLOCATION_ERROR;
				330	return;
				331	}
				332	chStr->append(permRes); //((UnicodeString *)(ne->value.pointer));
				333	//if (PROGRESS) printf(" Piece: %s\n", UToS(*chStr));
				334	result->put(*chStr, chStr, status);
				335	ne = subpermute.nextElement(el);
				336	}
				337	}
				338	//return result;
				339	}
				340
				341	// privates
				342
				343	// we have a segment, in NFD. Find all the strings that are canonically equivalent to it.
				344	UnicodeString* CanonicalIterator::getEquivalents(const UnicodeString &segment, int32_t &result_len, UErrorCode &status) {
				345	Hashtable result(status);
				346	Hashtable permutations(status);
				347	Hashtable basic(status);
				348	if (U_FAILURE(status)) {
				349	return 0;
				350	}
				351	result.setValueDeleter(uprv_deleteUObject);
				352	permutations.setValueDeleter(uprv_deleteUObject);
				353	basic.setValueDeleter(uprv_deleteUObject);
				354
				355	UChar USeg[256];
				356	int32_t segLen = segment.extract(USeg, 256, status);
				357	getEquivalents2(&basic, USeg, segLen, status);
				358
				359	// now get all the permutations
				360	// add only the ones that are canonically equivalent
				361	// TODO: optimize by not permuting any class zero.
				362
				363	const UHashElement *ne = NULL;
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	364	int32_t el = UHASH_FIRST;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	365	//Iterator it = basic.iterator();
				366	ne = basic.nextElement(el);
				367	//while (it.hasNext())
				368	while (ne != NULL) {
				369	//String item = (String) it.next();
				370	UnicodeString item = ((UnicodeString )(ne->value.pointer));
				371
				372	permutations.removeAll();
				373	permute(item, CANITER_SKIP_ZEROES, &permutations, status);
				374	const UHashElement *ne2 = NULL;
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	375	int32_t el2 = UHASH_FIRST;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	376	//Iterator it2 = permutations.iterator();
				377	ne2 = permutations.nextElement(el2);
				378	//while (it2.hasNext())
				379	while (ne2 != NULL) {
				380	//String possible = (String) it2.next();
				381	//UnicodeString possible = new UnicodeString(((UnicodeString *)(ne2->value.pointer)));
				382	UnicodeString possible(((UnicodeString )(ne2->value.pointer)));
				383	UnicodeString attempt;
				384	nfd.normalize(possible, attempt, status);
				385
				386	// TODO: check if operator == is semanticaly the same as attempt.equals(segment)
				387	if (attempt==segment) {
				388	//if (PROGRESS) printf("Adding Permutation: %s\n", UToS(Tr(*possible)));
				389	// TODO: use the hashtable just to catch duplicates - store strings directly (somehow).
				390	result.put(possible, new UnicodeString(possible), status); //add(possible);
				391	} else {
				392	//if (PROGRESS) printf("-Skipping Permutation: %s\n", UToS(Tr(*possible)));
				393	}
				394
				395	ne2 = permutations.nextElement(el2);
				396	}
				397	ne = basic.nextElement(el);
				398	}
				399
				400	/* Test for buffer overflows */
				401	if(U_FAILURE(status)) {
				402	return 0;
				403	}
				404	// convert into a String[] to clean up storage
				405	//String[] finalResult = new String[result.size()];
				406	UnicodeString *finalResult = NULL;
				407	int32_t resultCount;
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame^]	408	if((resultCount = result.count()) != 0) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	409	finalResult = new UnicodeString[resultCount];
				410	if (finalResult == 0) {
				411	status = U_MEMORY_ALLOCATION_ERROR;
				412	return NULL;
				413	}
				414	}
				415	else {
				416	status = U_ILLEGAL_ARGUMENT_ERROR;
				417	return NULL;
				418	}
				419	//result.toArray(finalResult);
				420	result_len = 0;
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	421	el = UHASH_FIRST;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	422	ne = result.nextElement(el);
				423	while(ne != NULL) {
				424	finalResult[result_len++] = ((UnicodeString )(ne->value.pointer));
				425	ne = result.nextElement(el);
				426	}
				427
				428
				429	return finalResult;
				430	}
				431
				432	Hashtable CanonicalIterator::getEquivalents2(Hashtable fillinResult, const UChar *segment, int32_t segLen, UErrorCode &status) {
				433
				434	if (U_FAILURE(status)) {
				435	return NULL;
				436	}
				437
				438	//if (PROGRESS) printf("Adding: %s\n", UToS(Tr(segment)));
				439
				440	UnicodeString toPut(segment, segLen);
				441
				442	fillinResult->put(toPut, new UnicodeString(toPut), status);
				443
				444	UnicodeSet starts;
				445
				446	// cycle through all the characters
				447	UChar32 cp;
				448	for (int32_t i = 0; i < segLen; i += U16_LENGTH(cp)) {
				449	// see if any character is at the start of some decomposition
				450	U16_GET(segment, 0, i, segLen, cp);
				451	if (!nfcImpl.getCanonStartSet(cp, starts)) {
				452	continue;
				453	}
				454	// if so, see which decompositions match
				455	UnicodeSetIterator iter(starts);
				456	while (iter.next()) {
				457	UChar32 cp2 = iter.getCodepoint();
				458	Hashtable remainder(status);
				459	remainder.setValueDeleter(uprv_deleteUObject);
				460	if (extract(&remainder, cp2, segment, segLen, i, status) == NULL) {
				461	continue;
				462	}
				463
				464	// there were some matches, so add all the possibilities to the set.
				465	UnicodeString prefix(segment, i);
				466	prefix += cp2;
				467
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	468	int32_t el = UHASH_FIRST;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	469	const UHashElement *ne = remainder.nextElement(el);
				470	while (ne != NULL) {
				471	UnicodeString item = ((UnicodeString )(ne->value.pointer));
				472	UnicodeString *toAdd = new UnicodeString(prefix);
				473	/* test for NULL */
				474	if (toAdd == 0) {
				475	status = U_MEMORY_ALLOCATION_ERROR;
				476	return NULL;
				477	}
				478	*toAdd += item;
				479	fillinResult->put(*toAdd, toAdd, status);
				480
				481	//if (PROGRESS) printf("Adding: %s\n", UToS(Tr(*toAdd)));
				482
				483	ne = remainder.nextElement(el);
				484	}
				485	}
				486	}
				487
				488	/* Test for buffer overflows */
				489	if(U_FAILURE(status)) {
				490	return NULL;
				491	}
				492	return fillinResult;
				493	}
				494
				495	/**
				496	* See if the decomposition of cp2 is at segment starting at segmentPos
				497	* (with canonical rearrangment!)
				498	* If so, take the remainder, and return the equivalents
				499	*/
				500	Hashtable CanonicalIterator::extract(Hashtable fillinResult, UChar32 comp, const UChar *segment, int32_t segLen, int32_t segmentPos, UErrorCode &status) {
				501	//Hashtable *CanonicalIterator::extract(UChar32 comp, const UnicodeString &segment, int32_t segLen, int32_t segmentPos, UErrorCode &status) {
				502	//if (PROGRESS) printf(" extract: %s, ", UToS(Tr(UnicodeString(comp))));
				503	//if (PROGRESS) printf("%s, %i\n", UToS(Tr(segment)), segmentPos);
				504
				505	if (U_FAILURE(status)) {
				506	return NULL;
				507	}
				508
				509	UnicodeString temp(comp);
				510	int32_t inputLen=temp.length();
				511	UnicodeString decompString;
				512	nfd.normalize(temp, decompString, status);
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	513	if (U_FAILURE(status)) {
				514	return NULL;
				515	}
				516	if (decompString.isBogus()) {
				517	status = U_MEMORY_ALLOCATION_ERROR;
				518	return NULL;
				519	}
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	520	const UChar *decomp=decompString.getBuffer();
				521	int32_t decompLen=decompString.length();
				522
				523	// See if it matches the start of segment (at segmentPos)
				524	UBool ok = FALSE;
				525	UChar32 cp;
				526	int32_t decompPos = 0;
				527	UChar32 decompCp;
				528	U16_NEXT(decomp, decompPos, decompLen, decompCp);
				529
				530	int32_t i = segmentPos;
				531	while(i < segLen) {
				532	U16_NEXT(segment, i, segLen, cp);
				533
				534	if (cp == decompCp) { // if equal, eat another cp from decomp
				535
				536	//if (PROGRESS) printf(" matches: %s\n", UToS(Tr(UnicodeString(cp))));
				537
				538	if (decompPos == decompLen) { // done, have all decomp characters!
				539	temp.append(segment+i, segLen-i);
				540	ok = TRUE;
				541	break;
				542	}
				543	U16_NEXT(decomp, decompPos, decompLen, decompCp);
				544	} else {
				545	//if (PROGRESS) printf(" buffer: %s\n", UToS(Tr(UnicodeString(cp))));
				546
				547	// brute force approach
				548	temp.append(cp);
				549
				550	/* TODO: optimize
				551	// since we know that the classes are monotonically increasing, after zero
				552	// e.g. 0 5 7 9 0 3
				553	// we can do an optimization
				554	// there are only a few cases that work: zero, less, same, greater
				555	// if both classes are the same, we fail
				556	// if the decomp class < the segment class, we fail
				557
				558	segClass = getClass(cp);
				559	if (decompClass <= segClass) return null;
				560	*/
				561	}
				562	}
				563	if (!ok)
				564	return NULL; // we failed, characters left over
				565
				566	//if (PROGRESS) printf("Matches\n");
				567
				568	if (inputLen == temp.length()) {
				569	fillinResult->put(UnicodeString(), new UnicodeString(), status);
				570	return fillinResult; // succeed, but no remainder
				571	}
				572
				573	// brute force approach
				574	// check to make sure result is canonically equivalent
				575	UnicodeString trial;
				576	nfd.normalize(temp, trial, status);
				577	if(U_FAILURE(status) \|\| trial.compare(segment+segmentPos, segLen - segmentPos) != 0) {
				578	return NULL;
				579	}
				580
				581	return getEquivalents2(fillinResult, temp.getBuffer()+inputLen, temp.length()-inputLen, status);
				582	}
				583
				584	U_NAMESPACE_END
				585
				586	#endif /* #if !UCONFIG_NO_NORMALIZATION */