Blame - source/test/intltest/regcoll.cpp - chromium.googlesource.com/chromium/deps/icu

blob: 9bd47d229f302cad40529879c2c871a20b9ba6da [file] [log] [blame]

Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1	// © 2016 and later: Unicode, Inc. and others.
				2	// License & terms of use: http://www.unicode.org/copyright.html
				3	/********************************************************************
				4	* COPYRIGHT:
				5	* Copyright (c) 1997-2016, International Business Machines Corporation and
				6	* others. All Rights Reserved.
				7	********************************************************************/
				8
				9	#include "unicode/utypes.h"
				10
				11	#if !UCONFIG_NO_COLLATION
				12
				13	#include "unicode/coll.h"
				14	#include "unicode/localpointer.h"
				15	#include "unicode/tblcoll.h"
				16	#include "unicode/unistr.h"
				17	#include "unicode/sortkey.h"
				18	#include "regcoll.h"
				19	#include "sfwdchit.h"
				20	#include "testutil.h"
				21	#include "cmemory.h"
				22
				23	CollationRegressionTest::CollationRegressionTest()
				24	{
				25	UErrorCode status = U_ZERO_ERROR;
				26
				27	en_us = (RuleBasedCollator *)Collator::createInstance(Locale::getUS(), status);
				28	if(U_FAILURE(status)) {
				29	delete en_us;
				30	en_us = 0;
				31	errcheckln(status, "Collator creation failed with %s", u_errorName(status));
				32	return;
				33	}
				34	}
				35
				36	CollationRegressionTest::~CollationRegressionTest()
				37	{
				38	delete en_us;
				39	}
				40
				41
				42	// @bug 4048446
				43	//
				44	// CollationElementIterator.reset() doesn't work
				45	//
				46	void CollationRegressionTest::Test4048446(/* char* par */)
				47	{
				48	const UnicodeString test1 = "XFILE What subset of all possible test cases has the highest probability of detecting the most errors?";
				49	const UnicodeString test2 = "Xf_ile What subset of all possible test cases has the lowest probability of detecting the least errors?";
				50	CollationElementIterator *i1 = en_us->createCollationElementIterator(test1);
				51	CollationElementIterator *i2 = en_us->createCollationElementIterator(test1);
				52	UErrorCode status = U_ZERO_ERROR;
				53
				54	if (i1 == NULL\|\| i2 == NULL)
				55	{
				56	errln("Could not create CollationElementIterator's");
				57	delete i1;
				58	delete i2;
				59	return;
				60	}
				61
				62	while (i1->next(status) != CollationElementIterator::NULLORDER)
				63	{
				64	if (U_FAILURE(status))
				65	{
				66	errln("error calling next()");
				67
				68	delete i1;
				69	delete i2;
				70	return;
				71	}
				72	}
				73
				74	i1->reset();
				75
				76	assertEqual(i1, i2);
				77
				78	delete i1;
				79	delete i2;
				80	}
				81
				82	// @bug 4051866
				83	//
				84	// Collator -> rules -> Collator round-trip broken for expanding characters
				85	//
				86	void CollationRegressionTest::Test4051866(/* char* par */)
				87	{
				88	UnicodeString rules;
				89	UErrorCode status = U_ZERO_ERROR;
				90
				91	rules += "&n < o ";
				92	rules += "& oe ,o";
				93	rules += (UChar)0x3080;
				94	rules += "& oe ,";
				95	rules += (UChar)0x1530;
				96	rules += " ,O";
				97	rules += "& OE ,O";
				98	rules += (UChar)0x3080;
				99	rules += "& OE ,";
				100	rules += (UChar)0x1520;
				101	rules += "< p ,P";
				102
				103	// Build a collator containing expanding characters
				104	LocalPointer<RuleBasedCollator> c1(new RuleBasedCollator(rules, status), status);
				105	if (U_FAILURE(status)) {
				106	errln("RuleBasedCollator(rule string) failed - %s", u_errorName(status));
				107	return;
				108	}
				109
				110	// Build another using the rules from the first
				111	LocalPointer<RuleBasedCollator> c2(new RuleBasedCollator(c1->getRules(), status), status);
				112	if (U_FAILURE(status)) {
				113	errln("RuleBasedCollator(rule string from other RBC) failed - %s", u_errorName(status));
				114	return;
				115	}
				116
				117	// Make sure they're the same
				118	if (!(c1->getRules() == c2->getRules()))
				119	{
				120	errln("Rules are not equal");
				121	}
				122	}
				123
				124	// @bug 4053636
				125	//
				126	// Collator thinks "black-bird" == "black"
				127	//
				128	void CollationRegressionTest::Test4053636(/* char* par */)
				129	{
				130	if (en_us->equals("black_bird", "black"))
				131	{
				132	errln("black-bird == black");
				133	}
				134	}
				135
				136	// @bug 4054238
				137	//
				138	// CollationElementIterator will not work correctly if the associated
				139	// Collator object's mode is changed
				140	//
				141	void CollationRegressionTest::Test4054238(/* char* par */)
				142	{
				143	const UChar chars3[] = {0x61, 0x00FC, 0x62, 0x65, 0x63, 0x6b, 0x20, 0x47, 0x72, 0x00F6, 0x00DF, 0x65, 0x20, 0x4c, 0x00FC, 0x62, 0x63, 0x6b, 0};
				144	const UnicodeString test3(chars3);
				145	RuleBasedCollator *c = en_us->clone();
				146
				147	// NOTE: The Java code uses en_us to create the CollationElementIterators
				148	// but I'm pretty sure that's wrong, so I've changed this to use c.
				149	UErrorCode status = U_ZERO_ERROR;
				150	c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
				151	CollationElementIterator *i1 = c->createCollationElementIterator(test3);
				152	delete i1;
				153	delete c;
				154	}
				155
				156	// @bug 4054734
				157	//
				158	// Collator::IDENTICAL documented but not implemented
				159	//
				160	void CollationRegressionTest::Test4054734(/* char* par */)
				161	{
				162	/*
				163	Here's the original Java:
				164
				165	String[] decomp = {
				166	"\u0001", "<", "\u0002",
				167	"\u0001", "=", "\u0001",
				168	"A\u0001", ">", "~\u0002", // Ensure A and ~ are not compared bitwise
				169	"\u00C0", "=", "A\u0300" // Decomp should make these equal
				170	};
				171
				172	String[] nodecomp = {
				173	"\u00C0", ">", "A\u0300" // A-grave vs. A combining-grave
				174	};
				175	*/
				176
				177	static const UChar decomp[][CollationRegressionTest::MAX_TOKEN_LEN] =
				178	{
				179	{0x0001, 0}, {0x3c, 0}, {0x0002, 0},
				180	{0x0001, 0}, {0x3d, 0}, {0x0001, 0},
				181	{0x41, 0x0001, 0}, {0x3e, 0}, {0x7e, 0x0002, 0},
				182	{0x00c0, 0}, {0x3d, 0}, {0x41, 0x0300, 0}
				183	};
				184
				185
				186	UErrorCode status = U_ZERO_ERROR;
				187	RuleBasedCollator *c = en_us->clone();
				188
				189	c->setStrength(Collator::IDENTICAL);
				190
				191	c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
				192	compareArray(*c, decomp, UPRV_LENGTHOF(decomp));
				193
				194	delete c;
				195	}
				196
				197	// @bug 4054736
				198	//
				199	// Full Decomposition mode not implemented
				200	//
				201	void CollationRegressionTest::Test4054736(/* char* par */)
				202	{
				203	UErrorCode status = U_ZERO_ERROR;
				204	RuleBasedCollator *c = en_us->clone();
				205
				206	c->setStrength(Collator::SECONDARY);
				207	c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
				208
				209	static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
				210	{
				211	{0xFB4F, 0}, {0x3d, 0}, {0x05D0, 0x05DC} // Alef-Lamed vs. Alef, Lamed
				212	};
				213
				214	compareArray(*c, tests, UPRV_LENGTHOF(tests));
				215
				216	delete c;
				217	}
				218
				219	// @bug 4058613
				220	//
				221	// Collator::createInstance() causes an ArrayIndexOutofBoundsException for Korean
				222	//
				223	void CollationRegressionTest::Test4058613(/* char* par */)
				224	{
				225	// Creating a default collator doesn't work when Korean is the default
				226	// locale
				227
				228	Locale oldDefault = Locale::getDefault();
				229	UErrorCode status = U_ZERO_ERROR;
				230
				231	Locale::setDefault(Locale::getKorean(), status);
				232
				233	if (U_FAILURE(status))
				234	{
				235	errln("Could not set default locale to Locale::KOREAN");
				236	return;
				237	}
				238
				239	Collator *c = NULL;
				240
				241	c = Collator::createInstance("en_US", status);
				242
				243	if (c == NULL \|\| U_FAILURE(status))
				244	{
				245	errln("Could not create a Korean collator");
				246	Locale::setDefault(oldDefault, status);
				247	delete c;
				248	return;
				249	}
				250
				251	// Since the fix to this bug was to turn off decomposition for Korean collators,
				252	// ensure that's what we got
				253	if (c->getAttribute(UCOL_NORMALIZATION_MODE, status) != UCOL_OFF)
				254	{
				255	errln("Decomposition is not set to NO_DECOMPOSITION for Korean collator");
				256	}
				257
				258	delete c;
				259
				260	Locale::setDefault(oldDefault, status);
				261	}
				262
				263	// @bug 4059820
				264	//
				265	// RuleBasedCollator.getRules does not return the exact pattern as input
				266	// for expanding character sequences
				267	//
				268	void CollationRegressionTest::Test4059820(/* char* par */)
				269	{
				270	UErrorCode status = U_ZERO_ERROR;
				271
				272	RuleBasedCollator *c = NULL;
				273	UnicodeString rules = "&9 < a < b , c/a < d < z";
				274
				275	c = new RuleBasedCollator(rules, status);
				276
				277	if (c == NULL \|\| U_FAILURE(status))
				278	{
				279	errln("Failure building a collator.");
				280	delete c;
				281	return;
				282	}
				283
				284	if ( c->getRules().indexOf("c/a") == -1)
				285	{
				286	errln("returned rules do not contain 'c/a'");
				287	}
				288
				289	delete c;
				290	}
				291
				292	// @bug 4060154
				293	//
				294	// MergeCollation::fixEntry broken for "& H < \u0131, \u0130, i, I"
				295	//
				296	void CollationRegressionTest::Test4060154(/* char* par */)
				297	{
				298	UErrorCode status = U_ZERO_ERROR;
				299	UnicodeString rules;
				300
				301	rules += "&f < g, G < h, H < i, I < j, J";
				302	rules += " & H < ";
				303	rules += (UChar)0x0131;
				304	rules += ", ";
				305	rules += (UChar)0x0130;
				306	rules += ", i, I";
				307
				308	RuleBasedCollator *c = NULL;
				309
				310	c = new RuleBasedCollator(rules, status);
				311
				312	if (c == NULL \|\| U_FAILURE(status))
				313	{
				314	errln("failure building collator.");
				315	delete c;
				316	return;
				317	}
				318
				319	c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
				320
				321	/*
				322	String[] tertiary = {
				323	"A", "<", "B",
				324	"H", "<", "\u0131",
				325	"H", "<", "I",
				326	"\u0131", "<", "\u0130",
				327	"\u0130", "<", "i",
				328	"\u0130", ">", "H",
				329	};
				330	*/
				331
				332	static const UChar tertiary[][CollationRegressionTest::MAX_TOKEN_LEN] =
				333	{
				334	{0x41, 0}, {0x3c, 0}, {0x42, 0},
				335	{0x48, 0}, {0x3c, 0}, {0x0131, 0},
				336	{0x48, 0}, {0x3c, 0}, {0x49, 0},
				337	{0x0131, 0}, {0x3c, 0}, {0x0130, 0},
				338	{0x0130, 0}, {0x3c, 0}, {0x69, 0},
				339	{0x0130, 0}, {0x3e, 0}, {0x48, 0}
				340	};
				341
				342	c->setStrength(Collator::TERTIARY);
				343	compareArray(*c, tertiary, UPRV_LENGTHOF(tertiary));
				344
				345	/*
				346	String[] secondary = {
				347	"H", "<", "I",
				348	"\u0131", "=", "\u0130",
				349	};
				350	*/
				351	static const UChar secondary[][CollationRegressionTest::MAX_TOKEN_LEN] =
				352	{
				353	{0x48, 0}, {0x3c, 0}, {0x49, 0},
				354	{0x0131, 0}, {0x3d, 0}, {0x0130, 0}
				355	};
				356
				357	c->setStrength(Collator::PRIMARY);
				358	compareArray(*c, secondary, UPRV_LENGTHOF(secondary));
				359
				360	delete c;
				361	}
				362
				363	// @bug 4062418
				364	//
				365	// Secondary/Tertiary comparison incorrect in French Secondary
				366	//
				367	void CollationRegressionTest::Test4062418(/* char* par */)
				368	{
				369	UErrorCode status = U_ZERO_ERROR;
				370
				371	RuleBasedCollator *c = NULL;
				372
				373	c = (RuleBasedCollator *) Collator::createInstance(Locale::getCanadaFrench(), status);
				374
				375	if (c == NULL \|\| U_FAILURE(status))
				376	{
				377	errln("Failed to create collator for Locale::getCanadaFrench()");
				378	delete c;
				379	return;
				380	}
				381
				382	c->setStrength(Collator::SECONDARY);
				383
				384	/*
				385	String[] tests = {
				386	"p\u00eache", "<", "p\u00e9ch\u00e9", // Comparing accents from end, p\u00e9ch\u00e9 is greater
				387	};
				388	*/
				389	static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
				390	{
				391	{0x70, 0x00EA, 0x63, 0x68, 0x65, 0}, {0x3c, 0}, {0x70, 0x00E9, 0x63, 0x68, 0x00E9, 0}
				392	};
				393
				394	compareArray(*c, tests, UPRV_LENGTHOF(tests));
				395
				396	delete c;
				397	}
				398
				399	// @bug 4065540
				400	//
				401	// Collator::compare() method broken if either string contains spaces
				402	//
				403	void CollationRegressionTest::Test4065540(/* char* par */)
				404	{
				405	if (en_us->compare("abcd e", "abcd f") == 0)
				406	{
				407	errln("'abcd e' == 'abcd f'");
				408	}
				409	}
				410
				411	// @bug 4066189
				412	//
				413	// Unicode characters need to be recursively decomposed to get the
				414	// correct result. For example,
				415	// u1EB1 -> \u0103 + \u0300 -> a + \u0306 + \u0300.
				416	//
				417	void CollationRegressionTest::Test4066189(/* char* par */)
				418	{
				419	static const UChar chars1[] = {0x1EB1, 0};
				420	static const UChar chars2[] = {0x61, 0x0306, 0x0300, 0};
				421	const UnicodeString test1(chars1);
				422	const UnicodeString test2(chars2);
				423	UErrorCode status = U_ZERO_ERROR;
				424
				425	// NOTE: The java code used en_us to create the
				426	// CollationElementIterator's. I'm pretty sure that
				427	// was wrong, so I've change the code to use c1 and c2
				428	RuleBasedCollator *c1 = en_us->clone();
				429	c1->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
				430	CollationElementIterator *i1 = c1->createCollationElementIterator(test1);
				431
				432	RuleBasedCollator *c2 = en_us->clone();
				433	c2->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status);
				434	CollationElementIterator *i2 = c2->createCollationElementIterator(test2);
				435
				436	assertEqual(i1, i2);
				437
				438	delete i2;
				439	delete c2;
				440	delete i1;
				441	delete c1;
				442	}
				443
				444	// @bug 4066696
				445	//
				446	// French secondary collation checking at the end of compare iteration fails
				447	//
				448	void CollationRegressionTest::Test4066696(/* char* par */)
				449	{
				450	UErrorCode status = U_ZERO_ERROR;
				451	RuleBasedCollator *c = NULL;
				452
				453	c = (RuleBasedCollator *)Collator::createInstance(Locale::getCanadaFrench(), status);
				454
				455	if (c == NULL \|\| U_FAILURE(status))
				456	{
				457	errln("Failure creating collator for Locale::getCanadaFrench()");
				458	delete c;
				459	return;
				460	}
				461
				462	c->setStrength(Collator::SECONDARY);
				463
				464	/*
				465	String[] tests = {
				466	"\u00e0", "<", "\u01fa", // a-grave < A-ring-acute
				467	};
				468
				469	should be:
				470
				471	String[] tests = {
				472	"\u00e0", ">", "\u01fa", // a-grave < A-ring-acute
				473	};
				474
				475	*/
				476
				477	static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
				478	{
				479	{0x00E0, 0}, {0x3e, 0}, {0x01FA, 0}
				480	};
				481
				482	compareArray(*c, tests, UPRV_LENGTHOF(tests));
				483
				484	delete c;
				485	}
				486
				487	// @bug 4076676
				488	//
				489	// Bad canonicalization of same-class combining characters
				490	//
				491	void CollationRegressionTest::Test4076676(/* char* par */)
				492	{
				493	// These combining characters are all in the same class, so they should not
				494	// be reordered, and they should compare as unequal.
				495	static const UChar s1[] = {0x41, 0x0301, 0x0302, 0x0300, 0};
				496	static const UChar s2[] = {0x41, 0x0302, 0x0300, 0x0301, 0};
				497
				498	RuleBasedCollator *c = en_us->clone();
				499	c->setStrength(Collator::TERTIARY);
				500
				501	if (c->compare(s1,s2) == 0)
				502	{
				503	errln("Same-class combining chars were reordered");
				504	}
				505
				506	delete c;
				507	}
				508
				509	// @bug 4079231
				510	//
				511	// RuleBasedCollator::operator==(NULL) throws NullPointerException
				512	//
				513	void CollationRegressionTest::Test4079231(/* char* par */)
				514	{
				515	// I don't think there's any way to write this test
				516	// in C++. The following is equivalent to the Java,
				517	// but doesn't compile 'cause NULL can't be converted
				518	// to Collator&
				519	//
				520	// if (en_us->operator==(NULL))
				521	// {
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	522	// errln("en_us->operator==(NULL) returned true");
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	523	// }
				524
				525	/*
				526	try {
				527	if (en_us->equals(null)) {
				528	errln("en_us->equals(null) returned true");
				529	}
				530	}
				531	catch (Exception e) {
				532	errln("en_us->equals(null) threw " + e.toString());
				533	}
				534	*/
				535	}
				536
				537	// @bug 4078588
				538	//
				539	// RuleBasedCollator breaks on "< a < bb" rule
				540	//
				541	void CollationRegressionTest::Test4078588(/* char par /)
				542	{
				543	UErrorCode status = U_ZERO_ERROR;
				544	RuleBasedCollator *rbc = new RuleBasedCollator("&9 < a < bb", status);
				545
				546	if (rbc == NULL \|\| U_FAILURE(status))
				547	{
				548	errln("Failed to create RuleBasedCollator.");
				549	delete rbc;
				550	return;
				551	}
				552
				553	Collator::EComparisonResult result = rbc->compare("a","bb");
				554
				555	if (result != Collator::LESS)
				556	{
				557	errln((UnicodeString)"Compare(a,bb) returned " + (int)result
				558	+ (UnicodeString)"; expected -1");
				559	}
				560
				561	delete rbc;
				562	}
				563
				564	// @bug 4081866
				565	//
				566	// Combining characters in different classes not reordered properly.
				567	//
				568	void CollationRegressionTest::Test4081866(/* char* par */)
				569	{
				570	// These combining characters are all in different classes,
				571	// so they should be reordered and the strings should compare as equal.
				572	static const UChar s1[] = {0x41, 0x0300, 0x0316, 0x0327, 0x0315, 0};
				573	static const UChar s2[] = {0x41, 0x0327, 0x0316, 0x0315, 0x0300, 0};
				574
				575	UErrorCode status = U_ZERO_ERROR;
				576	RuleBasedCollator *c = en_us->clone();
				577	c->setStrength(Collator::TERTIARY);
				578
				579	// Now that the default collators are set to NO_DECOMPOSITION
				580	// (as a result of fixing bug 4114077), we must set it explicitly
				581	// when we're testing reordering behavior. -- lwerner, 5/5/98
				582	c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
				583
				584	if (c->compare(s1,s2) != 0)
				585	{
				586	errln("Combining chars were not reordered");
				587	}
				588
				589	delete c;
				590	}
				591
				592	// @bug 4087241
				593	//
				594	// string comparison errors in Scandinavian collators
				595	//
				596	void CollationRegressionTest::Test4087241(/* char* par */)
				597	{
				598	UErrorCode status = U_ZERO_ERROR;
				599	Locale da_DK("da", "DK");
				600	RuleBasedCollator *c = NULL;
				601
				602	c = (RuleBasedCollator *) Collator::createInstance(da_DK, status);
				603
				604	if (c == NULL \|\| U_FAILURE(status))
				605	{
				606	errln("Failed to create collator for da_DK locale");
				607	delete c;
				608	return;
				609	}
				610
				611	c->setStrength(Collator::SECONDARY);
				612
				613	static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
				614	{
				615	{0x7a, 0}, {0x3c, 0}, {0x00E6, 0}, // z < ae
				616	{0x61, 0x0308, 0}, {0x3c, 0}, {0x61, 0x030A, 0}, // a-umlaut < a-ring
				617	{0x59, 0}, {0x3c, 0}, {0x75, 0x0308, 0}, // Y < u-umlaut
				618	};
				619
				620	compareArray(*c, tests, UPRV_LENGTHOF(tests));
				621
				622	delete c;
				623	}
				624
				625	// @bug 4087243
				626	//
				627	// CollationKey takes ignorable strings into account when it shouldn't
				628	//
				629	void CollationRegressionTest::Test4087243(/* char* par */)
				630	{
				631	RuleBasedCollator *c = en_us->clone();
				632	c->setStrength(Collator::TERTIARY);
				633
				634	static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
				635	{
				636	{0x31, 0x32, 0x33, 0}, {0x3d, 0}, {0x31, 0x32, 0x33, 0x0001, 0} // 1 2 3 = 1 2 3 ctrl-A
				637	};
				638
				639	compareArray(*c, tests, UPRV_LENGTHOF(tests));
				640
				641	delete c;
				642	}
				643
				644	// @bug 4092260
				645	//
				646	// Mu/micro conflict
				647	// Micro symbol and greek lowercase letter Mu should sort identically
				648	//
				649	void CollationRegressionTest::Test4092260(/* char* par */)
				650	{
				651	UErrorCode status = U_ZERO_ERROR;
				652	Locale el("el", "");
				653	Collator *c = NULL;
				654
				655	c = Collator::createInstance(el, status);
				656
				657	if (c == NULL \|\| U_FAILURE(status))
				658	{
				659	errln("Failed to create collator for el locale.");
				660	delete c;
				661	return;
				662	}
				663
				664	// These now have tertiary differences in UCA
				665	c->setAttribute(UCOL_STRENGTH, UCOL_SECONDARY, status);
				666
				667	static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
				668	{
				669	{0x00B5, 0}, {0x3d, 0}, {0x03BC, 0}
				670	};
				671
				672	compareArray(*c, tests, UPRV_LENGTHOF(tests));
				673
				674	delete c;
				675	}
				676
				677	// @bug 4095316
				678	//
				679	void CollationRegressionTest::Test4095316(/* char* par */)
				680	{
				681	UErrorCode status = U_ZERO_ERROR;
				682	Locale el_GR("el", "GR");
				683	Collator *c = Collator::createInstance(el_GR, status);
				684
				685	if (c == NULL \|\| U_FAILURE(status))
				686	{
				687	errln("Failed to create collator for el_GR locale");
				688	delete c;
				689	return;
				690	}
				691	// These now have tertiary differences in UCA
				692	//c->setStrength(Collator::TERTIARY);
				693	c->setAttribute(UCOL_STRENGTH, UCOL_SECONDARY, status);
				694
				695	static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
				696	{
				697	{0x03D4, 0}, {0x3d, 0}, {0x03AB, 0}
				698	};
				699
				700	compareArray(*c, tests, UPRV_LENGTHOF(tests));
				701
				702	delete c;
				703	}
				704
				705	// @bug 4101940
				706	//
				707	void CollationRegressionTest::Test4101940(/* char* par */)
				708	{
				709	UErrorCode status = U_ZERO_ERROR;
				710	RuleBasedCollator *c = NULL;
				711	UnicodeString rules = "&9 < a < b";
				712	UnicodeString nothing = "";
				713
				714	c = new RuleBasedCollator(rules, status);
				715
				716	if (c == NULL \|\| U_FAILURE(status))
				717	{
				718	errln("Failed to create RuleBasedCollator");
				719	delete c;
				720	return;
				721	}
				722
				723	CollationElementIterator *i = c->createCollationElementIterator(nothing);
				724	i->reset();
				725
				726	if (i->next(status) != CollationElementIterator::NULLORDER)
				727	{
				728	errln("next did not return NULLORDER");
				729	}
				730
				731	delete i;
				732	delete c;
				733	}
				734
				735	// @bug 4103436
				736	//
				737	// Collator::compare not handling spaces properly
				738	//
				739	void CollationRegressionTest::Test4103436(/* char* par */)
				740	{
				741	RuleBasedCollator *c = en_us->clone();
				742	c->setStrength(Collator::TERTIARY);
				743
				744	static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
				745	{
				746	{0x66, 0x69, 0x6c, 0x65, 0}, {0x3c, 0}, {0x66, 0x69, 0x6c, 0x65, 0x20, 0x61, 0x63, 0x63, 0x65, 0x73, 0x73, 0},
				747	{0x66, 0x69, 0x6c, 0x65, 0}, {0x3c, 0}, {0x66, 0x69, 0x6c, 0x65, 0x61, 0x63, 0x63, 0x65, 0x73, 0x73, 0}
				748	};
				749
				750	compareArray(*c, tests, UPRV_LENGTHOF(tests));
				751
				752	delete c;
				753	}
				754
				755	// @bug 4114076
				756	//
				757	// Collation not Unicode conformant with Hangul syllables
				758	//
				759	void CollationRegressionTest::Test4114076(/* char* par */)
				760	{
				761	UErrorCode status = U_ZERO_ERROR;
				762	RuleBasedCollator *c = en_us->clone();
				763	c->setStrength(Collator::TERTIARY);
				764
				765	//
				766	// With Canonical decomposition, Hangul syllables should get decomposed
				767	// into Jamo, but Jamo characters should not be decomposed into
				768	// conjoining Jamo
				769	//
				770	static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] =
				771	{
				772	{0xd4db, 0}, {0x3d, 0}, {0x1111, 0x1171, 0x11b6, 0}
				773	};
				774
				775	c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
				776	compareArray(*c, test1, UPRV_LENGTHOF(test1));
				777
				778	// From UTR #15:
				779	// *In earlier versions of Unicode, jamo characters like ksf
				780	// had compatibility mappings to kf + sf. These mappings were
				781	// removed in Unicode 2.1.9 to ensure that Hangul syllables are maintained.)
				782	// That is, the following test is obsolete as of 2.1.9
				783
				784	//obsolete- // With Full decomposition, it should go all the way down to
				785	//obsolete- // conjoining Jamo characters.
				786	//obsolete- //
				787	//obsolete- static const UChar test2[][CollationRegressionTest::MAX_TOKEN_LEN] =
				788	//obsolete- {
				789	//obsolete- {0xd4db, 0}, {0x3d, 0}, {0x1111, 0x116e, 0x1175, 0x11af, 0x11c2, 0}
				790	//obsolete- };
				791	//obsolete-
				792	//obsolete- c->setDecomposition(Normalizer::DECOMP_COMPAT);
				793	//obsolete- compareArray(*c, test2, UPRV_LENGTHOF(test2));
				794
				795	delete c;
				796	}
				797
				798
				799	// @bug 4124632
				800	//
				801	// Collator::getCollationKey was hanging on certain character sequences
				802	//
				803	void CollationRegressionTest::Test4124632(/* char* par */)
				804	{
				805	UErrorCode status = U_ZERO_ERROR;
				806	Collator *coll = NULL;
				807
				808	coll = Collator::createInstance(Locale::getJapan(), status);
				809
				810	if (coll == NULL \|\| U_FAILURE(status))
				811	{
				812	errln("Failed to create collator for Locale::JAPAN");
				813	delete coll;
				814	return;
				815	}
				816
				817	static const UChar test[] = {0x41, 0x0308, 0x62, 0x63, 0};
				818	CollationKey key;
				819
				820	coll->getCollationKey(test, key, status);
				821
				822	if (key.isBogus() \|\| U_FAILURE(status))
				823	{
				824	errln("CollationKey creation failed.");
				825	}
				826
				827	delete coll;
				828	}
				829
				830	// @bug 4132736
				831	//
				832	// sort order of french words with multiple accents has errors
				833	//
				834	void CollationRegressionTest::Test4132736(/* char* par */)
				835	{
				836	UErrorCode status = U_ZERO_ERROR;
				837
				838	Collator *c = NULL;
				839
				840	c = Collator::createInstance(Locale::getCanadaFrench(), status);
				841	c->setStrength(Collator::TERTIARY);
				842
				843	if (c == NULL \|\| U_FAILURE(status))
				844	{
				845	errln("Failed to create a collator for Locale::getCanadaFrench()");
				846	delete c;
				847	return;
				848	}
				849
				850	static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] =
				851	{
				852	{0x65, 0x0300, 0x65, 0x0301, 0}, {0x3c, 0}, {0x65, 0x0301, 0x65, 0x0300, 0},
				853	{0x65, 0x0300, 0x0301, 0}, {0x3c, 0}, {0x65, 0x0301, 0x0300, 0}
				854	};
				855
				856	compareArray(*c, test1, UPRV_LENGTHOF(test1));
				857
				858	delete c;
				859	}
				860
				861	// @bug 4133509
				862	//
				863	// The sorting using java.text.CollationKey is not in the exact order
				864	//
				865	void CollationRegressionTest::Test4133509(/* char* par */)
				866	{
				867	static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] =
				868	{
				869	{0x45, 0x78, 0x63, 0x65, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0}, {0x3c, 0}, {0x45, 0x78, 0x63, 0x65, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x49, 0x6e, 0x49, 0x6e, 0x69, 0x74, 0x69, 0x61, 0x6c, 0x69, 0x7a, 0x65, 0x72, 0x45, 0x72, 0x72, 0x6f, 0x72, 0},
				870	{0x47, 0x72, 0x61, 0x70, 0x68, 0x69, 0x63, 0x73, 0}, {0x3c, 0}, {0x47, 0x72, 0x61, 0x70, 0x68, 0x69, 0x63, 0x73, 0x45, 0x6e, 0x76, 0x69, 0x72, 0x6f, 0x6e, 0x6d, 0x65, 0x6e, 0x74, 0},
				871	{0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0}, {0x3c, 0}, {0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x42, 0x75, 0x66, 0x66, 0x65, 0x72, 0}
				872	};
				873
				874	compareArray(*en_us, test1, UPRV_LENGTHOF(test1));
				875	}
				876
				877	// @bug 4114077
				878	//
				879	// Collation with decomposition off doesn't work for Europe
				880	//
				881	void CollationRegressionTest::Test4114077(/* char* par */)
				882	{
				883	// Ensure that we get the same results with decomposition off
				884	// as we do with it on....
				885
				886	UErrorCode status = U_ZERO_ERROR;
				887	RuleBasedCollator *c = en_us->clone();
				888	c->setStrength(Collator::TERTIARY);
				889
				890	static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] =
				891	{
				892	{0x00C0, 0}, {0x3d, 0}, {0x41, 0x0300, 0}, // Should be equivalent
				893	{0x70, 0x00ea, 0x63, 0x68, 0x65, 0}, {0x3e, 0}, {0x70, 0x00e9, 0x63, 0x68, 0x00e9, 0},
				894	{0x0204, 0}, {0x3d, 0}, {0x45, 0x030F, 0},
				895	{0x01fa, 0}, {0x3d, 0}, {0x41, 0x030a, 0x0301, 0}, // a-ring-acute -> a-ring, acute
				896	// -> a, ring, acute
				897	{0x41, 0x0300, 0x0316, 0}, {0x3c, 0}, {0x41, 0x0316, 0x0300, 0} // No reordering --> unequal
				898	};
				899
				900	c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status);
				901	compareArray(*c, test1, UPRV_LENGTHOF(test1));
				902
				903	static const UChar test2[][CollationRegressionTest::MAX_TOKEN_LEN] =
				904	{
				905	{0x41, 0x0300, 0x0316, 0}, {0x3d, 0}, {0x41, 0x0316, 0x0300, 0} // Reordering --> equal
				906	};
				907
				908	c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
				909	compareArray(*c, test2, UPRV_LENGTHOF(test2));
				910
				911	delete c;
				912	}
				913
				914	// @bug 4141640
				915	//
				916	// Support for Swedish gone in 1.1.6 (Can't create Swedish collator)
				917	//
				918	void CollationRegressionTest::Test4141640(/* char* par */)
				919	{
				920	//
				921	// Rather than just creating a Swedish collator, we might as well
				922	// try to instantiate one for every locale available on the system
				923	// in order to prevent this sort of bug from cropping up in the future
				924	//
				925	UErrorCode status = U_ZERO_ERROR;
				926	int32_t i, localeCount;
				927	const Locale *locales = Locale::getAvailableLocales(localeCount);
				928
				929	for (i = 0; i < localeCount; i += 1)
				930	{
				931	Collator *c = NULL;
				932
				933	status = U_ZERO_ERROR;
				934	c = Collator::createInstance(locales[i], status);
				935
				936	if (c == NULL \|\| U_FAILURE(status))
				937	{
				938	UnicodeString msg, localeName;
				939
				940	msg += "Could not create collator for locale ";
				941	msg += locales[i].getName();
				942
				943	errln(msg);
				944	}
				945
				946	delete c;
				947	}
				948	}
				949
				950	// @bug 4139572
				951	//
				952	// getCollationKey throws exception for spanish text
				953	// Cannot reproduce this bug on 1.2, however it DOES fail on 1.1.6
				954	//
				955	void CollationRegressionTest::Test4139572(/* char* par */)
				956	{
				957	//
				958	// Code pasted straight from the bug report
				959	// (and then translated to C++ ;-)
				960	//
				961	// create spanish locale and collator
				962	UErrorCode status = U_ZERO_ERROR;
				963	Locale l("es", "es");
				964	Collator *col = NULL;
				965
				966	col = Collator::createInstance(l, status);
				967
				968	if (col == NULL \|\| U_FAILURE(status))
				969	{
				970	errln("Failed to create a collator for es_es locale.");
				971	delete col;
				972	return;
				973	}
				974
				975	CollationKey key;
				976
				977	// this spanish phrase kills it!
				978	col->getCollationKey("Nombre De Objeto", key, status);
				979
				980	if (key.isBogus() \|\| U_FAILURE(status))
				981	{
				982	errln("Error creating CollationKey for \"Nombre De Ojbeto\"");
				983	}
				984
				985	delete col;
				986	}
				987
				988	void CollationRegressionTest::Test4179216() {
				989	// you can position a CollationElementIterator in the middle of
				990	// a contracting character sequence, yielding a bogus collation
				991	// element
				992	IcuTestErrorCode errorCode(*this, "Test4179216");
				993	RuleBasedCollator coll(en_us->getRules() + " & C < ch , cH , Ch , CH < cat < crunchy", errorCode);
				994	UnicodeString testText = "church church catcatcher runcrunchynchy";
				995	CollationElementIterator *iter = coll.createCollationElementIterator(testText);
				996
				997	// test that the "ch" combination works properly
				998	iter->setOffset(4, errorCode);
				999	int32_t elt4 = CollationElementIterator::primaryOrder(iter->next(errorCode));
				1000
				1001	iter->reset();
				1002	int32_t elt0 = CollationElementIterator::primaryOrder(iter->next(errorCode));
				1003
				1004	iter->setOffset(5, errorCode);
				1005	int32_t elt5 = CollationElementIterator::primaryOrder(iter->next(errorCode));
				1006
				1007	// Compares and prints only 16-bit primary weights.
				1008	if (elt4 != elt0 \|\| elt5 != elt0) {
				1009	errln("The collation elements at positions 0 (0x%04x), "
				1010	"4 (0x%04x), and 5 (0x%04x) don't match.",
				1011	elt0, elt4, elt5);
				1012	}
				1013
				1014	// test that the "cat" combination works properly
				1015	iter->setOffset(14, errorCode);
				1016	int32_t elt14 = CollationElementIterator::primaryOrder(iter->next(errorCode));
				1017
				1018	iter->setOffset(15, errorCode);
				1019	int32_t elt15 = CollationElementIterator::primaryOrder(iter->next(errorCode));
				1020
				1021	iter->setOffset(16, errorCode);
				1022	int32_t elt16 = CollationElementIterator::primaryOrder(iter->next(errorCode));
				1023
				1024	iter->setOffset(17, errorCode);
				1025	int32_t elt17 = CollationElementIterator::primaryOrder(iter->next(errorCode));
				1026
				1027	iter->setOffset(18, errorCode);
				1028	int32_t elt18 = CollationElementIterator::primaryOrder(iter->next(errorCode));
				1029
				1030	iter->setOffset(19, errorCode);
				1031	int32_t elt19 = CollationElementIterator::primaryOrder(iter->next(errorCode));
				1032
				1033	// Compares and prints only 16-bit primary weights.
				1034	if (elt14 != elt15 \|\| elt14 != elt16 \|\| elt14 != elt17
				1035	\|\| elt14 != elt18 \|\| elt14 != elt19) {
				1036	errln("\"cat\" elements don't match: elt14 = 0x%04x, "
				1037	"elt15 = 0x%04x, elt16 = 0x%04x, elt17 = 0x%04x, "
				1038	"elt18 = 0x%04x, elt19 = 0x%04x",
				1039	elt14, elt15, elt16, elt17, elt18, elt19);
				1040	}
				1041
				1042	// now generate a complete list of the collation elements,
				1043	// first using next() and then using setOffset(), and
				1044	// make sure both interfaces return the same set of elements
				1045	iter->reset();
				1046
				1047	int32_t elt = iter->next(errorCode);
				1048	int32_t count = 0;
				1049	while (elt != CollationElementIterator::NULLORDER) {
				1050	++count;
				1051	elt = iter->next(errorCode);
				1052	}
				1053
				1054	LocalArray<UnicodeString> nextElements(new UnicodeString[count]);
				1055	LocalArray<UnicodeString> setOffsetElements(new UnicodeString[count]);
				1056	int32_t lastPos = 0;
				1057
				1058	iter->reset();
				1059	elt = iter->next(errorCode);
				1060	count = 0;
				1061	while (elt != CollationElementIterator::NULLORDER) {
				1062	nextElements[count++] = testText.tempSubStringBetween(lastPos, iter->getOffset());
				1063	lastPos = iter->getOffset();
				1064	elt = iter->next(errorCode);
				1065	}
				1066	int32_t nextElementsLength = count;
				1067	count = 0;
				1068	for (int32_t i = 0; i < testText.length(); ) {
				1069	iter->setOffset(i, errorCode);
				1070	lastPos = iter->getOffset();
				1071	elt = iter->next(errorCode);
				1072	setOffsetElements[count++] = testText.tempSubStringBetween(lastPos, iter->getOffset());
				1073	i = iter->getOffset();
				1074	}
				1075	for (int32_t i = 0; i < nextElementsLength; i++) {
				1076	if (nextElements[i] == setOffsetElements[i]) {
				1077	logln(nextElements[i]);
				1078	} else {
				1079	errln(UnicodeString("Error: next() yielded ") + nextElements[i] +
				1080	", but setOffset() yielded " + setOffsetElements[i]);
				1081	}
				1082	}
				1083	delete iter;
				1084	}
				1085
				1086	// Ticket 7189
				1087	//
				1088	// nextSortKeyPart incorrect for EO_S1 collation
				1089	static int32_t calcKeyIncremental(UCollator coll, const UChar text, int32_t len, uint8_t keyBuf, int32_t /keyBufLen*/, UErrorCode& status) {
				1090	UCharIterator uiter;
				1091	uint32_t state[2] = { 0, 0 };
				1092	int32_t keyLen;
				1093	int32_t count = 8;
				1094
				1095	uiter_setString(&uiter, text, len);
				1096	keyLen = 0;
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1097	while (true) {
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1098	int32_t keyPartLen = ucol_nextSortKeyPart(coll, &uiter, state, &keyBuf[keyLen], count, &status);
				1099	if (U_FAILURE(status)) {
				1100	return -1;
				1101	}
				1102	if (keyPartLen == 0) {
				1103	break;
				1104	}
				1105	keyLen += keyPartLen;
				1106	}
				1107	return keyLen;
				1108	}
				1109
				1110	void CollationRegressionTest::TestT7189() {
				1111	UErrorCode status = U_ZERO_ERROR;
				1112	UCollator *coll;
				1113	uint32_t i;
				1114
				1115	static const UChar text1[][CollationRegressionTest::MAX_TOKEN_LEN] = {
				1116	// "Achter De Hoven"
				1117	{ 0x41, 0x63, 0x68, 0x74, 0x65, 0x72, 0x20, 0x44, 0x65, 0x20, 0x48, 0x6F, 0x76, 0x65, 0x6E, 0x00 },
				1118	// "ABC"
				1119	{ 0x41, 0x42, 0x43, 0x00 },
				1120	// "HELLO world!"
				1121	{ 0x48, 0x45, 0x4C, 0x4C, 0x4F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21, 0x00 }
				1122	};
				1123
				1124	static const UChar text2[][CollationRegressionTest::MAX_TOKEN_LEN] = {
				1125	// "Achter de Hoven"
				1126	{ 0x41, 0x63, 0x68, 0x74, 0x65, 0x72, 0x20, 0x64, 0x65, 0x20, 0x48, 0x6F, 0x76, 0x65, 0x6E, 0x00 },
				1127	// "abc"
				1128	{ 0x61, 0x62, 0x63, 0x00 },
				1129	// "hello world!"
				1130	{ 0x68, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21, 0x00 }
				1131	};
				1132
				1133	// Open the collator
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	1134	coll = ucol_openFromShortString("EO_S1", false, NULL, &status);
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame]	1135	if (U_FAILURE(status)) {
				1136	errln("Failed to create a collator for short string EO_S1");
				1137	return;
				1138	}
				1139
				1140	for (i = 0; i < UPRV_LENGTHOF(text1); i++) {
				1141	uint8_t key1[100], key2[100];
				1142	int32_t len1, len2;
				1143
				1144	len1 = calcKeyIncremental(coll, text1[i], -1, key1, sizeof(key1), status);
				1145	if (U_FAILURE(status)) {
				1146	errln(UnicodeString("Failed to get a partial collation key for ") + text1[i]);
				1147	break;
				1148	}
				1149	len2 = calcKeyIncremental(coll, text2[i], -1, key2, sizeof(key2), status);
				1150	if (U_FAILURE(status)) {
				1151	errln(UnicodeString("Failed to get a partial collation key for ") + text2[i]);
				1152	break;
				1153	}
				1154
				1155	if (len1 == len2 && uprv_memcmp(key1, key2, len1) == 0) {
				1156	errln(UnicodeString("Failed: Identical key\n") + " text1: " + text1[i] + "\n" + " text2: " + text2[i] + "\n" + " key : " + TestUtility::hex(key1, len1));
				1157	} else {
				1158	logln(UnicodeString("Keys produced -\n") + " text1: " + text1[i] + "\n" + " key1 : " + TestUtility::hex(key1, len1) + "\n" + " text2: " + text2[i] + "\n" + " key2 : "
				1159	+ TestUtility::hex(key2, len2));
				1160	}
				1161	}
				1162	ucol_close(coll);
				1163	}
				1164
				1165	void CollationRegressionTest::TestCaseFirstCompression() {
				1166	RuleBasedCollator *col = en_us->clone();
				1167	UErrorCode status = U_ZERO_ERROR;
				1168
				1169	// default
				1170	caseFirstCompressionSub(col, "default");
				1171
				1172	// Upper first
				1173	col->setAttribute(UCOL_CASE_FIRST, UCOL_UPPER_FIRST, status);
				1174	if (U_FAILURE(status)) {
				1175	errln("Failed to set UCOL_UPPER_FIRST");
				1176	return;
				1177	}
				1178	caseFirstCompressionSub(col, "upper first");
				1179
				1180	// Lower first
				1181	col->setAttribute(UCOL_CASE_FIRST, UCOL_LOWER_FIRST, status);
				1182	if (U_FAILURE(status)) {
				1183	errln("Failed to set UCOL_LOWER_FIRST");
				1184	return;
				1185	}
				1186	caseFirstCompressionSub(col, "lower first");
				1187
				1188	delete col;
				1189	}
				1190
				1191	void CollationRegressionTest::caseFirstCompressionSub(Collator *col, UnicodeString opt) {
				1192	const int32_t maxLength = 50;
				1193
				1194	UChar str1[maxLength];
				1195	UChar str2[maxLength];
				1196
				1197	CollationKey key1, key2;
				1198
				1199	for (int32_t len = 1; len <= maxLength; len++) {
				1200	int32_t i = 0;
				1201	for (; i < len - 1; i++) {
				1202	str1[i] = str2[i] = (UChar)0x61; // 'a'
				1203	}
				1204	str1[i] = (UChar)0x41; // 'A'
				1205	str2[i] = (UChar)0x61; // 'a'
				1206
				1207	UErrorCode status = U_ZERO_ERROR;
				1208	col->getCollationKey(str1, len, key1, status);
				1209	col->getCollationKey(str2, len, key2, status);
				1210
				1211	UCollationResult cmpKey = key1.compareTo(key2, status);
				1212	UCollationResult cmpCol = col->compare(str1, len, str2, len, status);
				1213
				1214	if (U_FAILURE(status)) {
				1215	errln("Error in caseFirstCompressionSub");
				1216	} else if (cmpKey != cmpCol) {
				1217	errln((UnicodeString)"Inconsistent comparison(" + opt
				1218	+ "): str1=" + UnicodeString(str1, len) + ", str2=" + UnicodeString(str2, len)
				1219	+ ", cmpKey=" + cmpKey + ", cmpCol=" + cmpCol);
				1220	}
				1221	}
				1222	}
				1223
				1224	void CollationRegressionTest::TestTrailingComment() {
				1225	// ICU ticket #8070:
				1226	// Check that the rule parser handles a comment without terminating end-of-line.
				1227	IcuTestErrorCode errorCode(*this, "TestTrailingComment");
				1228	RuleBasedCollator coll(UNICODE_STRING_SIMPLE("&c<b#comment1\n<a#comment2"), errorCode);
				1229	UnicodeString a((UChar)0x61), b((UChar)0x62), c((UChar)0x63);
				1230	assertTrue("c<b", coll.compare(c, b) < 0);
				1231	assertTrue("b<a", coll.compare(b, a) < 0);
				1232	}
				1233
				1234	void CollationRegressionTest::TestBeforeWithTooStrongAfter() {
				1235	// ICU ticket #9959:
				1236	// Forbid rules with a before-reset followed by a stronger relation.
				1237	IcuTestErrorCode errorCode(*this, "TestBeforeWithTooStrongAfter");
				1238	RuleBasedCollator before2(UNICODE_STRING_SIMPLE("&[before 2]x<<q<p"), errorCode);
				1239	if(errorCode.isSuccess()) {
				1240	errln("should forbid before-2-reset followed by primary relation");
				1241	} else {
				1242	errorCode.reset();
				1243	}
				1244	RuleBasedCollator before3(UNICODE_STRING_SIMPLE("&[before 3]x<<<q<<s<p"), errorCode);
				1245	if(errorCode.isSuccess()) {
				1246	errln("should forbid before-3-reset followed by primary or secondary relation");
				1247	} else {
				1248	errorCode.reset();
				1249	}
				1250	}
				1251
				1252	void CollationRegressionTest::compareArray(Collator &c,
				1253	const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN],
				1254	int32_t testCount)
				1255	{
				1256	int32_t i;
				1257	Collator::EComparisonResult expectedResult = Collator::EQUAL;
				1258
				1259	for (i = 0; i < testCount; i += 3)
				1260	{
				1261	UnicodeString source(tests[i]);
				1262	UnicodeString comparison(tests[i + 1]);
				1263	UnicodeString target(tests[i + 2]);
				1264
				1265	if (comparison == "<")
				1266	{
				1267	expectedResult = Collator::LESS;
				1268	}
				1269	else if (comparison == ">")
				1270	{
				1271	expectedResult = Collator::GREATER;
				1272	}
				1273	else if (comparison == "=")
				1274	{
				1275	expectedResult = Collator::EQUAL;
				1276	}
				1277	else
				1278	{
				1279	UnicodeString bogus1("Bogus comparison string \"");
				1280	UnicodeString bogus2("\"");
				1281	errln(bogus1 + comparison + bogus2);
				1282	}
				1283
				1284	Collator::EComparisonResult compareResult = c.compare(source, target);
				1285
				1286	CollationKey sourceKey, targetKey;
				1287	UErrorCode status = U_ZERO_ERROR;
				1288
				1289	c.getCollationKey(source, sourceKey, status);
				1290
				1291	if (U_FAILURE(status))
				1292	{
				1293	errln("Couldn't get collationKey for source");
				1294	continue;
				1295	}
				1296
				1297	c.getCollationKey(target, targetKey, status);
				1298
				1299	if (U_FAILURE(status))
				1300	{
				1301	errln("Couldn't get collationKey for target");
				1302	continue;
				1303	}
				1304
				1305	Collator::EComparisonResult keyResult = sourceKey.compareTo(targetKey);
				1306
				1307	reportCResult( source, target, sourceKey, targetKey, compareResult, keyResult, compareResult, expectedResult );
				1308
				1309	}
				1310	}
				1311
				1312	void CollationRegressionTest::assertEqual(CollationElementIterator &i1, CollationElementIterator &i2)
				1313	{
				1314	int32_t c1, c2, count = 0;
				1315	UErrorCode status = U_ZERO_ERROR;
				1316
				1317	do
				1318	{
				1319	c1 = i1.next(status);
				1320	c2 = i2.next(status);
				1321
				1322	if (c1 != c2)
				1323	{
				1324	UnicodeString msg, msg1(" ");
				1325
				1326	msg += msg1 + count;
				1327	msg += ": strength(0x";
				1328	appendHex(c1, 8, msg);
				1329	msg += ") != strength(0x";
				1330	appendHex(c2, 8, msg);
				1331	msg += ")";
				1332
				1333	errln(msg);
				1334	break;
				1335	}
				1336
				1337	count += 1;
				1338	}
				1339	while (c1 != CollationElementIterator::NULLORDER);
				1340	}
				1341
				1342	void CollationRegressionTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* /* par */)
				1343	{
				1344	if (exec)
				1345	{
				1346	logln("Collation Regression Tests: ");
				1347	}
				1348
				1349	if(en_us == NULL) {
				1350	dataerrln("Class collator not instantiated");
				1351	name = "";
				1352	return;
				1353	}
				1354	TESTCASE_AUTO_BEGIN;
				1355	TESTCASE_AUTO(Test4048446);
				1356	TESTCASE_AUTO(Test4051866);
				1357	TESTCASE_AUTO(Test4053636);
				1358	TESTCASE_AUTO(Test4054238);
				1359	TESTCASE_AUTO(Test4054734);
				1360	TESTCASE_AUTO(Test4054736);
				1361	TESTCASE_AUTO(Test4058613);
				1362	TESTCASE_AUTO(Test4059820);
				1363	TESTCASE_AUTO(Test4060154);
				1364	TESTCASE_AUTO(Test4062418);
				1365	TESTCASE_AUTO(Test4065540);
				1366	TESTCASE_AUTO(Test4066189);
				1367	TESTCASE_AUTO(Test4066696);
				1368	TESTCASE_AUTO(Test4076676);
				1369	TESTCASE_AUTO(Test4078588);
				1370	TESTCASE_AUTO(Test4079231);
				1371	TESTCASE_AUTO(Test4081866);
				1372	TESTCASE_AUTO(Test4087241);
				1373	TESTCASE_AUTO(Test4087243);
				1374	TESTCASE_AUTO(Test4092260);
				1375	TESTCASE_AUTO(Test4095316);
				1376	TESTCASE_AUTO(Test4101940);
				1377	TESTCASE_AUTO(Test4103436);
				1378	TESTCASE_AUTO(Test4114076);
				1379	TESTCASE_AUTO(Test4114077);
				1380	TESTCASE_AUTO(Test4124632);
				1381	TESTCASE_AUTO(Test4132736);
				1382	TESTCASE_AUTO(Test4133509);
				1383	TESTCASE_AUTO(Test4139572);
				1384	TESTCASE_AUTO(Test4141640);
				1385	TESTCASE_AUTO(Test4179216);
				1386	TESTCASE_AUTO(TestT7189);
				1387	TESTCASE_AUTO(TestCaseFirstCompression);
				1388	TESTCASE_AUTO(TestTrailingComment);
				1389	TESTCASE_AUTO(TestBeforeWithTooStrongAfter);
				1390	TESTCASE_AUTO_END;
				1391	}
				1392
				1393	#endif /* #if !UCONFIG_NO_COLLATION */