blob: 25c8745cbf203b3c590ffea65a67a65394bd653d [file] [log] [blame]
Jungshik Shin87232d82017-05-13 21:10:13 -07001// © 2016 and later: Unicode, Inc. and others.
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002// License & terms of use: http://www.unicode.org/copyright.html
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00003/*
4 ******************************************************************************
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08005 * Copyright (C) 1996-2014, International Business Machines Corporation and
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00006 * others. All Rights Reserved.
7 ******************************************************************************
8 */
9
10/**
11 * File coll.cpp
12 *
13 * Created by: Helena Shih
14 *
15 * Modification History:
16 *
17 * Date Name Description
18 * 2/5/97 aliu Modified createDefault to load collation data from
19 * binary files when possible. Added related methods
20 * createCollationFromFile, chopLocale, createPathName.
21 * 2/11/97 aliu Added methods addToCache, findInCache, which implement
22 * a Collation cache. Modified createDefault to look in
23 * cache first, and also to store newly created Collation
24 * objects in the cache. Modified to not use gLocPath.
25 * 2/12/97 aliu Modified to create objects from RuleBasedCollator cache.
26 * Moved cache out of Collation class.
27 * 2/13/97 aliu Moved several methods out of this class and into
28 * RuleBasedCollator, with modifications. Modified
29 * createDefault() to call new RuleBasedCollator(Locale&)
30 * constructor. General clean up and documentation.
31 * 2/20/97 helena Added clone, operator==, operator!=, operator=, and copy
32 * constructor.
33 * 05/06/97 helena Added memory allocation error detection.
34 * 05/08/97 helena Added createInstance().
35 * 6/20/97 helena Java class name change.
36 * 04/23/99 stephen Removed EDecompositionMode, merged with
37 * Normalizer::EMode
38 * 11/23/9 srl Inlining of some critical functions
39 * 01/29/01 synwee Modified into a C++ wrapper calling C APIs (ucol.h)
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -080040 * 2012-2014 markus Rewritten in C++ again.
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000041 */
42
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -080043#include "utypeinfo.h" // for 'typeid' to work
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000044
45#include "unicode/utypes.h"
46
47#if !UCONFIG_NO_COLLATION
48
49#include "unicode/coll.h"
50#include "unicode/tblcoll.h"
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -080051#include "collationdata.h"
52#include "collationroot.h"
53#include "collationtailoring.h"
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000054#include "ucol_imp.h"
55#include "cstring.h"
56#include "cmemory.h"
57#include "umutex.h"
58#include "servloc.h"
59#include "uassert.h"
60#include "ustrenum.h"
61#include "uresimp.h"
62#include "ucln_in.h"
63
64static icu::Locale* availableLocaleList = NULL;
65static int32_t availableLocaleListCount;
Jungshik Shinb3189662017-11-07 11:18:34 -080066#if !UCONFIG_NO_SERVICE
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000067static icu::ICULocaleService* gService = NULL;
68static icu::UInitOnce gServiceInitOnce = U_INITONCE_INITIALIZER;
Jungshik Shinb3189662017-11-07 11:18:34 -080069#endif
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000070static icu::UInitOnce gAvailableLocaleListInitOnce;
71
72/**
73 * Release all static memory held by collator.
74 */
75U_CDECL_BEGIN
76static UBool U_CALLCONV collator_cleanup(void) {
77#if !UCONFIG_NO_SERVICE
78 if (gService) {
79 delete gService;
80 gService = NULL;
81 }
82 gServiceInitOnce.reset();
83#endif
84 if (availableLocaleList) {
85 delete []availableLocaleList;
86 availableLocaleList = NULL;
87 }
88 availableLocaleListCount = 0;
89 gAvailableLocaleListInitOnce.reset();
90 return TRUE;
91}
92
93U_CDECL_END
94
95U_NAMESPACE_BEGIN
96
97#if !UCONFIG_NO_SERVICE
98
99// ------------------------------------------
100//
101// Registration
102//
103
104//-------------------------------------------
105
106CollatorFactory::~CollatorFactory() {}
107
108//-------------------------------------------
109
110UBool
111CollatorFactory::visible(void) const {
112 return TRUE;
113}
114
115//-------------------------------------------
116
117UnicodeString&
118CollatorFactory::getDisplayName(const Locale& objectLocale,
119 const Locale& displayLocale,
120 UnicodeString& result)
121{
122 return objectLocale.getDisplayName(displayLocale, result);
123}
124
125// -------------------------------------
126
127class ICUCollatorFactory : public ICUResourceBundleFactory {
128 public:
129 ICUCollatorFactory() : ICUResourceBundleFactory(UnicodeString(U_ICUDATA_COLL, -1, US_INV)) { }
130 virtual ~ICUCollatorFactory();
131 protected:
132 virtual UObject* create(const ICUServiceKey& key, const ICUService* service, UErrorCode& status) const;
133};
134
135ICUCollatorFactory::~ICUCollatorFactory() {}
136
137UObject*
138ICUCollatorFactory::create(const ICUServiceKey& key, const ICUService* /* service */, UErrorCode& status) const {
139 if (handlesKey(key, status)) {
140 const LocaleKey& lkey = (const LocaleKey&)key;
141 Locale loc;
142 // make sure the requested locale is correct
143 // default LocaleFactory uses currentLocale since that's the one vetted by handlesKey
144 // but for ICU rb resources we use the actual one since it will fallback again
145 lkey.canonicalLocale(loc);
146
147 return Collator::makeInstance(loc, status);
148 }
149 return NULL;
150}
151
152// -------------------------------------
153
154class ICUCollatorService : public ICULocaleService {
155public:
156 ICUCollatorService()
157 : ICULocaleService(UNICODE_STRING_SIMPLE("Collator"))
158 {
159 UErrorCode status = U_ZERO_ERROR;
160 registerFactory(new ICUCollatorFactory(), status);
161 }
162
163 virtual ~ICUCollatorService();
164
165 virtual UObject* cloneInstance(UObject* instance) const {
166 return ((Collator*)instance)->clone();
167 }
168
169 virtual UObject* handleDefault(const ICUServiceKey& key, UnicodeString* actualID, UErrorCode& status) const {
170 LocaleKey& lkey = (LocaleKey&)key;
171 if (actualID) {
172 // Ugly Hack Alert! We return an empty actualID to signal
173 // to callers that this is a default object, not a "real"
174 // service-created object. (TODO remove in 3.0) [aliu]
175 actualID->truncate(0);
176 }
177 Locale loc("");
178 lkey.canonicalLocale(loc);
179 return Collator::makeInstance(loc, status);
180 }
181
182 virtual UObject* getKey(ICUServiceKey& key, UnicodeString* actualReturn, UErrorCode& status) const {
183 UnicodeString ar;
184 if (actualReturn == NULL) {
185 actualReturn = &ar;
186 }
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800187 return (Collator*)ICULocaleService::getKey(key, actualReturn, status);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000188 }
189
190 virtual UBool isDefault() const {
191 return countFactories() == 1;
192 }
193};
194
195ICUCollatorService::~ICUCollatorService() {}
196
197// -------------------------------------
198
199static void U_CALLCONV initService() {
200 gService = new ICUCollatorService();
201 ucln_i18n_registerCleanup(UCLN_I18N_COLLATOR, collator_cleanup);
202}
203
204
205static ICULocaleService*
206getService(void)
207{
208 umtx_initOnce(gServiceInitOnce, &initService);
209 return gService;
210}
211
212// -------------------------------------
213
214static inline UBool
215hasService(void)
216{
217 UBool retVal = !gServiceInitOnce.isReset() && (getService() != NULL);
218 return retVal;
219}
220
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000221#endif /* UCONFIG_NO_SERVICE */
222
223static void U_CALLCONV
224initAvailableLocaleList(UErrorCode &status) {
225 U_ASSERT(availableLocaleListCount == 0);
226 U_ASSERT(availableLocaleList == NULL);
227 // for now, there is a hardcoded list, so just walk through that list and set it up.
228 UResourceBundle *index = NULL;
229 UResourceBundle installed;
230 int32_t i = 0;
231
232 ures_initStackObject(&installed);
233 index = ures_openDirect(U_ICUDATA_COLL, "res_index", &status);
234 ures_getByKey(index, "InstalledLocales", &installed, &status);
235
236 if(U_SUCCESS(status)) {
237 availableLocaleListCount = ures_getSize(&installed);
238 availableLocaleList = new Locale[availableLocaleListCount];
239
240 if (availableLocaleList != NULL) {
241 ures_resetIterator(&installed);
242 while(ures_hasNext(&installed)) {
243 const char *tempKey = NULL;
244 ures_getNextString(&installed, NULL, &tempKey, &status);
245 availableLocaleList[i++] = Locale(tempKey);
246 }
247 }
248 U_ASSERT(availableLocaleListCount == i);
249 ures_close(&installed);
250 }
251 ures_close(index);
252 ucln_i18n_registerCleanup(UCLN_I18N_COLLATOR, collator_cleanup);
253}
254
255static UBool isAvailableLocaleListInitialized(UErrorCode &status) {
256 umtx_initOnce(gAvailableLocaleListInitOnce, &initAvailableLocaleList, status);
257 return U_SUCCESS(status);
258}
259
260
261// Collator public methods -----------------------------------------------
262
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800263namespace {
264
265static const struct {
266 const char *name;
267 UColAttribute attr;
268} collAttributes[] = {
269 { "colStrength", UCOL_STRENGTH },
270 { "colBackwards", UCOL_FRENCH_COLLATION },
271 { "colCaseLevel", UCOL_CASE_LEVEL },
272 { "colCaseFirst", UCOL_CASE_FIRST },
273 { "colAlternate", UCOL_ALTERNATE_HANDLING },
274 { "colNormalization", UCOL_NORMALIZATION_MODE },
275 { "colNumeric", UCOL_NUMERIC_COLLATION }
276};
277
278static const struct {
279 const char *name;
280 UColAttributeValue value;
281} collAttributeValues[] = {
282 { "primary", UCOL_PRIMARY },
283 { "secondary", UCOL_SECONDARY },
284 { "tertiary", UCOL_TERTIARY },
285 { "quaternary", UCOL_QUATERNARY },
286 // Note: Not supporting typo "quarternary" because it was never supported in locale IDs.
287 { "identical", UCOL_IDENTICAL },
288 { "no", UCOL_OFF },
289 { "yes", UCOL_ON },
290 { "shifted", UCOL_SHIFTED },
291 { "non-ignorable", UCOL_NON_IGNORABLE },
292 { "lower", UCOL_LOWER_FIRST },
293 { "upper", UCOL_UPPER_FIRST }
294};
295
296static const char *collReorderCodes[UCOL_REORDER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST] = {
297 "space", "punct", "symbol", "currency", "digit"
298};
299
300int32_t getReorderCode(const char *s) {
301 for (int32_t i = 0; i < UPRV_LENGTHOF(collReorderCodes); ++i) {
302 if (uprv_stricmp(s, collReorderCodes[i]) == 0) {
303 return UCOL_REORDER_CODE_FIRST + i;
304 }
305 }
306 // Not supporting "others" = UCOL_REORDER_CODE_OTHERS
307 // as a synonym for Zzzz = USCRIPT_UNKNOWN for now:
308 // Avoid introducing synonyms/aliases.
309 return -1;
310}
311
312/**
313 * Sets collation attributes according to locale keywords. See
314 * http://www.unicode.org/reports/tr35/tr35-collation.html#Collation_Settings
315 *
316 * Using "alias" keywords and values where defined:
317 * http://www.unicode.org/reports/tr35/tr35.html#Old_Locale_Extension_Syntax
318 * http://unicode.org/repos/cldr/trunk/common/bcp47/collation.xml
319 */
320void setAttributesFromKeywords(const Locale &loc, Collator &coll, UErrorCode &errorCode) {
321 if (U_FAILURE(errorCode)) {
322 return;
323 }
324 if (uprv_strcmp(loc.getName(), loc.getBaseName()) == 0) {
325 // No keywords.
326 return;
327 }
328 char value[1024]; // The reordering value could be long.
329 // Check for collation keywords that were already deprecated
330 // before any were supported in createInstance() (except for "collation").
331 int32_t length = loc.getKeywordValue("colHiraganaQuaternary", value, UPRV_LENGTHOF(value), errorCode);
332 if (U_FAILURE(errorCode)) {
333 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
334 return;
335 }
336 if (length != 0) {
337 errorCode = U_UNSUPPORTED_ERROR;
338 return;
339 }
340 length = loc.getKeywordValue("variableTop", value, UPRV_LENGTHOF(value), errorCode);
341 if (U_FAILURE(errorCode)) {
342 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
343 return;
344 }
345 if (length != 0) {
346 errorCode = U_UNSUPPORTED_ERROR;
347 return;
348 }
349 // Parse known collation keywords, ignore others.
350 if (errorCode == U_STRING_NOT_TERMINATED_WARNING) {
351 errorCode = U_ZERO_ERROR;
352 }
353 for (int32_t i = 0; i < UPRV_LENGTHOF(collAttributes); ++i) {
354 length = loc.getKeywordValue(collAttributes[i].name, value, UPRV_LENGTHOF(value), errorCode);
355 if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) {
356 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
357 return;
358 }
359 if (length == 0) { continue; }
360 for (int32_t j = 0;; ++j) {
361 if (j == UPRV_LENGTHOF(collAttributeValues)) {
362 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
363 return;
364 }
365 if (uprv_stricmp(value, collAttributeValues[j].name) == 0) {
366 coll.setAttribute(collAttributes[i].attr, collAttributeValues[j].value, errorCode);
367 break;
368 }
369 }
370 }
371 length = loc.getKeywordValue("colReorder", value, UPRV_LENGTHOF(value), errorCode);
372 if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) {
373 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
374 return;
375 }
376 if (length != 0) {
377 int32_t codes[USCRIPT_CODE_LIMIT + UCOL_REORDER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST];
378 int32_t codesLength = 0;
379 char *scriptName = value;
380 for (;;) {
381 if (codesLength == UPRV_LENGTHOF(codes)) {
382 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
383 return;
384 }
385 char *limit = scriptName;
386 char c;
387 while ((c = *limit) != 0 && c != '-') { ++limit; }
388 *limit = 0;
389 int32_t code;
390 if ((limit - scriptName) == 4) {
391 // Strict parsing, accept only 4-letter script codes, not long names.
392 code = u_getPropertyValueEnum(UCHAR_SCRIPT, scriptName);
393 } else {
394 code = getReorderCode(scriptName);
395 }
396 if (code < 0) {
397 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
398 return;
399 }
400 codes[codesLength++] = code;
401 if (c == 0) { break; }
402 scriptName = limit + 1;
403 }
404 coll.setReorderCodes(codes, codesLength, errorCode);
405 }
406 length = loc.getKeywordValue("kv", value, UPRV_LENGTHOF(value), errorCode);
407 if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) {
408 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
409 return;
410 }
411 if (length != 0) {
412 int32_t code = getReorderCode(value);
413 if (code < 0) {
414 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
415 return;
416 }
417 coll.setMaxVariable((UColReorderCode)code, errorCode);
418 }
419 if (U_FAILURE(errorCode)) {
420 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
421 }
422}
423
424} // namespace
425
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000426Collator* U_EXPORT2 Collator::createInstance(UErrorCode& success)
427{
428 return createInstance(Locale::getDefault(), success);
429}
430
431Collator* U_EXPORT2 Collator::createInstance(const Locale& desiredLocale,
432 UErrorCode& status)
433{
434 if (U_FAILURE(status))
435 return 0;
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800436 if (desiredLocale.isBogus()) {
437 // Locale constructed from malformed locale ID or language tag.
438 status = U_ILLEGAL_ARGUMENT_ERROR;
439 return NULL;
440 }
441
442 Collator* coll;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000443#if !UCONFIG_NO_SERVICE
444 if (hasService()) {
445 Locale actualLoc;
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800446 coll = (Collator*)gService->get(desiredLocale, &actualLoc, status);
447 } else
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000448#endif
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000449 {
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800450 coll = makeInstance(desiredLocale, status);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000451 }
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800452 setAttributesFromKeywords(desiredLocale, *coll, status);
453 if (U_FAILURE(status)) {
454 delete coll;
455 return NULL;
456 }
457 return coll;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000458}
459
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000460
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800461Collator* Collator::makeInstance(const Locale& desiredLocale, UErrorCode& status) {
462 const CollationCacheEntry *entry = CollationLoader::loadTailoring(desiredLocale, status);
463 if (U_SUCCESS(status)) {
464 Collator *result = new RuleBasedCollator(entry);
465 if (result != NULL) {
466 // Both the unified cache's get() and the RBC constructor
467 // did addRef(). Undo one of them.
468 entry->removeRef();
469 return result;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000470 }
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800471 status = U_MEMORY_ALLOCATION_ERROR;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000472 }
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800473 if (entry != NULL) {
474 // Undo the addRef() from the cache.get().
475 entry->removeRef();
476 }
477 return NULL;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000478}
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000479
480Collator *
481Collator::safeClone() const {
482 return clone();
483}
484
485// implement deprecated, previously abstract method
486Collator::EComparisonResult Collator::compare(const UnicodeString& source,
487 const UnicodeString& target) const
488{
489 UErrorCode ec = U_ZERO_ERROR;
490 return (EComparisonResult)compare(source, target, ec);
491}
492
493// implement deprecated, previously abstract method
494Collator::EComparisonResult Collator::compare(const UnicodeString& source,
495 const UnicodeString& target,
496 int32_t length) const
497{
498 UErrorCode ec = U_ZERO_ERROR;
499 return (EComparisonResult)compare(source, target, length, ec);
500}
501
502// implement deprecated, previously abstract method
503Collator::EComparisonResult Collator::compare(const UChar* source, int32_t sourceLength,
504 const UChar* target, int32_t targetLength)
505 const
506{
507 UErrorCode ec = U_ZERO_ERROR;
508 return (EComparisonResult)compare(source, sourceLength, target, targetLength, ec);
509}
510
511UCollationResult Collator::compare(UCharIterator &/*sIter*/,
512 UCharIterator &/*tIter*/,
513 UErrorCode &status) const {
514 if(U_SUCCESS(status)) {
515 // Not implemented in the base class.
516 status = U_UNSUPPORTED_ERROR;
517 }
518 return UCOL_EQUAL;
519}
520
521UCollationResult Collator::compareUTF8(const StringPiece &source,
522 const StringPiece &target,
523 UErrorCode &status) const {
524 if(U_FAILURE(status)) {
525 return UCOL_EQUAL;
526 }
527 UCharIterator sIter, tIter;
528 uiter_setUTF8(&sIter, source.data(), source.length());
529 uiter_setUTF8(&tIter, target.data(), target.length());
530 return compare(sIter, tIter, status);
531}
532
533UBool Collator::equals(const UnicodeString& source,
534 const UnicodeString& target) const
535{
536 UErrorCode ec = U_ZERO_ERROR;
537 return (compare(source, target, ec) == UCOL_EQUAL);
538}
539
540UBool Collator::greaterOrEqual(const UnicodeString& source,
541 const UnicodeString& target) const
542{
543 UErrorCode ec = U_ZERO_ERROR;
544 return (compare(source, target, ec) != UCOL_LESS);
545}
546
547UBool Collator::greater(const UnicodeString& source,
548 const UnicodeString& target) const
549{
550 UErrorCode ec = U_ZERO_ERROR;
551 return (compare(source, target, ec) == UCOL_GREATER);
552}
553
554// this API ignores registered collators, since it returns an
555// array of indefinite lifetime
556const Locale* U_EXPORT2 Collator::getAvailableLocales(int32_t& count)
557{
558 UErrorCode status = U_ZERO_ERROR;
559 Locale *result = NULL;
560 count = 0;
561 if (isAvailableLocaleListInitialized(status))
562 {
563 result = availableLocaleList;
564 count = availableLocaleListCount;
565 }
566 return result;
567}
568
569UnicodeString& U_EXPORT2 Collator::getDisplayName(const Locale& objectLocale,
570 const Locale& displayLocale,
571 UnicodeString& name)
572{
573#if !UCONFIG_NO_SERVICE
574 if (hasService()) {
575 UnicodeString locNameStr;
576 LocaleUtility::initNameFromLocale(objectLocale, locNameStr);
577 return gService->getDisplayName(locNameStr, name, displayLocale);
578 }
579#endif
580 return objectLocale.getDisplayName(displayLocale, name);
581}
582
583UnicodeString& U_EXPORT2 Collator::getDisplayName(const Locale& objectLocale,
584 UnicodeString& name)
585{
586 return getDisplayName(objectLocale, Locale::getDefault(), name);
587}
588
589/* This is useless information */
590/*void Collator::getVersion(UVersionInfo versionInfo) const
591{
592 if (versionInfo!=NULL)
593 uprv_memcpy(versionInfo, fVersion, U_MAX_VERSION_LENGTH);
594}
595*/
596
597// UCollator protected constructor destructor ----------------------------
598
599/**
600* Default constructor.
601* Constructor is different from the old default Collator constructor.
602* The task for determing the default collation strength and normalization mode
603* is left to the child class.
604*/
605Collator::Collator()
606: UObject()
607{
608}
609
610/**
611* Constructor.
612* Empty constructor, does not handle the arguments.
613* This constructor is done for backward compatibility with 1.7 and 1.8.
614* The task for handling the argument collation strength and normalization
615* mode is left to the child class.
616* @param collationStrength collation strength
617* @param decompositionMode
618* @deprecated 2.4 use the default constructor instead
619*/
620Collator::Collator(UCollationStrength, UNormalizationMode )
621: UObject()
622{
623}
624
625Collator::~Collator()
626{
627}
628
629Collator::Collator(const Collator &other)
630 : UObject(other)
631{
632}
633
634UBool Collator::operator==(const Collator& other) const
635{
636 // Subclasses: Call this method and then add more specific checks.
637 return typeid(*this) == typeid(other);
638}
639
640UBool Collator::operator!=(const Collator& other) const
641{
642 return (UBool)!(*this == other);
643}
644
645int32_t U_EXPORT2 Collator::getBound(const uint8_t *source,
646 int32_t sourceLength,
647 UColBoundMode boundType,
648 uint32_t noOfLevels,
649 uint8_t *result,
650 int32_t resultLength,
651 UErrorCode &status)
652{
653 return ucol_getBound(source, sourceLength, boundType, noOfLevels, result, resultLength, &status);
654}
655
656void
657Collator::setLocales(const Locale& /* requestedLocale */, const Locale& /* validLocale */, const Locale& /*actualLocale*/) {
658}
659
660UnicodeSet *Collator::getTailoredSet(UErrorCode &status) const
661{
662 if(U_FAILURE(status)) {
663 return NULL;
664 }
665 // everything can be changed
666 return new UnicodeSet(0, 0x10FFFF);
667}
668
669// -------------------------------------
670
671#if !UCONFIG_NO_SERVICE
672URegistryKey U_EXPORT2
673Collator::registerInstance(Collator* toAdopt, const Locale& locale, UErrorCode& status)
674{
675 if (U_SUCCESS(status)) {
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800676 // Set the collator locales while registering so that createInstance()
677 // need not guess whether the collator's locales are already set properly
678 // (as they are by the data loader).
679 toAdopt->setLocales(locale, locale, locale);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000680 return getService()->registerInstance(toAdopt, locale, status);
681 }
682 return NULL;
683}
684
685// -------------------------------------
686
687class CFactory : public LocaleKeyFactory {
688private:
689 CollatorFactory* _delegate;
690 Hashtable* _ids;
691
692public:
693 CFactory(CollatorFactory* delegate, UErrorCode& status)
694 : LocaleKeyFactory(delegate->visible() ? VISIBLE : INVISIBLE)
695 , _delegate(delegate)
696 , _ids(NULL)
697 {
698 if (U_SUCCESS(status)) {
699 int32_t count = 0;
700 _ids = new Hashtable(status);
701 if (_ids) {
702 const UnicodeString * idlist = _delegate->getSupportedIDs(count, status);
703 for (int i = 0; i < count; ++i) {
704 _ids->put(idlist[i], (void*)this, status);
705 if (U_FAILURE(status)) {
706 delete _ids;
707 _ids = NULL;
708 return;
709 }
710 }
711 } else {
712 status = U_MEMORY_ALLOCATION_ERROR;
713 }
714 }
715 }
716
717 virtual ~CFactory();
718
719 virtual UObject* create(const ICUServiceKey& key, const ICUService* service, UErrorCode& status) const;
720
721protected:
722 virtual const Hashtable* getSupportedIDs(UErrorCode& status) const
723 {
724 if (U_SUCCESS(status)) {
725 return _ids;
726 }
727 return NULL;
728 }
729
730 virtual UnicodeString&
731 getDisplayName(const UnicodeString& id, const Locale& locale, UnicodeString& result) const;
732};
733
734CFactory::~CFactory()
735{
736 delete _delegate;
737 delete _ids;
738}
739
740UObject*
741CFactory::create(const ICUServiceKey& key, const ICUService* /* service */, UErrorCode& status) const
742{
743 if (handlesKey(key, status)) {
744 const LocaleKey& lkey = (const LocaleKey&)key;
745 Locale validLoc;
746 lkey.currentLocale(validLoc);
747 return _delegate->createCollator(validLoc);
748 }
749 return NULL;
750}
751
752UnicodeString&
753CFactory::getDisplayName(const UnicodeString& id, const Locale& locale, UnicodeString& result) const
754{
755 if ((_coverage & 0x1) == 0) {
756 UErrorCode status = U_ZERO_ERROR;
757 const Hashtable* ids = getSupportedIDs(status);
758 if (ids && (ids->get(id) != NULL)) {
759 Locale loc;
760 LocaleUtility::initLocaleFromName(id, loc);
761 return _delegate->getDisplayName(loc, locale, result);
762 }
763 }
764 result.setToBogus();
765 return result;
766}
767
768URegistryKey U_EXPORT2
769Collator::registerFactory(CollatorFactory* toAdopt, UErrorCode& status)
770{
771 if (U_SUCCESS(status)) {
772 CFactory* f = new CFactory(toAdopt, status);
773 if (f) {
774 return getService()->registerFactory(f, status);
775 }
776 status = U_MEMORY_ALLOCATION_ERROR;
777 }
778 return NULL;
779}
780
781// -------------------------------------
782
783UBool U_EXPORT2
784Collator::unregister(URegistryKey key, UErrorCode& status)
785{
786 if (U_SUCCESS(status)) {
787 if (hasService()) {
788 return gService->unregister(key, status);
789 }
790 status = U_ILLEGAL_ARGUMENT_ERROR;
791 }
792 return FALSE;
793}
794#endif /* UCONFIG_NO_SERVICE */
795
796class CollationLocaleListEnumeration : public StringEnumeration {
797private:
798 int32_t index;
799public:
800 static UClassID U_EXPORT2 getStaticClassID(void);
801 virtual UClassID getDynamicClassID(void) const;
802public:
803 CollationLocaleListEnumeration()
804 : index(0)
805 {
806 // The global variables should already be initialized.
807 //isAvailableLocaleListInitialized(status);
808 }
809
810 virtual ~CollationLocaleListEnumeration();
811
812 virtual StringEnumeration * clone() const
813 {
814 CollationLocaleListEnumeration *result = new CollationLocaleListEnumeration();
815 if (result) {
816 result->index = index;
817 }
818 return result;
819 }
820
821 virtual int32_t count(UErrorCode &/*status*/) const {
822 return availableLocaleListCount;
823 }
824
825 virtual const char* next(int32_t* resultLength, UErrorCode& /*status*/) {
826 const char* result;
827 if(index < availableLocaleListCount) {
828 result = availableLocaleList[index++].getName();
829 if(resultLength != NULL) {
830 *resultLength = (int32_t)uprv_strlen(result);
831 }
832 } else {
833 if(resultLength != NULL) {
834 *resultLength = 0;
835 }
836 result = NULL;
837 }
838 return result;
839 }
840
841 virtual const UnicodeString* snext(UErrorCode& status) {
842 int32_t resultLength = 0;
843 const char *s = next(&resultLength, status);
844 return setChars(s, resultLength, status);
845 }
846
847 virtual void reset(UErrorCode& /*status*/) {
848 index = 0;
849 }
850};
851
852CollationLocaleListEnumeration::~CollationLocaleListEnumeration() {}
853
854UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationLocaleListEnumeration)
855
856
857// -------------------------------------
858
859StringEnumeration* U_EXPORT2
860Collator::getAvailableLocales(void)
861{
862#if !UCONFIG_NO_SERVICE
863 if (hasService()) {
864 return getService()->getAvailableLocales();
865 }
866#endif /* UCONFIG_NO_SERVICE */
867 UErrorCode status = U_ZERO_ERROR;
868 if (isAvailableLocaleListInitialized(status)) {
869 return new CollationLocaleListEnumeration();
870 }
871 return NULL;
872}
873
874StringEnumeration* U_EXPORT2
875Collator::getKeywords(UErrorCode& status) {
Jungshik Shin70f82502016-01-29 00:32:36 -0800876 return UStringEnumeration::fromUEnumeration(
877 ucol_getKeywords(&status), status);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000878}
879
880StringEnumeration* U_EXPORT2
881Collator::getKeywordValues(const char *keyword, UErrorCode& status) {
Jungshik Shin70f82502016-01-29 00:32:36 -0800882 return UStringEnumeration::fromUEnumeration(
883 ucol_getKeywordValues(keyword, &status), status);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000884}
885
886StringEnumeration* U_EXPORT2
887Collator::getKeywordValuesForLocale(const char* key, const Locale& locale,
888 UBool commonlyUsed, UErrorCode& status) {
Jungshik Shin70f82502016-01-29 00:32:36 -0800889 return UStringEnumeration::fromUEnumeration(
890 ucol_getKeywordValuesForLocale(
891 key, locale.getName(), commonlyUsed, &status),
892 status);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000893}
894
895Locale U_EXPORT2
896Collator::getFunctionalEquivalent(const char* keyword, const Locale& locale,
897 UBool& isAvailable, UErrorCode& status) {
898 // This is a wrapper over ucol_getFunctionalEquivalent
899 char loc[ULOC_FULLNAME_CAPACITY];
900 /*int32_t len =*/ ucol_getFunctionalEquivalent(loc, sizeof(loc),
901 keyword, locale.getName(), &isAvailable, &status);
902 if (U_FAILURE(status)) {
903 *loc = 0; // root
904 }
905 return Locale::createFromName(loc);
906}
907
908Collator::ECollationStrength
909Collator::getStrength(void) const {
910 UErrorCode intStatus = U_ZERO_ERROR;
911 return (ECollationStrength)getAttribute(UCOL_STRENGTH, intStatus);
912}
913
914void
915Collator::setStrength(ECollationStrength newStrength) {
916 UErrorCode intStatus = U_ZERO_ERROR;
917 setAttribute(UCOL_STRENGTH, (UColAttributeValue)newStrength, intStatus);
918}
919
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800920Collator &
921Collator::setMaxVariable(UColReorderCode /*group*/, UErrorCode &errorCode) {
922 if (U_SUCCESS(errorCode)) {
923 errorCode = U_UNSUPPORTED_ERROR;
924 }
925 return *this;
926}
927
928UColReorderCode
929Collator::getMaxVariable() const {
930 return UCOL_REORDER_CODE_PUNCTUATION;
931}
932
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000933int32_t
934Collator::getReorderCodes(int32_t* /* dest*/,
935 int32_t /* destCapacity*/,
936 UErrorCode& status) const
937{
938 if (U_SUCCESS(status)) {
939 status = U_UNSUPPORTED_ERROR;
940 }
941 return 0;
942}
943
944void
945Collator::setReorderCodes(const int32_t* /* reorderCodes */,
946 int32_t /* reorderCodesLength */,
947 UErrorCode& status)
948{
949 if (U_SUCCESS(status)) {
950 status = U_UNSUPPORTED_ERROR;
951 }
952}
953
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800954int32_t
955Collator::getEquivalentReorderCodes(int32_t reorderCode,
956 int32_t *dest, int32_t capacity,
957 UErrorCode &errorCode) {
958 if(U_FAILURE(errorCode)) { return 0; }
959 if(capacity < 0 || (dest == NULL && capacity > 0)) {
960 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
961 return 0;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000962 }
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800963 const CollationData *baseData = CollationRoot::getData(errorCode);
964 if(U_FAILURE(errorCode)) { return 0; }
965 return baseData->getEquivalentScripts(reorderCode, dest, capacity, errorCode);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000966}
967
968int32_t
969Collator::internalGetShortDefinitionString(const char * /*locale*/,
970 char * /*buffer*/,
971 int32_t /*capacity*/,
972 UErrorCode &status) const {
973 if(U_SUCCESS(status)) {
974 status = U_UNSUPPORTED_ERROR; /* Shouldn't happen, internal function */
975 }
976 return 0;
977}
978
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800979UCollationResult
980Collator::internalCompareUTF8(const char *left, int32_t leftLength,
981 const char *right, int32_t rightLength,
982 UErrorCode &errorCode) const {
983 if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
984 if((left == NULL && leftLength != 0) || (right == NULL && rightLength != 0)) {
985 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
986 return UCOL_EQUAL;
987 }
988 return compareUTF8(
989 StringPiece(left, (leftLength < 0) ? uprv_strlen(left) : leftLength),
990 StringPiece(right, (rightLength < 0) ? uprv_strlen(right) : rightLength),
991 errorCode);
992}
993
994int32_t
995Collator::internalNextSortKeyPart(UCharIterator * /*iter*/, uint32_t /*state*/[2],
996 uint8_t * /*dest*/, int32_t /*count*/, UErrorCode &errorCode) const {
997 if (U_SUCCESS(errorCode)) {
998 errorCode = U_UNSUPPORTED_ERROR;
999 }
1000 return 0;
1001}
1002
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001003// UCollator private data members ----------------------------------------
1004
1005/* This is useless information */
1006/*const UVersionInfo Collator::fVersion = {1, 1, 0, 0};*/
1007
1008// -------------------------------------
1009
1010U_NAMESPACE_END
1011
1012#endif /* #if !UCONFIG_NO_COLLATION */
1013
1014/* eof */