blob: a240295b679eaa12ad10cc2c415f0e56ff1b475c [file] [log] [blame]
Jungshik Shin87232d82017-05-13 21:10:13 -07001// © 2016 and later: Unicode, Inc. and others.
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002// License & terms of use: http://www.unicode.org/copyright.html
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08003/*
4*******************************************************************************
Jungshik Shin70f82502016-01-29 00:32:36 -08005* Copyright (C) 1996-2015, International Business Machines
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08006* Corporation and others. All Rights Reserved.
7*******************************************************************************
8* rulebasedcollator.cpp
9*
10* (replaced the former tblcoll.cpp)
11*
12* created on: 2012feb14 with new and old collation code
13* created by: Markus W. Scherer
14*/
15
16#include "unicode/utypes.h"
17
18#if !UCONFIG_NO_COLLATION
19
20#include "unicode/coll.h"
21#include "unicode/coleitr.h"
22#include "unicode/localpointer.h"
23#include "unicode/locid.h"
24#include "unicode/sortkey.h"
25#include "unicode/tblcoll.h"
26#include "unicode/ucol.h"
27#include "unicode/uiter.h"
28#include "unicode/uloc.h"
29#include "unicode/uniset.h"
30#include "unicode/unistr.h"
31#include "unicode/usetiter.h"
32#include "unicode/utf8.h"
33#include "unicode/uversion.h"
34#include "bocsu.h"
35#include "charstr.h"
36#include "cmemory.h"
37#include "collation.h"
38#include "collationcompare.h"
39#include "collationdata.h"
40#include "collationdatareader.h"
41#include "collationfastlatin.h"
42#include "collationiterator.h"
43#include "collationkeys.h"
44#include "collationroot.h"
45#include "collationsets.h"
46#include "collationsettings.h"
47#include "collationtailoring.h"
48#include "cstring.h"
49#include "uassert.h"
50#include "ucol_imp.h"
51#include "uhash.h"
52#include "uitercollationiterator.h"
53#include "ustr_imp.h"
54#include "utf16collationiterator.h"
55#include "utf8collationiterator.h"
56#include "uvectr64.h"
57
58U_NAMESPACE_BEGIN
59
60namespace {
61
62class FixedSortKeyByteSink : public SortKeyByteSink {
63public:
64 FixedSortKeyByteSink(char *dest, int32_t destCapacity)
65 : SortKeyByteSink(dest, destCapacity) {}
66 virtual ~FixedSortKeyByteSink();
67
68private:
Frank Tang3e05d9d2021-11-08 14:04:04 -080069 virtual void AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length) override;
70 virtual UBool Resize(int32_t appendCapacity, int32_t length) override;
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -080071};
72
73FixedSortKeyByteSink::~FixedSortKeyByteSink() {}
74
75void
76FixedSortKeyByteSink::AppendBeyondCapacity(const char *bytes, int32_t /*n*/, int32_t length) {
77 // buffer_ != NULL && bytes != NULL && n > 0 && appended_ > capacity_
78 // Fill the buffer completely.
79 int32_t available = capacity_ - length;
80 if (available > 0) {
81 uprv_memcpy(buffer_ + length, bytes, available);
82 }
83}
84
85UBool
86FixedSortKeyByteSink::Resize(int32_t /*appendCapacity*/, int32_t /*length*/) {
Frank Tang1f164ee2022-11-08 12:31:27 -080087 return false;
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -080088}
89
90} // namespace
91
92// Not in an anonymous namespace, so that it can be a friend of CollationKey.
93class CollationKeyByteSink : public SortKeyByteSink {
94public:
95 CollationKeyByteSink(CollationKey &key)
96 : SortKeyByteSink(reinterpret_cast<char *>(key.getBytes()), key.getCapacity()),
97 key_(key) {}
98 virtual ~CollationKeyByteSink();
99
100private:
Frank Tang3e05d9d2021-11-08 14:04:04 -0800101 virtual void AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length) override;
102 virtual UBool Resize(int32_t appendCapacity, int32_t length) override;
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800103
104 CollationKey &key_;
105};
106
107CollationKeyByteSink::~CollationKeyByteSink() {}
108
109void
110CollationKeyByteSink::AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length) {
111 // buffer_ != NULL && bytes != NULL && n > 0 && appended_ > capacity_
112 if (Resize(n, length)) {
113 uprv_memcpy(buffer_ + length, bytes, n);
114 }
115}
116
117UBool
118CollationKeyByteSink::Resize(int32_t appendCapacity, int32_t length) {
119 if (buffer_ == NULL) {
Frank Tang1f164ee2022-11-08 12:31:27 -0800120 return false; // allocation failed before already
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800121 }
122 int32_t newCapacity = 2 * capacity_;
123 int32_t altCapacity = length + 2 * appendCapacity;
124 if (newCapacity < altCapacity) {
125 newCapacity = altCapacity;
126 }
127 if (newCapacity < 200) {
128 newCapacity = 200;
129 }
130 uint8_t *newBuffer = key_.reallocate(newCapacity, length);
131 if (newBuffer == NULL) {
132 SetNotOk();
Frank Tang1f164ee2022-11-08 12:31:27 -0800133 return false;
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800134 }
135 buffer_ = reinterpret_cast<char *>(newBuffer);
136 capacity_ = newCapacity;
Frank Tang1f164ee2022-11-08 12:31:27 -0800137 return true;
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800138}
139
140RuleBasedCollator::RuleBasedCollator(const RuleBasedCollator &other)
141 : Collator(other),
142 data(other.data),
143 settings(other.settings),
144 tailoring(other.tailoring),
145 cacheEntry(other.cacheEntry),
146 validLocale(other.validLocale),
147 explicitlySetAttributes(other.explicitlySetAttributes),
148 actualLocaleIsSameAsValid(other.actualLocaleIsSameAsValid) {
149 settings->addRef();
150 cacheEntry->addRef();
151}
152
153RuleBasedCollator::RuleBasedCollator(const uint8_t *bin, int32_t length,
154 const RuleBasedCollator *base, UErrorCode &errorCode)
155 : data(NULL),
156 settings(NULL),
157 tailoring(NULL),
158 cacheEntry(NULL),
159 validLocale(""),
160 explicitlySetAttributes(0),
Frank Tang1f164ee2022-11-08 12:31:27 -0800161 actualLocaleIsSameAsValid(false) {
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800162 if(U_FAILURE(errorCode)) { return; }
163 if(bin == NULL || length == 0 || base == NULL) {
164 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
165 return;
166 }
167 const CollationTailoring *root = CollationRoot::getRoot(errorCode);
168 if(U_FAILURE(errorCode)) { return; }
169 if(base->tailoring != root) {
170 errorCode = U_UNSUPPORTED_ERROR;
171 return;
172 }
173 LocalPointer<CollationTailoring> t(new CollationTailoring(base->tailoring->settings));
174 if(t.isNull() || t->isBogus()) {
175 errorCode = U_MEMORY_ALLOCATION_ERROR;
176 return;
177 }
178 CollationDataReader::read(base->tailoring, bin, length, *t, errorCode);
179 if(U_FAILURE(errorCode)) { return; }
180 t->actualLocale.setToBogus();
181 adoptTailoring(t.orphan(), errorCode);
182}
183
184RuleBasedCollator::RuleBasedCollator(const CollationCacheEntry *entry)
185 : data(entry->tailoring->data),
186 settings(entry->tailoring->settings),
187 tailoring(entry->tailoring),
188 cacheEntry(entry),
189 validLocale(entry->validLocale),
190 explicitlySetAttributes(0),
Frank Tang1f164ee2022-11-08 12:31:27 -0800191 actualLocaleIsSameAsValid(false) {
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800192 settings->addRef();
193 cacheEntry->addRef();
194}
195
196RuleBasedCollator::~RuleBasedCollator() {
197 SharedObject::clearPtr(settings);
198 SharedObject::clearPtr(cacheEntry);
199}
200
201void
202RuleBasedCollator::adoptTailoring(CollationTailoring *t, UErrorCode &errorCode) {
203 if(U_FAILURE(errorCode)) {
204 t->deleteIfZeroRefCount();
205 return;
206 }
207 U_ASSERT(settings == NULL && data == NULL && tailoring == NULL && cacheEntry == NULL);
208 cacheEntry = new CollationCacheEntry(t->actualLocale, t);
209 if(cacheEntry == NULL) {
210 errorCode = U_MEMORY_ALLOCATION_ERROR;
211 t->deleteIfZeroRefCount();
212 return;
213 }
214 data = t->data;
215 settings = t->settings;
216 settings->addRef();
217 tailoring = t;
218 cacheEntry->addRef();
219 validLocale = t->actualLocale;
Frank Tang1f164ee2022-11-08 12:31:27 -0800220 actualLocaleIsSameAsValid = false;
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800221}
222
Frank Tangb8696612019-10-25 14:58:21 -0700223RuleBasedCollator *
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800224RuleBasedCollator::clone() const {
225 return new RuleBasedCollator(*this);
226}
227
228RuleBasedCollator &RuleBasedCollator::operator=(const RuleBasedCollator &other) {
229 if(this == &other) { return *this; }
230 SharedObject::copyPtr(other.settings, settings);
231 tailoring = other.tailoring;
232 SharedObject::copyPtr(other.cacheEntry, cacheEntry);
233 data = tailoring->data;
234 validLocale = other.validLocale;
235 explicitlySetAttributes = other.explicitlySetAttributes;
236 actualLocaleIsSameAsValid = other.actualLocaleIsSameAsValid;
237 return *this;
238}
239
240UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedCollator)
241
Frank Tang3e05d9d2021-11-08 14:04:04 -0800242bool
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800243RuleBasedCollator::operator==(const Collator& other) const {
Frank Tang3e05d9d2021-11-08 14:04:04 -0800244 if(this == &other) { return true; }
245 if(!Collator::operator==(other)) { return false; }
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800246 const RuleBasedCollator &o = static_cast<const RuleBasedCollator &>(other);
Frank Tang3e05d9d2021-11-08 14:04:04 -0800247 if(*settings != *o.settings) { return false; }
248 if(data == o.data) { return true; }
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800249 UBool thisIsRoot = data->base == NULL;
250 UBool otherIsRoot = o.data->base == NULL;
251 U_ASSERT(!thisIsRoot || !otherIsRoot); // otherwise their data pointers should be ==
Frank Tang3e05d9d2021-11-08 14:04:04 -0800252 if(thisIsRoot != otherIsRoot) { return false; }
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800253 if((thisIsRoot || !tailoring->rules.isEmpty()) &&
254 (otherIsRoot || !o.tailoring->rules.isEmpty())) {
255 // Shortcut: If both collators have valid rule strings, then compare those.
Frank Tang3e05d9d2021-11-08 14:04:04 -0800256 if(tailoring->rules == o.tailoring->rules) { return true; }
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800257 }
258 // Different rule strings can result in the same or equivalent tailoring.
259 // The rule strings are optional in ICU resource bundles, although included by default.
260 // cloneBinary() drops the rule string.
261 UErrorCode errorCode = U_ZERO_ERROR;
262 LocalPointer<UnicodeSet> thisTailored(getTailoredSet(errorCode));
263 LocalPointer<UnicodeSet> otherTailored(o.getTailoredSet(errorCode));
Frank Tang3e05d9d2021-11-08 14:04:04 -0800264 if(U_FAILURE(errorCode)) { return false; }
265 if(*thisTailored != *otherTailored) { return false; }
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800266 // For completeness, we should compare all of the mappings;
267 // or we should create a list of strings, sort it with one collator,
268 // and check if both collators compare adjacent strings the same
269 // (order & strength, down to quaternary); or similar.
270 // Testing equality of collators seems unusual.
Frank Tang3e05d9d2021-11-08 14:04:04 -0800271 return true;
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800272}
273
274int32_t
275RuleBasedCollator::hashCode() const {
276 int32_t h = settings->hashCode();
277 if(data->base == NULL) { return h; } // root collator
278 // Do not rely on the rule string, see comments in operator==().
279 UErrorCode errorCode = U_ZERO_ERROR;
280 LocalPointer<UnicodeSet> set(getTailoredSet(errorCode));
281 if(U_FAILURE(errorCode)) { return 0; }
282 UnicodeSetIterator iter(*set);
283 while(iter.next() && !iter.isString()) {
284 h ^= data->getCE32(iter.getCodepoint());
285 }
286 return h;
287}
288
289void
290RuleBasedCollator::setLocales(const Locale &requested, const Locale &valid,
291 const Locale &actual) {
292 if(actual == tailoring->actualLocale) {
Frank Tang1f164ee2022-11-08 12:31:27 -0800293 actualLocaleIsSameAsValid = false;
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800294 } else {
295 U_ASSERT(actual == valid);
Frank Tang1f164ee2022-11-08 12:31:27 -0800296 actualLocaleIsSameAsValid = true;
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800297 }
298 // Do not modify tailoring.actualLocale:
299 // We cannot be sure that that would be thread-safe.
300 validLocale = valid;
301 (void)requested; // Ignore, see also ticket #10477.
302}
303
304Locale
305RuleBasedCollator::getLocale(ULocDataLocaleType type, UErrorCode& errorCode) const {
306 if(U_FAILURE(errorCode)) {
307 return Locale::getRoot();
308 }
309 switch(type) {
310 case ULOC_ACTUAL_LOCALE:
311 return actualLocaleIsSameAsValid ? validLocale : tailoring->actualLocale;
312 case ULOC_VALID_LOCALE:
313 return validLocale;
314 case ULOC_REQUESTED_LOCALE:
315 default:
316 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
317 return Locale::getRoot();
318 }
319}
320
321const char *
322RuleBasedCollator::internalGetLocaleID(ULocDataLocaleType type, UErrorCode &errorCode) const {
323 if(U_FAILURE(errorCode)) {
324 return NULL;
325 }
326 const Locale *result;
327 switch(type) {
328 case ULOC_ACTUAL_LOCALE:
329 result = actualLocaleIsSameAsValid ? &validLocale : &tailoring->actualLocale;
330 break;
331 case ULOC_VALID_LOCALE:
332 result = &validLocale;
333 break;
334 case ULOC_REQUESTED_LOCALE:
335 default:
336 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
337 return NULL;
338 }
339 if(result->isBogus()) { return NULL; }
340 const char *id = result->getName();
341 return id[0] == 0 ? "root" : id;
342}
343
344const UnicodeString&
345RuleBasedCollator::getRules() const {
346 return tailoring->rules;
347}
348
349void
350RuleBasedCollator::getRules(UColRuleOption delta, UnicodeString &buffer) const {
351 if(delta == UCOL_TAILORING_ONLY) {
352 buffer = tailoring->rules;
353 return;
354 }
355 // UCOL_FULL_RULES
356 buffer.remove();
357 CollationLoader::appendRootRules(buffer);
358 buffer.append(tailoring->rules).getTerminatedBuffer();
359}
360
361void
362RuleBasedCollator::getVersion(UVersionInfo version) const {
363 uprv_memcpy(version, tailoring->version, U_MAX_VERSION_LENGTH);
364 version[0] += (UCOL_RUNTIME_VERSION << 4) + (UCOL_RUNTIME_VERSION >> 4);
365}
366
367UnicodeSet *
368RuleBasedCollator::getTailoredSet(UErrorCode &errorCode) const {
369 if(U_FAILURE(errorCode)) { return NULL; }
370 UnicodeSet *tailored = new UnicodeSet();
371 if(tailored == NULL) {
372 errorCode = U_MEMORY_ALLOCATION_ERROR;
373 return NULL;
374 }
375 if(data->base != NULL) {
376 TailoredSet(tailored).forData(data, errorCode);
377 if(U_FAILURE(errorCode)) {
378 delete tailored;
379 return NULL;
380 }
381 }
382 return tailored;
383}
384
385void
386RuleBasedCollator::internalGetContractionsAndExpansions(
387 UnicodeSet *contractions, UnicodeSet *expansions,
388 UBool addPrefixes, UErrorCode &errorCode) const {
389 if(U_FAILURE(errorCode)) { return; }
390 if(contractions != NULL) {
391 contractions->clear();
392 }
393 if(expansions != NULL) {
394 expansions->clear();
395 }
396 ContractionsAndExpansions(contractions, expansions, NULL, addPrefixes).forData(data, errorCode);
397}
398
399void
400RuleBasedCollator::internalAddContractions(UChar32 c, UnicodeSet &set, UErrorCode &errorCode) const {
401 if(U_FAILURE(errorCode)) { return; }
Frank Tang1f164ee2022-11-08 12:31:27 -0800402 ContractionsAndExpansions(&set, NULL, NULL, false).forCodePoint(data, c, errorCode);
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800403}
404
405const CollationSettings &
406RuleBasedCollator::getDefaultSettings() const {
407 return *tailoring->settings;
408}
409
410UColAttributeValue
411RuleBasedCollator::getAttribute(UColAttribute attr, UErrorCode &errorCode) const {
412 if(U_FAILURE(errorCode)) { return UCOL_DEFAULT; }
413 int32_t option;
414 switch(attr) {
415 case UCOL_FRENCH_COLLATION:
416 option = CollationSettings::BACKWARD_SECONDARY;
417 break;
418 case UCOL_ALTERNATE_HANDLING:
419 return settings->getAlternateHandling();
420 case UCOL_CASE_FIRST:
421 return settings->getCaseFirst();
422 case UCOL_CASE_LEVEL:
423 option = CollationSettings::CASE_LEVEL;
424 break;
425 case UCOL_NORMALIZATION_MODE:
426 option = CollationSettings::CHECK_FCD;
427 break;
428 case UCOL_STRENGTH:
429 return (UColAttributeValue)settings->getStrength();
430 case UCOL_HIRAGANA_QUATERNARY_MODE:
431 // Deprecated attribute, unsettable.
432 return UCOL_OFF;
433 case UCOL_NUMERIC_COLLATION:
434 option = CollationSettings::NUMERIC;
435 break;
436 default:
437 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
438 return UCOL_DEFAULT;
439 }
440 return ((settings->options & option) == 0) ? UCOL_OFF : UCOL_ON;
441}
442
443void
444RuleBasedCollator::setAttribute(UColAttribute attr, UColAttributeValue value,
445 UErrorCode &errorCode) {
446 UColAttributeValue oldValue = getAttribute(attr, errorCode);
447 if(U_FAILURE(errorCode)) { return; }
448 if(value == oldValue) {
449 setAttributeExplicitly(attr);
450 return;
451 }
452 const CollationSettings &defaultSettings = getDefaultSettings();
453 if(settings == &defaultSettings) {
454 if(value == UCOL_DEFAULT) {
455 setAttributeDefault(attr);
456 return;
457 }
458 }
459 CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
460 if(ownedSettings == NULL) {
461 errorCode = U_MEMORY_ALLOCATION_ERROR;
462 return;
463 }
464
465 switch(attr) {
466 case UCOL_FRENCH_COLLATION:
467 ownedSettings->setFlag(CollationSettings::BACKWARD_SECONDARY, value,
468 defaultSettings.options, errorCode);
469 break;
470 case UCOL_ALTERNATE_HANDLING:
471 ownedSettings->setAlternateHandling(value, defaultSettings.options, errorCode);
472 break;
473 case UCOL_CASE_FIRST:
474 ownedSettings->setCaseFirst(value, defaultSettings.options, errorCode);
475 break;
476 case UCOL_CASE_LEVEL:
477 ownedSettings->setFlag(CollationSettings::CASE_LEVEL, value,
478 defaultSettings.options, errorCode);
479 break;
480 case UCOL_NORMALIZATION_MODE:
481 ownedSettings->setFlag(CollationSettings::CHECK_FCD, value,
482 defaultSettings.options, errorCode);
483 break;
484 case UCOL_STRENGTH:
485 ownedSettings->setStrength(value, defaultSettings.options, errorCode);
486 break;
487 case UCOL_HIRAGANA_QUATERNARY_MODE:
488 // Deprecated attribute. Check for valid values but do not change anything.
489 if(value != UCOL_OFF && value != UCOL_ON && value != UCOL_DEFAULT) {
490 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
491 }
492 break;
493 case UCOL_NUMERIC_COLLATION:
494 ownedSettings->setFlag(CollationSettings::NUMERIC, value, defaultSettings.options, errorCode);
495 break;
496 default:
497 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
498 break;
499 }
500 if(U_FAILURE(errorCode)) { return; }
501 setFastLatinOptions(*ownedSettings);
502 if(value == UCOL_DEFAULT) {
503 setAttributeDefault(attr);
504 } else {
505 setAttributeExplicitly(attr);
506 }
507}
508
509Collator &
510RuleBasedCollator::setMaxVariable(UColReorderCode group, UErrorCode &errorCode) {
511 if(U_FAILURE(errorCode)) { return *this; }
512 // Convert the reorder code into a MaxVariable number, or UCOL_DEFAULT=-1.
513 int32_t value;
514 if(group == UCOL_REORDER_CODE_DEFAULT) {
515 value = UCOL_DEFAULT;
516 } else if(UCOL_REORDER_CODE_FIRST <= group && group <= UCOL_REORDER_CODE_CURRENCY) {
517 value = group - UCOL_REORDER_CODE_FIRST;
518 } else {
519 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
520 return *this;
521 }
522 CollationSettings::MaxVariable oldValue = settings->getMaxVariable();
523 if(value == oldValue) {
524 setAttributeExplicitly(ATTR_VARIABLE_TOP);
525 return *this;
526 }
527 const CollationSettings &defaultSettings = getDefaultSettings();
528 if(settings == &defaultSettings) {
529 if(value == UCOL_DEFAULT) {
530 setAttributeDefault(ATTR_VARIABLE_TOP);
531 return *this;
532 }
533 }
534 CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
535 if(ownedSettings == NULL) {
536 errorCode = U_MEMORY_ALLOCATION_ERROR;
537 return *this;
538 }
539
540 if(group == UCOL_REORDER_CODE_DEFAULT) {
Frank Tang585942f2022-05-09 15:40:30 -0700541 group = (UColReorderCode)(
542 UCOL_REORDER_CODE_FIRST + int32_t{defaultSettings.getMaxVariable()});
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800543 }
544 uint32_t varTop = data->getLastPrimaryForGroup(group);
545 U_ASSERT(varTop != 0);
546 ownedSettings->setMaxVariable(value, defaultSettings.options, errorCode);
547 if(U_FAILURE(errorCode)) { return *this; }
548 ownedSettings->variableTop = varTop;
549 setFastLatinOptions(*ownedSettings);
550 if(value == UCOL_DEFAULT) {
551 setAttributeDefault(ATTR_VARIABLE_TOP);
552 } else {
553 setAttributeExplicitly(ATTR_VARIABLE_TOP);
554 }
555 return *this;
556}
557
558UColReorderCode
559RuleBasedCollator::getMaxVariable() const {
Frank Tang585942f2022-05-09 15:40:30 -0700560 return (UColReorderCode)(UCOL_REORDER_CODE_FIRST + int32_t{settings->getMaxVariable()});
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800561}
562
563uint32_t
564RuleBasedCollator::getVariableTop(UErrorCode & /*errorCode*/) const {
565 return settings->variableTop;
566}
567
568uint32_t
569RuleBasedCollator::setVariableTop(const UChar *varTop, int32_t len, UErrorCode &errorCode) {
570 if(U_FAILURE(errorCode)) { return 0; }
571 if(varTop == NULL && len !=0) {
572 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
573 return 0;
574 }
575 if(len < 0) { len = u_strlen(varTop); }
576 if(len == 0) {
577 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
578 return 0;
579 }
580 UBool numeric = settings->isNumeric();
581 int64_t ce1, ce2;
582 if(settings->dontCheckFCD()) {
583 UTF16CollationIterator ci(data, numeric, varTop, varTop, varTop + len);
584 ce1 = ci.nextCE(errorCode);
585 ce2 = ci.nextCE(errorCode);
586 } else {
587 FCDUTF16CollationIterator ci(data, numeric, varTop, varTop, varTop + len);
588 ce1 = ci.nextCE(errorCode);
589 ce2 = ci.nextCE(errorCode);
590 }
591 if(ce1 == Collation::NO_CE || ce2 != Collation::NO_CE) {
592 errorCode = U_CE_NOT_FOUND_ERROR;
593 return 0;
594 }
595 setVariableTop((uint32_t)(ce1 >> 32), errorCode);
596 return settings->variableTop;
597}
598
599uint32_t
600RuleBasedCollator::setVariableTop(const UnicodeString &varTop, UErrorCode &errorCode) {
601 return setVariableTop(varTop.getBuffer(), varTop.length(), errorCode);
602}
603
604void
605RuleBasedCollator::setVariableTop(uint32_t varTop, UErrorCode &errorCode) {
606 if(U_FAILURE(errorCode)) { return; }
607 if(varTop != settings->variableTop) {
608 // Pin the variable top to the end of the reordering group which contains it.
609 // Only a few special groups are supported.
610 int32_t group = data->getGroupForPrimary(varTop);
611 if(group < UCOL_REORDER_CODE_FIRST || UCOL_REORDER_CODE_CURRENCY < group) {
612 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
613 return;
614 }
615 uint32_t v = data->getLastPrimaryForGroup(group);
616 U_ASSERT(v != 0 && v >= varTop);
617 varTop = v;
618 if(varTop != settings->variableTop) {
619 CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
620 if(ownedSettings == NULL) {
621 errorCode = U_MEMORY_ALLOCATION_ERROR;
622 return;
623 }
624 ownedSettings->setMaxVariable(group - UCOL_REORDER_CODE_FIRST,
625 getDefaultSettings().options, errorCode);
626 if(U_FAILURE(errorCode)) { return; }
627 ownedSettings->variableTop = varTop;
628 setFastLatinOptions(*ownedSettings);
629 }
630 }
631 if(varTop == getDefaultSettings().variableTop) {
632 setAttributeDefault(ATTR_VARIABLE_TOP);
633 } else {
634 setAttributeExplicitly(ATTR_VARIABLE_TOP);
635 }
636}
637
638int32_t
639RuleBasedCollator::getReorderCodes(int32_t *dest, int32_t capacity,
640 UErrorCode &errorCode) const {
641 if(U_FAILURE(errorCode)) { return 0; }
642 if(capacity < 0 || (dest == NULL && capacity > 0)) {
643 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
644 return 0;
645 }
646 int32_t length = settings->reorderCodesLength;
647 if(length == 0) { return 0; }
648 if(length > capacity) {
649 errorCode = U_BUFFER_OVERFLOW_ERROR;
650 return length;
651 }
652 uprv_memcpy(dest, settings->reorderCodes, length * 4);
653 return length;
654}
655
656void
657RuleBasedCollator::setReorderCodes(const int32_t *reorderCodes, int32_t length,
658 UErrorCode &errorCode) {
659 if(U_FAILURE(errorCode)) { return; }
660 if(length < 0 || (reorderCodes == NULL && length > 0)) {
661 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
662 return;
663 }
664 if(length == 1 && reorderCodes[0] == UCOL_REORDER_CODE_NONE) {
665 length = 0;
666 }
667 if(length == settings->reorderCodesLength &&
668 uprv_memcmp(reorderCodes, settings->reorderCodes, length * 4) == 0) {
669 return;
670 }
671 const CollationSettings &defaultSettings = getDefaultSettings();
672 if(length == 1 && reorderCodes[0] == UCOL_REORDER_CODE_DEFAULT) {
673 if(settings != &defaultSettings) {
674 CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
675 if(ownedSettings == NULL) {
676 errorCode = U_MEMORY_ALLOCATION_ERROR;
677 return;
678 }
Jungshik Shin70f82502016-01-29 00:32:36 -0800679 ownedSettings->copyReorderingFrom(defaultSettings, errorCode);
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800680 setFastLatinOptions(*ownedSettings);
681 }
682 return;
683 }
684 CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
685 if(ownedSettings == NULL) {
686 errorCode = U_MEMORY_ALLOCATION_ERROR;
687 return;
688 }
Jungshik Shin70f82502016-01-29 00:32:36 -0800689 ownedSettings->setReordering(*data, reorderCodes, length, errorCode);
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800690 setFastLatinOptions(*ownedSettings);
691}
692
693void
694RuleBasedCollator::setFastLatinOptions(CollationSettings &ownedSettings) const {
695 ownedSettings.fastLatinOptions = CollationFastLatin::getOptions(
696 data, ownedSettings,
697 ownedSettings.fastLatinPrimaries, UPRV_LENGTHOF(ownedSettings.fastLatinPrimaries));
698}
699
700UCollationResult
701RuleBasedCollator::compare(const UnicodeString &left, const UnicodeString &right,
702 UErrorCode &errorCode) const {
703 if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
704 return doCompare(left.getBuffer(), left.length(),
705 right.getBuffer(), right.length(), errorCode);
706}
707
708UCollationResult
709RuleBasedCollator::compare(const UnicodeString &left, const UnicodeString &right,
710 int32_t length, UErrorCode &errorCode) const {
711 if(U_FAILURE(errorCode) || length == 0) { return UCOL_EQUAL; }
712 if(length < 0) {
713 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
714 return UCOL_EQUAL;
715 }
716 int32_t leftLength = left.length();
717 int32_t rightLength = right.length();
718 if(leftLength > length) { leftLength = length; }
719 if(rightLength > length) { rightLength = length; }
720 return doCompare(left.getBuffer(), leftLength,
721 right.getBuffer(), rightLength, errorCode);
722}
723
724UCollationResult
725RuleBasedCollator::compare(const UChar *left, int32_t leftLength,
726 const UChar *right, int32_t rightLength,
727 UErrorCode &errorCode) const {
728 if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
729 if((left == NULL && leftLength != 0) || (right == NULL && rightLength != 0)) {
730 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
731 return UCOL_EQUAL;
732 }
733 // Make sure both or neither strings have a known length.
734 // We do not optimize for mixed length/termination.
735 if(leftLength >= 0) {
736 if(rightLength < 0) { rightLength = u_strlen(right); }
737 } else {
738 if(rightLength >= 0) { leftLength = u_strlen(left); }
739 }
740 return doCompare(left, leftLength, right, rightLength, errorCode);
741}
742
743UCollationResult
744RuleBasedCollator::compareUTF8(const StringPiece &left, const StringPiece &right,
745 UErrorCode &errorCode) const {
746 if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
747 const uint8_t *leftBytes = reinterpret_cast<const uint8_t *>(left.data());
748 const uint8_t *rightBytes = reinterpret_cast<const uint8_t *>(right.data());
749 if((leftBytes == NULL && !left.empty()) || (rightBytes == NULL && !right.empty())) {
750 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
751 return UCOL_EQUAL;
752 }
753 return doCompare(leftBytes, left.length(), rightBytes, right.length(), errorCode);
754}
755
756UCollationResult
757RuleBasedCollator::internalCompareUTF8(const char *left, int32_t leftLength,
758 const char *right, int32_t rightLength,
759 UErrorCode &errorCode) const {
760 if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
761 if((left == NULL && leftLength != 0) || (right == NULL && rightLength != 0)) {
762 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
763 return UCOL_EQUAL;
764 }
765 // Make sure both or neither strings have a known length.
766 // We do not optimize for mixed length/termination.
767 if(leftLength >= 0) {
Jungshik Shin42d50272018-10-24 01:22:09 -0700768 if(rightLength < 0) { rightLength = static_cast<int32_t>(uprv_strlen(right)); }
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800769 } else {
Jungshik Shin42d50272018-10-24 01:22:09 -0700770 if(rightLength >= 0) { leftLength = static_cast<int32_t>(uprv_strlen(left)); }
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800771 }
772 return doCompare(reinterpret_cast<const uint8_t *>(left), leftLength,
773 reinterpret_cast<const uint8_t *>(right), rightLength, errorCode);
774}
775
776namespace {
777
778/**
779 * Abstract iterator for identical-level string comparisons.
780 * Returns FCD code points and handles temporary switching to NFD.
781 */
782class NFDIterator : public UObject {
783public:
784 NFDIterator() : index(-1), length(0) {}
785 virtual ~NFDIterator() {}
786 /**
787 * Returns the next code point from the internal normalization buffer,
788 * or else the next text code point.
789 * Returns -1 at the end of the text.
790 */
791 UChar32 nextCodePoint() {
792 if(index >= 0) {
793 if(index == length) {
794 index = -1;
795 } else {
796 UChar32 c;
797 U16_NEXT_UNSAFE(decomp, index, c);
798 return c;
799 }
800 }
801 return nextRawCodePoint();
802 }
803 /**
804 * @param nfcImpl
805 * @param c the last code point returned by nextCodePoint() or nextDecomposedCodePoint()
806 * @return the first code point in c's decomposition,
807 * or c itself if it was decomposed already or if it does not decompose
808 */
809 UChar32 nextDecomposedCodePoint(const Normalizer2Impl &nfcImpl, UChar32 c) {
810 if(index >= 0) { return c; }
811 decomp = nfcImpl.getDecomposition(c, buffer, length);
812 if(decomp == NULL) { return c; }
813 index = 0;
814 U16_NEXT_UNSAFE(decomp, index, c);
815 return c;
816 }
817protected:
818 /**
819 * Returns the next text code point in FCD order.
820 * Returns -1 at the end of the text.
821 */
822 virtual UChar32 nextRawCodePoint() = 0;
823private:
824 const UChar *decomp;
825 UChar buffer[4];
826 int32_t index;
827 int32_t length;
828};
829
830class UTF16NFDIterator : public NFDIterator {
831public:
832 UTF16NFDIterator(const UChar *text, const UChar *textLimit) : s(text), limit(textLimit) {}
833protected:
Frank Tang3e05d9d2021-11-08 14:04:04 -0800834 virtual UChar32 nextRawCodePoint() override {
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800835 if(s == limit) { return U_SENTINEL; }
836 UChar32 c = *s++;
837 if(limit == NULL && c == 0) {
838 s = NULL;
839 return U_SENTINEL;
840 }
841 UChar trail;
842 if(U16_IS_LEAD(c) && s != limit && U16_IS_TRAIL(trail = *s)) {
843 ++s;
844 c = U16_GET_SUPPLEMENTARY(c, trail);
845 }
846 return c;
847 }
848
849 const UChar *s;
850 const UChar *limit;
851};
852
853class FCDUTF16NFDIterator : public UTF16NFDIterator {
854public:
855 FCDUTF16NFDIterator(const Normalizer2Impl &nfcImpl, const UChar *text, const UChar *textLimit)
856 : UTF16NFDIterator(NULL, NULL) {
857 UErrorCode errorCode = U_ZERO_ERROR;
858 const UChar *spanLimit = nfcImpl.makeFCD(text, textLimit, NULL, errorCode);
859 if(U_FAILURE(errorCode)) { return; }
860 if(spanLimit == textLimit || (textLimit == NULL && *spanLimit == 0)) {
861 s = text;
862 limit = spanLimit;
863 } else {
864 str.setTo(text, (int32_t)(spanLimit - text));
865 {
Jungshik Shin42d50272018-10-24 01:22:09 -0700866 ReorderingBuffer r_buffer(nfcImpl, str);
867 if(r_buffer.init(str.length(), errorCode)) {
868 nfcImpl.makeFCD(spanLimit, textLimit, &r_buffer, errorCode);
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800869 }
870 }
871 if(U_SUCCESS(errorCode)) {
872 s = str.getBuffer();
873 limit = s + str.length();
874 }
875 }
876 }
877private:
878 UnicodeString str;
879};
880
881class UTF8NFDIterator : public NFDIterator {
882public:
883 UTF8NFDIterator(const uint8_t *text, int32_t textLength)
884 : s(text), pos(0), length(textLength) {}
885protected:
Frank Tang3e05d9d2021-11-08 14:04:04 -0800886 virtual UChar32 nextRawCodePoint() override {
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800887 if(pos == length || (s[pos] == 0 && length < 0)) { return U_SENTINEL; }
888 UChar32 c;
889 U8_NEXT_OR_FFFD(s, pos, length, c);
890 return c;
891 }
892
893 const uint8_t *s;
894 int32_t pos;
895 int32_t length;
896};
897
898class FCDUTF8NFDIterator : public NFDIterator {
899public:
900 FCDUTF8NFDIterator(const CollationData *data, const uint8_t *text, int32_t textLength)
Frank Tang1f164ee2022-11-08 12:31:27 -0800901 : u8ci(data, false, text, 0, textLength) {}
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800902protected:
Frank Tang3e05d9d2021-11-08 14:04:04 -0800903 virtual UChar32 nextRawCodePoint() override {
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800904 UErrorCode errorCode = U_ZERO_ERROR;
905 return u8ci.nextCodePoint(errorCode);
906 }
907private:
908 FCDUTF8CollationIterator u8ci;
909};
910
911class UIterNFDIterator : public NFDIterator {
912public:
913 UIterNFDIterator(UCharIterator &it) : iter(it) {}
914protected:
Frank Tang3e05d9d2021-11-08 14:04:04 -0800915 virtual UChar32 nextRawCodePoint() override {
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800916 return uiter_next32(&iter);
917 }
918private:
919 UCharIterator &iter;
920};
921
922class FCDUIterNFDIterator : public NFDIterator {
923public:
924 FCDUIterNFDIterator(const CollationData *data, UCharIterator &it, int32_t startIndex)
Frank Tang1f164ee2022-11-08 12:31:27 -0800925 : uici(data, false, it, startIndex) {}
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800926protected:
Frank Tang3e05d9d2021-11-08 14:04:04 -0800927 virtual UChar32 nextRawCodePoint() override {
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800928 UErrorCode errorCode = U_ZERO_ERROR;
929 return uici.nextCodePoint(errorCode);
930 }
931private:
932 FCDUIterCollationIterator uici;
933};
934
935UCollationResult compareNFDIter(const Normalizer2Impl &nfcImpl,
936 NFDIterator &left, NFDIterator &right) {
937 for(;;) {
938 // Fetch the next FCD code point from each string.
939 UChar32 leftCp = left.nextCodePoint();
940 UChar32 rightCp = right.nextCodePoint();
941 if(leftCp == rightCp) {
942 if(leftCp < 0) { break; }
943 continue;
944 }
945 // If they are different, then decompose each and compare again.
946 if(leftCp < 0) {
947 leftCp = -2; // end of string
948 } else if(leftCp == 0xfffe) {
949 leftCp = -1; // U+FFFE: merge separator
950 } else {
951 leftCp = left.nextDecomposedCodePoint(nfcImpl, leftCp);
952 }
953 if(rightCp < 0) {
954 rightCp = -2; // end of string
955 } else if(rightCp == 0xfffe) {
956 rightCp = -1; // U+FFFE: merge separator
957 } else {
958 rightCp = right.nextDecomposedCodePoint(nfcImpl, rightCp);
959 }
960 if(leftCp < rightCp) { return UCOL_LESS; }
961 if(leftCp > rightCp) { return UCOL_GREATER; }
962 }
963 return UCOL_EQUAL;
964}
965
966} // namespace
967
968UCollationResult
969RuleBasedCollator::doCompare(const UChar *left, int32_t leftLength,
970 const UChar *right, int32_t rightLength,
971 UErrorCode &errorCode) const {
972 // U_FAILURE(errorCode) checked by caller.
973 if(left == right && leftLength == rightLength) {
974 return UCOL_EQUAL;
975 }
976
977 // Identical-prefix test.
978 const UChar *leftLimit;
979 const UChar *rightLimit;
980 int32_t equalPrefixLength = 0;
981 if(leftLength < 0) {
982 leftLimit = NULL;
983 rightLimit = NULL;
984 UChar c;
985 while((c = left[equalPrefixLength]) == right[equalPrefixLength]) {
986 if(c == 0) { return UCOL_EQUAL; }
987 ++equalPrefixLength;
988 }
989 } else {
990 leftLimit = left + leftLength;
991 rightLimit = right + rightLength;
992 for(;;) {
993 if(equalPrefixLength == leftLength) {
994 if(equalPrefixLength == rightLength) { return UCOL_EQUAL; }
995 break;
996 } else if(equalPrefixLength == rightLength ||
997 left[equalPrefixLength] != right[equalPrefixLength]) {
998 break;
999 }
1000 ++equalPrefixLength;
1001 }
1002 }
1003
1004 UBool numeric = settings->isNumeric();
1005 if(equalPrefixLength > 0) {
1006 if((equalPrefixLength != leftLength &&
1007 data->isUnsafeBackward(left[equalPrefixLength], numeric)) ||
1008 (equalPrefixLength != rightLength &&
1009 data->isUnsafeBackward(right[equalPrefixLength], numeric))) {
1010 // Identical prefix: Back up to the start of a contraction or reordering sequence.
1011 while(--equalPrefixLength > 0 &&
1012 data->isUnsafeBackward(left[equalPrefixLength], numeric)) {}
1013 }
1014 // Notes:
1015 // - A longer string can compare equal to a prefix of it if only ignorables follow.
1016 // - With a backward level, a longer string can compare less-than a prefix of it.
1017
1018 // Pass the actual start of each string into the CollationIterators,
1019 // plus the equalPrefixLength position,
1020 // so that prefix matches back into the equal prefix work.
1021 }
1022
1023 int32_t result;
1024 int32_t fastLatinOptions = settings->fastLatinOptions;
1025 if(fastLatinOptions >= 0 &&
1026 (equalPrefixLength == leftLength ||
1027 left[equalPrefixLength] <= CollationFastLatin::LATIN_MAX) &&
1028 (equalPrefixLength == rightLength ||
1029 right[equalPrefixLength] <= CollationFastLatin::LATIN_MAX)) {
1030 if(leftLength >= 0) {
1031 result = CollationFastLatin::compareUTF16(data->fastLatinTable,
1032 settings->fastLatinPrimaries,
1033 fastLatinOptions,
1034 left + equalPrefixLength,
1035 leftLength - equalPrefixLength,
1036 right + equalPrefixLength,
1037 rightLength - equalPrefixLength);
1038 } else {
1039 result = CollationFastLatin::compareUTF16(data->fastLatinTable,
1040 settings->fastLatinPrimaries,
1041 fastLatinOptions,
1042 left + equalPrefixLength, -1,
1043 right + equalPrefixLength, -1);
1044 }
1045 } else {
1046 result = CollationFastLatin::BAIL_OUT_RESULT;
1047 }
1048
1049 if(result == CollationFastLatin::BAIL_OUT_RESULT) {
1050 if(settings->dontCheckFCD()) {
1051 UTF16CollationIterator leftIter(data, numeric,
1052 left, left + equalPrefixLength, leftLimit);
1053 UTF16CollationIterator rightIter(data, numeric,
1054 right, right + equalPrefixLength, rightLimit);
1055 result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
1056 } else {
1057 FCDUTF16CollationIterator leftIter(data, numeric,
1058 left, left + equalPrefixLength, leftLimit);
1059 FCDUTF16CollationIterator rightIter(data, numeric,
1060 right, right + equalPrefixLength, rightLimit);
1061 result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
1062 }
1063 }
1064 if(result != UCOL_EQUAL || settings->getStrength() < UCOL_IDENTICAL || U_FAILURE(errorCode)) {
1065 return (UCollationResult)result;
1066 }
1067
1068 // Note: If NUL-terminated, we could get the actual limits from the iterators now.
1069 // That would complicate the iterators a bit, NUL-terminated strings are only a C convenience,
1070 // and the benefit seems unlikely to be measurable.
1071
1072 // Compare identical level.
1073 const Normalizer2Impl &nfcImpl = data->nfcImpl;
1074 left += equalPrefixLength;
1075 right += equalPrefixLength;
1076 if(settings->dontCheckFCD()) {
1077 UTF16NFDIterator leftIter(left, leftLimit);
1078 UTF16NFDIterator rightIter(right, rightLimit);
1079 return compareNFDIter(nfcImpl, leftIter, rightIter);
1080 } else {
1081 FCDUTF16NFDIterator leftIter(nfcImpl, left, leftLimit);
1082 FCDUTF16NFDIterator rightIter(nfcImpl, right, rightLimit);
1083 return compareNFDIter(nfcImpl, leftIter, rightIter);
1084 }
1085}
1086
1087UCollationResult
1088RuleBasedCollator::doCompare(const uint8_t *left, int32_t leftLength,
1089 const uint8_t *right, int32_t rightLength,
1090 UErrorCode &errorCode) const {
1091 // U_FAILURE(errorCode) checked by caller.
1092 if(left == right && leftLength == rightLength) {
1093 return UCOL_EQUAL;
1094 }
1095
1096 // Identical-prefix test.
1097 int32_t equalPrefixLength = 0;
1098 if(leftLength < 0) {
1099 uint8_t c;
1100 while((c = left[equalPrefixLength]) == right[equalPrefixLength]) {
1101 if(c == 0) { return UCOL_EQUAL; }
1102 ++equalPrefixLength;
1103 }
1104 } else {
1105 for(;;) {
1106 if(equalPrefixLength == leftLength) {
1107 if(equalPrefixLength == rightLength) { return UCOL_EQUAL; }
1108 break;
1109 } else if(equalPrefixLength == rightLength ||
1110 left[equalPrefixLength] != right[equalPrefixLength]) {
1111 break;
1112 }
1113 ++equalPrefixLength;
1114 }
1115 }
1116 // Back up to the start of a partially-equal code point.
1117 if(equalPrefixLength > 0 &&
1118 ((equalPrefixLength != leftLength && U8_IS_TRAIL(left[equalPrefixLength])) ||
1119 (equalPrefixLength != rightLength && U8_IS_TRAIL(right[equalPrefixLength])))) {
1120 while(--equalPrefixLength > 0 && U8_IS_TRAIL(left[equalPrefixLength])) {}
1121 }
1122
1123 UBool numeric = settings->isNumeric();
1124 if(equalPrefixLength > 0) {
Frank Tang1f164ee2022-11-08 12:31:27 -08001125 UBool unsafe = false;
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001126 if(equalPrefixLength != leftLength) {
1127 int32_t i = equalPrefixLength;
1128 UChar32 c;
1129 U8_NEXT_OR_FFFD(left, i, leftLength, c);
1130 unsafe = data->isUnsafeBackward(c, numeric);
1131 }
1132 if(!unsafe && equalPrefixLength != rightLength) {
1133 int32_t i = equalPrefixLength;
1134 UChar32 c;
1135 U8_NEXT_OR_FFFD(right, i, rightLength, c);
1136 unsafe = data->isUnsafeBackward(c, numeric);
1137 }
1138 if(unsafe) {
1139 // Identical prefix: Back up to the start of a contraction or reordering sequence.
1140 UChar32 c;
1141 do {
1142 U8_PREV_OR_FFFD(left, 0, equalPrefixLength, c);
1143 } while(equalPrefixLength > 0 && data->isUnsafeBackward(c, numeric));
1144 }
1145 // See the notes in the UTF-16 version.
1146
1147 // Pass the actual start of each string into the CollationIterators,
1148 // plus the equalPrefixLength position,
1149 // so that prefix matches back into the equal prefix work.
1150 }
1151
1152 int32_t result;
1153 int32_t fastLatinOptions = settings->fastLatinOptions;
1154 if(fastLatinOptions >= 0 &&
1155 (equalPrefixLength == leftLength ||
1156 left[equalPrefixLength] <= CollationFastLatin::LATIN_MAX_UTF8_LEAD) &&
1157 (equalPrefixLength == rightLength ||
1158 right[equalPrefixLength] <= CollationFastLatin::LATIN_MAX_UTF8_LEAD)) {
1159 if(leftLength >= 0) {
1160 result = CollationFastLatin::compareUTF8(data->fastLatinTable,
1161 settings->fastLatinPrimaries,
1162 fastLatinOptions,
1163 left + equalPrefixLength,
1164 leftLength - equalPrefixLength,
1165 right + equalPrefixLength,
1166 rightLength - equalPrefixLength);
1167 } else {
1168 result = CollationFastLatin::compareUTF8(data->fastLatinTable,
1169 settings->fastLatinPrimaries,
1170 fastLatinOptions,
1171 left + equalPrefixLength, -1,
1172 right + equalPrefixLength, -1);
1173 }
1174 } else {
1175 result = CollationFastLatin::BAIL_OUT_RESULT;
1176 }
1177
1178 if(result == CollationFastLatin::BAIL_OUT_RESULT) {
1179 if(settings->dontCheckFCD()) {
1180 UTF8CollationIterator leftIter(data, numeric, left, equalPrefixLength, leftLength);
1181 UTF8CollationIterator rightIter(data, numeric, right, equalPrefixLength, rightLength);
1182 result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
1183 } else {
1184 FCDUTF8CollationIterator leftIter(data, numeric, left, equalPrefixLength, leftLength);
1185 FCDUTF8CollationIterator rightIter(data, numeric, right, equalPrefixLength, rightLength);
1186 result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
1187 }
1188 }
1189 if(result != UCOL_EQUAL || settings->getStrength() < UCOL_IDENTICAL || U_FAILURE(errorCode)) {
1190 return (UCollationResult)result;
1191 }
1192
1193 // Note: If NUL-terminated, we could get the actual limits from the iterators now.
1194 // That would complicate the iterators a bit, NUL-terminated strings are only a C convenience,
1195 // and the benefit seems unlikely to be measurable.
1196
1197 // Compare identical level.
1198 const Normalizer2Impl &nfcImpl = data->nfcImpl;
1199 left += equalPrefixLength;
1200 right += equalPrefixLength;
1201 if(leftLength > 0) {
1202 leftLength -= equalPrefixLength;
1203 rightLength -= equalPrefixLength;
1204 }
1205 if(settings->dontCheckFCD()) {
1206 UTF8NFDIterator leftIter(left, leftLength);
1207 UTF8NFDIterator rightIter(right, rightLength);
1208 return compareNFDIter(nfcImpl, leftIter, rightIter);
1209 } else {
1210 FCDUTF8NFDIterator leftIter(data, left, leftLength);
1211 FCDUTF8NFDIterator rightIter(data, right, rightLength);
1212 return compareNFDIter(nfcImpl, leftIter, rightIter);
1213 }
1214}
1215
1216UCollationResult
1217RuleBasedCollator::compare(UCharIterator &left, UCharIterator &right,
1218 UErrorCode &errorCode) const {
1219 if(U_FAILURE(errorCode) || &left == &right) { return UCOL_EQUAL; }
1220 UBool numeric = settings->isNumeric();
1221
1222 // Identical-prefix test.
1223 int32_t equalPrefixLength = 0;
1224 {
1225 UChar32 leftUnit;
1226 UChar32 rightUnit;
1227 while((leftUnit = left.next(&left)) == (rightUnit = right.next(&right))) {
1228 if(leftUnit < 0) { return UCOL_EQUAL; }
1229 ++equalPrefixLength;
1230 }
1231
1232 // Back out the code units that differed, for the real collation comparison.
1233 if(leftUnit >= 0) { left.previous(&left); }
1234 if(rightUnit >= 0) { right.previous(&right); }
1235
1236 if(equalPrefixLength > 0) {
1237 if((leftUnit >= 0 && data->isUnsafeBackward(leftUnit, numeric)) ||
1238 (rightUnit >= 0 && data->isUnsafeBackward(rightUnit, numeric))) {
1239 // Identical prefix: Back up to the start of a contraction or reordering sequence.
1240 do {
1241 --equalPrefixLength;
1242 leftUnit = left.previous(&left);
1243 right.previous(&right);
1244 } while(equalPrefixLength > 0 && data->isUnsafeBackward(leftUnit, numeric));
1245 }
1246 // See the notes in the UTF-16 version.
1247 }
1248 }
1249
1250 UCollationResult result;
1251 if(settings->dontCheckFCD()) {
1252 UIterCollationIterator leftIter(data, numeric, left);
1253 UIterCollationIterator rightIter(data, numeric, right);
1254 result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
1255 } else {
1256 FCDUIterCollationIterator leftIter(data, numeric, left, equalPrefixLength);
1257 FCDUIterCollationIterator rightIter(data, numeric, right, equalPrefixLength);
1258 result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
1259 }
1260 if(result != UCOL_EQUAL || settings->getStrength() < UCOL_IDENTICAL || U_FAILURE(errorCode)) {
1261 return result;
1262 }
1263
1264 // Compare identical level.
1265 left.move(&left, equalPrefixLength, UITER_ZERO);
1266 right.move(&right, equalPrefixLength, UITER_ZERO);
1267 const Normalizer2Impl &nfcImpl = data->nfcImpl;
1268 if(settings->dontCheckFCD()) {
1269 UIterNFDIterator leftIter(left);
1270 UIterNFDIterator rightIter(right);
1271 return compareNFDIter(nfcImpl, leftIter, rightIter);
1272 } else {
1273 FCDUIterNFDIterator leftIter(data, left, equalPrefixLength);
1274 FCDUIterNFDIterator rightIter(data, right, equalPrefixLength);
1275 return compareNFDIter(nfcImpl, leftIter, rightIter);
1276 }
1277}
1278
1279CollationKey &
1280RuleBasedCollator::getCollationKey(const UnicodeString &s, CollationKey &key,
1281 UErrorCode &errorCode) const {
1282 return getCollationKey(s.getBuffer(), s.length(), key, errorCode);
1283}
1284
1285CollationKey &
1286RuleBasedCollator::getCollationKey(const UChar *s, int32_t length, CollationKey& key,
1287 UErrorCode &errorCode) const {
1288 if(U_FAILURE(errorCode)) {
1289 return key.setToBogus();
1290 }
1291 if(s == NULL && length != 0) {
1292 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
1293 return key.setToBogus();
1294 }
1295 key.reset(); // resets the "bogus" state
1296 CollationKeyByteSink sink(key);
1297 writeSortKey(s, length, sink, errorCode);
1298 if(U_FAILURE(errorCode)) {
1299 key.setToBogus();
1300 } else if(key.isBogus()) {
1301 errorCode = U_MEMORY_ALLOCATION_ERROR;
1302 } else {
1303 key.setLength(sink.NumberOfBytesAppended());
1304 }
1305 return key;
1306}
1307
1308int32_t
1309RuleBasedCollator::getSortKey(const UnicodeString &s,
1310 uint8_t *dest, int32_t capacity) const {
1311 return getSortKey(s.getBuffer(), s.length(), dest, capacity);
1312}
1313
1314int32_t
1315RuleBasedCollator::getSortKey(const UChar *s, int32_t length,
1316 uint8_t *dest, int32_t capacity) const {
1317 if((s == NULL && length != 0) || capacity < 0 || (dest == NULL && capacity > 0)) {
1318 return 0;
1319 }
1320 uint8_t noDest[1] = { 0 };
1321 if(dest == NULL) {
1322 // Distinguish pure preflighting from an allocation error.
1323 dest = noDest;
1324 capacity = 0;
1325 }
1326 FixedSortKeyByteSink sink(reinterpret_cast<char *>(dest), capacity);
1327 UErrorCode errorCode = U_ZERO_ERROR;
1328 writeSortKey(s, length, sink, errorCode);
1329 return U_SUCCESS(errorCode) ? sink.NumberOfBytesAppended() : 0;
1330}
1331
1332void
1333RuleBasedCollator::writeSortKey(const UChar *s, int32_t length,
1334 SortKeyByteSink &sink, UErrorCode &errorCode) const {
1335 if(U_FAILURE(errorCode)) { return; }
1336 const UChar *limit = (length >= 0) ? s + length : NULL;
1337 UBool numeric = settings->isNumeric();
1338 CollationKeys::LevelCallback callback;
1339 if(settings->dontCheckFCD()) {
1340 UTF16CollationIterator iter(data, numeric, s, s, limit);
1341 CollationKeys::writeSortKeyUpToQuaternary(iter, data->compressibleBytes, *settings,
1342 sink, Collation::PRIMARY_LEVEL,
Frank Tang1f164ee2022-11-08 12:31:27 -08001343 callback, true, errorCode);
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001344 } else {
1345 FCDUTF16CollationIterator iter(data, numeric, s, s, limit);
1346 CollationKeys::writeSortKeyUpToQuaternary(iter, data->compressibleBytes, *settings,
1347 sink, Collation::PRIMARY_LEVEL,
Frank Tang1f164ee2022-11-08 12:31:27 -08001348 callback, true, errorCode);
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001349 }
1350 if(settings->getStrength() == UCOL_IDENTICAL) {
1351 writeIdenticalLevel(s, limit, sink, errorCode);
1352 }
1353 static const char terminator = 0; // TERMINATOR_BYTE
1354 sink.Append(&terminator, 1);
1355}
1356
1357void
1358RuleBasedCollator::writeIdenticalLevel(const UChar *s, const UChar *limit,
1359 SortKeyByteSink &sink, UErrorCode &errorCode) const {
1360 // NFD quick check
1361 const UChar *nfdQCYesLimit = data->nfcImpl.decompose(s, limit, NULL, errorCode);
1362 if(U_FAILURE(errorCode)) { return; }
1363 sink.Append(Collation::LEVEL_SEPARATOR_BYTE);
1364 UChar32 prev = 0;
1365 if(nfdQCYesLimit != s) {
1366 prev = u_writeIdenticalLevelRun(prev, s, (int32_t)(nfdQCYesLimit - s), sink);
1367 }
1368 // Is there non-NFD text?
1369 int32_t destLengthEstimate;
1370 if(limit != NULL) {
1371 if(nfdQCYesLimit == limit) { return; }
1372 destLengthEstimate = (int32_t)(limit - nfdQCYesLimit);
1373 } else {
1374 // s is NUL-terminated
1375 if(*nfdQCYesLimit == 0) { return; }
1376 destLengthEstimate = -1;
1377 }
1378 UnicodeString nfd;
1379 data->nfcImpl.decompose(nfdQCYesLimit, limit, nfd, destLengthEstimate, errorCode);
1380 u_writeIdenticalLevelRun(prev, nfd.getBuffer(), nfd.length(), sink);
1381}
1382
1383namespace {
1384
1385/**
1386 * internalNextSortKeyPart() calls CollationKeys::writeSortKeyUpToQuaternary()
1387 * with an instance of this callback class.
1388 * When another level is about to be written, the callback
1389 * records the level and the number of bytes that will be written until
1390 * the sink (which is actually a FixedSortKeyByteSink) fills up.
1391 *
1392 * When internalNextSortKeyPart() is called again, it restarts with the last level
1393 * and ignores as many bytes as were written previously for that level.
1394 */
1395class PartLevelCallback : public CollationKeys::LevelCallback {
1396public:
1397 PartLevelCallback(const SortKeyByteSink &s)
1398 : sink(s), level(Collation::PRIMARY_LEVEL) {
1399 levelCapacity = sink.GetRemainingCapacity();
1400 }
1401 virtual ~PartLevelCallback() {}
Frank Tang3e05d9d2021-11-08 14:04:04 -08001402 virtual UBool needToWrite(Collation::Level l) override {
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001403 if(!sink.Overflowed()) {
1404 // Remember a level that will be at least partially written.
1405 level = l;
1406 levelCapacity = sink.GetRemainingCapacity();
Frank Tang1f164ee2022-11-08 12:31:27 -08001407 return true;
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001408 } else {
Frank Tang1f164ee2022-11-08 12:31:27 -08001409 return false;
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001410 }
1411 }
1412 Collation::Level getLevel() const { return level; }
1413 int32_t getLevelCapacity() const { return levelCapacity; }
1414
1415private:
1416 const SortKeyByteSink &sink;
1417 Collation::Level level;
1418 int32_t levelCapacity;
1419};
1420
1421} // namespace
1422
1423int32_t
1424RuleBasedCollator::internalNextSortKeyPart(UCharIterator *iter, uint32_t state[2],
1425 uint8_t *dest, int32_t count, UErrorCode &errorCode) const {
1426 if(U_FAILURE(errorCode)) { return 0; }
1427 if(iter == NULL || state == NULL || count < 0 || (count > 0 && dest == NULL)) {
1428 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
1429 return 0;
1430 }
1431 if(count == 0) { return 0; }
1432
1433 FixedSortKeyByteSink sink(reinterpret_cast<char *>(dest), count);
1434 sink.IgnoreBytes((int32_t)state[1]);
1435 iter->move(iter, 0, UITER_START);
1436
1437 Collation::Level level = (Collation::Level)state[0];
1438 if(level <= Collation::QUATERNARY_LEVEL) {
1439 UBool numeric = settings->isNumeric();
1440 PartLevelCallback callback(sink);
1441 if(settings->dontCheckFCD()) {
1442 UIterCollationIterator ci(data, numeric, *iter);
1443 CollationKeys::writeSortKeyUpToQuaternary(ci, data->compressibleBytes, *settings,
Frank Tang1f164ee2022-11-08 12:31:27 -08001444 sink, level, callback, false, errorCode);
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001445 } else {
1446 FCDUIterCollationIterator ci(data, numeric, *iter, 0);
1447 CollationKeys::writeSortKeyUpToQuaternary(ci, data->compressibleBytes, *settings,
Frank Tang1f164ee2022-11-08 12:31:27 -08001448 sink, level, callback, false, errorCode);
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001449 }
1450 if(U_FAILURE(errorCode)) { return 0; }
1451 if(sink.NumberOfBytesAppended() > count) {
1452 state[0] = (uint32_t)callback.getLevel();
1453 state[1] = (uint32_t)callback.getLevelCapacity();
1454 return count;
1455 }
1456 // All of the normal levels are done.
1457 if(settings->getStrength() == UCOL_IDENTICAL) {
1458 level = Collation::IDENTICAL_LEVEL;
1459 iter->move(iter, 0, UITER_START);
1460 }
1461 // else fall through to setting ZERO_LEVEL
1462 }
1463
1464 if(level == Collation::IDENTICAL_LEVEL) {
1465 int32_t levelCapacity = sink.GetRemainingCapacity();
1466 UnicodeString s;
1467 for(;;) {
1468 UChar32 c = iter->next(iter);
1469 if(c < 0) { break; }
1470 s.append((UChar)c);
1471 }
1472 const UChar *sArray = s.getBuffer();
1473 writeIdenticalLevel(sArray, sArray + s.length(), sink, errorCode);
1474 if(U_FAILURE(errorCode)) { return 0; }
1475 if(sink.NumberOfBytesAppended() > count) {
1476 state[0] = (uint32_t)level;
1477 state[1] = (uint32_t)levelCapacity;
1478 return count;
1479 }
1480 }
1481
1482 // ZERO_LEVEL: Fill the remainder of dest with 00 bytes.
1483 state[0] = (uint32_t)Collation::ZERO_LEVEL;
1484 state[1] = 0;
1485 int32_t length = sink.NumberOfBytesAppended();
1486 int32_t i = length;
1487 while(i < count) { dest[i++] = 0; }
1488 return length;
1489}
1490
1491void
1492RuleBasedCollator::internalGetCEs(const UnicodeString &str, UVector64 &ces,
1493 UErrorCode &errorCode) const {
1494 if(U_FAILURE(errorCode)) { return; }
1495 const UChar *s = str.getBuffer();
1496 const UChar *limit = s + str.length();
1497 UBool numeric = settings->isNumeric();
1498 if(settings->dontCheckFCD()) {
1499 UTF16CollationIterator iter(data, numeric, s, s, limit);
1500 int64_t ce;
1501 while((ce = iter.nextCE(errorCode)) != Collation::NO_CE) {
1502 ces.addElement(ce, errorCode);
1503 }
1504 } else {
1505 FCDUTF16CollationIterator iter(data, numeric, s, s, limit);
1506 int64_t ce;
1507 while((ce = iter.nextCE(errorCode)) != Collation::NO_CE) {
1508 ces.addElement(ce, errorCode);
1509 }
1510 }
1511}
1512
1513namespace {
1514
1515void appendSubtag(CharString &s, char letter, const char *subtag, int32_t length,
1516 UErrorCode &errorCode) {
1517 if(U_FAILURE(errorCode) || length == 0) { return; }
1518 if(!s.isEmpty()) {
1519 s.append('_', errorCode);
1520 }
1521 s.append(letter, errorCode);
1522 for(int32_t i = 0; i < length; ++i) {
1523 s.append(uprv_toupper(subtag[i]), errorCode);
1524 }
1525}
1526
1527void appendAttribute(CharString &s, char letter, UColAttributeValue value,
1528 UErrorCode &errorCode) {
1529 if(U_FAILURE(errorCode)) { return; }
1530 if(!s.isEmpty()) {
1531 s.append('_', errorCode);
1532 }
1533 static const char *valueChars = "1234...........IXO..SN..LU......";
1534 s.append(letter, errorCode);
1535 s.append(valueChars[value], errorCode);
1536}
1537
1538} // namespace
1539
1540int32_t
1541RuleBasedCollator::internalGetShortDefinitionString(const char *locale,
1542 char *buffer, int32_t capacity,
1543 UErrorCode &errorCode) const {
1544 if(U_FAILURE(errorCode)) { return 0; }
1545 if(buffer == NULL ? capacity != 0 : capacity < 0) {
1546 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
1547 return 0;
1548 }
1549 if(locale == NULL) {
1550 locale = internalGetLocaleID(ULOC_VALID_LOCALE, errorCode);
1551 }
1552
1553 char resultLocale[ULOC_FULLNAME_CAPACITY + 1];
1554 int32_t length = ucol_getFunctionalEquivalent(resultLocale, ULOC_FULLNAME_CAPACITY,
1555 "collation", locale,
1556 NULL, &errorCode);
1557 if(U_FAILURE(errorCode)) { return 0; }
Frank Tang69c72a62019-04-03 21:41:21 -07001558 resultLocale[length] = 0;
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001559
1560 // Append items in alphabetic order of their short definition letters.
1561 CharString result;
1562 char subtag[ULOC_KEYWORD_AND_VALUES_CAPACITY];
1563
1564 if(attributeHasBeenSetExplicitly(UCOL_ALTERNATE_HANDLING)) {
1565 appendAttribute(result, 'A', getAttribute(UCOL_ALTERNATE_HANDLING, errorCode), errorCode);
1566 }
1567 // ATTR_VARIABLE_TOP not supported because 'B' was broken.
1568 // See ICU tickets #10372 and #10386.
1569 if(attributeHasBeenSetExplicitly(UCOL_CASE_FIRST)) {
1570 appendAttribute(result, 'C', getAttribute(UCOL_CASE_FIRST, errorCode), errorCode);
1571 }
1572 if(attributeHasBeenSetExplicitly(UCOL_NUMERIC_COLLATION)) {
1573 appendAttribute(result, 'D', getAttribute(UCOL_NUMERIC_COLLATION, errorCode), errorCode);
1574 }
1575 if(attributeHasBeenSetExplicitly(UCOL_CASE_LEVEL)) {
1576 appendAttribute(result, 'E', getAttribute(UCOL_CASE_LEVEL, errorCode), errorCode);
1577 }
1578 if(attributeHasBeenSetExplicitly(UCOL_FRENCH_COLLATION)) {
1579 appendAttribute(result, 'F', getAttribute(UCOL_FRENCH_COLLATION, errorCode), errorCode);
1580 }
1581 // Note: UCOL_HIRAGANA_QUATERNARY_MODE is deprecated and never changes away from default.
1582 length = uloc_getKeywordValue(resultLocale, "collation", subtag, UPRV_LENGTHOF(subtag), &errorCode);
1583 appendSubtag(result, 'K', subtag, length, errorCode);
1584 length = uloc_getLanguage(resultLocale, subtag, UPRV_LENGTHOF(subtag), &errorCode);
Frank Tang69c72a62019-04-03 21:41:21 -07001585 if (length == 0) {
1586 appendSubtag(result, 'L', "root", 4, errorCode);
1587 } else {
1588 appendSubtag(result, 'L', subtag, length, errorCode);
1589 }
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001590 if(attributeHasBeenSetExplicitly(UCOL_NORMALIZATION_MODE)) {
1591 appendAttribute(result, 'N', getAttribute(UCOL_NORMALIZATION_MODE, errorCode), errorCode);
1592 }
1593 length = uloc_getCountry(resultLocale, subtag, UPRV_LENGTHOF(subtag), &errorCode);
1594 appendSubtag(result, 'R', subtag, length, errorCode);
1595 if(attributeHasBeenSetExplicitly(UCOL_STRENGTH)) {
1596 appendAttribute(result, 'S', getAttribute(UCOL_STRENGTH, errorCode), errorCode);
1597 }
1598 length = uloc_getVariant(resultLocale, subtag, UPRV_LENGTHOF(subtag), &errorCode);
1599 appendSubtag(result, 'V', subtag, length, errorCode);
1600 length = uloc_getScript(resultLocale, subtag, UPRV_LENGTHOF(subtag), &errorCode);
1601 appendSubtag(result, 'Z', subtag, length, errorCode);
1602
1603 if(U_FAILURE(errorCode)) { return 0; }
Frank Tangf90543d2020-10-30 19:02:04 -07001604 return result.extract(buffer, capacity, errorCode);
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001605}
1606
1607UBool
1608RuleBasedCollator::isUnsafe(UChar32 c) const {
1609 return data->isUnsafeBackward(c, settings->isNumeric());
1610}
1611
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001612void U_CALLCONV
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001613RuleBasedCollator::computeMaxExpansions(const CollationTailoring *t, UErrorCode &errorCode) {
1614 t->maxExpansions = CollationElementIterator::computeMaxExpansions(t->data, errorCode);
1615}
1616
1617UBool
1618RuleBasedCollator::initMaxExpansions(UErrorCode &errorCode) const {
1619 umtx_initOnce(tailoring->maxExpansionsInitOnce, computeMaxExpansions, tailoring, errorCode);
1620 return U_SUCCESS(errorCode);
1621}
1622
1623CollationElementIterator *
1624RuleBasedCollator::createCollationElementIterator(const UnicodeString& source) const {
1625 UErrorCode errorCode = U_ZERO_ERROR;
1626 if(!initMaxExpansions(errorCode)) { return NULL; }
1627 CollationElementIterator *cei = new CollationElementIterator(source, this, errorCode);
1628 if(U_FAILURE(errorCode)) {
1629 delete cei;
1630 return NULL;
1631 }
1632 return cei;
1633}
1634
1635CollationElementIterator *
1636RuleBasedCollator::createCollationElementIterator(const CharacterIterator& source) const {
1637 UErrorCode errorCode = U_ZERO_ERROR;
1638 if(!initMaxExpansions(errorCode)) { return NULL; }
1639 CollationElementIterator *cei = new CollationElementIterator(source, this, errorCode);
1640 if(U_FAILURE(errorCode)) {
1641 delete cei;
1642 return NULL;
1643 }
1644 return cei;
1645}
1646
1647int32_t
1648RuleBasedCollator::getMaxExpansion(int32_t order) const {
1649 UErrorCode errorCode = U_ZERO_ERROR;
1650 (void)initMaxExpansions(errorCode);
1651 return CollationElementIterator::getMaxExpansion(tailoring->maxExpansions, order);
1652}
1653
1654U_NAMESPACE_END
1655
1656#endif // !UCONFIG_NO_COLLATION