Frank Tang | 952ccb9 | 2019-08-22 12:09:17 -0700 | [diff] [blame] | 1 | // © 2019 and later: Unicode, Inc. and others. |
Frank Tang | f90543d | 2020-10-30 19:02:04 -0700 | [diff] [blame] | 2 | // License & terms of use: http://www.unicode.org/copyright.html |
Frank Tang | 952ccb9 | 2019-08-22 12:09:17 -0700 | [diff] [blame] | 3 | |
| 4 | // locdistance.cpp |
| 5 | // created: 2019may08 Markus W. Scherer |
| 6 | |
| 7 | #include "unicode/utypes.h" |
| 8 | #include "unicode/bytestrie.h" |
| 9 | #include "unicode/localematcher.h" |
| 10 | #include "unicode/locid.h" |
| 11 | #include "unicode/uobject.h" |
| 12 | #include "unicode/ures.h" |
| 13 | #include "cstring.h" |
| 14 | #include "locdistance.h" |
| 15 | #include "loclikelysubtags.h" |
| 16 | #include "uassert.h" |
| 17 | #include "ucln_cmn.h" |
| 18 | #include "uinvchar.h" |
| 19 | #include "umutex.h" |
| 20 | |
| 21 | U_NAMESPACE_BEGIN |
| 22 | |
| 23 | namespace { |
| 24 | |
| 25 | /** |
| 26 | * Bit flag used on the last character of a subtag in the trie. |
| 27 | * Must be set consistently by the builder and the lookup code. |
| 28 | */ |
| 29 | constexpr int32_t END_OF_SUBTAG = 0x80; |
| 30 | /** Distance value bit flag, set by the builder. */ |
| 31 | constexpr int32_t DISTANCE_SKIP_SCRIPT = 0x80; |
| 32 | /** Distance value bit flag, set by trieNext(). */ |
| 33 | constexpr int32_t DISTANCE_IS_FINAL = 0x100; |
| 34 | constexpr int32_t DISTANCE_IS_FINAL_OR_SKIP_SCRIPT = DISTANCE_IS_FINAL | DISTANCE_SKIP_SCRIPT; |
| 35 | |
| 36 | constexpr int32_t ABOVE_THRESHOLD = 100; |
| 37 | |
| 38 | // Indexes into array of distances. |
| 39 | enum { |
| 40 | IX_DEF_LANG_DISTANCE, |
| 41 | IX_DEF_SCRIPT_DISTANCE, |
| 42 | IX_DEF_REGION_DISTANCE, |
| 43 | IX_MIN_REGION_DISTANCE, |
| 44 | IX_LIMIT |
| 45 | }; |
| 46 | |
| 47 | LocaleDistance *gLocaleDistance = nullptr; |
Frank Tang | 1c67b4e | 2022-05-18 10:13:51 -0700 | [diff] [blame] | 48 | UInitOnce gInitOnce {}; |
Frank Tang | 952ccb9 | 2019-08-22 12:09:17 -0700 | [diff] [blame] | 49 | |
| 50 | UBool U_CALLCONV cleanup() { |
| 51 | delete gLocaleDistance; |
| 52 | gLocaleDistance = nullptr; |
| 53 | gInitOnce.reset(); |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 54 | return true; |
Frank Tang | 952ccb9 | 2019-08-22 12:09:17 -0700 | [diff] [blame] | 55 | } |
| 56 | |
| 57 | } // namespace |
| 58 | |
| 59 | void U_CALLCONV LocaleDistance::initLocaleDistance(UErrorCode &errorCode) { |
| 60 | // This function is invoked only via umtx_initOnce(). |
| 61 | U_ASSERT(gLocaleDistance == nullptr); |
| 62 | const XLikelySubtags &likely = *XLikelySubtags::getSingleton(errorCode); |
| 63 | if (U_FAILURE(errorCode)) { return; } |
| 64 | const LocaleDistanceData &data = likely.getDistanceData(); |
| 65 | if (data.distanceTrieBytes == nullptr || |
| 66 | data.regionToPartitions == nullptr || data.partitions == nullptr || |
| 67 | // ok if no paradigms |
| 68 | data.distances == nullptr) { |
| 69 | errorCode = U_MISSING_RESOURCE_ERROR; |
| 70 | return; |
| 71 | } |
Frank Tang | f222396 | 2020-04-27 18:25:29 -0700 | [diff] [blame] | 72 | gLocaleDistance = new LocaleDistance(data, likely); |
Frank Tang | 952ccb9 | 2019-08-22 12:09:17 -0700 | [diff] [blame] | 73 | if (gLocaleDistance == nullptr) { |
| 74 | errorCode = U_MEMORY_ALLOCATION_ERROR; |
| 75 | return; |
| 76 | } |
| 77 | ucln_common_registerCleanup(UCLN_COMMON_LOCALE_DISTANCE, cleanup); |
| 78 | } |
| 79 | |
| 80 | const LocaleDistance *LocaleDistance::getSingleton(UErrorCode &errorCode) { |
| 81 | if (U_FAILURE(errorCode)) { return nullptr; } |
| 82 | umtx_initOnce(gInitOnce, &LocaleDistance::initLocaleDistance, errorCode); |
| 83 | return gLocaleDistance; |
| 84 | } |
| 85 | |
Frank Tang | f222396 | 2020-04-27 18:25:29 -0700 | [diff] [blame] | 86 | LocaleDistance::LocaleDistance(const LocaleDistanceData &data, const XLikelySubtags &likely) : |
| 87 | likelySubtags(likely), |
Frank Tang | 952ccb9 | 2019-08-22 12:09:17 -0700 | [diff] [blame] | 88 | trie(data.distanceTrieBytes), |
| 89 | regionToPartitionsIndex(data.regionToPartitions), partitionArrays(data.partitions), |
| 90 | paradigmLSRs(data.paradigms), paradigmLSRsLength(data.paradigmsLength), |
| 91 | defaultLanguageDistance(data.distances[IX_DEF_LANG_DISTANCE]), |
| 92 | defaultScriptDistance(data.distances[IX_DEF_SCRIPT_DISTANCE]), |
| 93 | defaultRegionDistance(data.distances[IX_DEF_REGION_DISTANCE]), |
| 94 | minRegionDistance(data.distances[IX_MIN_REGION_DISTANCE]) { |
| 95 | // For the default demotion value, use the |
| 96 | // default region distance between unrelated Englishes. |
| 97 | // Thus, unless demotion is turned off, |
| 98 | // a mere region difference for one desired locale |
| 99 | // is as good as a perfect match for the next following desired locale. |
| 100 | // As of CLDR 36, we have <languageMatch desired="en_*_*" supported="en_*_*" distance="5"/>. |
Frank Tang | f222396 | 2020-04-27 18:25:29 -0700 | [diff] [blame] | 101 | LSR en("en", "Latn", "US", LSR::EXPLICIT_LSR); |
| 102 | LSR enGB("en", "Latn", "GB", LSR::EXPLICIT_LSR); |
Frank Tang | 952ccb9 | 2019-08-22 12:09:17 -0700 | [diff] [blame] | 103 | const LSR *p_enGB = &enGB; |
Frank Tang | f222396 | 2020-04-27 18:25:29 -0700 | [diff] [blame] | 104 | int32_t indexAndDistance = getBestIndexAndDistance(en, &p_enGB, 1, |
| 105 | shiftDistance(50), ULOCMATCH_FAVOR_LANGUAGE, ULOCMATCH_DIRECTION_WITH_ONE_WAY); |
| 106 | defaultDemotionPerDesiredLocale = getDistanceFloor(indexAndDistance); |
Frank Tang | 952ccb9 | 2019-08-22 12:09:17 -0700 | [diff] [blame] | 107 | } |
| 108 | |
| 109 | int32_t LocaleDistance::getBestIndexAndDistance( |
| 110 | const LSR &desired, |
| 111 | const LSR **supportedLSRs, int32_t supportedLSRsLength, |
Frank Tang | f222396 | 2020-04-27 18:25:29 -0700 | [diff] [blame] | 112 | int32_t shiftedThreshold, |
| 113 | ULocMatchFavorSubtag favorSubtag, ULocMatchDirection direction) const { |
Frank Tang | 952ccb9 | 2019-08-22 12:09:17 -0700 | [diff] [blame] | 114 | BytesTrie iter(trie); |
| 115 | // Look up the desired language only once for all supported LSRs. |
| 116 | // Its "distance" is either a match point value of 0, or a non-match negative value. |
| 117 | // Note: The data builder verifies that there are no <*, supported> or <desired, *> rules. |
| 118 | int32_t desLangDistance = trieNext(iter, desired.language, false); |
| 119 | uint64_t desLangState = desLangDistance >= 0 && supportedLSRsLength > 1 ? iter.getState64() : 0; |
| 120 | // Index of the supported LSR with the lowest distance. |
| 121 | int32_t bestIndex = -1; |
Frank Tang | f222396 | 2020-04-27 18:25:29 -0700 | [diff] [blame] | 122 | // Cached lookup info from XLikelySubtags.compareLikely(). |
| 123 | int32_t bestLikelyInfo = -1; |
Frank Tang | 952ccb9 | 2019-08-22 12:09:17 -0700 | [diff] [blame] | 124 | for (int32_t slIndex = 0; slIndex < supportedLSRsLength; ++slIndex) { |
| 125 | const LSR &supported = *supportedLSRs[slIndex]; |
| 126 | bool star = false; |
| 127 | int32_t distance = desLangDistance; |
| 128 | if (distance >= 0) { |
| 129 | U_ASSERT((distance & DISTANCE_IS_FINAL) == 0); |
| 130 | if (slIndex != 0) { |
| 131 | iter.resetToState64(desLangState); |
| 132 | } |
| 133 | distance = trieNext(iter, supported.language, true); |
| 134 | } |
| 135 | // Note: The data builder verifies that there are no rules with "any" (*) language and |
| 136 | // real (non *) script or region subtags. |
| 137 | // This means that if the lookup for either language fails we can use |
| 138 | // the default distances without further lookups. |
| 139 | int32_t flags; |
| 140 | if (distance >= 0) { |
| 141 | flags = distance & DISTANCE_IS_FINAL_OR_SKIP_SCRIPT; |
| 142 | distance &= ~DISTANCE_IS_FINAL_OR_SKIP_SCRIPT; |
| 143 | } else { // <*, *> |
| 144 | if (uprv_strcmp(desired.language, supported.language) == 0) { |
| 145 | distance = 0; |
| 146 | } else { |
| 147 | distance = defaultLanguageDistance; |
| 148 | } |
| 149 | flags = 0; |
| 150 | star = true; |
| 151 | } |
| 152 | U_ASSERT(0 <= distance && distance <= 100); |
Frank Tang | f222396 | 2020-04-27 18:25:29 -0700 | [diff] [blame] | 153 | // Round up the shifted threshold (if fraction bits are not 0) |
| 154 | // for comparison with un-shifted distances until we need fraction bits. |
| 155 | // (If we simply shifted non-zero fraction bits away, then we might ignore a language |
| 156 | // when it's really still a micro distance below the threshold.) |
| 157 | int32_t roundedThreshold = (shiftedThreshold + DISTANCE_FRACTION_MASK) >> DISTANCE_SHIFT; |
Frank Tang | 952ccb9 | 2019-08-22 12:09:17 -0700 | [diff] [blame] | 158 | // We implement "favor subtag" by reducing the language subtag distance |
| 159 | // (unscientifically reducing it to a quarter of the normal value), |
| 160 | // so that the script distance is relatively more important. |
| 161 | // For example, given a default language distance of 80, we reduce it to 20, |
| 162 | // which is below the default threshold of 50, which is the default script distance. |
| 163 | if (favorSubtag == ULOCMATCH_FAVOR_SCRIPT) { |
| 164 | distance >>= 2; |
| 165 | } |
Frank Tang | f222396 | 2020-04-27 18:25:29 -0700 | [diff] [blame] | 166 | // Let distance == roundedThreshold pass until the tie-breaker logic |
| 167 | // at the end of the loop. |
| 168 | if (distance > roundedThreshold) { |
Frank Tang | 952ccb9 | 2019-08-22 12:09:17 -0700 | [diff] [blame] | 169 | continue; |
| 170 | } |
| 171 | |
| 172 | int32_t scriptDistance; |
| 173 | if (star || flags != 0) { |
| 174 | if (uprv_strcmp(desired.script, supported.script) == 0) { |
| 175 | scriptDistance = 0; |
| 176 | } else { |
| 177 | scriptDistance = defaultScriptDistance; |
| 178 | } |
| 179 | } else { |
| 180 | scriptDistance = getDesSuppScriptDistance(iter, iter.getState64(), |
| 181 | desired.script, supported.script); |
| 182 | flags = scriptDistance & DISTANCE_IS_FINAL; |
| 183 | scriptDistance &= ~DISTANCE_IS_FINAL; |
| 184 | } |
| 185 | distance += scriptDistance; |
Frank Tang | f222396 | 2020-04-27 18:25:29 -0700 | [diff] [blame] | 186 | if (distance > roundedThreshold) { |
Frank Tang | 952ccb9 | 2019-08-22 12:09:17 -0700 | [diff] [blame] | 187 | continue; |
| 188 | } |
| 189 | |
| 190 | if (uprv_strcmp(desired.region, supported.region) == 0) { |
| 191 | // regionDistance = 0 |
| 192 | } else if (star || (flags & DISTANCE_IS_FINAL) != 0) { |
| 193 | distance += defaultRegionDistance; |
| 194 | } else { |
Frank Tang | f222396 | 2020-04-27 18:25:29 -0700 | [diff] [blame] | 195 | int32_t remainingThreshold = roundedThreshold - distance; |
| 196 | if (minRegionDistance > remainingThreshold) { |
Frank Tang | 952ccb9 | 2019-08-22 12:09:17 -0700 | [diff] [blame] | 197 | continue; |
| 198 | } |
| 199 | |
| 200 | // From here on we know the regions are not equal. |
| 201 | // Map each region to zero or more partitions. (zero = one non-matching string) |
| 202 | // (Each array of single-character partition strings is encoded as one string.) |
| 203 | // If either side has more than one, then we find the maximum distance. |
| 204 | // This could be optimized by adding some more structure, but probably not worth it. |
| 205 | distance += getRegionPartitionsDistance( |
| 206 | iter, iter.getState64(), |
| 207 | partitionsForRegion(desired), |
| 208 | partitionsForRegion(supported), |
| 209 | remainingThreshold); |
| 210 | } |
Frank Tang | f222396 | 2020-04-27 18:25:29 -0700 | [diff] [blame] | 211 | int32_t shiftedDistance = shiftDistance(distance); |
| 212 | if (shiftedDistance == 0) { |
| 213 | // Distinguish between equivalent but originally unequal locales via an |
| 214 | // additional micro distance. |
| 215 | shiftedDistance |= (desired.flags ^ supported.flags); |
| 216 | if (shiftedDistance < shiftedThreshold) { |
| 217 | if (direction != ULOCMATCH_DIRECTION_ONLY_TWO_WAY || |
| 218 | // Is there also a match when we swap desired/supported? |
| 219 | isMatch(supported, desired, shiftedThreshold, favorSubtag)) { |
| 220 | if (shiftedDistance == 0) { |
| 221 | return slIndex << INDEX_SHIFT; |
| 222 | } |
| 223 | bestIndex = slIndex; |
| 224 | shiftedThreshold = shiftedDistance; |
| 225 | bestLikelyInfo = -1; |
| 226 | } |
Frank Tang | 952ccb9 | 2019-08-22 12:09:17 -0700 | [diff] [blame] | 227 | } |
Frank Tang | f222396 | 2020-04-27 18:25:29 -0700 | [diff] [blame] | 228 | } else { |
| 229 | if (shiftedDistance < shiftedThreshold) { |
| 230 | if (direction != ULOCMATCH_DIRECTION_ONLY_TWO_WAY || |
| 231 | // Is there also a match when we swap desired/supported? |
| 232 | isMatch(supported, desired, shiftedThreshold, favorSubtag)) { |
| 233 | bestIndex = slIndex; |
| 234 | shiftedThreshold = shiftedDistance; |
| 235 | bestLikelyInfo = -1; |
| 236 | } |
| 237 | } else if (shiftedDistance == shiftedThreshold && bestIndex >= 0) { |
| 238 | if (direction != ULOCMATCH_DIRECTION_ONLY_TWO_WAY || |
| 239 | // Is there also a match when we swap desired/supported? |
| 240 | isMatch(supported, desired, shiftedThreshold, favorSubtag)) { |
| 241 | bestLikelyInfo = likelySubtags.compareLikely( |
| 242 | supported, *supportedLSRs[bestIndex], bestLikelyInfo); |
| 243 | if ((bestLikelyInfo & 1) != 0) { |
| 244 | // This supported locale matches as well as the previous best match, |
| 245 | // and neither matches perfectly, |
| 246 | // but this one is "more likely" (has more-default subtags). |
| 247 | bestIndex = slIndex; |
| 248 | } |
| 249 | } |
| 250 | } |
Frank Tang | 952ccb9 | 2019-08-22 12:09:17 -0700 | [diff] [blame] | 251 | } |
| 252 | } |
Frank Tang | f222396 | 2020-04-27 18:25:29 -0700 | [diff] [blame] | 253 | return bestIndex >= 0 ? |
| 254 | (bestIndex << INDEX_SHIFT) | shiftedThreshold : |
| 255 | INDEX_NEG_1 | shiftDistance(ABOVE_THRESHOLD); |
Frank Tang | 952ccb9 | 2019-08-22 12:09:17 -0700 | [diff] [blame] | 256 | } |
| 257 | |
| 258 | int32_t LocaleDistance::getDesSuppScriptDistance( |
| 259 | BytesTrie &iter, uint64_t startState, const char *desired, const char *supported) { |
| 260 | // Note: The data builder verifies that there are no <*, supported> or <desired, *> rules. |
| 261 | int32_t distance = trieNext(iter, desired, false); |
| 262 | if (distance >= 0) { |
| 263 | distance = trieNext(iter, supported, true); |
| 264 | } |
| 265 | if (distance < 0) { |
| 266 | UStringTrieResult result = iter.resetToState64(startState).next(u'*'); // <*, *> |
| 267 | U_ASSERT(USTRINGTRIE_HAS_VALUE(result)); |
| 268 | if (uprv_strcmp(desired, supported) == 0) { |
| 269 | distance = 0; // same script |
| 270 | } else { |
| 271 | distance = iter.getValue(); |
| 272 | U_ASSERT(distance >= 0); |
| 273 | } |
| 274 | if (result == USTRINGTRIE_FINAL_VALUE) { |
| 275 | distance |= DISTANCE_IS_FINAL; |
| 276 | } |
| 277 | } |
| 278 | return distance; |
| 279 | } |
| 280 | |
| 281 | int32_t LocaleDistance::getRegionPartitionsDistance( |
| 282 | BytesTrie &iter, uint64_t startState, |
| 283 | const char *desiredPartitions, const char *supportedPartitions, int32_t threshold) { |
| 284 | char desired = *desiredPartitions++; |
| 285 | char supported = *supportedPartitions++; |
| 286 | U_ASSERT(desired != 0 && supported != 0); |
| 287 | // See if we have single desired/supported partitions, from NUL-terminated |
| 288 | // partition strings without explicit length. |
| 289 | bool suppLengthGt1 = *supportedPartitions != 0; // gt1: more than 1 character |
| 290 | // equivalent to: if (desLength == 1 && suppLength == 1) |
| 291 | if (*desiredPartitions == 0 && !suppLengthGt1) { |
| 292 | // Fastpath for single desired/supported partitions. |
| 293 | UStringTrieResult result = iter.next(uprv_invCharToAscii(desired) | END_OF_SUBTAG); |
| 294 | if (USTRINGTRIE_HAS_NEXT(result)) { |
| 295 | result = iter.next(uprv_invCharToAscii(supported) | END_OF_SUBTAG); |
| 296 | if (USTRINGTRIE_HAS_VALUE(result)) { |
| 297 | return iter.getValue(); |
| 298 | } |
| 299 | } |
| 300 | return getFallbackRegionDistance(iter, startState); |
| 301 | } |
| 302 | |
| 303 | const char *supportedStart = supportedPartitions - 1; // for restart of inner loop |
| 304 | int32_t regionDistance = 0; |
| 305 | // Fall back to * only once, not for each pair of partition strings. |
| 306 | bool star = false; |
| 307 | for (;;) { |
| 308 | // Look up each desired-partition string only once, |
| 309 | // not for each (desired, supported) pair. |
| 310 | UStringTrieResult result = iter.next(uprv_invCharToAscii(desired) | END_OF_SUBTAG); |
| 311 | if (USTRINGTRIE_HAS_NEXT(result)) { |
| 312 | uint64_t desState = suppLengthGt1 ? iter.getState64() : 0; |
| 313 | for (;;) { |
| 314 | result = iter.next(uprv_invCharToAscii(supported) | END_OF_SUBTAG); |
| 315 | int32_t d; |
| 316 | if (USTRINGTRIE_HAS_VALUE(result)) { |
| 317 | d = iter.getValue(); |
| 318 | } else if (star) { |
| 319 | d = 0; |
| 320 | } else { |
| 321 | d = getFallbackRegionDistance(iter, startState); |
| 322 | star = true; |
| 323 | } |
Frank Tang | f222396 | 2020-04-27 18:25:29 -0700 | [diff] [blame] | 324 | if (d > threshold) { |
Frank Tang | 952ccb9 | 2019-08-22 12:09:17 -0700 | [diff] [blame] | 325 | return d; |
| 326 | } else if (regionDistance < d) { |
| 327 | regionDistance = d; |
| 328 | } |
| 329 | if ((supported = *supportedPartitions++) != 0) { |
| 330 | iter.resetToState64(desState); |
| 331 | } else { |
| 332 | break; |
| 333 | } |
| 334 | } |
| 335 | } else if (!star) { |
| 336 | int32_t d = getFallbackRegionDistance(iter, startState); |
Frank Tang | f222396 | 2020-04-27 18:25:29 -0700 | [diff] [blame] | 337 | if (d > threshold) { |
Frank Tang | 952ccb9 | 2019-08-22 12:09:17 -0700 | [diff] [blame] | 338 | return d; |
| 339 | } else if (regionDistance < d) { |
| 340 | regionDistance = d; |
| 341 | } |
| 342 | star = true; |
| 343 | } |
| 344 | if ((desired = *desiredPartitions++) != 0) { |
| 345 | iter.resetToState64(startState); |
| 346 | supportedPartitions = supportedStart; |
| 347 | supported = *supportedPartitions++; |
| 348 | } else { |
| 349 | break; |
| 350 | } |
| 351 | } |
| 352 | return regionDistance; |
| 353 | } |
| 354 | |
| 355 | int32_t LocaleDistance::getFallbackRegionDistance(BytesTrie &iter, uint64_t startState) { |
| 356 | #if U_DEBUG |
| 357 | UStringTrieResult result = |
| 358 | #endif |
| 359 | iter.resetToState64(startState).next(u'*'); // <*, *> |
| 360 | U_ASSERT(USTRINGTRIE_HAS_VALUE(result)); |
| 361 | int32_t distance = iter.getValue(); |
| 362 | U_ASSERT(distance >= 0); |
| 363 | return distance; |
| 364 | } |
| 365 | |
| 366 | int32_t LocaleDistance::trieNext(BytesTrie &iter, const char *s, bool wantValue) { |
| 367 | uint8_t c; |
| 368 | if ((c = *s) == 0) { |
| 369 | return -1; // no empty subtags in the distance data |
| 370 | } |
| 371 | for (;;) { |
| 372 | c = uprv_invCharToAscii(c); |
| 373 | // EBCDIC: If *s is not an invariant character, |
| 374 | // then c is now 0 and will simply not match anything, which is harmless. |
| 375 | uint8_t next = *++s; |
| 376 | if (next != 0) { |
| 377 | if (!USTRINGTRIE_HAS_NEXT(iter.next(c))) { |
| 378 | return -1; |
| 379 | } |
| 380 | } else { |
| 381 | // last character of this subtag |
| 382 | UStringTrieResult result = iter.next(c | END_OF_SUBTAG); |
| 383 | if (wantValue) { |
| 384 | if (USTRINGTRIE_HAS_VALUE(result)) { |
| 385 | int32_t value = iter.getValue(); |
| 386 | if (result == USTRINGTRIE_FINAL_VALUE) { |
| 387 | value |= DISTANCE_IS_FINAL; |
| 388 | } |
| 389 | return value; |
| 390 | } |
| 391 | } else { |
| 392 | if (USTRINGTRIE_HAS_NEXT(result)) { |
| 393 | return 0; |
| 394 | } |
| 395 | } |
| 396 | return -1; |
| 397 | } |
| 398 | c = next; |
| 399 | } |
| 400 | } |
| 401 | |
| 402 | UBool LocaleDistance::isParadigmLSR(const LSR &lsr) const { |
Frank Tang | f222396 | 2020-04-27 18:25:29 -0700 | [diff] [blame] | 403 | // Linear search for a very short list (length 6 as of 2019), |
| 404 | // because we look for equivalence not equality, and |
| 405 | // because it's easy. |
| 406 | // If there are many paradigm LSRs we should use a hash set |
| 407 | // with custom comparator and hasher. |
Frank Tang | 952ccb9 | 2019-08-22 12:09:17 -0700 | [diff] [blame] | 408 | U_ASSERT(paradigmLSRsLength <= 15); |
| 409 | for (int32_t i = 0; i < paradigmLSRsLength; ++i) { |
Frank Tang | f222396 | 2020-04-27 18:25:29 -0700 | [diff] [blame] | 410 | if (lsr.isEquivalentTo(paradigmLSRs[i])) { return true; } |
Frank Tang | 952ccb9 | 2019-08-22 12:09:17 -0700 | [diff] [blame] | 411 | } |
| 412 | return false; |
| 413 | } |
| 414 | |
| 415 | U_NAMESPACE_END |