blob: 5795cbf87e633aba1c06bc986bb91134e58d1771 [file] [log] [blame]
Frank Tang952ccb92019-08-22 12:09:17 -07001// © 2019 and later: Unicode, Inc. and others.
Frank Tangf90543d2020-10-30 19:02:04 -07002// License & terms of use: http://www.unicode.org/copyright.html
Frank Tang952ccb92019-08-22 12:09:17 -07003
4// localematcher.cpp
5// created: 2019may08 Markus W. Scherer
6
Frank Tang952ccb92019-08-22 12:09:17 -07007#include "unicode/utypes.h"
8#include "unicode/localebuilder.h"
9#include "unicode/localematcher.h"
10#include "unicode/locid.h"
11#include "unicode/stringpiece.h"
Frank Tangf2223962020-04-27 18:25:29 -070012#include "unicode/uloc.h"
Frank Tang952ccb92019-08-22 12:09:17 -070013#include "unicode/uobject.h"
14#include "cstring.h"
15#include "localeprioritylist.h"
16#include "loclikelysubtags.h"
17#include "locdistance.h"
18#include "lsr.h"
19#include "uassert.h"
20#include "uhash.h"
Frank Tangf2223962020-04-27 18:25:29 -070021#include "ustr_imp.h"
Frank Tang952ccb92019-08-22 12:09:17 -070022#include "uvector.h"
23
Frank Tangf2223962020-04-27 18:25:29 -070024#define UND_LSR LSR("und", "", "", LSR::EXPLICIT_LSR)
Frank Tang952ccb92019-08-22 12:09:17 -070025
26/**
27 * Indicator for the lifetime of desired-locale objects passed into the LocaleMatcher.
28 *
29 * @draft ICU 65
30 */
31enum ULocMatchLifetime {
32 /**
33 * Locale objects are temporary.
34 * The matcher will make a copy of a locale that will be used beyond one function call.
35 *
36 * @draft ICU 65
37 */
38 ULOCMATCH_TEMPORARY_LOCALES,
39 /**
40 * Locale objects are stored at least as long as the matcher is used.
41 * The matcher will keep only a pointer to a locale that will be used beyond one function call,
42 * avoiding a copy.
43 *
44 * @draft ICU 65
45 */
46 ULOCMATCH_STORED_LOCALES // TODO: permanent? cached? clone?
47};
48#ifndef U_IN_DOXYGEN
49typedef enum ULocMatchLifetime ULocMatchLifetime;
50#endif
51
52U_NAMESPACE_BEGIN
53
54LocaleMatcher::Result::Result(LocaleMatcher::Result &&src) U_NOEXCEPT :
55 desiredLocale(src.desiredLocale),
56 supportedLocale(src.supportedLocale),
57 desiredIndex(src.desiredIndex),
58 supportedIndex(src.supportedIndex),
59 desiredIsOwned(src.desiredIsOwned) {
60 if (desiredIsOwned) {
61 src.desiredLocale = nullptr;
62 src.desiredIndex = -1;
63 src.desiredIsOwned = FALSE;
64 }
65}
66
67LocaleMatcher::Result::~Result() {
68 if (desiredIsOwned) {
69 delete desiredLocale;
70 }
71}
72
73LocaleMatcher::Result &LocaleMatcher::Result::operator=(LocaleMatcher::Result &&src) U_NOEXCEPT {
74 this->~Result();
75
76 desiredLocale = src.desiredLocale;
77 supportedLocale = src.supportedLocale;
78 desiredIndex = src.desiredIndex;
79 supportedIndex = src.supportedIndex;
80 desiredIsOwned = src.desiredIsOwned;
81
82 if (desiredIsOwned) {
83 src.desiredLocale = nullptr;
84 src.desiredIndex = -1;
85 src.desiredIsOwned = FALSE;
86 }
87 return *this;
88}
89
90Locale LocaleMatcher::Result::makeResolvedLocale(UErrorCode &errorCode) const {
91 if (U_FAILURE(errorCode) || supportedLocale == nullptr) {
92 return Locale::getRoot();
93 }
94 const Locale *bestDesired = getDesiredLocale();
95 if (bestDesired == nullptr || *supportedLocale == *bestDesired) {
96 return *supportedLocale;
97 }
98 LocaleBuilder b;
99 b.setLocale(*supportedLocale);
100
101 // Copy the region from bestDesired, if there is one.
102 const char *region = bestDesired->getCountry();
103 if (*region != 0) {
104 b.setRegion(region);
105 }
106
107 // Copy the variants from bestDesired, if there are any.
108 // Note that this will override any supportedLocale variants.
109 // For example, "sco-ulster-fonipa" + "...-fonupa" => "sco-fonupa" (replacing ulster).
110 const char *variants = bestDesired->getVariant();
111 if (*variants != 0) {
112 b.setVariant(variants);
113 }
114
115 // Copy the extensions from bestDesired, if there are any.
116 // C++ note: The following note, copied from Java, may not be true,
117 // as long as C++ copies by legacy ICU keyword, not by extension singleton.
118 // Note that this will override any supportedLocale extensions.
119 // For example, "th-u-nu-latn-ca-buddhist" + "...-u-nu-native" => "th-u-nu-native"
120 // (replacing calendar).
121 b.copyExtensionsFrom(*bestDesired, errorCode);
122 return b.build(errorCode);
123}
124
125LocaleMatcher::Builder::Builder(LocaleMatcher::Builder &&src) U_NOEXCEPT :
126 errorCode_(src.errorCode_),
127 supportedLocales_(src.supportedLocales_),
128 thresholdDistance_(src.thresholdDistance_),
129 demotion_(src.demotion_),
130 defaultLocale_(src.defaultLocale_),
Frank Tangf90543d2020-10-30 19:02:04 -0700131 withDefault_(src.withDefault_),
Frank Tangf2223962020-04-27 18:25:29 -0700132 favor_(src.favor_),
133 direction_(src.direction_) {
Frank Tang952ccb92019-08-22 12:09:17 -0700134 src.supportedLocales_ = nullptr;
135 src.defaultLocale_ = nullptr;
136}
137
138LocaleMatcher::Builder::~Builder() {
139 delete supportedLocales_;
140 delete defaultLocale_;
Frank Tangf90543d2020-10-30 19:02:04 -0700141 delete maxDistanceDesired_;
142 delete maxDistanceSupported_;
Frank Tang952ccb92019-08-22 12:09:17 -0700143}
144
145LocaleMatcher::Builder &LocaleMatcher::Builder::operator=(LocaleMatcher::Builder &&src) U_NOEXCEPT {
146 this->~Builder();
147
148 errorCode_ = src.errorCode_;
149 supportedLocales_ = src.supportedLocales_;
150 thresholdDistance_ = src.thresholdDistance_;
151 demotion_ = src.demotion_;
152 defaultLocale_ = src.defaultLocale_;
Frank Tangf90543d2020-10-30 19:02:04 -0700153 withDefault_ = src.withDefault_,
Frank Tang952ccb92019-08-22 12:09:17 -0700154 favor_ = src.favor_;
Frank Tangf2223962020-04-27 18:25:29 -0700155 direction_ = src.direction_;
Frank Tang952ccb92019-08-22 12:09:17 -0700156
157 src.supportedLocales_ = nullptr;
158 src.defaultLocale_ = nullptr;
159 return *this;
160}
161
162void LocaleMatcher::Builder::clearSupportedLocales() {
163 if (supportedLocales_ != nullptr) {
164 supportedLocales_->removeAllElements();
165 }
166}
167
168bool LocaleMatcher::Builder::ensureSupportedLocaleVector() {
169 if (U_FAILURE(errorCode_)) { return false; }
170 if (supportedLocales_ != nullptr) { return true; }
171 supportedLocales_ = new UVector(uprv_deleteUObject, nullptr, errorCode_);
172 if (U_FAILURE(errorCode_)) { return false; }
173 if (supportedLocales_ == nullptr) {
174 errorCode_ = U_MEMORY_ALLOCATION_ERROR;
175 return false;
176 }
177 return true;
178}
179
180LocaleMatcher::Builder &LocaleMatcher::Builder::setSupportedLocalesFromListString(
181 StringPiece locales) {
182 LocalePriorityList list(locales, errorCode_);
183 if (U_FAILURE(errorCode_)) { return *this; }
184 clearSupportedLocales();
185 if (!ensureSupportedLocaleVector()) { return *this; }
186 int32_t length = list.getLengthIncludingRemoved();
187 for (int32_t i = 0; i < length; ++i) {
188 Locale *locale = list.orphanLocaleAt(i);
189 if (locale == nullptr) { continue; }
190 supportedLocales_->addElement(locale, errorCode_);
191 if (U_FAILURE(errorCode_)) {
192 delete locale;
193 break;
194 }
195 }
196 return *this;
197}
198
199LocaleMatcher::Builder &LocaleMatcher::Builder::setSupportedLocales(Locale::Iterator &locales) {
200 if (U_FAILURE(errorCode_)) { return *this; }
201 clearSupportedLocales();
202 if (!ensureSupportedLocaleVector()) { return *this; }
203 while (locales.hasNext()) {
204 const Locale &locale = locales.next();
205 Locale *clone = locale.clone();
206 if (clone == nullptr) {
207 errorCode_ = U_MEMORY_ALLOCATION_ERROR;
208 break;
209 }
210 supportedLocales_->addElement(clone, errorCode_);
211 if (U_FAILURE(errorCode_)) {
212 delete clone;
213 break;
214 }
215 }
216 return *this;
217}
218
219LocaleMatcher::Builder &LocaleMatcher::Builder::addSupportedLocale(const Locale &locale) {
220 if (!ensureSupportedLocaleVector()) { return *this; }
221 Locale *clone = locale.clone();
222 if (clone == nullptr) {
223 errorCode_ = U_MEMORY_ALLOCATION_ERROR;
224 return *this;
225 }
226 supportedLocales_->addElement(clone, errorCode_);
227 if (U_FAILURE(errorCode_)) {
228 delete clone;
229 }
230 return *this;
231}
232
Frank Tangf90543d2020-10-30 19:02:04 -0700233LocaleMatcher::Builder &LocaleMatcher::Builder::setNoDefaultLocale() {
234 if (U_FAILURE(errorCode_)) { return *this; }
235 delete defaultLocale_;
236 defaultLocale_ = nullptr;
237 withDefault_ = false;
238 return *this;
239}
240
Frank Tang952ccb92019-08-22 12:09:17 -0700241LocaleMatcher::Builder &LocaleMatcher::Builder::setDefaultLocale(const Locale *defaultLocale) {
242 if (U_FAILURE(errorCode_)) { return *this; }
243 Locale *clone = nullptr;
244 if (defaultLocale != nullptr) {
245 clone = defaultLocale->clone();
246 if (clone == nullptr) {
247 errorCode_ = U_MEMORY_ALLOCATION_ERROR;
248 return *this;
249 }
250 }
251 delete defaultLocale_;
252 defaultLocale_ = clone;
Frank Tangf90543d2020-10-30 19:02:04 -0700253 withDefault_ = true;
Frank Tang952ccb92019-08-22 12:09:17 -0700254 return *this;
255}
256
257LocaleMatcher::Builder &LocaleMatcher::Builder::setFavorSubtag(ULocMatchFavorSubtag subtag) {
258 if (U_FAILURE(errorCode_)) { return *this; }
259 favor_ = subtag;
260 return *this;
261}
262
263LocaleMatcher::Builder &LocaleMatcher::Builder::setDemotionPerDesiredLocale(ULocMatchDemotion demotion) {
264 if (U_FAILURE(errorCode_)) { return *this; }
265 demotion_ = demotion;
266 return *this;
267}
268
Frank Tangf90543d2020-10-30 19:02:04 -0700269LocaleMatcher::Builder &LocaleMatcher::Builder::setMaxDistance(const Locale &desired,
270 const Locale &supported) {
271 if (U_FAILURE(errorCode_)) { return *this; }
272 Locale *desiredClone = desired.clone();
273 Locale *supportedClone = supported.clone();
274 if (desiredClone == nullptr || supportedClone == nullptr) {
275 delete desiredClone; // in case only one could not be allocated
276 delete supportedClone;
277 errorCode_ = U_MEMORY_ALLOCATION_ERROR;
278 return *this;
279 }
280 delete maxDistanceDesired_;
281 delete maxDistanceSupported_;
282 maxDistanceDesired_ = desiredClone;
283 maxDistanceSupported_ = supportedClone;
284 return *this;
285}
286
Frank Tang952ccb92019-08-22 12:09:17 -0700287#if 0
288/**
289 * <i>Internal only!</i>
290 *
291 * @param thresholdDistance the thresholdDistance to set, with -1 = default
292 * @return this Builder object
293 * @internal
294 * @deprecated This API is ICU internal only.
295 */
296@Deprecated
297LocaleMatcher::Builder &LocaleMatcher::Builder::internalSetThresholdDistance(int32_t thresholdDistance) {
298 if (U_FAILURE(errorCode_)) { return *this; }
299 if (thresholdDistance > 100) {
300 thresholdDistance = 100;
301 }
302 thresholdDistance_ = thresholdDistance;
303 return *this;
304}
305#endif
306
307UBool LocaleMatcher::Builder::copyErrorTo(UErrorCode &outErrorCode) const {
308 if (U_FAILURE(outErrorCode)) { return TRUE; }
309 if (U_SUCCESS(errorCode_)) { return FALSE; }
310 outErrorCode = errorCode_;
311 return TRUE;
312}
313
314LocaleMatcher LocaleMatcher::Builder::build(UErrorCode &errorCode) const {
315 if (U_SUCCESS(errorCode) && U_FAILURE(errorCode_)) {
316 errorCode = errorCode_;
317 }
318 return LocaleMatcher(*this, errorCode);
319}
320
321namespace {
322
323LSR getMaximalLsrOrUnd(const XLikelySubtags &likelySubtags, const Locale &locale,
324 UErrorCode &errorCode) {
325 if (U_FAILURE(errorCode) || locale.isBogus() || *locale.getName() == 0 /* "und" */) {
326 return UND_LSR;
327 } else {
328 return likelySubtags.makeMaximizedLsrFrom(locale, errorCode);
329 }
330}
331
332int32_t hashLSR(const UHashTok token) {
333 const LSR *lsr = static_cast<const LSR *>(token.pointer);
334 return lsr->hashCode;
335}
336
337UBool compareLSRs(const UHashTok t1, const UHashTok t2) {
338 const LSR *lsr1 = static_cast<const LSR *>(t1.pointer);
339 const LSR *lsr2 = static_cast<const LSR *>(t2.pointer);
340 return *lsr1 == *lsr2;
341}
342
Frank Tang952ccb92019-08-22 12:09:17 -0700343} // namespace
344
Frank Tangf2223962020-04-27 18:25:29 -0700345int32_t LocaleMatcher::putIfAbsent(const LSR &lsr, int32_t i, int32_t suppLength,
346 UErrorCode &errorCode) {
347 if (U_FAILURE(errorCode)) { return suppLength; }
348 int32_t index = uhash_geti(supportedLsrToIndex, &lsr);
349 if (index == 0) {
350 uhash_puti(supportedLsrToIndex, const_cast<LSR *>(&lsr), i + 1, &errorCode);
351 if (U_SUCCESS(errorCode)) {
352 supportedLSRs[suppLength] = &lsr;
353 supportedIndexes[suppLength++] = i;
354 }
355 }
356 return suppLength;
357}
358
Frank Tang952ccb92019-08-22 12:09:17 -0700359LocaleMatcher::LocaleMatcher(const Builder &builder, UErrorCode &errorCode) :
360 likelySubtags(*XLikelySubtags::getSingleton(errorCode)),
361 localeDistance(*LocaleDistance::getSingleton(errorCode)),
362 thresholdDistance(builder.thresholdDistance_),
363 demotionPerDesiredLocale(0),
364 favorSubtag(builder.favor_),
Frank Tangf2223962020-04-27 18:25:29 -0700365 direction(builder.direction_),
Frank Tang952ccb92019-08-22 12:09:17 -0700366 supportedLocales(nullptr), lsrs(nullptr), supportedLocalesLength(0),
367 supportedLsrToIndex(nullptr),
368 supportedLSRs(nullptr), supportedIndexes(nullptr), supportedLSRsLength(0),
Frank Tangf2223962020-04-27 18:25:29 -0700369 ownedDefaultLocale(nullptr), defaultLocale(nullptr) {
Frank Tang952ccb92019-08-22 12:09:17 -0700370 if (U_FAILURE(errorCode)) { return; }
Frank Tangf2223962020-04-27 18:25:29 -0700371 const Locale *def = builder.defaultLocale_;
372 LSR builderDefaultLSR;
373 const LSR *defLSR = nullptr;
374 if (def != nullptr) {
375 ownedDefaultLocale = def->clone();
376 if (ownedDefaultLocale == nullptr) {
377 errorCode = U_MEMORY_ALLOCATION_ERROR;
378 return;
379 }
380 def = ownedDefaultLocale;
381 builderDefaultLSR = getMaximalLsrOrUnd(likelySubtags, *def, errorCode);
382 if (U_FAILURE(errorCode)) { return; }
383 defLSR = &builderDefaultLSR;
384 }
Frank Tang952ccb92019-08-22 12:09:17 -0700385 supportedLocalesLength = builder.supportedLocales_ != nullptr ?
386 builder.supportedLocales_->size() : 0;
Frank Tang952ccb92019-08-22 12:09:17 -0700387 if (supportedLocalesLength > 0) {
388 // Store the supported locales in input order,
389 // so that when different types are used (e.g., language tag strings)
390 // we can return those by parallel index.
391 supportedLocales = static_cast<const Locale **>(
392 uprv_malloc(supportedLocalesLength * sizeof(const Locale *)));
393 // Supported LRSs in input order.
394 // In C++, we store these permanently to simplify ownership management
395 // in the hash tables. Duplicate LSRs (if any) are unused overhead.
396 lsrs = new LSR[supportedLocalesLength];
397 if (supportedLocales == nullptr || lsrs == nullptr) {
398 errorCode = U_MEMORY_ALLOCATION_ERROR;
399 return;
400 }
401 // If the constructor fails partway, we need null pointers for destructibility.
402 uprv_memset(supportedLocales, 0, supportedLocalesLength * sizeof(const Locale *));
Frank Tang952ccb92019-08-22 12:09:17 -0700403 for (int32_t i = 0; i < supportedLocalesLength; ++i) {
404 const Locale &locale = *static_cast<Locale *>(builder.supportedLocales_->elementAt(i));
405 supportedLocales[i] = locale.clone();
406 if (supportedLocales[i] == nullptr) {
407 errorCode = U_MEMORY_ALLOCATION_ERROR;
408 return;
409 }
410 const Locale &supportedLocale = *supportedLocales[i];
411 LSR &lsr = lsrs[i] = getMaximalLsrOrUnd(likelySubtags, supportedLocale, errorCode);
412 lsr.setHashCode();
413 if (U_FAILURE(errorCode)) { return; }
Frank Tang952ccb92019-08-22 12:09:17 -0700414 }
415
416 // We need an unordered map from LSR to first supported locale with that LSR,
Frank Tangf2223962020-04-27 18:25:29 -0700417 // and an ordered list of (LSR, supported index) for
418 // the supported locales in the following order:
Frank Tang952ccb92019-08-22 12:09:17 -0700419 // 1. Default locale, if it is supported.
420 // 2. Priority locales (aka "paradigm locales") in builder order.
421 // 3. Remaining locales in builder order.
Frank Tang952ccb92019-08-22 12:09:17 -0700422 supportedLsrToIndex = uhash_openSize(hashLSR, compareLSRs, uhash_compareLong,
423 supportedLocalesLength, &errorCode);
424 if (U_FAILURE(errorCode)) { return; }
Frank Tang952ccb92019-08-22 12:09:17 -0700425 supportedLSRs = static_cast<const LSR **>(
Frank Tangf2223962020-04-27 18:25:29 -0700426 uprv_malloc(supportedLocalesLength * sizeof(const LSR *)));
Frank Tang952ccb92019-08-22 12:09:17 -0700427 supportedIndexes = static_cast<int32_t *>(
Frank Tangf2223962020-04-27 18:25:29 -0700428 uprv_malloc(supportedLocalesLength * sizeof(int32_t)));
Frank Tang952ccb92019-08-22 12:09:17 -0700429 if (supportedLSRs == nullptr || supportedIndexes == nullptr) {
430 errorCode = U_MEMORY_ALLOCATION_ERROR;
431 return;
432 }
Frank Tangf2223962020-04-27 18:25:29 -0700433 int32_t suppLength = 0;
434 // Determine insertion order.
435 // Add locales immediately that are equivalent to the default.
Frank Tangf90543d2020-10-30 19:02:04 -0700436 MaybeStackArray<int8_t, 100> order(supportedLocalesLength, errorCode);
437 if (U_FAILURE(errorCode)) { return; }
Frank Tangf2223962020-04-27 18:25:29 -0700438 int32_t numParadigms = 0;
Frank Tang952ccb92019-08-22 12:09:17 -0700439 for (int32_t i = 0; i < supportedLocalesLength; ++i) {
Frank Tang952ccb92019-08-22 12:09:17 -0700440 const Locale &locale = *supportedLocales[i];
441 const LSR &lsr = lsrs[i];
Frank Tangf90543d2020-10-30 19:02:04 -0700442 if (defLSR == nullptr && builder.withDefault_) {
443 // Implicit default locale = first supported locale, if not turned off.
Frank Tang952ccb92019-08-22 12:09:17 -0700444 U_ASSERT(i == 0);
445 def = &locale;
446 defLSR = &lsr;
Frank Tangf2223962020-04-27 18:25:29 -0700447 order[i] = 1;
448 suppLength = putIfAbsent(lsr, 0, suppLength, errorCode);
Frank Tangf90543d2020-10-30 19:02:04 -0700449 } else if (defLSR != nullptr && lsr.isEquivalentTo(*defLSR)) {
Frank Tangf2223962020-04-27 18:25:29 -0700450 order[i] = 1;
451 suppLength = putIfAbsent(lsr, i, suppLength, errorCode);
452 } else if (localeDistance.isParadigmLSR(lsr)) {
453 order[i] = 2;
454 ++numParadigms;
Frank Tang952ccb92019-08-22 12:09:17 -0700455 } else {
Frank Tangf2223962020-04-27 18:25:29 -0700456 order[i] = 3;
Frank Tang952ccb92019-08-22 12:09:17 -0700457 }
458 if (U_FAILURE(errorCode)) { return; }
459 }
Frank Tangf2223962020-04-27 18:25:29 -0700460 // Add supported paradigm locales.
461 int32_t paradigmLimit = suppLength + numParadigms;
462 for (int32_t i = 0; i < supportedLocalesLength && suppLength < paradigmLimit; ++i) {
463 if (order[i] == 2) {
464 suppLength = putIfAbsent(lsrs[i], i, suppLength, errorCode);
465 }
Frank Tang952ccb92019-08-22 12:09:17 -0700466 }
Frank Tangf2223962020-04-27 18:25:29 -0700467 // Add remaining supported locales.
468 for (int32_t i = 0; i < supportedLocalesLength; ++i) {
469 if (order[i] == 3) {
470 suppLength = putIfAbsent(lsrs[i], i, suppLength, errorCode);
471 }
472 }
473 supportedLSRsLength = suppLength;
474 // If supportedLSRsLength < supportedLocalesLength then
475 // we waste as many array slots as there are duplicate supported LSRs,
476 // but the amount of wasted space is small as long as there are few duplicates.
Frank Tang952ccb92019-08-22 12:09:17 -0700477 }
478
Frank Tang952ccb92019-08-22 12:09:17 -0700479 defaultLocale = def;
Frank Tang952ccb92019-08-22 12:09:17 -0700480
481 if (builder.demotion_ == ULOCMATCH_DEMOTION_REGION) {
482 demotionPerDesiredLocale = localeDistance.getDefaultDemotionPerDesiredLocale();
483 }
Frank Tangf90543d2020-10-30 19:02:04 -0700484
485 if (thresholdDistance >= 0) {
486 // already copied
487 } else if (builder.maxDistanceDesired_ != nullptr) {
488 LSR suppLSR = getMaximalLsrOrUnd(likelySubtags, *builder.maxDistanceSupported_, errorCode);
489 const LSR *pSuppLSR = &suppLSR;
490 int32_t indexAndDistance = localeDistance.getBestIndexAndDistance(
491 getMaximalLsrOrUnd(likelySubtags, *builder.maxDistanceDesired_, errorCode),
492 &pSuppLSR, 1,
493 LocaleDistance::shiftDistance(100), favorSubtag, direction);
494 if (U_SUCCESS(errorCode)) {
495 // +1 for an exclusive threshold from an inclusive max.
496 thresholdDistance = LocaleDistance::getDistanceFloor(indexAndDistance) + 1;
497 } else {
498 thresholdDistance = 0;
499 }
500 } else {
501 thresholdDistance = localeDistance.getDefaultScriptDistance();
502 }
Frank Tang952ccb92019-08-22 12:09:17 -0700503}
504
505LocaleMatcher::LocaleMatcher(LocaleMatcher &&src) U_NOEXCEPT :
506 likelySubtags(src.likelySubtags),
507 localeDistance(src.localeDistance),
508 thresholdDistance(src.thresholdDistance),
509 demotionPerDesiredLocale(src.demotionPerDesiredLocale),
510 favorSubtag(src.favorSubtag),
Frank Tangf2223962020-04-27 18:25:29 -0700511 direction(src.direction),
Frank Tang952ccb92019-08-22 12:09:17 -0700512 supportedLocales(src.supportedLocales), lsrs(src.lsrs),
513 supportedLocalesLength(src.supportedLocalesLength),
514 supportedLsrToIndex(src.supportedLsrToIndex),
515 supportedLSRs(src.supportedLSRs),
516 supportedIndexes(src.supportedIndexes),
517 supportedLSRsLength(src.supportedLSRsLength),
Frank Tangf2223962020-04-27 18:25:29 -0700518 ownedDefaultLocale(src.ownedDefaultLocale), defaultLocale(src.defaultLocale) {
Frank Tang952ccb92019-08-22 12:09:17 -0700519 src.supportedLocales = nullptr;
520 src.lsrs = nullptr;
521 src.supportedLocalesLength = 0;
522 src.supportedLsrToIndex = nullptr;
523 src.supportedLSRs = nullptr;
524 src.supportedIndexes = nullptr;
525 src.supportedLSRsLength = 0;
526 src.ownedDefaultLocale = nullptr;
527 src.defaultLocale = nullptr;
Frank Tang952ccb92019-08-22 12:09:17 -0700528}
529
530LocaleMatcher::~LocaleMatcher() {
531 for (int32_t i = 0; i < supportedLocalesLength; ++i) {
532 delete supportedLocales[i];
533 }
534 uprv_free(supportedLocales);
535 delete[] lsrs;
536 uhash_close(supportedLsrToIndex);
537 uprv_free(supportedLSRs);
538 uprv_free(supportedIndexes);
539 delete ownedDefaultLocale;
540}
541
542LocaleMatcher &LocaleMatcher::operator=(LocaleMatcher &&src) U_NOEXCEPT {
543 this->~LocaleMatcher();
544
545 thresholdDistance = src.thresholdDistance;
546 demotionPerDesiredLocale = src.demotionPerDesiredLocale;
547 favorSubtag = src.favorSubtag;
Frank Tangf2223962020-04-27 18:25:29 -0700548 direction = src.direction;
Frank Tang952ccb92019-08-22 12:09:17 -0700549 supportedLocales = src.supportedLocales;
550 lsrs = src.lsrs;
551 supportedLocalesLength = src.supportedLocalesLength;
552 supportedLsrToIndex = src.supportedLsrToIndex;
553 supportedLSRs = src.supportedLSRs;
554 supportedIndexes = src.supportedIndexes;
555 supportedLSRsLength = src.supportedLSRsLength;
556 ownedDefaultLocale = src.ownedDefaultLocale;
557 defaultLocale = src.defaultLocale;
Frank Tang952ccb92019-08-22 12:09:17 -0700558
559 src.supportedLocales = nullptr;
560 src.lsrs = nullptr;
561 src.supportedLocalesLength = 0;
562 src.supportedLsrToIndex = nullptr;
563 src.supportedLSRs = nullptr;
564 src.supportedIndexes = nullptr;
565 src.supportedLSRsLength = 0;
566 src.ownedDefaultLocale = nullptr;
567 src.defaultLocale = nullptr;
Frank Tang952ccb92019-08-22 12:09:17 -0700568 return *this;
569}
570
571class LocaleLsrIterator {
572public:
573 LocaleLsrIterator(const XLikelySubtags &likelySubtags, Locale::Iterator &locales,
574 ULocMatchLifetime lifetime) :
575 likelySubtags(likelySubtags), locales(locales), lifetime(lifetime) {}
576
577 ~LocaleLsrIterator() {
578 if (lifetime == ULOCMATCH_TEMPORARY_LOCALES) {
579 delete remembered;
580 }
581 }
582
583 bool hasNext() const {
584 return locales.hasNext();
585 }
586
587 LSR next(UErrorCode &errorCode) {
588 current = &locales.next();
589 return getMaximalLsrOrUnd(likelySubtags, *current, errorCode);
590 }
591
592 void rememberCurrent(int32_t desiredIndex, UErrorCode &errorCode) {
593 if (U_FAILURE(errorCode)) { return; }
594 bestDesiredIndex = desiredIndex;
595 if (lifetime == ULOCMATCH_STORED_LOCALES) {
596 remembered = current;
597 } else {
598 // ULOCMATCH_TEMPORARY_LOCALES
599 delete remembered;
600 remembered = new Locale(*current);
601 if (remembered == nullptr) {
602 errorCode = U_MEMORY_ALLOCATION_ERROR;
603 }
604 }
605 }
606
607 const Locale *orphanRemembered() {
608 const Locale *rem = remembered;
609 remembered = nullptr;
610 return rem;
611 }
612
613 int32_t getBestDesiredIndex() const {
614 return bestDesiredIndex;
615 }
616
617private:
618 const XLikelySubtags &likelySubtags;
619 Locale::Iterator &locales;
620 ULocMatchLifetime lifetime;
621 const Locale *current = nullptr, *remembered = nullptr;
622 int32_t bestDesiredIndex = -1;
623};
624
625const Locale *LocaleMatcher::getBestMatch(const Locale &desiredLocale, UErrorCode &errorCode) const {
626 if (U_FAILURE(errorCode)) { return nullptr; }
627 int32_t suppIndex = getBestSuppIndex(
628 getMaximalLsrOrUnd(likelySubtags, desiredLocale, errorCode),
629 nullptr, errorCode);
630 return U_SUCCESS(errorCode) && suppIndex >= 0 ? supportedLocales[suppIndex] : defaultLocale;
631}
632
633const Locale *LocaleMatcher::getBestMatch(Locale::Iterator &desiredLocales,
634 UErrorCode &errorCode) const {
635 if (U_FAILURE(errorCode)) { return nullptr; }
636 if (!desiredLocales.hasNext()) {
637 return defaultLocale;
638 }
639 LocaleLsrIterator lsrIter(likelySubtags, desiredLocales, ULOCMATCH_TEMPORARY_LOCALES);
640 int32_t suppIndex = getBestSuppIndex(lsrIter.next(errorCode), &lsrIter, errorCode);
641 return U_SUCCESS(errorCode) && suppIndex >= 0 ? supportedLocales[suppIndex] : defaultLocale;
642}
643
644const Locale *LocaleMatcher::getBestMatchForListString(
645 StringPiece desiredLocaleList, UErrorCode &errorCode) const {
646 LocalePriorityList list(desiredLocaleList, errorCode);
647 LocalePriorityList::Iterator iter = list.iterator();
648 return getBestMatch(iter, errorCode);
649}
650
651LocaleMatcher::Result LocaleMatcher::getBestMatchResult(
652 const Locale &desiredLocale, UErrorCode &errorCode) const {
653 if (U_FAILURE(errorCode)) {
Frank Tangf2223962020-04-27 18:25:29 -0700654 return Result(nullptr, defaultLocale, -1, -1, FALSE);
Frank Tang952ccb92019-08-22 12:09:17 -0700655 }
656 int32_t suppIndex = getBestSuppIndex(
657 getMaximalLsrOrUnd(likelySubtags, desiredLocale, errorCode),
658 nullptr, errorCode);
659 if (U_FAILURE(errorCode) || suppIndex < 0) {
Frank Tangf2223962020-04-27 18:25:29 -0700660 return Result(nullptr, defaultLocale, -1, -1, FALSE);
Frank Tang952ccb92019-08-22 12:09:17 -0700661 } else {
662 return Result(&desiredLocale, supportedLocales[suppIndex], 0, suppIndex, FALSE);
663 }
664}
665
666LocaleMatcher::Result LocaleMatcher::getBestMatchResult(
667 Locale::Iterator &desiredLocales, UErrorCode &errorCode) const {
668 if (U_FAILURE(errorCode) || !desiredLocales.hasNext()) {
Frank Tangf2223962020-04-27 18:25:29 -0700669 return Result(nullptr, defaultLocale, -1, -1, FALSE);
Frank Tang952ccb92019-08-22 12:09:17 -0700670 }
671 LocaleLsrIterator lsrIter(likelySubtags, desiredLocales, ULOCMATCH_TEMPORARY_LOCALES);
672 int32_t suppIndex = getBestSuppIndex(lsrIter.next(errorCode), &lsrIter, errorCode);
673 if (U_FAILURE(errorCode) || suppIndex < 0) {
Frank Tangf2223962020-04-27 18:25:29 -0700674 return Result(nullptr, defaultLocale, -1, -1, FALSE);
Frank Tang952ccb92019-08-22 12:09:17 -0700675 } else {
676 return Result(lsrIter.orphanRemembered(), supportedLocales[suppIndex],
677 lsrIter.getBestDesiredIndex(), suppIndex, TRUE);
678 }
679}
680
681int32_t LocaleMatcher::getBestSuppIndex(LSR desiredLSR, LocaleLsrIterator *remainingIter,
682 UErrorCode &errorCode) const {
683 if (U_FAILURE(errorCode)) { return -1; }
684 int32_t desiredIndex = 0;
685 int32_t bestSupportedLsrIndex = -1;
Frank Tangf2223962020-04-27 18:25:29 -0700686 for (int32_t bestShiftedDistance = LocaleDistance::shiftDistance(thresholdDistance);;) {
Frank Tang952ccb92019-08-22 12:09:17 -0700687 // Quick check for exact maximized LSR.
688 // Returns suppIndex+1 where 0 means not found.
689 if (supportedLsrToIndex != nullptr) {
690 desiredLSR.setHashCode();
691 int32_t index = uhash_geti(supportedLsrToIndex, &desiredLSR);
692 if (index != 0) {
693 int32_t suppIndex = index - 1;
694 if (remainingIter != nullptr) {
695 remainingIter->rememberCurrent(desiredIndex, errorCode);
696 }
697 return suppIndex;
698 }
699 }
700 int32_t bestIndexAndDistance = localeDistance.getBestIndexAndDistance(
Frank Tangf2223962020-04-27 18:25:29 -0700701 desiredLSR, supportedLSRs, supportedLSRsLength,
702 bestShiftedDistance, favorSubtag, direction);
Frank Tang952ccb92019-08-22 12:09:17 -0700703 if (bestIndexAndDistance >= 0) {
Frank Tangf2223962020-04-27 18:25:29 -0700704 bestShiftedDistance = LocaleDistance::getShiftedDistance(bestIndexAndDistance);
Frank Tang952ccb92019-08-22 12:09:17 -0700705 if (remainingIter != nullptr) {
706 remainingIter->rememberCurrent(desiredIndex, errorCode);
707 if (U_FAILURE(errorCode)) { return -1; }
708 }
Frank Tangf2223962020-04-27 18:25:29 -0700709 bestSupportedLsrIndex = LocaleDistance::getIndex(bestIndexAndDistance);
Frank Tang952ccb92019-08-22 12:09:17 -0700710 }
Frank Tangf2223962020-04-27 18:25:29 -0700711 if ((bestShiftedDistance -= LocaleDistance::shiftDistance(demotionPerDesiredLocale)) <= 0) {
Frank Tang952ccb92019-08-22 12:09:17 -0700712 break;
713 }
714 if (remainingIter == nullptr || !remainingIter->hasNext()) {
715 break;
716 }
717 desiredLSR = remainingIter->next(errorCode);
718 if (U_FAILURE(errorCode)) { return -1; }
719 ++desiredIndex;
720 }
721 if (bestSupportedLsrIndex < 0) {
722 // no good match
723 return -1;
724 }
725 return supportedIndexes[bestSupportedLsrIndex];
726}
727
Frank Tangf90543d2020-10-30 19:02:04 -0700728UBool LocaleMatcher::isMatch(const Locale &desired, const Locale &supported,
729 UErrorCode &errorCode) const {
730 LSR suppLSR = getMaximalLsrOrUnd(likelySubtags, supported, errorCode);
731 if (U_FAILURE(errorCode)) { return 0; }
732 const LSR *pSuppLSR = &suppLSR;
733 int32_t indexAndDistance = localeDistance.getBestIndexAndDistance(
734 getMaximalLsrOrUnd(likelySubtags, desired, errorCode),
735 &pSuppLSR, 1,
736 LocaleDistance::shiftDistance(thresholdDistance), favorSubtag, direction);
737 return indexAndDistance >= 0;
738}
739
Frank Tang952ccb92019-08-22 12:09:17 -0700740double LocaleMatcher::internalMatch(const Locale &desired, const Locale &supported, UErrorCode &errorCode) const {
741 // Returns the inverse of the distance: That is, 1-distance(desired, supported).
742 LSR suppLSR = getMaximalLsrOrUnd(likelySubtags, supported, errorCode);
743 if (U_FAILURE(errorCode)) { return 0; }
744 const LSR *pSuppLSR = &suppLSR;
Frank Tangf2223962020-04-27 18:25:29 -0700745 int32_t indexAndDistance = localeDistance.getBestIndexAndDistance(
Frank Tang952ccb92019-08-22 12:09:17 -0700746 getMaximalLsrOrUnd(likelySubtags, desired, errorCode),
747 &pSuppLSR, 1,
Frank Tangf2223962020-04-27 18:25:29 -0700748 LocaleDistance::shiftDistance(thresholdDistance), favorSubtag, direction);
749 double distance = LocaleDistance::getDistanceDouble(indexAndDistance);
750 return (100.0 - distance) / 100.0;
Frank Tang952ccb92019-08-22 12:09:17 -0700751}
752
753U_NAMESPACE_END
754
Frank Tangf2223962020-04-27 18:25:29 -0700755// uloc_acceptLanguage() --------------------------------------------------- ***
756
757U_NAMESPACE_USE
758
759namespace {
760
761class LocaleFromTag {
762public:
763 LocaleFromTag() : locale(Locale::getRoot()) {}
764 const Locale &operator()(const char *tag) { return locale = Locale(tag); }
765
766private:
767 // Store the locale in the converter, rather than return a reference to a temporary,
768 // or a value which could go out of scope with the caller's reference to it.
769 Locale locale;
770};
771
772int32_t acceptLanguage(UEnumeration &supportedLocales, Locale::Iterator &desiredLocales,
773 char *dest, int32_t capacity, UAcceptResult *acceptResult,
774 UErrorCode &errorCode) {
775 if (U_FAILURE(errorCode)) { return 0; }
776 LocaleMatcher::Builder builder;
777 const char *locString;
778 while ((locString = uenum_next(&supportedLocales, nullptr, &errorCode)) != nullptr) {
779 Locale loc(locString);
780 if (loc.isBogus()) {
781 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
782 return 0;
783 }
784 builder.addSupportedLocale(loc);
785 }
786 LocaleMatcher matcher = builder.build(errorCode);
787 LocaleMatcher::Result result = matcher.getBestMatchResult(desiredLocales, errorCode);
788 if (U_FAILURE(errorCode)) { return 0; }
789 if (result.getDesiredIndex() >= 0) {
790 if (acceptResult != nullptr) {
791 *acceptResult = *result.getDesiredLocale() == *result.getSupportedLocale() ?
792 ULOC_ACCEPT_VALID : ULOC_ACCEPT_FALLBACK;
793 }
794 const char *bestStr = result.getSupportedLocale()->getName();
795 int32_t bestLength = (int32_t)uprv_strlen(bestStr);
796 if (bestLength <= capacity) {
797 uprv_memcpy(dest, bestStr, bestLength);
798 }
799 return u_terminateChars(dest, capacity, bestLength, &errorCode);
800 } else {
801 if (acceptResult != nullptr) {
802 *acceptResult = ULOC_ACCEPT_FAILED;
803 }
804 return u_terminateChars(dest, capacity, 0, &errorCode);
805 }
806}
807
808} // namespace
809
810U_CAPI int32_t U_EXPORT2
811uloc_acceptLanguage(char *result, int32_t resultAvailable,
812 UAcceptResult *outResult,
813 const char **acceptList, int32_t acceptListCount,
814 UEnumeration *availableLocales,
815 UErrorCode *status) {
816 if (U_FAILURE(*status)) { return 0; }
817 if ((result == nullptr ? resultAvailable != 0 : resultAvailable < 0) ||
818 (acceptList == nullptr ? acceptListCount != 0 : acceptListCount < 0) ||
819 availableLocales == nullptr) {
820 *status = U_ILLEGAL_ARGUMENT_ERROR;
821 return 0;
822 }
823 LocaleFromTag converter;
824 Locale::ConvertingIterator<const char **, LocaleFromTag> desiredLocales(
825 acceptList, acceptList + acceptListCount, converter);
826 return acceptLanguage(*availableLocales, desiredLocales,
827 result, resultAvailable, outResult, *status);
828}
829
830U_CAPI int32_t U_EXPORT2
831uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable,
832 UAcceptResult *outResult,
833 const char *httpAcceptLanguage,
834 UEnumeration *availableLocales,
835 UErrorCode *status) {
836 if (U_FAILURE(*status)) { return 0; }
837 if ((result == nullptr ? resultAvailable != 0 : resultAvailable < 0) ||
838 httpAcceptLanguage == nullptr || availableLocales == nullptr) {
839 *status = U_ILLEGAL_ARGUMENT_ERROR;
840 return 0;
841 }
842 LocalePriorityList list(httpAcceptLanguage, *status);
843 LocalePriorityList::Iterator desiredLocales = list.iterator();
844 return acceptLanguage(*availableLocales, desiredLocales,
845 result, resultAvailable, outResult, *status);
846}