blob: ec0dca28a45a69be7f12efebdc7503029563dc34 [file] [log] [blame]
Jungshik Shin87232d82017-05-13 21:10:13 -07001// © 2016 and later: Unicode, Inc. and others.
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002// License & terms of use: http://www.unicode.org/copyright.html
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00003/*
4*******************************************************************************
5*
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07006* Copyright (C) 1997-2016, International Business Machines
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00007* Corporation and others. All Rights Reserved.
8*
9*******************************************************************************
10* file name: loclikely.cpp
Jungshik Shin87232d82017-05-13 21:10:13 -070011* encoding: UTF-8
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000012* tab size: 8 (not used)
13* indentation:4
14*
15* created on: 2010feb25
16* created by: Markus W. Scherer
17*
18* Code for likely and minimized locale subtags, separated out from other .cpp files
19* that then do not depend on resource bundle code and likely-subtags data.
20*/
21
Frank Tang69c72a62019-04-03 21:41:21 -070022#include "unicode/bytestream.h"
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000023#include "unicode/utypes.h"
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -080024#include "unicode/locid.h"
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000025#include "unicode/putil.h"
Jungshik Shin87232d82017-05-13 21:10:13 -070026#include "unicode/uchar.h"
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000027#include "unicode/uloc.h"
28#include "unicode/ures.h"
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -080029#include "unicode/uscript.h"
Frank Tang69c72a62019-04-03 21:41:21 -070030#include "bytesinkutil.h"
31#include "charstr.h"
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000032#include "cmemory.h"
33#include "cstring.h"
34#include "ulocimp.h"
35#include "ustr_imp.h"
36
37/**
Frank Tang69c72a62019-04-03 21:41:21 -070038 * These are the canonical strings for unknown languages, scripts and regions.
39 **/
40static const char* const unknownLanguage = "und";
41static const char* const unknownScript = "Zzzz";
42static const char* const unknownRegion = "ZZ";
43
44/**
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000045 * This function looks for the localeID in the likelySubtags resource.
46 *
47 * @param localeID The tag to find.
48 * @param buffer A buffer to hold the matching entry
49 * @param bufferLength The length of the output buffer
50 * @return A pointer to "buffer" if found, or a null pointer if not.
51 */
52static const char* U_CALLCONV
53findLikelySubtags(const char* localeID,
54 char* buffer,
55 int32_t bufferLength,
56 UErrorCode* err) {
57 const char* result = NULL;
58
59 if (!U_FAILURE(*err)) {
60 int32_t resLen = 0;
61 const UChar* s = NULL;
62 UErrorCode tmpErr = U_ZERO_ERROR;
Frank Tang69c72a62019-04-03 21:41:21 -070063 icu::LocalUResourceBundlePointer subtags(ures_openDirect(NULL, "likelySubtags", &tmpErr));
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000064 if (U_SUCCESS(tmpErr)) {
Frank Tang69c72a62019-04-03 21:41:21 -070065 icu::CharString und;
66 if (localeID != NULL) {
67 if (*localeID == '\0') {
68 localeID = unknownLanguage;
69 } else if (*localeID == '_') {
70 und.append(unknownLanguage, *err);
71 und.append(localeID, *err);
72 if (U_FAILURE(*err)) {
73 return NULL;
74 }
75 localeID = und.data();
76 }
77 }
78 s = ures_getStringByKey(subtags.getAlias(), localeID, &resLen, &tmpErr);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000079
80 if (U_FAILURE(tmpErr)) {
81 /*
82 * If a resource is missing, it's not really an error, it's
83 * just that we don't have any data for that particular locale ID.
84 */
85 if (tmpErr != U_MISSING_RESOURCE_ERROR) {
86 *err = tmpErr;
87 }
88 }
89 else if (resLen >= bufferLength) {
90 /* The buffer should never overflow. */
91 *err = U_INTERNAL_PROGRAM_ERROR;
92 }
93 else {
94 u_UCharsToChars(s, buffer, resLen + 1);
Frank Tang69c72a62019-04-03 21:41:21 -070095 if (resLen >= 3 &&
96 uprv_strnicmp(buffer, unknownLanguage, 3) == 0 &&
97 (resLen == 3 || buffer[3] == '_')) {
98 uprv_memmove(buffer, buffer + 3, resLen - 3 + 1);
99 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000100 result = buffer;
101 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000102 } else {
103 *err = tmpErr;
104 }
105 }
106
107 return result;
108}
109
110/**
111 * Append a tag to a buffer, adding the separator if necessary. The buffer
112 * must be large enough to contain the resulting tag plus any separator
113 * necessary. The tag must not be a zero-length string.
114 *
115 * @param tag The tag to add.
116 * @param tagLength The length of the tag.
117 * @param buffer The output buffer.
Frank Tang3e05d9d2021-11-08 14:04:04 -0800118 * @param bufferLength The length of the output buffer. This is an input/output parameter.
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000119 **/
120static void U_CALLCONV
121appendTag(
122 const char* tag,
123 int32_t tagLength,
124 char* buffer,
Frank Tang69c72a62019-04-03 21:41:21 -0700125 int32_t* bufferLength,
126 UBool withSeparator) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000127
Frank Tang69c72a62019-04-03 21:41:21 -0700128 if (withSeparator) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000129 buffer[*bufferLength] = '_';
130 ++(*bufferLength);
131 }
132
133 uprv_memmove(
134 &buffer[*bufferLength],
135 tag,
136 tagLength);
137
138 *bufferLength += tagLength;
139}
140
141/**
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000142 * Create a tag string from the supplied parameters. The lang, script and region
143 * parameters may be NULL pointers. If they are, their corresponding length parameters
144 * must be less than or equal to 0.
145 *
146 * If any of the language, script or region parameters are empty, and the alternateTags
147 * parameter is not NULL, it will be parsed for potential language, script and region tags
148 * to be used when constructing the new tag. If the alternateTags parameter is NULL, or
149 * it contains no language tag, the default tag for the unknown language is used.
150 *
151 * If the length of the new string exceeds the capacity of the output buffer,
152 * the function copies as many bytes to the output buffer as it can, and returns
153 * the error U_BUFFER_OVERFLOW_ERROR.
154 *
155 * If an illegal argument is provided, the function returns the error
156 * U_ILLEGAL_ARGUMENT_ERROR.
157 *
158 * Note that this function can return the warning U_STRING_NOT_TERMINATED_WARNING if
159 * the tag string fits in the output buffer, but the null terminator doesn't.
160 *
161 * @param lang The language tag to use.
162 * @param langLength The length of the language tag.
163 * @param script The script tag to use.
164 * @param scriptLength The length of the script tag.
165 * @param region The region tag to use.
166 * @param regionLength The length of the region tag.
167 * @param trailing Any trailing data to append to the new tag.
168 * @param trailingLength The length of the trailing data.
169 * @param alternateTags A string containing any alternate tags.
Frank Tang69c72a62019-04-03 21:41:21 -0700170 * @param sink The output sink receiving the tag string.
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000171 * @param err A pointer to a UErrorCode for error reporting.
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000172 **/
Frank Tang69c72a62019-04-03 21:41:21 -0700173static void U_CALLCONV
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000174createTagStringWithAlternates(
175 const char* lang,
176 int32_t langLength,
177 const char* script,
178 int32_t scriptLength,
179 const char* region,
180 int32_t regionLength,
181 const char* trailing,
182 int32_t trailingLength,
183 const char* alternateTags,
Frank Tang69c72a62019-04-03 21:41:21 -0700184 icu::ByteSink& sink,
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000185 UErrorCode* err) {
186
187 if (U_FAILURE(*err)) {
188 goto error;
189 }
Frank Tang69c72a62019-04-03 21:41:21 -0700190 else if (langLength >= ULOC_LANG_CAPACITY ||
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000191 scriptLength >= ULOC_SCRIPT_CAPACITY ||
192 regionLength >= ULOC_COUNTRY_CAPACITY) {
193 goto error;
194 }
195 else {
196 /**
197 * ULOC_FULLNAME_CAPACITY will provide enough capacity
198 * that we can build a string that contains the language,
199 * script and region code without worrying about overrunning
200 * the user-supplied buffer.
201 **/
202 char tagBuffer[ULOC_FULLNAME_CAPACITY];
203 int32_t tagLength = 0;
Frank Tang1f164ee2022-11-08 12:31:27 -0800204 UBool regionAppended = false;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000205
206 if (langLength > 0) {
207 appendTag(
208 lang,
209 langLength,
210 tagBuffer,
Frank Tang69c72a62019-04-03 21:41:21 -0700211 &tagLength,
Frank Tang1f164ee2022-11-08 12:31:27 -0800212 /*withSeparator=*/false);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000213 }
214 else if (alternateTags == NULL) {
215 /*
Frank Tang69c72a62019-04-03 21:41:21 -0700216 * Use the empty string for an unknown language, if
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000217 * we found no language.
218 */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000219 }
220 else {
221 /*
222 * Parse the alternateTags string for the language.
223 */
224 char alternateLang[ULOC_LANG_CAPACITY];
225 int32_t alternateLangLength = sizeof(alternateLang);
226
227 alternateLangLength =
228 uloc_getLanguage(
229 alternateTags,
230 alternateLang,
231 alternateLangLength,
232 err);
233 if(U_FAILURE(*err) ||
234 alternateLangLength >= ULOC_LANG_CAPACITY) {
235 goto error;
236 }
237 else if (alternateLangLength == 0) {
238 /*
Frank Tang69c72a62019-04-03 21:41:21 -0700239 * Use the empty string for an unknown language, if
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000240 * we found no language.
241 */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000242 }
243 else {
244 appendTag(
245 alternateLang,
246 alternateLangLength,
247 tagBuffer,
Frank Tang69c72a62019-04-03 21:41:21 -0700248 &tagLength,
Frank Tang1f164ee2022-11-08 12:31:27 -0800249 /*withSeparator=*/false);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000250 }
251 }
252
253 if (scriptLength > 0) {
254 appendTag(
255 script,
256 scriptLength,
257 tagBuffer,
Frank Tang69c72a62019-04-03 21:41:21 -0700258 &tagLength,
Frank Tang1f164ee2022-11-08 12:31:27 -0800259 /*withSeparator=*/true);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000260 }
261 else if (alternateTags != NULL) {
262 /*
263 * Parse the alternateTags string for the script.
264 */
265 char alternateScript[ULOC_SCRIPT_CAPACITY];
266
267 const int32_t alternateScriptLength =
268 uloc_getScript(
269 alternateTags,
270 alternateScript,
271 sizeof(alternateScript),
272 err);
273
274 if (U_FAILURE(*err) ||
275 alternateScriptLength >= ULOC_SCRIPT_CAPACITY) {
276 goto error;
277 }
278 else if (alternateScriptLength > 0) {
279 appendTag(
280 alternateScript,
281 alternateScriptLength,
282 tagBuffer,
Frank Tang69c72a62019-04-03 21:41:21 -0700283 &tagLength,
Frank Tang1f164ee2022-11-08 12:31:27 -0800284 /*withSeparator=*/true);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000285 }
286 }
287
288 if (regionLength > 0) {
289 appendTag(
290 region,
291 regionLength,
292 tagBuffer,
Frank Tang69c72a62019-04-03 21:41:21 -0700293 &tagLength,
Frank Tang1f164ee2022-11-08 12:31:27 -0800294 /*withSeparator=*/true);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000295
Frank Tang1f164ee2022-11-08 12:31:27 -0800296 regionAppended = true;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000297 }
298 else if (alternateTags != NULL) {
299 /*
300 * Parse the alternateTags string for the region.
301 */
302 char alternateRegion[ULOC_COUNTRY_CAPACITY];
303
304 const int32_t alternateRegionLength =
305 uloc_getCountry(
306 alternateTags,
307 alternateRegion,
308 sizeof(alternateRegion),
309 err);
310 if (U_FAILURE(*err) ||
311 alternateRegionLength >= ULOC_COUNTRY_CAPACITY) {
312 goto error;
313 }
314 else if (alternateRegionLength > 0) {
315 appendTag(
316 alternateRegion,
317 alternateRegionLength,
318 tagBuffer,
Frank Tang69c72a62019-04-03 21:41:21 -0700319 &tagLength,
Frank Tang1f164ee2022-11-08 12:31:27 -0800320 /*withSeparator=*/true);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000321
Frank Tang1f164ee2022-11-08 12:31:27 -0800322 regionAppended = true;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000323 }
324 }
325
Frank Tang69c72a62019-04-03 21:41:21 -0700326 /**
327 * Copy the partial tag from our internal buffer to the supplied
328 * target.
329 **/
330 sink.Append(tagBuffer, tagLength);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000331
332 if (trailingLength > 0) {
Frank Tang69c72a62019-04-03 21:41:21 -0700333 if (*trailing != '@') {
334 sink.Append("_", 1);
335 if (!regionAppended) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000336 /* extra separator is required */
Frank Tang69c72a62019-04-03 21:41:21 -0700337 sink.Append("_", 1);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000338 }
339 }
340
Frank Tang69c72a62019-04-03 21:41:21 -0700341 /*
342 * Copy the trailing data into the supplied buffer.
343 */
344 sink.Append(trailing, trailingLength);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000345 }
346
Frank Tang69c72a62019-04-03 21:41:21 -0700347 return;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000348 }
349
350error:
351
352 /**
353 * An overflow indicates the locale ID passed in
354 * is ill-formed. If we got here, and there was
355 * no previous error, it's an implicit overflow.
356 **/
357 if (*err == U_BUFFER_OVERFLOW_ERROR ||
358 U_SUCCESS(*err)) {
359 *err = U_ILLEGAL_ARGUMENT_ERROR;
360 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000361}
362
363/**
364 * Create a tag string from the supplied parameters. The lang, script and region
365 * parameters may be NULL pointers. If they are, their corresponding length parameters
366 * must be less than or equal to 0. If the lang parameter is an empty string, the
367 * default value for an unknown language is written to the output buffer.
368 *
369 * If the length of the new string exceeds the capacity of the output buffer,
370 * the function copies as many bytes to the output buffer as it can, and returns
371 * the error U_BUFFER_OVERFLOW_ERROR.
372 *
373 * If an illegal argument is provided, the function returns the error
374 * U_ILLEGAL_ARGUMENT_ERROR.
375 *
376 * @param lang The language tag to use.
377 * @param langLength The length of the language tag.
378 * @param script The script tag to use.
379 * @param scriptLength The length of the script tag.
380 * @param region The region tag to use.
381 * @param regionLength The length of the region tag.
382 * @param trailing Any trailing data to append to the new tag.
383 * @param trailingLength The length of the trailing data.
Frank Tang69c72a62019-04-03 21:41:21 -0700384 * @param sink The output sink receiving the tag string.
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000385 * @param err A pointer to a UErrorCode for error reporting.
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000386 **/
Frank Tang69c72a62019-04-03 21:41:21 -0700387static void U_CALLCONV
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000388createTagString(
389 const char* lang,
390 int32_t langLength,
391 const char* script,
392 int32_t scriptLength,
393 const char* region,
394 int32_t regionLength,
395 const char* trailing,
396 int32_t trailingLength,
Frank Tang69c72a62019-04-03 21:41:21 -0700397 icu::ByteSink& sink,
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000398 UErrorCode* err)
399{
Frank Tang69c72a62019-04-03 21:41:21 -0700400 createTagStringWithAlternates(
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000401 lang,
402 langLength,
403 script,
404 scriptLength,
405 region,
406 regionLength,
407 trailing,
408 trailingLength,
409 NULL,
Frank Tang69c72a62019-04-03 21:41:21 -0700410 sink,
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000411 err);
412}
413
414/**
415 * Parse the language, script, and region subtags from a tag string, and copy the
416 * results into the corresponding output parameters. The buffers are null-terminated,
417 * unless overflow occurs.
418 *
419 * The langLength, scriptLength, and regionLength parameters are input/output
420 * parameters, and must contain the capacity of their corresponding buffers on
421 * input. On output, they will contain the actual length of the buffers, not
422 * including the null terminator.
423 *
424 * If the length of any of the output subtags exceeds the capacity of the corresponding
425 * buffer, the function copies as many bytes to the output buffer as it can, and returns
426 * the error U_BUFFER_OVERFLOW_ERROR. It will not parse any more subtags once overflow
427 * occurs.
428 *
429 * If an illegal argument is provided, the function returns the error
430 * U_ILLEGAL_ARGUMENT_ERROR.
431 *
432 * @param localeID The locale ID to parse.
433 * @param lang The language tag buffer.
434 * @param langLength The length of the language tag.
435 * @param script The script tag buffer.
436 * @param scriptLength The length of the script tag.
437 * @param region The region tag buffer.
438 * @param regionLength The length of the region tag.
439 * @param err A pointer to a UErrorCode for error reporting.
440 * @return The number of chars of the localeID parameter consumed.
441 **/
442static int32_t U_CALLCONV
443parseTagString(
444 const char* localeID,
445 char* lang,
446 int32_t* langLength,
447 char* script,
448 int32_t* scriptLength,
449 char* region,
450 int32_t* regionLength,
451 UErrorCode* err)
452{
453 const char* position = localeID;
454 int32_t subtagLength = 0;
455
456 if(U_FAILURE(*err) ||
457 localeID == NULL ||
458 lang == NULL ||
459 langLength == NULL ||
460 script == NULL ||
461 scriptLength == NULL ||
462 region == NULL ||
463 regionLength == NULL) {
464 goto error;
465 }
466
Frank Tangf90543d2020-10-30 19:02:04 -0700467 subtagLength = ulocimp_getLanguage(position, &position, *err).extract(lang, *langLength, *err);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000468
469 /*
470 * Note that we explicit consider U_STRING_NOT_TERMINATED_WARNING
471 * to be an error, because it indicates the user-supplied tag is
472 * not well-formed.
473 */
474 if(U_FAILURE(*err)) {
475 goto error;
476 }
477
478 *langLength = subtagLength;
479
480 /*
Frank Tang69c72a62019-04-03 21:41:21 -0700481 * If no language was present, use the empty string instead.
482 * Otherwise, move past any separator.
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000483 */
Jungshik Shinb3189662017-11-07 11:18:34 -0800484 if (_isIDSeparator(*position)) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000485 ++position;
486 }
487
Frank Tangf90543d2020-10-30 19:02:04 -0700488 subtagLength = ulocimp_getScript(position, &position, *err).extract(script, *scriptLength, *err);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000489
490 if(U_FAILURE(*err)) {
491 goto error;
492 }
493
494 *scriptLength = subtagLength;
495
496 if (*scriptLength > 0) {
497 if (uprv_strnicmp(script, unknownScript, *scriptLength) == 0) {
498 /**
499 * If the script part is the "unknown" script, then don't return it.
500 **/
501 *scriptLength = 0;
502 }
503
504 /*
505 * Move past any separator.
506 */
507 if (_isIDSeparator(*position)) {
508 ++position;
509 }
510 }
511
Frank Tangf90543d2020-10-30 19:02:04 -0700512 subtagLength = ulocimp_getCountry(position, &position, *err).extract(region, *regionLength, *err);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000513
514 if(U_FAILURE(*err)) {
515 goto error;
516 }
517
518 *regionLength = subtagLength;
519
520 if (*regionLength > 0) {
521 if (uprv_strnicmp(region, unknownRegion, *regionLength) == 0) {
522 /**
523 * If the region part is the "unknown" region, then don't return it.
524 **/
525 *regionLength = 0;
526 }
527 } else if (*position != 0 && *position != '@') {
528 /* back up over consumed trailing separator */
529 --position;
530 }
531
532exit:
533
534 return (int32_t)(position - localeID);
535
536error:
537
538 /**
539 * If we get here, we have no explicit error, it's the result of an
540 * illegal argument.
541 **/
542 if (!U_FAILURE(*err)) {
543 *err = U_ILLEGAL_ARGUMENT_ERROR;
544 }
545
546 goto exit;
547}
548
Frank Tang69c72a62019-04-03 21:41:21 -0700549static UBool U_CALLCONV
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000550createLikelySubtagsString(
551 const char* lang,
552 int32_t langLength,
553 const char* script,
554 int32_t scriptLength,
555 const char* region,
556 int32_t regionLength,
557 const char* variants,
558 int32_t variantsLength,
Frank Tang69c72a62019-04-03 21:41:21 -0700559 icu::ByteSink& sink,
560 UErrorCode* err) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000561 /**
562 * ULOC_FULLNAME_CAPACITY will provide enough capacity
563 * that we can build a string that contains the language,
564 * script and region code without worrying about overrunning
565 * the user-supplied buffer.
566 **/
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000567 char likelySubtagsBuffer[ULOC_FULLNAME_CAPACITY];
568
569 if(U_FAILURE(*err)) {
570 goto error;
571 }
572
573 /**
574 * Try the language with the script and region first.
575 **/
576 if (scriptLength > 0 && regionLength > 0) {
577
578 const char* likelySubtags = NULL;
579
Frank Tang69c72a62019-04-03 21:41:21 -0700580 icu::CharString tagBuffer;
581 {
582 icu::CharStringByteSink sink(&tagBuffer);
583 createTagString(
584 lang,
585 langLength,
586 script,
587 scriptLength,
588 region,
589 regionLength,
590 NULL,
591 0,
592 sink,
593 err);
594 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000595 if(U_FAILURE(*err)) {
596 goto error;
597 }
598
599 likelySubtags =
600 findLikelySubtags(
Frank Tang69c72a62019-04-03 21:41:21 -0700601 tagBuffer.data(),
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000602 likelySubtagsBuffer,
603 sizeof(likelySubtagsBuffer),
604 err);
605 if(U_FAILURE(*err)) {
606 goto error;
607 }
608
609 if (likelySubtags != NULL) {
610 /* Always use the language tag from the
611 maximal string, since it may be more
612 specific than the one provided. */
Frank Tang69c72a62019-04-03 21:41:21 -0700613 createTagStringWithAlternates(
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000614 NULL,
615 0,
616 NULL,
617 0,
618 NULL,
619 0,
620 variants,
621 variantsLength,
622 likelySubtags,
Frank Tang69c72a62019-04-03 21:41:21 -0700623 sink,
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000624 err);
Frank Tang1f164ee2022-11-08 12:31:27 -0800625 return true;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000626 }
627 }
628
629 /**
630 * Try the language with just the script.
631 **/
632 if (scriptLength > 0) {
633
634 const char* likelySubtags = NULL;
635
Frank Tang69c72a62019-04-03 21:41:21 -0700636 icu::CharString tagBuffer;
637 {
638 icu::CharStringByteSink sink(&tagBuffer);
639 createTagString(
640 lang,
641 langLength,
642 script,
643 scriptLength,
644 NULL,
645 0,
646 NULL,
647 0,
648 sink,
649 err);
650 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000651 if(U_FAILURE(*err)) {
652 goto error;
653 }
654
655 likelySubtags =
656 findLikelySubtags(
Frank Tang69c72a62019-04-03 21:41:21 -0700657 tagBuffer.data(),
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000658 likelySubtagsBuffer,
659 sizeof(likelySubtagsBuffer),
660 err);
661 if(U_FAILURE(*err)) {
662 goto error;
663 }
664
665 if (likelySubtags != NULL) {
666 /* Always use the language tag from the
667 maximal string, since it may be more
668 specific than the one provided. */
Frank Tang69c72a62019-04-03 21:41:21 -0700669 createTagStringWithAlternates(
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000670 NULL,
671 0,
672 NULL,
673 0,
674 region,
675 regionLength,
676 variants,
677 variantsLength,
678 likelySubtags,
Frank Tang69c72a62019-04-03 21:41:21 -0700679 sink,
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000680 err);
Frank Tang1f164ee2022-11-08 12:31:27 -0800681 return true;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000682 }
683 }
684
685 /**
686 * Try the language with just the region.
687 **/
688 if (regionLength > 0) {
689
690 const char* likelySubtags = NULL;
691
Frank Tang69c72a62019-04-03 21:41:21 -0700692 icu::CharString tagBuffer;
693 {
694 icu::CharStringByteSink sink(&tagBuffer);
695 createTagString(
696 lang,
697 langLength,
698 NULL,
699 0,
700 region,
701 regionLength,
702 NULL,
703 0,
704 sink,
705 err);
706 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000707 if(U_FAILURE(*err)) {
708 goto error;
709 }
710
711 likelySubtags =
712 findLikelySubtags(
Frank Tang69c72a62019-04-03 21:41:21 -0700713 tagBuffer.data(),
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000714 likelySubtagsBuffer,
715 sizeof(likelySubtagsBuffer),
716 err);
717 if(U_FAILURE(*err)) {
718 goto error;
719 }
720
721 if (likelySubtags != NULL) {
722 /* Always use the language tag from the
723 maximal string, since it may be more
724 specific than the one provided. */
Frank Tang69c72a62019-04-03 21:41:21 -0700725 createTagStringWithAlternates(
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000726 NULL,
727 0,
728 script,
729 scriptLength,
730 NULL,
731 0,
732 variants,
733 variantsLength,
734 likelySubtags,
Frank Tang69c72a62019-04-03 21:41:21 -0700735 sink,
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000736 err);
Frank Tang1f164ee2022-11-08 12:31:27 -0800737 return true;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000738 }
739 }
740
741 /**
742 * Finally, try just the language.
743 **/
744 {
745 const char* likelySubtags = NULL;
746
Frank Tang69c72a62019-04-03 21:41:21 -0700747 icu::CharString tagBuffer;
748 {
749 icu::CharStringByteSink sink(&tagBuffer);
750 createTagString(
751 lang,
752 langLength,
753 NULL,
754 0,
755 NULL,
756 0,
757 NULL,
758 0,
759 sink,
760 err);
761 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000762 if(U_FAILURE(*err)) {
763 goto error;
764 }
765
766 likelySubtags =
767 findLikelySubtags(
Frank Tang69c72a62019-04-03 21:41:21 -0700768 tagBuffer.data(),
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000769 likelySubtagsBuffer,
770 sizeof(likelySubtagsBuffer),
771 err);
772 if(U_FAILURE(*err)) {
773 goto error;
774 }
775
776 if (likelySubtags != NULL) {
777 /* Always use the language tag from the
778 maximal string, since it may be more
779 specific than the one provided. */
Frank Tang69c72a62019-04-03 21:41:21 -0700780 createTagStringWithAlternates(
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000781 NULL,
782 0,
783 script,
784 scriptLength,
785 region,
786 regionLength,
787 variants,
788 variantsLength,
789 likelySubtags,
Frank Tang69c72a62019-04-03 21:41:21 -0700790 sink,
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000791 err);
Frank Tang1f164ee2022-11-08 12:31:27 -0800792 return true;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000793 }
794 }
795
Frank Tang1f164ee2022-11-08 12:31:27 -0800796 return false;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000797
798error:
799
800 if (!U_FAILURE(*err)) {
801 *err = U_ILLEGAL_ARGUMENT_ERROR;
802 }
803
Frank Tang1f164ee2022-11-08 12:31:27 -0800804 return false;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000805}
806
Frank Tangb8696612019-10-25 14:58:21 -0700807#define CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength) UPRV_BLOCK_MACRO_BEGIN { \
808 int32_t count = 0; \
809 int32_t i; \
810 for (i = 0; i < trailingLength; i++) { \
811 if (trailing[i] == '-' || trailing[i] == '_') { \
812 count = 0; \
813 if (count > 8) { \
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000814 goto error; \
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000815 } \
Frank Tangb8696612019-10-25 14:58:21 -0700816 } else if (trailing[i] == '@') { \
817 break; \
818 } else if (count > 8) { \
819 goto error; \
820 } else { \
821 count++; \
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000822 } \
Frank Tangb8696612019-10-25 14:58:21 -0700823 } \
824} UPRV_BLOCK_MACRO_END
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000825
Frank Tang53eca4c2020-06-04 17:40:55 -0700826static UBool
Frank Tang69c72a62019-04-03 21:41:21 -0700827_uloc_addLikelySubtags(const char* localeID,
828 icu::ByteSink& sink,
829 UErrorCode* err) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000830 char lang[ULOC_LANG_CAPACITY];
831 int32_t langLength = sizeof(lang);
832 char script[ULOC_SCRIPT_CAPACITY];
833 int32_t scriptLength = sizeof(script);
834 char region[ULOC_COUNTRY_CAPACITY];
835 int32_t regionLength = sizeof(region);
836 const char* trailing = "";
837 int32_t trailingLength = 0;
838 int32_t trailingIndex = 0;
Frank Tang1f164ee2022-11-08 12:31:27 -0800839 UBool success = false;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000840
841 if(U_FAILURE(*err)) {
842 goto error;
843 }
Frank Tang69c72a62019-04-03 21:41:21 -0700844 if (localeID == NULL) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000845 goto error;
846 }
847
848 trailingIndex = parseTagString(
849 localeID,
850 lang,
851 &langLength,
852 script,
853 &scriptLength,
854 region,
855 &regionLength,
856 err);
857 if(U_FAILURE(*err)) {
858 /* Overflow indicates an illegal argument error */
859 if (*err == U_BUFFER_OVERFLOW_ERROR) {
860 *err = U_ILLEGAL_ARGUMENT_ERROR;
861 }
862
863 goto error;
864 }
865
866 /* Find the length of the trailing portion. */
867 while (_isIDSeparator(localeID[trailingIndex])) {
868 trailingIndex++;
869 }
870 trailing = &localeID[trailingIndex];
871 trailingLength = (int32_t)uprv_strlen(trailing);
872
873 CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
874
Frank Tang69c72a62019-04-03 21:41:21 -0700875 success =
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000876 createLikelySubtagsString(
877 lang,
878 langLength,
879 script,
880 scriptLength,
881 region,
882 regionLength,
883 trailing,
884 trailingLength,
Frank Tang69c72a62019-04-03 21:41:21 -0700885 sink,
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000886 err);
887
Frank Tang69c72a62019-04-03 21:41:21 -0700888 if (!success) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000889 const int32_t localIDLength = (int32_t)uprv_strlen(localeID);
890
891 /*
892 * If we get here, we need to return localeID.
893 */
Frank Tang69c72a62019-04-03 21:41:21 -0700894 sink.Append(localeID, localIDLength);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000895 }
896
Frank Tang53eca4c2020-06-04 17:40:55 -0700897 return success;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000898
899error:
900
901 if (!U_FAILURE(*err)) {
902 *err = U_ILLEGAL_ARGUMENT_ERROR;
903 }
Frank Tang1f164ee2022-11-08 12:31:27 -0800904 return false;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000905}
906
Frank Tang53eca4c2020-06-04 17:40:55 -0700907// Add likely subtags to the sink
908// return true if the value in the sink is produced by a match during the lookup
909// return false if the value in the sink is the same as input because there are
910// no match after the lookup.
911static UBool _ulocimp_addLikelySubtags(const char*, icu::ByteSink&, UErrorCode*);
912
Frank Tang69c72a62019-04-03 21:41:21 -0700913static void
914_uloc_minimizeSubtags(const char* localeID,
915 icu::ByteSink& sink,
916 UErrorCode* err) {
917 icu::CharString maximizedTagBuffer;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000918
919 char lang[ULOC_LANG_CAPACITY];
920 int32_t langLength = sizeof(lang);
921 char script[ULOC_SCRIPT_CAPACITY];
922 int32_t scriptLength = sizeof(script);
923 char region[ULOC_COUNTRY_CAPACITY];
924 int32_t regionLength = sizeof(region);
925 const char* trailing = "";
926 int32_t trailingLength = 0;
927 int32_t trailingIndex = 0;
Frank Tang1f164ee2022-11-08 12:31:27 -0800928 UBool successGetMax = false;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000929
930 if(U_FAILURE(*err)) {
931 goto error;
932 }
Frank Tang69c72a62019-04-03 21:41:21 -0700933 else if (localeID == NULL) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000934 goto error;
935 }
936
937 trailingIndex =
938 parseTagString(
939 localeID,
940 lang,
941 &langLength,
942 script,
943 &scriptLength,
944 region,
945 &regionLength,
946 err);
947 if(U_FAILURE(*err)) {
948
949 /* Overflow indicates an illegal argument error */
950 if (*err == U_BUFFER_OVERFLOW_ERROR) {
951 *err = U_ILLEGAL_ARGUMENT_ERROR;
952 }
953
954 goto error;
955 }
956
957 /* Find the spot where the variants or the keywords begin, if any. */
958 while (_isIDSeparator(localeID[trailingIndex])) {
959 trailingIndex++;
960 }
961 trailing = &localeID[trailingIndex];
962 trailingLength = (int32_t)uprv_strlen(trailing);
963
964 CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
965
Frank Tang69c72a62019-04-03 21:41:21 -0700966 {
967 icu::CharString base;
968 {
Frank Tang53eca4c2020-06-04 17:40:55 -0700969 icu::CharStringByteSink baseSink(&base);
Frank Tang69c72a62019-04-03 21:41:21 -0700970 createTagString(
971 lang,
972 langLength,
973 script,
974 scriptLength,
975 region,
976 regionLength,
977 NULL,
978 0,
Frank Tang53eca4c2020-06-04 17:40:55 -0700979 baseSink,
Frank Tang69c72a62019-04-03 21:41:21 -0700980 err);
981 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000982
Frank Tang69c72a62019-04-03 21:41:21 -0700983 /**
984 * First, we need to first get the maximization
985 * from AddLikelySubtags.
986 **/
987 {
Frank Tang53eca4c2020-06-04 17:40:55 -0700988 icu::CharStringByteSink maxSink(&maximizedTagBuffer);
989 successGetMax = _ulocimp_addLikelySubtags(base.data(), maxSink, err);
Frank Tang69c72a62019-04-03 21:41:21 -0700990 }
991 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000992
993 if(U_FAILURE(*err)) {
994 goto error;
995 }
996
Frank Tang53eca4c2020-06-04 17:40:55 -0700997 if (!successGetMax) {
998 /**
999 * If we got here, return the locale ID parameter unchanged.
1000 **/
1001 const int32_t localeIDLength = (int32_t)uprv_strlen(localeID);
1002 sink.Append(localeID, localeIDLength);
1003 return;
1004 }
1005
1006 // In the following, the lang, script, region are referring to those in
1007 // the maximizedTagBuffer, not the one in the localeID.
1008 langLength = sizeof(lang);
1009 scriptLength = sizeof(script);
1010 regionLength = sizeof(region);
1011 parseTagString(
1012 maximizedTagBuffer.data(),
1013 lang,
1014 &langLength,
1015 script,
1016 &scriptLength,
1017 region,
1018 &regionLength,
1019 err);
1020 if(U_FAILURE(*err)) {
1021 goto error;
1022 }
1023
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001024 /**
1025 * Start first with just the language.
1026 **/
1027 {
Frank Tang69c72a62019-04-03 21:41:21 -07001028 icu::CharString tagBuffer;
1029 {
Frank Tang53eca4c2020-06-04 17:40:55 -07001030 icu::CharStringByteSink tagSink(&tagBuffer);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001031 createLikelySubtagsString(
1032 lang,
1033 langLength,
1034 NULL,
1035 0,
1036 NULL,
1037 0,
1038 NULL,
1039 0,
Frank Tang53eca4c2020-06-04 17:40:55 -07001040 tagSink,
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001041 err);
Frank Tang69c72a62019-04-03 21:41:21 -07001042 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001043
1044 if(U_FAILURE(*err)) {
1045 goto error;
1046 }
Frank Tang53eca4c2020-06-04 17:40:55 -07001047 else if (!tagBuffer.isEmpty() &&
1048 uprv_strnicmp(
Frank Tang69c72a62019-04-03 21:41:21 -07001049 maximizedTagBuffer.data(),
1050 tagBuffer.data(),
1051 tagBuffer.length()) == 0) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001052
Frank Tang69c72a62019-04-03 21:41:21 -07001053 createTagString(
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001054 lang,
1055 langLength,
1056 NULL,
1057 0,
1058 NULL,
1059 0,
1060 trailing,
1061 trailingLength,
Frank Tang69c72a62019-04-03 21:41:21 -07001062 sink,
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001063 err);
Frank Tang69c72a62019-04-03 21:41:21 -07001064 return;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001065 }
1066 }
1067
1068 /**
1069 * Next, try the language and region.
1070 **/
1071 if (regionLength > 0) {
1072
Frank Tang69c72a62019-04-03 21:41:21 -07001073 icu::CharString tagBuffer;
1074 {
Frank Tang53eca4c2020-06-04 17:40:55 -07001075 icu::CharStringByteSink tagSink(&tagBuffer);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001076 createLikelySubtagsString(
1077 lang,
1078 langLength,
1079 NULL,
1080 0,
1081 region,
1082 regionLength,
1083 NULL,
1084 0,
Frank Tang53eca4c2020-06-04 17:40:55 -07001085 tagSink,
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001086 err);
Frank Tang69c72a62019-04-03 21:41:21 -07001087 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001088
1089 if(U_FAILURE(*err)) {
1090 goto error;
1091 }
Frank Tang53eca4c2020-06-04 17:40:55 -07001092 else if (!tagBuffer.isEmpty() &&
1093 uprv_strnicmp(
Frank Tang69c72a62019-04-03 21:41:21 -07001094 maximizedTagBuffer.data(),
1095 tagBuffer.data(),
1096 tagBuffer.length()) == 0) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001097
Frank Tang69c72a62019-04-03 21:41:21 -07001098 createTagString(
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001099 lang,
1100 langLength,
1101 NULL,
1102 0,
1103 region,
1104 regionLength,
1105 trailing,
1106 trailingLength,
Frank Tang69c72a62019-04-03 21:41:21 -07001107 sink,
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001108 err);
Frank Tang69c72a62019-04-03 21:41:21 -07001109 return;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001110 }
1111 }
1112
1113 /**
1114 * Finally, try the language and script. This is our last chance,
1115 * since trying with all three subtags would only yield the
1116 * maximal version that we already have.
1117 **/
Frank Tang53eca4c2020-06-04 17:40:55 -07001118 if (scriptLength > 0) {
Frank Tang69c72a62019-04-03 21:41:21 -07001119 icu::CharString tagBuffer;
1120 {
Frank Tang53eca4c2020-06-04 17:40:55 -07001121 icu::CharStringByteSink tagSink(&tagBuffer);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001122 createLikelySubtagsString(
1123 lang,
1124 langLength,
1125 script,
1126 scriptLength,
1127 NULL,
1128 0,
1129 NULL,
1130 0,
Frank Tang53eca4c2020-06-04 17:40:55 -07001131 tagSink,
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001132 err);
Frank Tang69c72a62019-04-03 21:41:21 -07001133 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001134
1135 if(U_FAILURE(*err)) {
1136 goto error;
1137 }
Frank Tang53eca4c2020-06-04 17:40:55 -07001138 else if (!tagBuffer.isEmpty() &&
1139 uprv_strnicmp(
Frank Tang69c72a62019-04-03 21:41:21 -07001140 maximizedTagBuffer.data(),
1141 tagBuffer.data(),
1142 tagBuffer.length()) == 0) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001143
Frank Tang69c72a62019-04-03 21:41:21 -07001144 createTagString(
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001145 lang,
1146 langLength,
1147 script,
1148 scriptLength,
1149 NULL,
1150 0,
1151 trailing,
1152 trailingLength,
Frank Tang69c72a62019-04-03 21:41:21 -07001153 sink,
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001154 err);
Frank Tang69c72a62019-04-03 21:41:21 -07001155 return;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001156 }
1157 }
1158
1159 {
1160 /**
Frank Tang53eca4c2020-06-04 17:40:55 -07001161 * If we got here, return the max + trail.
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001162 **/
Frank Tang53eca4c2020-06-04 17:40:55 -07001163 createTagString(
1164 lang,
1165 langLength,
1166 script,
1167 scriptLength,
1168 region,
1169 regionLength,
1170 trailing,
1171 trailingLength,
1172 sink,
1173 err);
Frank Tang69c72a62019-04-03 21:41:21 -07001174 return;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001175 }
1176
1177error:
1178
1179 if (!U_FAILURE(*err)) {
1180 *err = U_ILLEGAL_ARGUMENT_ERROR;
1181 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001182}
1183
Frank Tanga38aef92021-08-10 15:57:41 -07001184static int32_t
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001185do_canonicalize(const char* localeID,
1186 char* buffer,
1187 int32_t bufferCapacity,
1188 UErrorCode* err)
1189{
Frank Tanga38aef92021-08-10 15:57:41 -07001190 int32_t canonicalizedSize = uloc_canonicalize(
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001191 localeID,
1192 buffer,
1193 bufferCapacity,
1194 err);
1195
1196 if (*err == U_STRING_NOT_TERMINATED_WARNING ||
1197 *err == U_BUFFER_OVERFLOW_ERROR) {
Frank Tanga38aef92021-08-10 15:57:41 -07001198 return canonicalizedSize;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001199 }
1200 else if (U_FAILURE(*err)) {
1201
Frank Tanga38aef92021-08-10 15:57:41 -07001202 return -1;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001203 }
1204 else {
Frank Tanga38aef92021-08-10 15:57:41 -07001205 return canonicalizedSize;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001206 }
1207}
1208
1209U_CAPI int32_t U_EXPORT2
Frank Tang69c72a62019-04-03 21:41:21 -07001210uloc_addLikelySubtags(const char* localeID,
1211 char* maximizedLocaleID,
1212 int32_t maximizedLocaleIDCapacity,
1213 UErrorCode* status) {
1214 if (U_FAILURE(*status)) {
1215 return 0;
1216 }
1217
1218 icu::CheckedArrayByteSink sink(
1219 maximizedLocaleID, maximizedLocaleIDCapacity);
1220
1221 ulocimp_addLikelySubtags(localeID, sink, status);
1222 int32_t reslen = sink.NumberOfBytesAppended();
1223
1224 if (U_FAILURE(*status)) {
1225 return sink.Overflowed() ? reslen : -1;
1226 }
1227
1228 if (sink.Overflowed()) {
1229 *status = U_BUFFER_OVERFLOW_ERROR;
1230 } else {
1231 u_terminateChars(
1232 maximizedLocaleID, maximizedLocaleIDCapacity, reslen, status);
1233 }
1234
1235 return reslen;
1236}
1237
Frank Tang53eca4c2020-06-04 17:40:55 -07001238static UBool
1239_ulocimp_addLikelySubtags(const char* localeID,
1240 icu::ByteSink& sink,
1241 UErrorCode* status) {
Frank Tanga38aef92021-08-10 15:57:41 -07001242 PreflightingLocaleIDBuffer localeBuffer;
1243 do {
1244 localeBuffer.requestedCapacity = do_canonicalize(localeID, localeBuffer.getBuffer(),
1245 localeBuffer.getCapacity(), status);
1246 } while (localeBuffer.needToTryAgain(status));
1247
1248 if (U_SUCCESS(*status)) {
1249 return _uloc_addLikelySubtags(localeBuffer.getBuffer(), sink, status);
1250 } else {
Frank Tang1f164ee2022-11-08 12:31:27 -08001251 return false;
Frank Tang53eca4c2020-06-04 17:40:55 -07001252 }
Frank Tang53eca4c2020-06-04 17:40:55 -07001253}
1254
Frank Tang69c72a62019-04-03 21:41:21 -07001255U_CAPI void U_EXPORT2
1256ulocimp_addLikelySubtags(const char* localeID,
1257 icu::ByteSink& sink,
1258 UErrorCode* status) {
Frank Tang53eca4c2020-06-04 17:40:55 -07001259 _ulocimp_addLikelySubtags(localeID, sink, status);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001260}
1261
1262U_CAPI int32_t U_EXPORT2
Frank Tang69c72a62019-04-03 21:41:21 -07001263uloc_minimizeSubtags(const char* localeID,
1264 char* minimizedLocaleID,
1265 int32_t minimizedLocaleIDCapacity,
1266 UErrorCode* status) {
1267 if (U_FAILURE(*status)) {
1268 return 0;
1269 }
1270
1271 icu::CheckedArrayByteSink sink(
1272 minimizedLocaleID, minimizedLocaleIDCapacity);
1273
1274 ulocimp_minimizeSubtags(localeID, sink, status);
1275 int32_t reslen = sink.NumberOfBytesAppended();
1276
1277 if (U_FAILURE(*status)) {
1278 return sink.Overflowed() ? reslen : -1;
1279 }
1280
1281 if (sink.Overflowed()) {
1282 *status = U_BUFFER_OVERFLOW_ERROR;
1283 } else {
1284 u_terminateChars(
1285 minimizedLocaleID, minimizedLocaleIDCapacity, reslen, status);
1286 }
1287
1288 return reslen;
1289}
1290
1291U_CAPI void U_EXPORT2
1292ulocimp_minimizeSubtags(const char* localeID,
1293 icu::ByteSink& sink,
1294 UErrorCode* status) {
Frank Tanga38aef92021-08-10 15:57:41 -07001295 PreflightingLocaleIDBuffer localeBuffer;
1296 do {
1297 localeBuffer.requestedCapacity = do_canonicalize(localeID, localeBuffer.getBuffer(),
1298 localeBuffer.getCapacity(), status);
1299 } while (localeBuffer.needToTryAgain(status));
1300
1301 _uloc_minimizeSubtags(localeBuffer.getBuffer(), sink, status);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001302}
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001303
1304// Pairs of (language subtag, + or -) for finding out fast if common languages
1305// are LTR (minus) or RTL (plus).
Jungshik Shinb3189662017-11-07 11:18:34 -08001306static const char LANG_DIR_STRING[] =
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001307 "root-en-es-pt-zh-ja-ko-de-fr-it-ar+he+fa+ru-nl-pl-th-tr-";
1308
Frank Tang69c72a62019-04-03 21:41:21 -07001309// Implemented here because this calls ulocimp_addLikelySubtags().
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001310U_CAPI UBool U_EXPORT2
1311uloc_isRightToLeft(const char *locale) {
1312 UErrorCode errorCode = U_ZERO_ERROR;
1313 char script[8];
1314 int32_t scriptLength = uloc_getScript(locale, script, UPRV_LENGTHOF(script), &errorCode);
1315 if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING ||
1316 scriptLength == 0) {
1317 // Fastpath: We know the likely scripts and their writing direction
1318 // for some common languages.
1319 errorCode = U_ZERO_ERROR;
1320 char lang[8];
1321 int32_t langLength = uloc_getLanguage(locale, lang, UPRV_LENGTHOF(lang), &errorCode);
Frank Tang69c72a62019-04-03 21:41:21 -07001322 if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) {
Frank Tang1f164ee2022-11-08 12:31:27 -08001323 return false;
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001324 }
Frank Tang69c72a62019-04-03 21:41:21 -07001325 if (langLength > 0) {
1326 const char* langPtr = uprv_strstr(LANG_DIR_STRING, lang);
1327 if (langPtr != NULL) {
1328 switch (langPtr[langLength]) {
Frank Tang1f164ee2022-11-08 12:31:27 -08001329 case '-': return false;
1330 case '+': return true;
Frank Tang69c72a62019-04-03 21:41:21 -07001331 default: break; // partial match of a longer code
1332 }
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001333 }
1334 }
1335 // Otherwise, find the likely script.
1336 errorCode = U_ZERO_ERROR;
Frank Tang69c72a62019-04-03 21:41:21 -07001337 icu::CharString likely;
1338 {
1339 icu::CharStringByteSink sink(&likely);
1340 ulocimp_addLikelySubtags(locale, sink, &errorCode);
1341 }
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001342 if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) {
Frank Tang1f164ee2022-11-08 12:31:27 -08001343 return false;
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001344 }
Frank Tang69c72a62019-04-03 21:41:21 -07001345 scriptLength = uloc_getScript(likely.data(), script, UPRV_LENGTHOF(script), &errorCode);
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001346 if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING ||
1347 scriptLength == 0) {
Frank Tang1f164ee2022-11-08 12:31:27 -08001348 return false;
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001349 }
1350 }
1351 UScriptCode scriptCode = (UScriptCode)u_getPropertyValueEnum(UCHAR_SCRIPT, script);
1352 return uscript_isRightToLeft(scriptCode);
1353}
1354
1355U_NAMESPACE_BEGIN
1356
1357UBool
1358Locale::isRightToLeft() const {
1359 return uloc_isRightToLeft(getBaseName());
1360}
1361
1362U_NAMESPACE_END
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001363
1364// The following must at least allow for rg key value (6) plus terminator (1).
1365#define ULOC_RG_BUFLEN 8
1366
1367U_CAPI int32_t U_EXPORT2
1368ulocimp_getRegionForSupplementalData(const char *localeID, UBool inferRegion,
1369 char *region, int32_t regionCapacity, UErrorCode* status) {
1370 if (U_FAILURE(*status)) {
1371 return 0;
1372 }
1373 char rgBuf[ULOC_RG_BUFLEN];
1374 UErrorCode rgStatus = U_ZERO_ERROR;
1375
1376 // First check for rg keyword value
1377 int32_t rgLen = uloc_getKeywordValue(localeID, "rg", rgBuf, ULOC_RG_BUFLEN, &rgStatus);
1378 if (U_FAILURE(rgStatus) || rgLen != 6) {
1379 rgLen = 0;
1380 } else {
1381 // rgBuf guaranteed to be zero terminated here, with text len 6
1382 char *rgPtr = rgBuf;
1383 for (; *rgPtr!= 0; rgPtr++) {
1384 *rgPtr = uprv_toupper(*rgPtr);
1385 }
1386 rgLen = (uprv_strcmp(rgBuf+2, "ZZZZ") == 0)? 2: 0;
1387 }
1388
1389 if (rgLen == 0) {
1390 // No valid rg keyword value, try for unicode_region_subtag
1391 rgLen = uloc_getCountry(localeID, rgBuf, ULOC_RG_BUFLEN, status);
1392 if (U_FAILURE(*status)) {
1393 rgLen = 0;
1394 } else if (rgLen == 0 && inferRegion) {
Frank Tang1f164ee2022-11-08 12:31:27 -08001395 // no unicode_region_subtag but inferRegion true, try likely subtags
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001396 rgStatus = U_ZERO_ERROR;
Frank Tang69c72a62019-04-03 21:41:21 -07001397 icu::CharString locBuf;
1398 {
1399 icu::CharStringByteSink sink(&locBuf);
1400 ulocimp_addLikelySubtags(localeID, sink, &rgStatus);
1401 }
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001402 if (U_SUCCESS(rgStatus)) {
Frank Tang69c72a62019-04-03 21:41:21 -07001403 rgLen = uloc_getCountry(locBuf.data(), rgBuf, ULOC_RG_BUFLEN, status);
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001404 if (U_FAILURE(*status)) {
1405 rgLen = 0;
1406 }
1407 }
1408 }
1409 }
1410
1411 rgBuf[rgLen] = 0;
1412 uprv_strncpy(region, rgBuf, regionCapacity);
1413 return u_terminateChars(region, regionCapacity, rgLen, status);
1414}
1415