blob: 99c6a0af39dbaecdb33bd7c3cfc7666ac1e7ec1c [file] [log] [blame]
Jungshik Shin87232d82017-05-13 21:10:13 -07001// © 2016 and later: Unicode, Inc. and others.
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002// License & terms of use: http://www.unicode.org/copyright.html
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00003/*
4**********************************************************************
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07005* Copyright (C) 1997-2016, International Business Machines
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00006* Corporation and others. All Rights Reserved.
7**********************************************************************
8*
9* File ULOC.CPP
10*
11* Modification History:
12*
13* Date Name Description
14* 04/01/97 aliu Creation.
15* 08/21/98 stephen JDK 1.2 sync
16* 12/08/98 rtg New Locale implementation and C API
17* 03/15/99 damiba overhaul.
18* 04/06/99 stephen changed setDefault() to realloc and copy
19* 06/14/99 stephen Changed calls to ures_open for new params
20* 07/21/99 stephen Modified setDefault() to propagate to C++
21* 05/14/04 alan 7 years later: refactored, cleaned up, fixed bugs,
22* brought canonicalization code into line with spec
23*****************************************************************************/
24
25/*
26 POSIX's locale format, from putil.c: [no spaces]
27
28 ll [ _CC ] [ . MM ] [ @ VV]
29
30 l = lang, C = ctry, M = charmap, V = variant
31*/
32
Frank Tangf2223962020-04-27 18:25:29 -070033#include "unicode/bytestream.h"
34#include "unicode/errorcode.h"
35#include "unicode/stringpiece.h"
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000036#include "unicode/utypes.h"
37#include "unicode/ustring.h"
38#include "unicode/uloc.h"
39
Frank Tangf2223962020-04-27 18:25:29 -070040#include "bytesinkutil.h"
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000041#include "putilimp.h"
42#include "ustr_imp.h"
43#include "ulocimp.h"
44#include "umutex.h"
45#include "cstring.h"
46#include "cmemory.h"
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000047#include "locmap.h"
48#include "uarrsort.h"
49#include "uenumimp.h"
50#include "uassert.h"
Jungshik Shin87232d82017-05-13 21:10:13 -070051#include "charstr.h"
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000052
Jungshik Shin87232d82017-05-13 21:10:13 -070053U_NAMESPACE_USE
Jungshik Shin5feb9ad2016-10-21 12:52:48 -070054
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000055/* ### Declarations **************************************************/
56
57/* Locale stuff from locid.cpp */
58U_CFUNC void locale_set_default(const char *id);
59U_CFUNC const char *locale_get_default(void);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000060
61/* ### Data tables **************************************************/
62
63/**
64 * Table of language codes, both 2- and 3-letter, with preference
65 * given to 2-letter codes where possible. Includes 3-letter codes
66 * that lack a 2-letter equivalent.
67 *
68 * This list must be in sorted order. This list is returned directly
69 * to the user by some API.
70 *
71 * This list must be kept in sync with LANGUAGES_3, with corresponding
72 * entries matched.
73 *
74 * This table should be terminated with a NULL entry, followed by a
75 * second list, and another NULL entry. The first list is visible to
76 * user code when this array is returned by API. The second list
77 * contains codes we support, but do not expose through user API.
78 *
79 * Notes
80 *
81 * Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to
82 * include the revisions up to 2001/7/27 *CWB*
83 *
84 * The 3 character codes are the terminology codes like RFC 3066. This
85 * is compatible with prior ICU codes
86 *
87 * "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the
88 * table but now at the end of the table because 3 character codes are
89 * duplicates. This avoids bad searches going from 3 to 2 character
90 * codes.
91 *
92 * The range qaa-qtz is reserved for local use
93 */
94/* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
Jungshik Shin70f82502016-01-29 00:32:36 -080095/* ISO639 table version is 20150505 */
Jungshik Shinb3189662017-11-07 11:18:34 -080096/* Subsequent hand addition of selected languages */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000097static const char * const LANGUAGES[] = {
Jungshik Shin5feb9ad2016-10-21 12:52:48 -070098 "aa", "ab", "ace", "ach", "ada", "ady", "ae", "aeb",
99 "af", "afh", "agq", "ain", "ak", "akk", "akz", "ale",
100 "aln", "alt", "am", "an", "ang", "anp", "ar", "arc",
Jungshik Shin87232d82017-05-13 21:10:13 -0700101 "arn", "aro", "arp", "arq", "ars", "arw", "ary", "arz", "as",
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700102 "asa", "ase", "ast", "av", "avk", "awa", "ay", "az",
103 "ba", "bal", "ban", "bar", "bas", "bax", "bbc", "bbj",
104 "be", "bej", "bem", "bew", "bez", "bfd", "bfq", "bg",
105 "bgn", "bho", "bi", "bik", "bin", "bjn", "bkm", "bla",
106 "bm", "bn", "bo", "bpy", "bqi", "br", "bra", "brh",
107 "brx", "bs", "bss", "bua", "bug", "bum", "byn", "byv",
Jungshik Shinb3189662017-11-07 11:18:34 -0800108 "ca", "cad", "car", "cay", "cch", "ccp", "ce", "ceb", "cgg",
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700109 "ch", "chb", "chg", "chk", "chm", "chn", "cho", "chp",
110 "chr", "chy", "ckb", "co", "cop", "cps", "cr", "crh",
111 "cs", "csb", "cu", "cv", "cy",
112 "da", "dak", "dar", "dav", "de", "del", "den", "dgr",
113 "din", "dje", "doi", "dsb", "dtp", "dua", "dum", "dv",
114 "dyo", "dyu", "dz", "dzg",
115 "ebu", "ee", "efi", "egl", "egy", "eka", "el", "elx",
116 "en", "enm", "eo", "es", "esu", "et", "eu", "ewo",
117 "ext",
118 "fa", "fan", "fat", "ff", "fi", "fil", "fit", "fj",
119 "fo", "fon", "fr", "frc", "frm", "fro", "frp", "frr",
120 "frs", "fur", "fy",
121 "ga", "gaa", "gag", "gan", "gay", "gba", "gbz", "gd",
122 "gez", "gil", "gl", "glk", "gmh", "gn", "goh", "gom",
123 "gon", "gor", "got", "grb", "grc", "gsw", "gu", "guc",
124 "gur", "guz", "gv", "gwi",
125 "ha", "hai", "hak", "haw", "he", "hi", "hif", "hil",
126 "hit", "hmn", "ho", "hr", "hsb", "hsn", "ht", "hu",
127 "hup", "hy", "hz",
128 "ia", "iba", "ibb", "id", "ie", "ig", "ii", "ik",
129 "ilo", "inh", "io", "is", "it", "iu", "izh",
130 "ja", "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut",
131 "jv",
132 "ka", "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd",
133 "kbl", "kcg", "kde", "kea", "ken", "kfo", "kg", "kgp",
134 "kha", "kho", "khq", "khw", "ki", "kiu", "kj", "kk",
135 "kkj", "kl", "kln", "km", "kmb", "kn", "ko", "koi",
136 "kok", "kos", "kpe", "kr", "krc", "kri", "krj", "krl",
137 "kru", "ks", "ksb", "ksf", "ksh", "ku", "kum", "kut",
138 "kv", "kw", "ky",
139 "la", "lad", "lag", "lah", "lam", "lb", "lez", "lfn",
140 "lg", "li", "lij", "liv", "lkt", "lmo", "ln", "lo",
141 "lol", "loz", "lrc", "lt", "ltg", "lu", "lua", "lui",
142 "lun", "luo", "lus", "luy", "lv", "lzh", "lzz",
143 "mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",
144 "mdf", "mdh", "mdr", "men", "mer", "mfe", "mg", "mga",
145 "mgh", "mgo", "mh", "mi", "mic", "min", "mis", "mk",
Frank Tang7e7574b2021-04-13 21:19:13 -0700146 "ml", "mn", "mnc", "mni",
Frank Tangb8696612019-10-25 14:58:21 -0700147 "moh", "mos", "mr", "mrj",
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700148 "ms", "mt", "mua", "mul", "mus", "mwl", "mwr", "mwv",
149 "my", "mye", "myv", "mzn",
150 "na", "nan", "nap", "naq", "nb", "nd", "nds", "ne",
151 "new", "ng", "nia", "niu", "njo", "nl", "nmg", "nn",
152 "nnh", "no", "nog", "non", "nov", "nqo", "nr", "nso",
153 "nus", "nv", "nwc", "ny", "nym", "nyn", "nyo", "nzi",
154 "oc", "oj", "om", "or", "os", "osa", "ota",
Frank Tangf2223962020-04-27 18:25:29 -0700155 "pa", "pag", "pal", "pam", "pap", "pau", "pcd", "pcm", "pdc",
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700156 "pdt", "peo", "pfl", "phn", "pi", "pl", "pms", "pnt",
157 "pon", "prg", "pro", "ps", "pt",
158 "qu", "quc", "qug",
159 "raj", "rap", "rar", "rgn", "rif", "rm", "rn", "ro",
160 "rof", "rom", "rtm", "ru", "rue", "rug", "rup",
161 "rw", "rwk",
162 "sa", "sad", "sah", "sam", "saq", "sas", "sat", "saz",
163 "sba", "sbp", "sc", "scn", "sco", "sd", "sdc", "sdh",
164 "se", "see", "seh", "sei", "sel", "ses", "sg", "sga",
165 "sgs", "shi", "shn", "shu", "si", "sid", "sk",
166 "sl", "sli", "sly", "sm", "sma", "smj", "smn", "sms",
167 "sn", "snk", "so", "sog", "sq", "sr", "srn", "srr",
168 "ss", "ssy", "st", "stq", "su", "suk", "sus", "sux",
Frank Tang7e7574b2021-04-13 21:19:13 -0700169 "sv", "sw", "swb", "syc", "syr", "szl",
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700170 "ta", "tcy", "te", "tem", "teo", "ter", "tet", "tg",
Frank Tang7e7574b2021-04-13 21:19:13 -0700171 "th", "ti", "tig", "tiv", "tk", "tkl", "tkr",
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700172 "tlh", "tli", "tly", "tmh", "tn", "to", "tog", "tpi",
173 "tr", "tru", "trv", "ts", "tsd", "tsi", "tt", "ttt",
174 "tum", "tvl", "tw", "twq", "ty", "tyv", "tzm",
175 "udm", "ug", "uga", "uk", "umb", "und", "ur", "uz",
176 "vai", "ve", "vec", "vep", "vi", "vls", "vmf", "vo",
177 "vot", "vro", "vun",
178 "wa", "wae", "wal", "war", "was", "wbp", "wo", "wuu",
179 "xal", "xh", "xmf", "xog",
180 "yao", "yap", "yav", "ybb", "yi", "yo", "yrl", "yue",
181 "za", "zap", "zbl", "zea", "zen", "zgh", "zh", "zu",
182 "zun", "zxx", "zza",
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000183NULL,
Frank Tang7e7574b2021-04-13 21:19:13 -0700184 "in", "iw", "ji", "jw", "mo", "sh", "swc", "tl", /* obsolete language codes */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000185NULL
186};
187
188static const char* const DEPRECATED_LANGUAGES[]={
Frank Tangd2858cb2022-04-08 20:34:12 -0700189 "in", "iw", "ji", "jw", "mo", NULL, NULL
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000190};
191static const char* const REPLACEMENT_LANGUAGES[]={
Frank Tangd2858cb2022-04-08 20:34:12 -0700192 "id", "he", "yi", "jv", "ro", NULL, NULL
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000193};
194
195/**
196 * Table of 3-letter language codes.
197 *
198 * This is a lookup table used to convert 3-letter language codes to
199 * their 2-letter equivalent, where possible. It must be kept in sync
200 * with LANGUAGES. For all valid i, LANGUAGES[i] must refer to the
201 * same language as LANGUAGES_3[i]. The commented-out lines are
202 * copied from LANGUAGES to make eyeballing this baby easier.
203 *
204 * Where a 3-letter language code has no 2-letter equivalent, the
205 * 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i].
206 *
207 * This table should be terminated with a NULL entry, followed by a
208 * second list, and another NULL entry. The two lists correspond to
209 * the two lists in LANGUAGES.
210 */
211/* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
Jungshik Shin70f82502016-01-29 00:32:36 -0800212/* ISO639 table version is 20150505 */
Jungshik Shinb3189662017-11-07 11:18:34 -0800213/* Subsequent hand addition of selected languages */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000214static const char * const LANGUAGES_3[] = {
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700215 "aar", "abk", "ace", "ach", "ada", "ady", "ave", "aeb",
216 "afr", "afh", "agq", "ain", "aka", "akk", "akz", "ale",
217 "aln", "alt", "amh", "arg", "ang", "anp", "ara", "arc",
Jungshik Shin87232d82017-05-13 21:10:13 -0700218 "arn", "aro", "arp", "arq", "ars", "arw", "ary", "arz", "asm",
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700219 "asa", "ase", "ast", "ava", "avk", "awa", "aym", "aze",
220 "bak", "bal", "ban", "bar", "bas", "bax", "bbc", "bbj",
221 "bel", "bej", "bem", "bew", "bez", "bfd", "bfq", "bul",
222 "bgn", "bho", "bis", "bik", "bin", "bjn", "bkm", "bla",
223 "bam", "ben", "bod", "bpy", "bqi", "bre", "bra", "brh",
224 "brx", "bos", "bss", "bua", "bug", "bum", "byn", "byv",
Jungshik Shinb3189662017-11-07 11:18:34 -0800225 "cat", "cad", "car", "cay", "cch", "ccp", "che", "ceb", "cgg",
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700226 "cha", "chb", "chg", "chk", "chm", "chn", "cho", "chp",
227 "chr", "chy", "ckb", "cos", "cop", "cps", "cre", "crh",
228 "ces", "csb", "chu", "chv", "cym",
229 "dan", "dak", "dar", "dav", "deu", "del", "den", "dgr",
230 "din", "dje", "doi", "dsb", "dtp", "dua", "dum", "div",
231 "dyo", "dyu", "dzo", "dzg",
232 "ebu", "ewe", "efi", "egl", "egy", "eka", "ell", "elx",
233 "eng", "enm", "epo", "spa", "esu", "est", "eus", "ewo",
234 "ext",
235 "fas", "fan", "fat", "ful", "fin", "fil", "fit", "fij",
236 "fao", "fon", "fra", "frc", "frm", "fro", "frp", "frr",
237 "frs", "fur", "fry",
238 "gle", "gaa", "gag", "gan", "gay", "gba", "gbz", "gla",
239 "gez", "gil", "glg", "glk", "gmh", "grn", "goh", "gom",
240 "gon", "gor", "got", "grb", "grc", "gsw", "guj", "guc",
241 "gur", "guz", "glv", "gwi",
242 "hau", "hai", "hak", "haw", "heb", "hin", "hif", "hil",
243 "hit", "hmn", "hmo", "hrv", "hsb", "hsn", "hat", "hun",
244 "hup", "hye", "her",
245 "ina", "iba", "ibb", "ind", "ile", "ibo", "iii", "ipk",
246 "ilo", "inh", "ido", "isl", "ita", "iku", "izh",
247 "jpn", "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut",
248 "jav",
249 "kat", "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd",
250 "kbl", "kcg", "kde", "kea", "ken", "kfo", "kon", "kgp",
251 "kha", "kho", "khq", "khw", "kik", "kiu", "kua", "kaz",
252 "kkj", "kal", "kln", "khm", "kmb", "kan", "kor", "koi",
253 "kok", "kos", "kpe", "kau", "krc", "kri", "krj", "krl",
254 "kru", "kas", "ksb", "ksf", "ksh", "kur", "kum", "kut",
255 "kom", "cor", "kir",
256 "lat", "lad", "lag", "lah", "lam", "ltz", "lez", "lfn",
257 "lug", "lim", "lij", "liv", "lkt", "lmo", "lin", "lao",
258 "lol", "loz", "lrc", "lit", "ltg", "lub", "lua", "lui",
259 "lun", "luo", "lus", "luy", "lav", "lzh", "lzz",
260 "mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",
261 "mdf", "mdh", "mdr", "men", "mer", "mfe", "mlg", "mga",
262 "mgh", "mgo", "mah", "mri", "mic", "min", "mis", "mkd",
Frank Tang7e7574b2021-04-13 21:19:13 -0700263 "mal", "mon", "mnc", "mni",
Frank Tangb8696612019-10-25 14:58:21 -0700264 "moh", "mos", "mar", "mrj",
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700265 "msa", "mlt", "mua", "mul", "mus", "mwl", "mwr", "mwv",
266 "mya", "mye", "myv", "mzn",
267 "nau", "nan", "nap", "naq", "nob", "nde", "nds", "nep",
268 "new", "ndo", "nia", "niu", "njo", "nld", "nmg", "nno",
269 "nnh", "nor", "nog", "non", "nov", "nqo", "nbl", "nso",
270 "nus", "nav", "nwc", "nya", "nym", "nyn", "nyo", "nzi",
271 "oci", "oji", "orm", "ori", "oss", "osa", "ota",
Frank Tangf2223962020-04-27 18:25:29 -0700272 "pan", "pag", "pal", "pam", "pap", "pau", "pcd", "pcm", "pdc",
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700273 "pdt", "peo", "pfl", "phn", "pli", "pol", "pms", "pnt",
274 "pon", "prg", "pro", "pus", "por",
275 "que", "quc", "qug",
276 "raj", "rap", "rar", "rgn", "rif", "roh", "run", "ron",
277 "rof", "rom", "rtm", "rus", "rue", "rug", "rup",
278 "kin", "rwk",
279 "san", "sad", "sah", "sam", "saq", "sas", "sat", "saz",
280 "sba", "sbp", "srd", "scn", "sco", "snd", "sdc", "sdh",
281 "sme", "see", "seh", "sei", "sel", "ses", "sag", "sga",
282 "sgs", "shi", "shn", "shu", "sin", "sid", "slk",
283 "slv", "sli", "sly", "smo", "sma", "smj", "smn", "sms",
284 "sna", "snk", "som", "sog", "sqi", "srp", "srn", "srr",
285 "ssw", "ssy", "sot", "stq", "sun", "suk", "sus", "sux",
Frank Tang7e7574b2021-04-13 21:19:13 -0700286 "swe", "swa", "swb", "syc", "syr", "szl",
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700287 "tam", "tcy", "tel", "tem", "teo", "ter", "tet", "tgk",
Frank Tang7e7574b2021-04-13 21:19:13 -0700288 "tha", "tir", "tig", "tiv", "tuk", "tkl", "tkr",
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700289 "tlh", "tli", "tly", "tmh", "tsn", "ton", "tog", "tpi",
290 "tur", "tru", "trv", "tso", "tsd", "tsi", "tat", "ttt",
291 "tum", "tvl", "twi", "twq", "tah", "tyv", "tzm",
292 "udm", "uig", "uga", "ukr", "umb", "und", "urd", "uzb",
293 "vai", "ven", "vec", "vep", "vie", "vls", "vmf", "vol",
294 "vot", "vro", "vun",
295 "wln", "wae", "wal", "war", "was", "wbp", "wol", "wuu",
296 "xal", "xho", "xmf", "xog",
297 "yao", "yap", "yav", "ybb", "yid", "yor", "yrl", "yue",
298 "zha", "zap", "zbl", "zea", "zen", "zgh", "zho", "zul",
299 "zun", "zxx", "zza",
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000300NULL,
Frank Tang7e7574b2021-04-13 21:19:13 -0700301/* "in", "iw", "ji", "jw", "mo", "sh", "swc", "tl", */
302 "ind", "heb", "yid", "jaw", "mol", "srp", "swc", "tgl",
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000303NULL
304};
305
306/**
307 * Table of 2-letter country codes.
308 *
309 * This list must be in sorted order. This list is returned directly
310 * to the user by some API.
311 *
312 * This list must be kept in sync with COUNTRIES_3, with corresponding
313 * entries matched.
314 *
315 * This table should be terminated with a NULL entry, followed by a
316 * second list, and another NULL entry. The first list is visible to
317 * user code when this array is returned by API. The second list
318 * contains codes we support, but do not expose through user API.
319 *
320 * Notes:
321 *
322 * ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per
323 * http://www.evertype.com/standards/iso3166/iso3166-1-en.html added
324 * new codes keeping the old ones for compatibility updated to include
325 * 1999/12/03 revisions *CWB*
326 *
327 * RO(ROM) is now RO(ROU) according to
328 * http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html
329 */
330static const char * const COUNTRIES[] = {
331 "AD", "AE", "AF", "AG", "AI", "AL", "AM",
332 "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ",
333 "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI",
334 "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV",
335 "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG",
336 "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR",
Frank Tang7e7574b2021-04-13 21:19:13 -0700337 "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DG", "DJ", "DK",
338 "DM", "DO", "DZ", "EA", "EC", "EE", "EG", "EH", "ER",
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000339 "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR",
340 "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL",
341 "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU",
342 "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU",
Frank Tang7e7574b2021-04-13 21:19:13 -0700343 "IC", "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS",
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000344 "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI",
345 "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA",
346 "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU",
347 "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK",
348 "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS",
349 "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA",
350 "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP",
351 "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG",
352 "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT",
353 "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA",
354 "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ",
355 "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV",
356 "SX", "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ",
357 "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV",
358 "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ",
359 "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF",
Frank Tang7e7574b2021-04-13 21:19:13 -0700360 "WS", "XK", "YE", "YT", "ZA", "ZM", "ZW",
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000361NULL,
362 "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR", /* obsolete country codes */
363NULL
364};
365
366static const char* const DEPRECATED_COUNTRIES[] = {
367 "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR", NULL, NULL /* deprecated country list */
368};
369static const char* const REPLACEMENT_COUNTRIES[] = {
370/* "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR" */
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700371 "CW", "MM", "RS", "DE", "BJ", "FR", "BF", "VU", "ZW", "RU", "TL", "GB", "VN", "YE", "RS", "CD", NULL, NULL /* replacement country codes */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000372};
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700373
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000374/**
375 * Table of 3-letter country codes.
376 *
377 * This is a lookup table used to convert 3-letter country codes to
378 * their 2-letter equivalent. It must be kept in sync with COUNTRIES.
379 * For all valid i, COUNTRIES[i] must refer to the same country as
380 * COUNTRIES_3[i]. The commented-out lines are copied from COUNTRIES
381 * to make eyeballing this baby easier.
382 *
383 * This table should be terminated with a NULL entry, followed by a
384 * second list, and another NULL entry. The two lists correspond to
385 * the two lists in COUNTRIES.
386 */
387static const char * const COUNTRIES_3[] = {
388/* "AD", "AE", "AF", "AG", "AI", "AL", "AM", */
389 "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM",
390/* "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ", */
391 "AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE",
392/* "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", */
393 "BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI",
394/* "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV", */
395 "BEN", "BLM", "BMU", "BRN", "BOL", "BES", "BRA", "BHS", "BTN", "BVT",
396/* "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG", */
397 "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",
398/* "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR", */
399 "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRI",
Frank Tang7e7574b2021-04-13 21:19:13 -0700400/* "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DG", "DJ", "DK", */
401 "CUB", "CPV", "CUW", "CXR", "CYP", "CZE", "DEU", "DGA", "DJI", "DNK",
402/* "DM", "DO", "DZ", "EA", "EC", "EE", "EG", "EH", "ER", */
403 "DMA", "DOM", "DZA", "XEA", "ECU", "EST", "EGY", "ESH", "ERI",
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000404/* "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", */
405 "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",
406/* "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", */
407 "GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL",
408/* "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", */
409 "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",
410/* "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", */
411 "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",
Frank Tang7e7574b2021-04-13 21:19:13 -0700412/* "IC", "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS" */
413 "XIC", "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL",
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000414/* "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", */
415 "ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",
416/* "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", */
417 "COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO",
418/* "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", */
419 "LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX",
420/* "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK", */
421 "LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD",
422/* "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS", */
423 "MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR",
424/* "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", */
425 "MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM",
426/* "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP", */
427 "NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL",
428/* "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG", */
429 "NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG",
430/* "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT", */
431 "PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT",
432/* "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA", */
433 "PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU",
434/* "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ", */
435 "SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM",
436/* "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV", */
437 "SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "SSD", "STP", "SLV",
438/* "SX", "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ", */
439 "SXM", "SYR", "SWZ", "TCA", "TCD", "ATF", "TGO", "THA", "TJK",
440/* "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV", */
441 "TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV",
442/* "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ", */
443 "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",
444/* "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", */
445 "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",
Frank Tang7e7574b2021-04-13 21:19:13 -0700446/* "WS", "XK", "YE", "YT", "ZA", "ZM", "ZW", */
Frank Tangd2858cb2022-04-08 20:34:12 -0700447 "WSM", "XKK", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000448NULL,
449/* "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR" */
450 "ANT", "BUR", "SCG", "FXX", "ROM", "SUN", "TMP", "YMD", "YUG", "ZAR",
451NULL
452};
453
454typedef struct CanonicalizationMap {
455 const char *id; /* input ID */
456 const char *canonicalID; /* canonicalized output ID */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000457} CanonicalizationMap;
458
459/**
460 * A map to canonicalize locale IDs. This handles a variety of
461 * different semantic kinds of transformations.
462 */
463static const CanonicalizationMap CANONICALIZE_MAP[] = {
Frank Tangf2223962020-04-27 18:25:29 -0700464 { "art__LOJBAN", "jbo" }, /* registered name */
Frank Tang960f1952019-02-15 16:46:49 -0800465 { "hy__AREVELA", "hy" }, /* Registered IANA variant */
466 { "hy__AREVMDA", "hyw" }, /* Registered IANA variant */
Frank Tangf2223962020-04-27 18:25:29 -0700467 { "zh__GUOYU", "zh" }, /* registered name */
468 { "zh__HAKKA", "hak" }, /* registered name */
469 { "zh__XIANG", "hsn" }, /* registered name */
470 // subtags with 3 chars won't be treated as variants.
Frank Tang960f1952019-02-15 16:46:49 -0800471 { "zh_GAN", "gan" }, /* registered name */
Frank Tang960f1952019-02-15 16:46:49 -0800472 { "zh_MIN_NAN", "nan" }, /* registered name */
473 { "zh_WUU", "wuu" }, /* registered name */
Frank Tang960f1952019-02-15 16:46:49 -0800474 { "zh_YUE", "yue" }, /* registered name */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000475};
476
477/* ### BCP47 Conversion *******************************************/
478/* Test if the locale id has BCP47 u extension and does not have '@' */
479#define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == NULL && getShortestSubtagLength(localeID) == 1)
480/* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */
Frank Tang3e05d9d2021-11-08 14:04:04 -0800481static const char* _ConvertBCP47(
482 const char* id, char* buffer, int32_t length,
483 UErrorCode* err, int32_t* pLocaleIdSize) {
484 const char* finalID;
Frank Tanga38aef92021-08-10 15:57:41 -0700485 int32_t localeIDSize = uloc_forLanguageTag(id, buffer, length, NULL, err);
486 if (localeIDSize <= 0 || U_FAILURE(*err) || *err == U_STRING_NOT_TERMINATED_WARNING) {
487 finalID=id;
488 if (*err == U_STRING_NOT_TERMINATED_WARNING) {
489 *err = U_BUFFER_OVERFLOW_ERROR;
490 }
491 } else {
492 finalID=buffer;
493 }
Frank Tang3e05d9d2021-11-08 14:04:04 -0800494 if (pLocaleIdSize != nullptr) {
495 *pLocaleIdSize = localeIDSize;
496 }
497 return finalID;
Frank Tanga38aef92021-08-10 15:57:41 -0700498}
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000499/* Gets the size of the shortest subtag in the given localeID. */
500static int32_t getShortestSubtagLength(const char *localeID) {
Jungshik Shinb3189662017-11-07 11:18:34 -0800501 int32_t localeIDLength = static_cast<int32_t>(uprv_strlen(localeID));
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000502 int32_t length = localeIDLength;
503 int32_t tmpLength = 0;
504 int32_t i;
505 UBool reset = TRUE;
506
507 for (i = 0; i < localeIDLength; i++) {
508 if (localeID[i] != '_' && localeID[i] != '-') {
509 if (reset) {
510 tmpLength = 0;
511 reset = FALSE;
512 }
513 tmpLength++;
514 } else {
515 if (tmpLength != 0 && tmpLength < length) {
516 length = tmpLength;
517 }
518 reset = TRUE;
519 }
520 }
521
522 return length;
523}
524
525/* ### Keywords **************************************************/
Jungshik Shin87232d82017-05-13 21:10:13 -0700526#define UPRV_ISDIGIT(c) (((c) >= '0') && ((c) <= '9'))
527#define UPRV_ISALPHANUM(c) (uprv_isASCIILetter(c) || UPRV_ISDIGIT(c) )
528/* Punctuation/symbols allowed in legacy key values */
529#define UPRV_OK_VALUE_PUNCTUATION(c) ((c) == '_' || (c) == '-' || (c) == '+' || (c) == '/')
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000530
531#define ULOC_KEYWORD_BUFFER_LEN 25
532#define ULOC_MAX_NO_KEYWORDS 25
533
534U_CAPI const char * U_EXPORT2
535locale_getKeywordsStart(const char *localeID) {
536 const char *result = NULL;
537 if((result = uprv_strchr(localeID, '@')) != NULL) {
538 return result;
539 }
540#if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
541 else {
542 /* We do this because the @ sign is variant, and the @ sign used on one
543 EBCDIC machine won't be compiled the same way on other EBCDIC based
544 machines. */
545 static const uint8_t ebcdicSigns[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
546 const uint8_t *charToFind = ebcdicSigns;
547 while(*charToFind) {
548 if((result = uprv_strchr(localeID, *charToFind)) != NULL) {
549 return result;
550 }
551 charToFind++;
552 }
553 }
554#endif
555 return NULL;
556}
557
558/**
559 * @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN]
560 * @param keywordName incoming name to be canonicalized
561 * @param status return status (keyword too long)
562 * @return length of the keyword name
563 */
564static int32_t locale_canonKeywordName(char *buf, const char *keywordName, UErrorCode *status)
565{
Jungshik Shin87232d82017-05-13 21:10:13 -0700566 int32_t keywordNameLen = 0;
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700567
Jungshik Shin87232d82017-05-13 21:10:13 -0700568 for (; *keywordName != 0; keywordName++) {
569 if (!UPRV_ISALPHANUM(*keywordName)) {
570 *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
571 return 0;
572 }
573 if (keywordNameLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
574 buf[keywordNameLen++] = uprv_tolower(*keywordName);
575 } else {
576 /* keyword name too long for internal buffer */
577 *status = U_INTERNAL_PROGRAM_ERROR;
578 return 0;
579 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000580 }
Jungshik Shin87232d82017-05-13 21:10:13 -0700581 if (keywordNameLen == 0) {
582 *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name */
583 return 0;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000584 }
Jungshik Shin87232d82017-05-13 21:10:13 -0700585 buf[keywordNameLen] = 0; /* terminate */
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700586
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000587 return keywordNameLen;
588}
589
590typedef struct {
591 char keyword[ULOC_KEYWORD_BUFFER_LEN];
592 int32_t keywordLen;
593 const char *valueStart;
594 int32_t valueLen;
595} KeywordStruct;
596
597static int32_t U_CALLCONV
598compareKeywordStructs(const void * /*context*/, const void *left, const void *right) {
599 const char* leftString = ((const KeywordStruct *)left)->keyword;
600 const char* rightString = ((const KeywordStruct *)right)->keyword;
601 return uprv_strcmp(leftString, rightString);
602}
603
Frank Tangf90543d2020-10-30 19:02:04 -0700604U_CFUNC void
605ulocimp_getKeywords(const char *localeID,
606 char prev,
607 ByteSink& sink,
608 UBool valuesToo,
609 UErrorCode *status)
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000610{
611 KeywordStruct keywordList[ULOC_MAX_NO_KEYWORDS];
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700612
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000613 int32_t maxKeywords = ULOC_MAX_NO_KEYWORDS;
614 int32_t numKeywords = 0;
615 const char* pos = localeID;
616 const char* equalSign = NULL;
617 const char* semicolon = NULL;
618 int32_t i = 0, j, n;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000619
620 if(prev == '@') { /* start of keyword definition */
621 /* we will grab pairs, trim spaces, lowercase keywords, sort and return */
622 do {
623 UBool duplicate = FALSE;
624 /* skip leading spaces */
625 while(*pos == ' ') {
626 pos++;
627 }
628 if (!*pos) { /* handle trailing "; " */
629 break;
630 }
631 if(numKeywords == maxKeywords) {
632 *status = U_INTERNAL_PROGRAM_ERROR;
Frank Tangf2223962020-04-27 18:25:29 -0700633 return;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000634 }
635 equalSign = uprv_strchr(pos, '=');
636 semicolon = uprv_strchr(pos, ';');
637 /* lack of '=' [foo@currency] is illegal */
638 /* ';' before '=' [foo@currency;collation=pinyin] is illegal */
639 if(!equalSign || (semicolon && semicolon<equalSign)) {
640 *status = U_INVALID_FORMAT_ERROR;
Frank Tangf2223962020-04-27 18:25:29 -0700641 return;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000642 }
643 /* need to normalize both keyword and keyword name */
644 if(equalSign - pos >= ULOC_KEYWORD_BUFFER_LEN) {
645 /* keyword name too long for internal buffer */
646 *status = U_INTERNAL_PROGRAM_ERROR;
Frank Tangf2223962020-04-27 18:25:29 -0700647 return;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000648 }
649 for(i = 0, n = 0; i < equalSign - pos; ++i) {
650 if (pos[i] != ' ') {
651 keywordList[numKeywords].keyword[n++] = uprv_tolower(pos[i]);
652 }
653 }
654
655 /* zero-length keyword is an error. */
656 if (n == 0) {
657 *status = U_INVALID_FORMAT_ERROR;
Frank Tangf2223962020-04-27 18:25:29 -0700658 return;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000659 }
660
661 keywordList[numKeywords].keyword[n] = 0;
662 keywordList[numKeywords].keywordLen = n;
663 /* now grab the value part. First we skip the '=' */
664 equalSign++;
665 /* then we leading spaces */
666 while(*equalSign == ' ') {
667 equalSign++;
668 }
669
670 /* Premature end or zero-length value */
Jungshik Shin (jungshik at google)46be5162015-03-26 11:46:43 -0700671 if (!*equalSign || equalSign == semicolon) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000672 *status = U_INVALID_FORMAT_ERROR;
Frank Tangf2223962020-04-27 18:25:29 -0700673 return;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000674 }
675
676 keywordList[numKeywords].valueStart = equalSign;
677
678 pos = semicolon;
679 i = 0;
680 if(pos) {
681 while(*(pos - i - 1) == ' ') {
682 i++;
683 }
684 keywordList[numKeywords].valueLen = (int32_t)(pos - equalSign - i);
685 pos++;
686 } else {
687 i = (int32_t)uprv_strlen(equalSign);
688 while(i && equalSign[i-1] == ' ') {
689 i--;
690 }
691 keywordList[numKeywords].valueLen = i;
692 }
693 /* If this is a duplicate keyword, then ignore it */
694 for (j=0; j<numKeywords; ++j) {
695 if (uprv_strcmp(keywordList[j].keyword, keywordList[numKeywords].keyword) == 0) {
696 duplicate = TRUE;
697 break;
698 }
699 }
700 if (!duplicate) {
701 ++numKeywords;
702 }
703 } while(pos);
704
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000705 /* now we have a list of keywords */
706 /* we need to sort it */
707 uprv_sortArray(keywordList, numKeywords, sizeof(KeywordStruct), compareKeywordStructs, NULL, FALSE, status);
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700708
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000709 /* Now construct the keyword part */
710 for(i = 0; i < numKeywords; i++) {
Frank Tangf2223962020-04-27 18:25:29 -0700711 sink.Append(keywordList[i].keyword, keywordList[i].keywordLen);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000712 if(valuesToo) {
Frank Tangf2223962020-04-27 18:25:29 -0700713 sink.Append("=", 1);
714 sink.Append(keywordList[i].valueStart, keywordList[i].valueLen);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000715 if(i < numKeywords - 1) {
Frank Tangf2223962020-04-27 18:25:29 -0700716 sink.Append(";", 1);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000717 }
Frank Tangf2223962020-04-27 18:25:29 -0700718 } else {
719 sink.Append("\0", 1);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000720 }
721 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000722 }
723}
724
Frank Tangf90543d2020-10-30 19:02:04 -0700725U_CAPI int32_t U_EXPORT2
726uloc_getKeywordValue(const char* localeID,
727 const char* keywordName,
728 char* buffer, int32_t bufferCapacity,
729 UErrorCode* status)
730{
Frank Tangf2223962020-04-27 18:25:29 -0700731 if (U_FAILURE(*status)) {
732 return 0;
733 }
734
Frank Tangf90543d2020-10-30 19:02:04 -0700735 CheckedArrayByteSink sink(buffer, bufferCapacity);
736 ulocimp_getKeywordValue(localeID, keywordName, sink, status);
Frank Tangf2223962020-04-27 18:25:29 -0700737
738 int32_t reslen = sink.NumberOfBytesAppended();
739
740 if (U_FAILURE(*status)) {
741 return reslen;
742 }
743
744 if (sink.Overflowed()) {
745 *status = U_BUFFER_OVERFLOW_ERROR;
746 } else {
Frank Tangf90543d2020-10-30 19:02:04 -0700747 u_terminateChars(buffer, bufferCapacity, reslen, status);
Frank Tangf2223962020-04-27 18:25:29 -0700748 }
749
750 return reslen;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000751}
752
Frank Tangf90543d2020-10-30 19:02:04 -0700753U_CAPI void U_EXPORT2
754ulocimp_getKeywordValue(const char* localeID,
755 const char* keywordName,
756 icu::ByteSink& sink,
757 UErrorCode* status)
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700758{
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000759 const char* startSearchHere = NULL;
760 const char* nextSeparator = NULL;
761 char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
762 char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000763
764 if(status && U_SUCCESS(*status) && localeID) {
765 char tempBuffer[ULOC_FULLNAME_CAPACITY];
766 const char* tmpLocaleID;
767
Jungshik Shin87232d82017-05-13 21:10:13 -0700768 if (keywordName == NULL || keywordName[0] == 0) {
769 *status = U_ILLEGAL_ARGUMENT_ERROR;
Frank Tangf90543d2020-10-30 19:02:04 -0700770 return;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000771 }
772
773 locale_canonKeywordName(keywordNameBuffer, keywordName, status);
774 if(U_FAILURE(*status)) {
Frank Tangf90543d2020-10-30 19:02:04 -0700775 return;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000776 }
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700777
Jungshik Shin87232d82017-05-13 21:10:13 -0700778 if (_hasBCP47Extension(localeID)) {
Frank Tang3e05d9d2021-11-08 14:04:04 -0800779 tmpLocaleID = _ConvertBCP47(localeID, tempBuffer,
780 sizeof(tempBuffer), status, nullptr);
Jungshik Shin87232d82017-05-13 21:10:13 -0700781 } else {
782 tmpLocaleID=localeID;
783 }
784
785 startSearchHere = locale_getKeywordsStart(tmpLocaleID);
786 if(startSearchHere == NULL) {
787 /* no keywords, return at once */
Frank Tangf90543d2020-10-30 19:02:04 -0700788 return;
Jungshik Shin87232d82017-05-13 21:10:13 -0700789 }
790
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000791 /* find the first keyword */
792 while(startSearchHere) {
Jungshik Shin87232d82017-05-13 21:10:13 -0700793 const char* keyValueTail;
794 int32_t keyValueLen;
795
796 startSearchHere++; /* skip @ or ; */
797 nextSeparator = uprv_strchr(startSearchHere, '=');
798 if(!nextSeparator) {
799 *status = U_ILLEGAL_ARGUMENT_ERROR; /* key must have =value */
Frank Tangf90543d2020-10-30 19:02:04 -0700800 return;
Jungshik Shin87232d82017-05-13 21:10:13 -0700801 }
802 /* strip leading & trailing spaces (TC decided to tolerate these) */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000803 while(*startSearchHere == ' ') {
804 startSearchHere++;
805 }
Jungshik Shin87232d82017-05-13 21:10:13 -0700806 keyValueTail = nextSeparator;
807 while (keyValueTail > startSearchHere && *(keyValueTail-1) == ' ') {
808 keyValueTail--;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000809 }
Jungshik Shin87232d82017-05-13 21:10:13 -0700810 /* now keyValueTail points to first char after the keyName */
811 /* copy & normalize keyName from locale */
812 if (startSearchHere == keyValueTail) {
813 *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name in passed-in locale */
Frank Tangf90543d2020-10-30 19:02:04 -0700814 return;
Jungshik Shin87232d82017-05-13 21:10:13 -0700815 }
816 keyValueLen = 0;
817 while (startSearchHere < keyValueTail) {
818 if (!UPRV_ISALPHANUM(*startSearchHere)) {
819 *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
Frank Tangf90543d2020-10-30 19:02:04 -0700820 return;
Jungshik Shin87232d82017-05-13 21:10:13 -0700821 }
822 if (keyValueLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
823 localeKeywordNameBuffer[keyValueLen++] = uprv_tolower(*startSearchHere++);
824 } else {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000825 /* keyword name too long for internal buffer */
826 *status = U_INTERNAL_PROGRAM_ERROR;
Frank Tangf90543d2020-10-30 19:02:04 -0700827 return;
Jungshik Shin87232d82017-05-13 21:10:13 -0700828 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000829 }
Jungshik Shin87232d82017-05-13 21:10:13 -0700830 localeKeywordNameBuffer[keyValueLen] = 0; /* terminate */
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700831
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000832 startSearchHere = uprv_strchr(nextSeparator, ';');
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700833
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000834 if(uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer) == 0) {
Jungshik Shin87232d82017-05-13 21:10:13 -0700835 /* current entry matches the keyword. */
836 nextSeparator++; /* skip '=' */
837 /* First strip leading & trailing spaces (TC decided to tolerate these) */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000838 while(*nextSeparator == ' ') {
Jungshik Shin87232d82017-05-13 21:10:13 -0700839 nextSeparator++;
840 }
841 keyValueTail = (startSearchHere)? startSearchHere: nextSeparator + uprv_strlen(nextSeparator);
842 while(keyValueTail > nextSeparator && *(keyValueTail-1) == ' ') {
843 keyValueTail--;
844 }
845 /* Now copy the value, but check well-formedness */
846 if (nextSeparator == keyValueTail) {
847 *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty key value name in passed-in locale */
Frank Tangf90543d2020-10-30 19:02:04 -0700848 return;
Jungshik Shin87232d82017-05-13 21:10:13 -0700849 }
Jungshik Shin87232d82017-05-13 21:10:13 -0700850 while (nextSeparator < keyValueTail) {
851 if (!UPRV_ISALPHANUM(*nextSeparator) && !UPRV_OK_VALUE_PUNCTUATION(*nextSeparator)) {
852 *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed key value */
Frank Tangf90543d2020-10-30 19:02:04 -0700853 return;
Jungshik Shin87232d82017-05-13 21:10:13 -0700854 }
Frank Tangf90543d2020-10-30 19:02:04 -0700855 /* Should we lowercase value to return here? Tests expect as-is. */
856 sink.Append(nextSeparator++, 1);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000857 }
Frank Tangf90543d2020-10-30 19:02:04 -0700858 return;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000859 }
860 }
861 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000862}
863
864U_CAPI int32_t U_EXPORT2
865uloc_setKeywordValue(const char* keywordName,
866 const char* keywordValue,
867 char* buffer, int32_t bufferCapacity,
868 UErrorCode* status)
869{
870 /* TODO: sorting. removal. */
871 int32_t keywordNameLen;
872 int32_t keywordValueLen;
873 int32_t bufLen;
874 int32_t needLen = 0;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000875 char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
Jungshik Shin87232d82017-05-13 21:10:13 -0700876 char keywordValueBuffer[ULOC_KEYWORDS_CAPACITY+1];
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000877 char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000878 int32_t rc;
879 char* nextSeparator = NULL;
880 char* nextEqualsign = NULL;
881 char* startSearchHere = NULL;
882 char* keywordStart = NULL;
Jungshik Shin87232d82017-05-13 21:10:13 -0700883 CharString updatedKeysAndValues;
Jungshik Shin87232d82017-05-13 21:10:13 -0700884 UBool handledInputKeyAndValue = FALSE;
885 char keyValuePrefix = '@';
886
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700887 if(U_FAILURE(*status)) {
888 return -1;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000889 }
Frank Tangefc58852020-11-12 11:50:18 -0800890 if (*status == U_STRING_NOT_TERMINATED_WARNING) {
891 *status = U_ZERO_ERROR;
892 }
Jungshik Shin87232d82017-05-13 21:10:13 -0700893 if (keywordName == NULL || keywordName[0] == 0 || bufferCapacity <= 1) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000894 *status = U_ILLEGAL_ARGUMENT_ERROR;
895 return 0;
896 }
Jungshik Shin87232d82017-05-13 21:10:13 -0700897 bufLen = (int32_t)uprv_strlen(buffer);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000898 if(bufferCapacity<bufLen) {
899 /* The capacity is less than the length?! Is this NULL terminated? */
900 *status = U_ILLEGAL_ARGUMENT_ERROR;
901 return 0;
902 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000903 keywordNameLen = locale_canonKeywordName(keywordNameBuffer, keywordName, status);
904 if(U_FAILURE(*status)) {
905 return 0;
906 }
Jungshik Shin87232d82017-05-13 21:10:13 -0700907
908 keywordValueLen = 0;
909 if(keywordValue) {
910 while (*keywordValue != 0) {
911 if (!UPRV_ISALPHANUM(*keywordValue) && !UPRV_OK_VALUE_PUNCTUATION(*keywordValue)) {
912 *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed key value */
913 return 0;
914 }
915 if (keywordValueLen < ULOC_KEYWORDS_CAPACITY) {
916 /* Should we force lowercase in value to set? */
917 keywordValueBuffer[keywordValueLen++] = *keywordValue++;
918 } else {
919 /* keywordValue too long for internal buffer */
920 *status = U_INTERNAL_PROGRAM_ERROR;
921 return 0;
922 }
923 }
924 }
925 keywordValueBuffer[keywordValueLen] = 0; /* terminate */
926
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000927 startSearchHere = (char*)locale_getKeywordsStart(buffer);
928 if(startSearchHere == NULL || (startSearchHere[1]==0)) {
Jungshik Shin87232d82017-05-13 21:10:13 -0700929 if(keywordValueLen == 0) { /* no keywords = nothing to remove */
Frank Tangefc58852020-11-12 11:50:18 -0800930 U_ASSERT(*status != U_STRING_NOT_TERMINATED_WARNING);
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700931 return bufLen;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000932 }
933
934 needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700935 if(startSearchHere) { /* had a single @ */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000936 needLen--; /* already had the @ */
937 /* startSearchHere points at the @ */
938 } else {
939 startSearchHere=buffer+bufLen;
940 }
941 if(needLen >= bufferCapacity) {
942 *status = U_BUFFER_OVERFLOW_ERROR;
943 return needLen; /* no change */
944 }
Jungshik Shin87232d82017-05-13 21:10:13 -0700945 *startSearchHere++ = '@';
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000946 uprv_strcpy(startSearchHere, keywordNameBuffer);
947 startSearchHere += keywordNameLen;
Jungshik Shin87232d82017-05-13 21:10:13 -0700948 *startSearchHere++ = '=';
949 uprv_strcpy(startSearchHere, keywordValueBuffer);
Frank Tangefc58852020-11-12 11:50:18 -0800950 U_ASSERT(*status != U_STRING_NOT_TERMINATED_WARNING);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000951 return needLen;
952 } /* end shortcut - no @ */
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700953
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000954 keywordStart = startSearchHere;
955 /* search for keyword */
956 while(keywordStart) {
Jungshik Shin87232d82017-05-13 21:10:13 -0700957 const char* keyValueTail;
958 int32_t keyValueLen;
959
960 keywordStart++; /* skip @ or ; */
961 nextEqualsign = uprv_strchr(keywordStart, '=');
962 if (!nextEqualsign) {
963 *status = U_ILLEGAL_ARGUMENT_ERROR; /* key must have =value */
964 return 0;
965 }
966 /* strip leading & trailing spaces (TC decided to tolerate these) */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000967 while(*keywordStart == ' ') {
968 keywordStart++;
969 }
Jungshik Shin87232d82017-05-13 21:10:13 -0700970 keyValueTail = nextEqualsign;
971 while (keyValueTail > keywordStart && *(keyValueTail-1) == ' ') {
972 keyValueTail--;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000973 }
Jungshik Shin87232d82017-05-13 21:10:13 -0700974 /* now keyValueTail points to first char after the keyName */
975 /* copy & normalize keyName from locale */
976 if (keywordStart == keyValueTail) {
977 *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name in passed-in locale */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000978 return 0;
979 }
Jungshik Shin87232d82017-05-13 21:10:13 -0700980 keyValueLen = 0;
981 while (keywordStart < keyValueTail) {
982 if (!UPRV_ISALPHANUM(*keywordStart)) {
983 *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
984 return 0;
985 }
986 if (keyValueLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
987 localeKeywordNameBuffer[keyValueLen++] = uprv_tolower(*keywordStart++);
988 } else {
989 /* keyword name too long for internal buffer */
990 *status = U_INTERNAL_PROGRAM_ERROR;
991 return 0;
992 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000993 }
Jungshik Shin87232d82017-05-13 21:10:13 -0700994 localeKeywordNameBuffer[keyValueLen] = 0; /* terminate */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000995
996 nextSeparator = uprv_strchr(nextEqualsign, ';');
Jungshik Shin87232d82017-05-13 21:10:13 -0700997
998 /* start processing the value part */
999 nextEqualsign++; /* skip '=' */
1000 /* First strip leading & trailing spaces (TC decided to tolerate these) */
1001 while(*nextEqualsign == ' ') {
1002 nextEqualsign++;
1003 }
1004 keyValueTail = (nextSeparator)? nextSeparator: nextEqualsign + uprv_strlen(nextEqualsign);
1005 while(keyValueTail > nextEqualsign && *(keyValueTail-1) == ' ') {
1006 keyValueTail--;
1007 }
1008 if (nextEqualsign == keyValueTail) {
1009 *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty key value in passed-in locale */
1010 return 0;
1011 }
1012
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001013 rc = uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer);
1014 if(rc == 0) {
Jungshik Shin87232d82017-05-13 21:10:13 -07001015 /* Current entry matches the input keyword. Update the entry */
1016 if(keywordValueLen > 0) { /* updating a value */
1017 updatedKeysAndValues.append(keyValuePrefix, *status);
1018 keyValuePrefix = ';'; /* for any subsequent key-value pair */
1019 updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
1020 updatedKeysAndValues.append('=', *status);
1021 updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
1022 } /* else removing this entry, don't emit anything */
1023 handledInputKeyAndValue = TRUE;
1024 } else {
1025 /* input keyword sorts earlier than current entry, add before current entry */
1026 if (rc < 0 && keywordValueLen > 0 && !handledInputKeyAndValue) {
1027 /* insert new entry at this location */
1028 updatedKeysAndValues.append(keyValuePrefix, *status);
1029 keyValuePrefix = ';'; /* for any subsequent key-value pair */
1030 updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
1031 updatedKeysAndValues.append('=', *status);
1032 updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
1033 handledInputKeyAndValue = TRUE;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001034 }
Jungshik Shin87232d82017-05-13 21:10:13 -07001035 /* copy the current entry */
1036 updatedKeysAndValues.append(keyValuePrefix, *status);
1037 keyValuePrefix = ';'; /* for any subsequent key-value pair */
1038 updatedKeysAndValues.append(localeKeywordNameBuffer, keyValueLen, *status);
1039 updatedKeysAndValues.append('=', *status);
Jungshik Shin42d50272018-10-24 01:22:09 -07001040 updatedKeysAndValues.append(nextEqualsign, static_cast<int32_t>(keyValueTail-nextEqualsign), *status);
Jungshik Shin87232d82017-05-13 21:10:13 -07001041 }
1042 if (!nextSeparator && keywordValueLen > 0 && !handledInputKeyAndValue) {
1043 /* append new entry at the end, it sorts later than existing entries */
1044 updatedKeysAndValues.append(keyValuePrefix, *status);
1045 /* skip keyValuePrefix update, no subsequent key-value pair */
1046 updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
1047 updatedKeysAndValues.append('=', *status);
1048 updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
1049 handledInputKeyAndValue = TRUE;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001050 }
1051 keywordStart = nextSeparator;
1052 } /* end loop searching */
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001053
Jungshik Shin87232d82017-05-13 21:10:13 -07001054 /* Any error from updatedKeysAndValues.append above would be internal and not due to
1055 * problems with the passed-in locale. So if we did encounter problems with the
1056 * passed-in locale above, those errors took precedence and overrode any error
1057 * status from updatedKeysAndValues.append, and also caused a return of 0. If there
1058 * are errors here they are from updatedKeysAndValues.append; they do cause an
1059 * error return but the passed-in locale is unmodified and the original bufLen is
1060 * returned.
1061 */
1062 if (!handledInputKeyAndValue || U_FAILURE(*status)) {
1063 /* if input key/value specified removal of a keyword not present in locale, or
1064 * there was an error in CharString.append, leave original locale alone. */
Frank Tangefc58852020-11-12 11:50:18 -08001065 U_ASSERT(*status != U_STRING_NOT_TERMINATED_WARNING);
Jungshik Shin87232d82017-05-13 21:10:13 -07001066 return bufLen;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001067 }
1068
Frank Tangf90543d2020-10-30 19:02:04 -07001069 // needLen = length of the part before '@'
1070 needLen = (int32_t)(startSearchHere - buffer);
Frank Tangefc58852020-11-12 11:50:18 -08001071 // Check to see can we fit the startSearchHere, if not, return
1072 // U_BUFFER_OVERFLOW_ERROR without copy updatedKeysAndValues into it.
1073 // We do this because this API function does not behave like most others:
1074 // It promises never to set a U_STRING_NOT_TERMINATED_WARNING.
1075 // When the contents fits but without the terminating NUL, in this case we need to not change
1076 // the buffer contents and return with a buffer overflow error.
1077 int32_t appendLength = updatedKeysAndValues.length();
1078 if (appendLength >= bufferCapacity - needLen) {
1079 *status = U_BUFFER_OVERFLOW_ERROR;
1080 return needLen + appendLength;
1081 }
1082 needLen += updatedKeysAndValues.extract(
Frank Tangf90543d2020-10-30 19:02:04 -07001083 startSearchHere, bufferCapacity - needLen, *status);
Frank Tangefc58852020-11-12 11:50:18 -08001084 U_ASSERT(*status != U_STRING_NOT_TERMINATED_WARNING);
1085 return needLen;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001086}
1087
1088/* ### ID parsing implementation **************************************************/
1089
1090#define _isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
1091
1092/*returns TRUE if one of the special prefixes is here (s=string)
1093 'x-' or 'i-' */
1094#define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1]))
1095
1096/* Dot terminates it because of POSIX form where dot precedes the codepage
1097 * except for variant
1098 */
1099#define _isTerminator(a) ((a==0)||(a=='.')||(a=='@'))
1100
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001101/**
1102 * Lookup 'key' in the array 'list'. The array 'list' should contain
1103 * a NULL entry, followed by more entries, and a second NULL entry.
1104 *
1105 * The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or
1106 * COUNTRIES_3.
1107 */
1108static int16_t _findIndex(const char* const* list, const char* key)
1109{
1110 const char* const* anchor = list;
1111 int32_t pass = 0;
1112
1113 /* Make two passes through two NULL-terminated arrays at 'list' */
1114 while (pass++ < 2) {
1115 while (*list) {
1116 if (uprv_strcmp(key, *list) == 0) {
1117 return (int16_t)(list - anchor);
1118 }
1119 list++;
1120 }
1121 ++list; /* skip final NULL *CWB*/
1122 }
1123 return -1;
1124}
1125
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001126U_CFUNC const char*
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001127uloc_getCurrentCountryID(const char* oldID){
1128 int32_t offset = _findIndex(DEPRECATED_COUNTRIES, oldID);
1129 if (offset >= 0) {
1130 return REPLACEMENT_COUNTRIES[offset];
1131 }
1132 return oldID;
1133}
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001134U_CFUNC const char*
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001135uloc_getCurrentLanguageID(const char* oldID){
1136 int32_t offset = _findIndex(DEPRECATED_LANGUAGES, oldID);
1137 if (offset >= 0) {
1138 return REPLACEMENT_LANGUAGES[offset];
1139 }
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001140 return oldID;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001141}
1142/*
1143 * the internal functions _getLanguage(), _getCountry(), _getVariant()
1144 * avoid duplicating code to handle the earlier locale ID pieces
1145 * in the functions for the later ones by
1146 * setting the *pEnd pointer to where they stopped parsing
1147 *
1148 * TODO try to use this in Locale
1149 */
Frank Tangf90543d2020-10-30 19:02:04 -07001150CharString U_EXPORT2
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001151ulocimp_getLanguage(const char *localeID,
Frank Tangf2223962020-04-27 18:25:29 -07001152 const char **pEnd,
1153 UErrorCode &status) {
1154 CharString result;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001155
Frank Tang69c72a62019-04-03 21:41:21 -07001156 if (uprv_stricmp(localeID, "root") == 0) {
1157 localeID += 4;
1158 } else if (uprv_strnicmp(localeID, "und", 3) == 0 &&
1159 (localeID[3] == '\0' ||
1160 localeID[3] == '-' ||
1161 localeID[3] == '_' ||
1162 localeID[3] == '@')) {
1163 localeID += 3;
1164 }
1165
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001166 /* if it starts with i- or x- then copy that prefix */
1167 if(_isIDPrefix(localeID)) {
Frank Tangf2223962020-04-27 18:25:29 -07001168 result.append((char)uprv_tolower(*localeID), status);
1169 result.append('-', status);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001170 localeID+=2;
1171 }
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001172
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001173 /* copy the language as far as possible and count its length */
1174 while(!_isTerminator(*localeID) && !_isIDSeparator(*localeID)) {
Frank Tangf2223962020-04-27 18:25:29 -07001175 result.append((char)uprv_tolower(*localeID), status);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001176 localeID++;
1177 }
1178
Frank Tangf2223962020-04-27 18:25:29 -07001179 if(result.length()==3) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001180 /* convert 3 character code to 2 character code if possible *CWB*/
Frank Tangf2223962020-04-27 18:25:29 -07001181 int32_t offset = _findIndex(LANGUAGES_3, result.data());
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001182 if(offset>=0) {
Frank Tangf2223962020-04-27 18:25:29 -07001183 result.clear();
1184 result.append(LANGUAGES[offset], status);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001185 }
1186 }
1187
1188 if(pEnd!=NULL) {
1189 *pEnd=localeID;
1190 }
Frank Tangf2223962020-04-27 18:25:29 -07001191
1192 return result;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001193}
1194
Frank Tangf90543d2020-10-30 19:02:04 -07001195CharString U_EXPORT2
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001196ulocimp_getScript(const char *localeID,
Frank Tangf2223962020-04-27 18:25:29 -07001197 const char **pEnd,
1198 UErrorCode &status) {
1199 CharString result;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001200 int32_t idLen = 0;
1201
1202 if (pEnd != NULL) {
1203 *pEnd = localeID;
1204 }
1205
1206 /* copy the second item as far as possible and count its length */
1207 while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])
1208 && uprv_isASCIILetter(localeID[idLen])) {
1209 idLen++;
1210 }
1211
1212 /* If it's exactly 4 characters long, then it's a script and not a country. */
1213 if (idLen == 4) {
1214 int32_t i;
1215 if (pEnd != NULL) {
1216 *pEnd = localeID+idLen;
1217 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001218 if (idLen >= 1) {
Frank Tangf2223962020-04-27 18:25:29 -07001219 result.append((char)uprv_toupper(*(localeID++)), status);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001220 }
1221 for (i = 1; i < idLen; i++) {
Frank Tangf2223962020-04-27 18:25:29 -07001222 result.append((char)uprv_tolower(*(localeID++)), status);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001223 }
1224 }
Frank Tangf2223962020-04-27 18:25:29 -07001225
1226 return result;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001227}
1228
Frank Tangf90543d2020-10-30 19:02:04 -07001229CharString U_EXPORT2
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001230ulocimp_getCountry(const char *localeID,
Frank Tangf2223962020-04-27 18:25:29 -07001231 const char **pEnd,
1232 UErrorCode &status) {
1233 CharString result;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001234 int32_t idLen=0;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001235
1236 /* copy the country as far as possible and count its length */
1237 while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) {
Frank Tangf2223962020-04-27 18:25:29 -07001238 result.append((char)uprv_toupper(localeID[idLen]), status);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001239 idLen++;
1240 }
1241
1242 /* the country should be either length 2 or 3 */
1243 if (idLen == 2 || idLen == 3) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001244 /* convert 3 character code to 2 character code if possible *CWB*/
1245 if(idLen==3) {
Frank Tangf2223962020-04-27 18:25:29 -07001246 int32_t offset = _findIndex(COUNTRIES_3, result.data());
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001247 if(offset>=0) {
Frank Tangf2223962020-04-27 18:25:29 -07001248 result.clear();
1249 result.append(COUNTRIES[offset], status);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001250 }
1251 }
1252 localeID+=idLen;
1253 } else {
Frank Tangf2223962020-04-27 18:25:29 -07001254 result.clear();
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001255 }
1256
1257 if(pEnd!=NULL) {
1258 *pEnd=localeID;
1259 }
1260
Frank Tangf2223962020-04-27 18:25:29 -07001261 return result;
1262}
1263
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001264/**
1265 * @param needSeparator if true, then add leading '_' if any variants
1266 * are added to 'variant'
1267 */
Frank Tangf2223962020-04-27 18:25:29 -07001268static void
Frank Tangf90543d2020-10-30 19:02:04 -07001269_getVariant(const char *localeID,
1270 char prev,
1271 ByteSink& sink,
1272 UBool needSeparator) {
Frank Tangf2223962020-04-27 18:25:29 -07001273 UBool hasVariant = FALSE;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001274
1275 /* get one or more variant tags and separate them with '_' */
1276 if(_isIDSeparator(prev)) {
1277 /* get a variant string after a '-' or '_' */
1278 while(!_isTerminator(*localeID)) {
1279 if (needSeparator) {
Frank Tangf2223962020-04-27 18:25:29 -07001280 sink.Append("_", 1);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001281 needSeparator = FALSE;
1282 }
Frank Tangf2223962020-04-27 18:25:29 -07001283 char c = (char)uprv_toupper(*localeID);
1284 if (c == '-') c = '_';
1285 sink.Append(&c, 1);
1286 hasVariant = TRUE;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001287 localeID++;
1288 }
1289 }
1290
1291 /* if there is no variant tag after a '-' or '_' then look for '@' */
Frank Tangf2223962020-04-27 18:25:29 -07001292 if(!hasVariant) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001293 if(prev=='@') {
1294 /* keep localeID */
1295 } else if((localeID=locale_getKeywordsStart(localeID))!=NULL) {
1296 ++localeID; /* point after the '@' */
1297 } else {
Frank Tangf2223962020-04-27 18:25:29 -07001298 return;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001299 }
1300 while(!_isTerminator(*localeID)) {
1301 if (needSeparator) {
Frank Tangf2223962020-04-27 18:25:29 -07001302 sink.Append("_", 1);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001303 needSeparator = FALSE;
1304 }
Frank Tangf2223962020-04-27 18:25:29 -07001305 char c = (char)uprv_toupper(*localeID);
1306 if (c == '-' || c == ',') c = '_';
1307 sink.Append(&c, 1);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001308 localeID++;
1309 }
1310 }
Frank Tangf2223962020-04-27 18:25:29 -07001311}
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001312
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001313/* Keyword enumeration */
1314
1315typedef struct UKeywordsContext {
1316 char* keywords;
1317 char* current;
1318} UKeywordsContext;
1319
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001320U_CDECL_BEGIN
1321
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001322static void U_CALLCONV
1323uloc_kw_closeKeywords(UEnumeration *enumerator) {
1324 uprv_free(((UKeywordsContext *)enumerator->context)->keywords);
1325 uprv_free(enumerator->context);
1326 uprv_free(enumerator);
1327}
1328
1329static int32_t U_CALLCONV
1330uloc_kw_countKeywords(UEnumeration *en, UErrorCode * /*status*/) {
1331 char *kw = ((UKeywordsContext *)en->context)->keywords;
1332 int32_t result = 0;
1333 while(*kw) {
1334 result++;
1335 kw += uprv_strlen(kw)+1;
1336 }
1337 return result;
1338}
1339
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001340static const char * U_CALLCONV
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001341uloc_kw_nextKeyword(UEnumeration* en,
1342 int32_t* resultLength,
1343 UErrorCode* /*status*/) {
1344 const char* result = ((UKeywordsContext *)en->context)->current;
1345 int32_t len = 0;
1346 if(*result) {
1347 len = (int32_t)uprv_strlen(((UKeywordsContext *)en->context)->current);
1348 ((UKeywordsContext *)en->context)->current += len+1;
1349 } else {
1350 result = NULL;
1351 }
1352 if (resultLength) {
1353 *resultLength = len;
1354 }
1355 return result;
1356}
1357
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001358static void U_CALLCONV
1359uloc_kw_resetKeywords(UEnumeration* en,
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001360 UErrorCode* /*status*/) {
1361 ((UKeywordsContext *)en->context)->current = ((UKeywordsContext *)en->context)->keywords;
1362}
1363
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001364U_CDECL_END
1365
1366
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001367static const UEnumeration gKeywordsEnum = {
1368 NULL,
1369 NULL,
1370 uloc_kw_closeKeywords,
1371 uloc_kw_countKeywords,
1372 uenum_unextDefault,
1373 uloc_kw_nextKeyword,
1374 uloc_kw_resetKeywords
1375};
1376
1377U_CAPI UEnumeration* U_EXPORT2
1378uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status)
1379{
Frank Tangb8696612019-10-25 14:58:21 -07001380 LocalMemory<UKeywordsContext> myContext;
1381 LocalMemory<UEnumeration> result;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001382
Frank Tangb8696612019-10-25 14:58:21 -07001383 if (U_FAILURE(*status)) {
1384 return nullptr;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001385 }
Frank Tangb8696612019-10-25 14:58:21 -07001386 myContext.adoptInstead(static_cast<UKeywordsContext *>(uprv_malloc(sizeof(UKeywordsContext))));
1387 result.adoptInstead(static_cast<UEnumeration *>(uprv_malloc(sizeof(UEnumeration))));
1388 if (myContext.isNull() || result.isNull()) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001389 *status = U_MEMORY_ALLOCATION_ERROR;
Frank Tangb8696612019-10-25 14:58:21 -07001390 return nullptr;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001391 }
Frank Tangb8696612019-10-25 14:58:21 -07001392 uprv_memcpy(result.getAlias(), &gKeywordsEnum, sizeof(UEnumeration));
1393 myContext->keywords = static_cast<char *>(uprv_malloc(keywordListSize+1));
1394 if (myContext->keywords == nullptr) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001395 *status = U_MEMORY_ALLOCATION_ERROR;
Frank Tangb8696612019-10-25 14:58:21 -07001396 return nullptr;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001397 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001398 uprv_memcpy(myContext->keywords, keywordList, keywordListSize);
1399 myContext->keywords[keywordListSize] = 0;
1400 myContext->current = myContext->keywords;
Frank Tangb8696612019-10-25 14:58:21 -07001401 result->context = myContext.orphan();
1402 return result.orphan();
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001403}
1404
1405U_CAPI UEnumeration* U_EXPORT2
1406uloc_openKeywords(const char* localeID,
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001407 UErrorCode* status)
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001408{
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001409 char tempBuffer[ULOC_FULLNAME_CAPACITY];
1410 const char* tmpLocaleID;
1411
1412 if(status==NULL || U_FAILURE(*status)) {
1413 return 0;
1414 }
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001415
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001416 if (_hasBCP47Extension(localeID)) {
Frank Tang3e05d9d2021-11-08 14:04:04 -08001417 tmpLocaleID = _ConvertBCP47(localeID, tempBuffer,
1418 sizeof(tempBuffer), status, nullptr);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001419 } else {
1420 if (localeID==NULL) {
Frank Tang3e05d9d2021-11-08 14:04:04 -08001421 localeID=uloc_getDefault();
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001422 }
1423 tmpLocaleID=localeID;
1424 }
1425
1426 /* Skip the language */
Frank Tangf90543d2020-10-30 19:02:04 -07001427 ulocimp_getLanguage(tmpLocaleID, &tmpLocaleID, *status);
1428 if (U_FAILURE(*status)) {
1429 return 0;
1430 }
1431
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001432 if(_isIDSeparator(*tmpLocaleID)) {
1433 const char *scriptID;
1434 /* Skip the script if available */
Frank Tangf90543d2020-10-30 19:02:04 -07001435 ulocimp_getScript(tmpLocaleID+1, &scriptID, *status);
1436 if (U_FAILURE(*status)) {
1437 return 0;
1438 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001439 if(scriptID != tmpLocaleID+1) {
1440 /* Found optional script */
1441 tmpLocaleID = scriptID;
1442 }
1443 /* Skip the Country */
1444 if (_isIDSeparator(*tmpLocaleID)) {
Frank Tangf90543d2020-10-30 19:02:04 -07001445 ulocimp_getCountry(tmpLocaleID+1, &tmpLocaleID, *status);
1446 if (U_FAILURE(*status)) {
1447 return 0;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001448 }
1449 }
1450 }
1451
1452 /* keywords are located after '@' */
1453 if((tmpLocaleID = locale_getKeywordsStart(tmpLocaleID)) != NULL) {
Frank Tangf90543d2020-10-30 19:02:04 -07001454 CharString keywords;
1455 CharStringByteSink sink(&keywords);
1456 ulocimp_getKeywords(tmpLocaleID+1, '@', sink, FALSE, status);
1457 if (U_FAILURE(*status)) {
1458 return NULL;
1459 }
1460 return uloc_openKeywordList(keywords.data(), keywords.length(), status);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001461 }
Frank Tangf90543d2020-10-30 19:02:04 -07001462 return NULL;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001463}
1464
1465
1466/* bit-flags for 'options' parameter of _canonicalize */
1467#define _ULOC_STRIP_KEYWORDS 0x2
1468#define _ULOC_CANONICALIZE 0x1
1469
1470#define OPTION_SET(options, mask) ((options & mask) != 0)
1471
1472static const char i_default[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'};
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001473#define I_DEFAULT_LENGTH UPRV_LENGTHOF(i_default)
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001474
1475/**
1476 * Canonicalize the given localeID, to level 1 or to level 2,
1477 * depending on the options. To specify level 1, pass in options=0.
1478 * To specify level 2, pass in options=_ULOC_CANONICALIZE.
1479 *
1480 * This is the code underlying uloc_getName and uloc_canonicalize.
1481 */
Frank Tangf2223962020-04-27 18:25:29 -07001482static void
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001483_canonicalize(const char* localeID,
Frank Tangf2223962020-04-27 18:25:29 -07001484 ByteSink& sink,
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001485 uint32_t options,
1486 UErrorCode* err) {
Frank Tang3e05d9d2021-11-08 14:04:04 -08001487 if (U_FAILURE(*err)) {
1488 return;
1489 }
1490
Frank Tangf2223962020-04-27 18:25:29 -07001491 int32_t j, fieldCount=0, scriptSize=0, variantSize=0;
Frank Tang3e05d9d2021-11-08 14:04:04 -08001492 PreflightingLocaleIDBuffer tempBuffer; // if localeID has a BCP47 extension, tmpLocaleID points to this
1493 CharString localeIDWithHyphens; // if localeID has a BPC47 extension and have _, tmpLocaleID points to this
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001494 const char* origLocaleID;
1495 const char* tmpLocaleID;
1496 const char* keywordAssign = NULL;
1497 const char* separatorIndicator = NULL;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001498
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001499 if (_hasBCP47Extension(localeID)) {
Frank Tang3e05d9d2021-11-08 14:04:04 -08001500 const char* localeIDPtr = localeID;
1501
1502 // convert all underbars to hyphens, unless the "BCP47 extension" comes at the beginning of the string
1503 if (uprv_strchr(localeID, '_') != nullptr && localeID[1] != '-' && localeID[1] != '_') {
1504 localeIDWithHyphens.append(localeID, -1, *err);
1505 if (U_SUCCESS(*err)) {
1506 for (char* p = localeIDWithHyphens.data(); *p != '\0'; ++p) {
1507 if (*p == '_') {
1508 *p = '-';
1509 }
1510 }
1511 localeIDPtr = localeIDWithHyphens.data();
1512 }
1513 }
1514
Frank Tanga38aef92021-08-10 15:57:41 -07001515 do {
Frank Tang3e05d9d2021-11-08 14:04:04 -08001516 // After this call tmpLocaleID may point to localeIDPtr which may
1517 // point to either localeID or localeIDWithHyphens.data().
1518 tmpLocaleID = _ConvertBCP47(localeIDPtr, tempBuffer.getBuffer(),
1519 tempBuffer.getCapacity(), err,
1520 &(tempBuffer.requestedCapacity));
Frank Tanga38aef92021-08-10 15:57:41 -07001521 } while (tempBuffer.needToTryAgain(err));
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001522 } else {
1523 if (localeID==NULL) {
1524 localeID=uloc_getDefault();
1525 }
1526 tmpLocaleID=localeID;
1527 }
1528
1529 origLocaleID=tmpLocaleID;
1530
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001531 /* get all pieces, one after another, and separate with '_' */
Frank Tangf2223962020-04-27 18:25:29 -07001532 CharString tag = ulocimp_getLanguage(tmpLocaleID, &tmpLocaleID, *err);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001533
Frank Tangf2223962020-04-27 18:25:29 -07001534 if (tag.length() == I_DEFAULT_LENGTH &&
1535 uprv_strncmp(origLocaleID, i_default, I_DEFAULT_LENGTH) == 0) {
1536 tag.clear();
1537 tag.append(uloc_getDefault(), *err);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001538 } else if(_isIDSeparator(*tmpLocaleID)) {
1539 const char *scriptID;
1540
1541 ++fieldCount;
Frank Tangf2223962020-04-27 18:25:29 -07001542 tag.append('_', *err);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001543
Frank Tangf2223962020-04-27 18:25:29 -07001544 CharString script = ulocimp_getScript(tmpLocaleID+1, &scriptID, *err);
1545 tag.append(script, *err);
1546 scriptSize = script.length();
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001547 if(scriptSize > 0) {
1548 /* Found optional script */
1549 tmpLocaleID = scriptID;
1550 ++fieldCount;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001551 if (_isIDSeparator(*tmpLocaleID)) {
1552 /* If there is something else, then we add the _ */
Frank Tangf2223962020-04-27 18:25:29 -07001553 tag.append('_', *err);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001554 }
1555 }
1556
1557 if (_isIDSeparator(*tmpLocaleID)) {
1558 const char *cntryID;
Frank Tangf2223962020-04-27 18:25:29 -07001559
1560 CharString country = ulocimp_getCountry(tmpLocaleID+1, &cntryID, *err);
1561 tag.append(country, *err);
1562 if (!country.isEmpty()) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001563 /* Found optional country */
1564 tmpLocaleID = cntryID;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001565 }
1566 if(_isIDSeparator(*tmpLocaleID)) {
1567 /* If there is something else, then we add the _ if we found country before. */
Frank Tangf2223962020-04-27 18:25:29 -07001568 if (!_isIDSeparator(*(tmpLocaleID+1))) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001569 ++fieldCount;
Frank Tangf2223962020-04-27 18:25:29 -07001570 tag.append('_', *err);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001571 }
1572
Frank Tangf2223962020-04-27 18:25:29 -07001573 variantSize = -tag.length();
1574 {
1575 CharStringByteSink s(&tag);
Frank Tangf90543d2020-10-30 19:02:04 -07001576 _getVariant(tmpLocaleID+1, *tmpLocaleID, s, FALSE);
Frank Tangf2223962020-04-27 18:25:29 -07001577 }
1578 variantSize += tag.length();
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001579 if (variantSize > 0) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001580 tmpLocaleID += variantSize + 1; /* skip '_' and variant */
1581 }
1582 }
1583 }
1584 }
1585
1586 /* Copy POSIX-style charset specifier, if any [mr.utf8] */
1587 if (!OPTION_SET(options, _ULOC_CANONICALIZE) && *tmpLocaleID == '.') {
1588 UBool done = FALSE;
1589 do {
1590 char c = *tmpLocaleID;
1591 switch (c) {
1592 case 0:
1593 case '@':
1594 done = TRUE;
1595 break;
1596 default:
Frank Tangf2223962020-04-27 18:25:29 -07001597 tag.append(c, *err);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001598 ++tmpLocaleID;
1599 break;
1600 }
1601 } while (!done);
1602 }
1603
1604 /* Scan ahead to next '@' and determine if it is followed by '=' and/or ';'
1605 After this, tmpLocaleID either points to '@' or is NULL */
1606 if ((tmpLocaleID=locale_getKeywordsStart(tmpLocaleID))!=NULL) {
1607 keywordAssign = uprv_strchr(tmpLocaleID, '=');
1608 separatorIndicator = uprv_strchr(tmpLocaleID, ';');
1609 }
1610
1611 /* Copy POSIX-style variant, if any [mr@FOO] */
1612 if (!OPTION_SET(options, _ULOC_CANONICALIZE) &&
1613 tmpLocaleID != NULL && keywordAssign == NULL) {
1614 for (;;) {
1615 char c = *tmpLocaleID;
1616 if (c == 0) {
1617 break;
1618 }
Frank Tangf2223962020-04-27 18:25:29 -07001619 tag.append(c, *err);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001620 ++tmpLocaleID;
1621 }
1622 }
1623
1624 if (OPTION_SET(options, _ULOC_CANONICALIZE)) {
1625 /* Handle @FOO variant if @ is present and not followed by = */
1626 if (tmpLocaleID!=NULL && keywordAssign==NULL) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001627 /* Add missing '_' if needed */
1628 if (fieldCount < 2 || (fieldCount < 3 && scriptSize > 0)) {
1629 do {
Frank Tangf2223962020-04-27 18:25:29 -07001630 tag.append('_', *err);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001631 ++fieldCount;
1632 } while(fieldCount<2);
1633 }
Frank Tangf2223962020-04-27 18:25:29 -07001634
1635 int32_t posixVariantSize = -tag.length();
1636 {
1637 CharStringByteSink s(&tag);
Frank Tangf90543d2020-10-30 19:02:04 -07001638 _getVariant(tmpLocaleID+1, '@', s, (UBool)(variantSize > 0));
Frank Tangf2223962020-04-27 18:25:29 -07001639 }
1640 posixVariantSize += tag.length();
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001641 if (posixVariantSize > 0) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001642 variantSize += posixVariantSize;
1643 }
1644 }
1645
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001646 /* Look up the ID in the canonicalization map */
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001647 for (j=0; j<UPRV_LENGTHOF(CANONICALIZE_MAP); j++) {
Frank Tangf2223962020-04-27 18:25:29 -07001648 StringPiece id(CANONICALIZE_MAP[j].id);
1649 if (tag == id) {
1650 if (id.empty() && tmpLocaleID != NULL) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001651 break; /* Don't remap "" if keywords present */
1652 }
Frank Tangf2223962020-04-27 18:25:29 -07001653 tag.clear();
1654 tag.append(CANONICALIZE_MAP[j].canonicalID, *err);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001655 break;
1656 }
1657 }
1658 }
1659
Frank Tangf2223962020-04-27 18:25:29 -07001660 sink.Append(tag.data(), tag.length());
1661
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001662 if (!OPTION_SET(options, _ULOC_STRIP_KEYWORDS)) {
1663 if (tmpLocaleID!=NULL && keywordAssign!=NULL &&
1664 (!separatorIndicator || separatorIndicator > keywordAssign)) {
Frank Tangf2223962020-04-27 18:25:29 -07001665 sink.Append("@", 1);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001666 ++fieldCount;
Frank Tangf90543d2020-10-30 19:02:04 -07001667 ulocimp_getKeywords(tmpLocaleID+1, '@', sink, TRUE, err);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001668 }
1669 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001670}
1671
1672/* ### ID parsing API **************************************************/
1673
1674U_CAPI int32_t U_EXPORT2
1675uloc_getParent(const char* localeID,
1676 char* parent,
1677 int32_t parentCapacity,
1678 UErrorCode* err)
1679{
1680 const char *lastUnderscore;
1681 int32_t i;
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001682
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001683 if (U_FAILURE(*err))
1684 return 0;
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001685
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001686 if (localeID == NULL)
1687 localeID = uloc_getDefault();
1688
1689 lastUnderscore=uprv_strrchr(localeID, '_');
1690 if(lastUnderscore!=NULL) {
1691 i=(int32_t)(lastUnderscore-localeID);
1692 } else {
1693 i=0;
1694 }
1695
Frank Tang69c72a62019-04-03 21:41:21 -07001696 if (i > 0) {
1697 if (uprv_strnicmp(localeID, "und_", 4) == 0) {
1698 localeID += 3;
1699 i -= 3;
1700 uprv_memmove(parent, localeID, uprv_min(i, parentCapacity));
1701 } else if (parent != localeID) {
1702 uprv_memcpy(parent, localeID, uprv_min(i, parentCapacity));
1703 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001704 }
Frank Tang69c72a62019-04-03 21:41:21 -07001705
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001706 return u_terminateChars(parent, parentCapacity, i, err);
1707}
1708
1709U_CAPI int32_t U_EXPORT2
1710uloc_getLanguage(const char* localeID,
1711 char* language,
1712 int32_t languageCapacity,
1713 UErrorCode* err)
1714{
1715 /* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001716
1717 if (err==NULL || U_FAILURE(*err)) {
1718 return 0;
1719 }
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001720
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001721 if(localeID==NULL) {
1722 localeID=uloc_getDefault();
1723 }
1724
Frank Tangf90543d2020-10-30 19:02:04 -07001725 return ulocimp_getLanguage(localeID, NULL, *err).extract(language, languageCapacity, *err);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001726}
1727
1728U_CAPI int32_t U_EXPORT2
1729uloc_getScript(const char* localeID,
1730 char* script,
1731 int32_t scriptCapacity,
1732 UErrorCode* err)
1733{
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001734 if(err==NULL || U_FAILURE(*err)) {
1735 return 0;
1736 }
1737
1738 if(localeID==NULL) {
1739 localeID=uloc_getDefault();
1740 }
1741
1742 /* skip the language */
Frank Tangf90543d2020-10-30 19:02:04 -07001743 ulocimp_getLanguage(localeID, &localeID, *err);
1744 if (U_FAILURE(*err)) {
1745 return 0;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001746 }
Frank Tangf90543d2020-10-30 19:02:04 -07001747
1748 if(_isIDSeparator(*localeID)) {
1749 return ulocimp_getScript(localeID+1, NULL, *err).extract(script, scriptCapacity, *err);
1750 }
1751 return u_terminateChars(script, scriptCapacity, 0, err);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001752}
1753
1754U_CAPI int32_t U_EXPORT2
1755uloc_getCountry(const char* localeID,
1756 char* country,
1757 int32_t countryCapacity,
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001758 UErrorCode* err)
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001759{
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001760 if(err==NULL || U_FAILURE(*err)) {
1761 return 0;
1762 }
1763
1764 if(localeID==NULL) {
1765 localeID=uloc_getDefault();
1766 }
1767
1768 /* Skip the language */
Frank Tangf90543d2020-10-30 19:02:04 -07001769 ulocimp_getLanguage(localeID, &localeID, *err);
1770 if (U_FAILURE(*err)) {
1771 return 0;
1772 }
1773
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001774 if(_isIDSeparator(*localeID)) {
1775 const char *scriptID;
1776 /* Skip the script if available */
Frank Tangf90543d2020-10-30 19:02:04 -07001777 ulocimp_getScript(localeID+1, &scriptID, *err);
1778 if (U_FAILURE(*err)) {
1779 return 0;
1780 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001781 if(scriptID != localeID+1) {
1782 /* Found optional script */
1783 localeID = scriptID;
1784 }
1785 if(_isIDSeparator(*localeID)) {
Frank Tangf90543d2020-10-30 19:02:04 -07001786 return ulocimp_getCountry(localeID+1, NULL, *err).extract(country, countryCapacity, *err);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001787 }
1788 }
Frank Tangf90543d2020-10-30 19:02:04 -07001789 return u_terminateChars(country, countryCapacity, 0, err);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001790}
1791
1792U_CAPI int32_t U_EXPORT2
1793uloc_getVariant(const char* localeID,
1794 char* variant,
1795 int32_t variantCapacity,
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001796 UErrorCode* err)
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001797{
1798 char tempBuffer[ULOC_FULLNAME_CAPACITY];
1799 const char* tmpLocaleID;
1800 int32_t i=0;
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001801
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001802 if(err==NULL || U_FAILURE(*err)) {
1803 return 0;
1804 }
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001805
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001806 if (_hasBCP47Extension(localeID)) {
Frank Tang3e05d9d2021-11-08 14:04:04 -08001807 tmpLocaleID =_ConvertBCP47(localeID, tempBuffer, sizeof(tempBuffer), err, nullptr);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001808 } else {
1809 if (localeID==NULL) {
1810 localeID=uloc_getDefault();
1811 }
1812 tmpLocaleID=localeID;
1813 }
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001814
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001815 /* Skip the language */
Frank Tangf90543d2020-10-30 19:02:04 -07001816 ulocimp_getLanguage(tmpLocaleID, &tmpLocaleID, *err);
1817 if (U_FAILURE(*err)) {
1818 return 0;
1819 }
1820
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001821 if(_isIDSeparator(*tmpLocaleID)) {
1822 const char *scriptID;
1823 /* Skip the script if available */
Frank Tangf90543d2020-10-30 19:02:04 -07001824 ulocimp_getScript(tmpLocaleID+1, &scriptID, *err);
1825 if (U_FAILURE(*err)) {
1826 return 0;
1827 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001828 if(scriptID != tmpLocaleID+1) {
1829 /* Found optional script */
1830 tmpLocaleID = scriptID;
1831 }
1832 /* Skip the Country */
1833 if (_isIDSeparator(*tmpLocaleID)) {
1834 const char *cntryID;
Frank Tangf90543d2020-10-30 19:02:04 -07001835 ulocimp_getCountry(tmpLocaleID+1, &cntryID, *err);
1836 if (U_FAILURE(*err)) {
1837 return 0;
1838 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001839 if (cntryID != tmpLocaleID+1) {
1840 /* Found optional country */
1841 tmpLocaleID = cntryID;
1842 }
1843 if(_isIDSeparator(*tmpLocaleID)) {
1844 /* If there was no country ID, skip a possible extra IDSeparator */
1845 if (tmpLocaleID != cntryID && _isIDSeparator(tmpLocaleID[1])) {
1846 tmpLocaleID++;
1847 }
Frank Tangf90543d2020-10-30 19:02:04 -07001848
1849 CheckedArrayByteSink sink(variant, variantCapacity);
1850 _getVariant(tmpLocaleID+1, *tmpLocaleID, sink, FALSE);
1851
1852 i = sink.NumberOfBytesAppended();
1853
1854 if (U_FAILURE(*err)) {
1855 return i;
1856 }
1857
1858 if (sink.Overflowed()) {
1859 *err = U_BUFFER_OVERFLOW_ERROR;
1860 return i;
1861 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001862 }
1863 }
1864 }
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001865
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001866 return u_terminateChars(variant, variantCapacity, i, err);
1867}
1868
1869U_CAPI int32_t U_EXPORT2
1870uloc_getName(const char* localeID,
1871 char* name,
1872 int32_t nameCapacity,
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001873 UErrorCode* err)
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001874{
Frank Tangf2223962020-04-27 18:25:29 -07001875 if (U_FAILURE(*err)) {
1876 return 0;
1877 }
1878
1879 CheckedArrayByteSink sink(name, nameCapacity);
1880 ulocimp_getName(localeID, sink, err);
1881
1882 int32_t reslen = sink.NumberOfBytesAppended();
1883
1884 if (U_FAILURE(*err)) {
1885 return reslen;
1886 }
1887
1888 if (sink.Overflowed()) {
1889 *err = U_BUFFER_OVERFLOW_ERROR;
1890 } else {
1891 u_terminateChars(name, nameCapacity, reslen, err);
1892 }
1893
1894 return reslen;
1895}
1896
Frank Tangf90543d2020-10-30 19:02:04 -07001897U_CAPI void U_EXPORT2
Frank Tangf2223962020-04-27 18:25:29 -07001898ulocimp_getName(const char* localeID,
1899 ByteSink& sink,
1900 UErrorCode* err)
1901{
1902 _canonicalize(localeID, sink, 0, err);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001903}
1904
1905U_CAPI int32_t U_EXPORT2
1906uloc_getBaseName(const char* localeID,
1907 char* name,
1908 int32_t nameCapacity,
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001909 UErrorCode* err)
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001910{
Frank Tangf2223962020-04-27 18:25:29 -07001911 if (U_FAILURE(*err)) {
1912 return 0;
1913 }
1914
1915 CheckedArrayByteSink sink(name, nameCapacity);
1916 ulocimp_getBaseName(localeID, sink, err);
1917
1918 int32_t reslen = sink.NumberOfBytesAppended();
1919
1920 if (U_FAILURE(*err)) {
1921 return reslen;
1922 }
1923
1924 if (sink.Overflowed()) {
1925 *err = U_BUFFER_OVERFLOW_ERROR;
1926 } else {
1927 u_terminateChars(name, nameCapacity, reslen, err);
1928 }
1929
1930 return reslen;
1931}
1932
Frank Tangf90543d2020-10-30 19:02:04 -07001933U_CAPI void U_EXPORT2
Frank Tangf2223962020-04-27 18:25:29 -07001934ulocimp_getBaseName(const char* localeID,
1935 ByteSink& sink,
1936 UErrorCode* err)
1937{
1938 _canonicalize(localeID, sink, _ULOC_STRIP_KEYWORDS, err);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001939}
1940
1941U_CAPI int32_t U_EXPORT2
1942uloc_canonicalize(const char* localeID,
1943 char* name,
1944 int32_t nameCapacity,
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001945 UErrorCode* err)
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001946{
Frank Tangf2223962020-04-27 18:25:29 -07001947 if (U_FAILURE(*err)) {
1948 return 0;
1949 }
1950
1951 CheckedArrayByteSink sink(name, nameCapacity);
1952 ulocimp_canonicalize(localeID, sink, err);
1953
1954 int32_t reslen = sink.NumberOfBytesAppended();
1955
1956 if (U_FAILURE(*err)) {
1957 return reslen;
1958 }
1959
1960 if (sink.Overflowed()) {
1961 *err = U_BUFFER_OVERFLOW_ERROR;
1962 } else {
1963 u_terminateChars(name, nameCapacity, reslen, err);
1964 }
1965
1966 return reslen;
1967}
1968
Frank Tangf90543d2020-10-30 19:02:04 -07001969U_CAPI void U_EXPORT2
Frank Tangf2223962020-04-27 18:25:29 -07001970ulocimp_canonicalize(const char* localeID,
1971 ByteSink& sink,
1972 UErrorCode* err)
1973{
1974 _canonicalize(localeID, sink, _ULOC_CANONICALIZE, err);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001975}
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001976
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001977U_CAPI const char* U_EXPORT2
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001978uloc_getISO3Language(const char* localeID)
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001979{
1980 int16_t offset;
1981 char lang[ULOC_LANG_CAPACITY];
1982 UErrorCode err = U_ZERO_ERROR;
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001983
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001984 if (localeID == NULL)
1985 {
1986 localeID = uloc_getDefault();
1987 }
1988 uloc_getLanguage(localeID, lang, ULOC_LANG_CAPACITY, &err);
1989 if (U_FAILURE(err))
1990 return "";
1991 offset = _findIndex(LANGUAGES, lang);
1992 if (offset < 0)
1993 return "";
1994 return LANGUAGES_3[offset];
1995}
1996
1997U_CAPI const char* U_EXPORT2
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001998uloc_getISO3Country(const char* localeID)
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001999{
2000 int16_t offset;
2001 char cntry[ULOC_LANG_CAPACITY];
2002 UErrorCode err = U_ZERO_ERROR;
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002003
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002004 if (localeID == NULL)
2005 {
2006 localeID = uloc_getDefault();
2007 }
2008 uloc_getCountry(localeID, cntry, ULOC_LANG_CAPACITY, &err);
2009 if (U_FAILURE(err))
2010 return "";
2011 offset = _findIndex(COUNTRIES, cntry);
2012 if (offset < 0)
2013 return "";
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002014
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002015 return COUNTRIES_3[offset];
2016}
2017
2018U_CAPI uint32_t U_EXPORT2
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002019uloc_getLCID(const char* localeID)
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002020{
2021 UErrorCode status = U_ZERO_ERROR;
2022 char langID[ULOC_FULLNAME_CAPACITY];
Jungshik Shin87232d82017-05-13 21:10:13 -07002023 uint32_t lcid = 0;
2024
2025 /* Check for incomplete id. */
2026 if (!localeID || uprv_strlen(localeID) < 2) {
2027 return 0;
2028 }
2029
Frank Tang69c72a62019-04-03 21:41:21 -07002030 // First, attempt Windows platform lookup if available, but fall
2031 // through to catch any special cases (ICU vs Windows name differences).
2032 lcid = uprv_convertToLCIDPlatform(localeID, &status);
2033 if (U_FAILURE(status)) {
2034 return 0;
2035 }
2036 if (lcid > 0) {
Jungshik Shin87232d82017-05-13 21:10:13 -07002037 // Windows found an LCID, return that
2038 return lcid;
2039 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002040
2041 uloc_getLanguage(localeID, langID, sizeof(langID), &status);
Frank Tang69c72a62019-04-03 21:41:21 -07002042 if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002043 return 0;
2044 }
2045
2046 if (uprv_strchr(localeID, '@')) {
2047 // uprv_convertToLCID does not support keywords other than collation.
2048 // Remove all keywords except collation.
2049 int32_t len;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002050 char tmpLocaleID[ULOC_FULLNAME_CAPACITY];
2051
Frank Tangf90543d2020-10-30 19:02:04 -07002052 CharString collVal;
2053 {
2054 CharStringByteSink sink(&collVal);
2055 ulocimp_getKeywordValue(localeID, "collation", sink, &status);
2056 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002057
Frank Tangf90543d2020-10-30 19:02:04 -07002058 if (U_SUCCESS(status) && !collVal.isEmpty()) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002059 len = uloc_getBaseName(localeID, tmpLocaleID,
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002060 UPRV_LENGTHOF(tmpLocaleID) - 1, &status);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002061
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002062 if (U_SUCCESS(status) && len > 0) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002063 tmpLocaleID[len] = 0;
2064
Frank Tangf90543d2020-10-30 19:02:04 -07002065 len = uloc_setKeywordValue("collation", collVal.data(), tmpLocaleID,
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002066 UPRV_LENGTHOF(tmpLocaleID) - len - 1, &status);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002067
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002068 if (U_SUCCESS(status) && len > 0) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002069 tmpLocaleID[len] = 0;
2070 return uprv_convertToLCID(langID, tmpLocaleID, &status);
2071 }
2072 }
2073 }
2074
2075 // fall through - all keywords are simply ignored
2076 status = U_ZERO_ERROR;
2077 }
2078
2079 return uprv_convertToLCID(langID, localeID, &status);
2080}
2081
2082U_CAPI int32_t U_EXPORT2
2083uloc_getLocaleForLCID(uint32_t hostid, char *locale, int32_t localeCapacity,
2084 UErrorCode *status)
2085{
2086 return uprv_convertToPosix(hostid, locale, localeCapacity, status);
2087}
2088
2089/* ### Default locale **************************************************/
2090
2091U_CAPI const char* U_EXPORT2
2092uloc_getDefault()
2093{
2094 return locale_get_default();
2095}
2096
2097U_CAPI void U_EXPORT2
2098uloc_setDefault(const char* newDefaultLocale,
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002099 UErrorCode* err)
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002100{
2101 if (U_FAILURE(*err))
2102 return;
2103 /* the error code isn't currently used for anything by this function*/
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002104
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002105 /* propagate change to C++ */
2106 locale_set_default(newDefaultLocale);
2107}
2108
2109/**
2110 * Returns a list of all 2-letter language codes defined in ISO 639. This is a pointer
2111 * to an array of pointers to arrays of char. All of these pointers are owned
2112 * by ICU-- do not delete them, and do not write through them. The array is
2113 * terminated with a null pointer.
2114 */
2115U_CAPI const char* const* U_EXPORT2
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002116uloc_getISOLanguages()
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002117{
2118 return LANGUAGES;
2119}
2120
2121/**
2122 * Returns a list of all 2-letter country codes defined in ISO 639. This is a
2123 * pointer to an array of pointers to arrays of char. All of these pointers are
2124 * owned by ICU-- do not delete them, and do not write through them. The array is
2125 * terminated with a null pointer.
2126 */
2127U_CAPI const char* const* U_EXPORT2
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002128uloc_getISOCountries()
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002129{
2130 return COUNTRIES;
2131}
2132
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08002133U_CAPI const char* U_EXPORT2
2134uloc_toUnicodeLocaleKey(const char* keyword)
2135{
2136 const char* bcpKey = ulocimp_toBcpKey(keyword);
2137 if (bcpKey == NULL && ultag_isUnicodeLocaleKey(keyword, -1)) {
2138 // unknown keyword, but syntax is fine..
2139 return keyword;
2140 }
2141 return bcpKey;
2142}
2143
2144U_CAPI const char* U_EXPORT2
2145uloc_toUnicodeLocaleType(const char* keyword, const char* value)
2146{
2147 const char* bcpType = ulocimp_toBcpType(keyword, value, NULL, NULL);
2148 if (bcpType == NULL && ultag_isUnicodeLocaleType(value, -1)) {
2149 // unknown keyword, but syntax is fine..
2150 return value;
2151 }
2152 return bcpType;
2153}
2154
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08002155static UBool
2156isWellFormedLegacyKey(const char* legacyKey)
2157{
2158 const char* p = legacyKey;
2159 while (*p) {
2160 if (!UPRV_ISALPHANUM(*p)) {
2161 return FALSE;
2162 }
2163 p++;
2164 }
2165 return TRUE;
2166}
2167
2168static UBool
2169isWellFormedLegacyType(const char* legacyType)
2170{
2171 const char* p = legacyType;
2172 int32_t alphaNumLen = 0;
2173 while (*p) {
2174 if (*p == '_' || *p == '/' || *p == '-') {
2175 if (alphaNumLen == 0) {
2176 return FALSE;
2177 }
2178 alphaNumLen = 0;
2179 } else if (UPRV_ISALPHANUM(*p)) {
2180 alphaNumLen++;
2181 } else {
2182 return FALSE;
2183 }
2184 p++;
2185 }
2186 return (alphaNumLen != 0);
2187}
2188
2189U_CAPI const char* U_EXPORT2
2190uloc_toLegacyKey(const char* keyword)
2191{
2192 const char* legacyKey = ulocimp_toLegacyKey(keyword);
2193 if (legacyKey == NULL) {
2194 // Checks if the specified locale key is well-formed with the legacy locale syntax.
2195 //
2196 // Note:
Jungshik Shin87232d82017-05-13 21:10:13 -07002197 // LDML/CLDR provides some definition of keyword syntax in
2198 // * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and
2199 // * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax
2200 // Keys can only consist of [0-9a-zA-Z].
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08002201 if (isWellFormedLegacyKey(keyword)) {
2202 return keyword;
2203 }
2204 }
2205 return legacyKey;
2206}
2207
2208U_CAPI const char* U_EXPORT2
2209uloc_toLegacyType(const char* keyword, const char* value)
2210{
2211 const char* legacyType = ulocimp_toLegacyType(keyword, value, NULL, NULL);
2212 if (legacyType == NULL) {
2213 // Checks if the specified locale type is well-formed with the legacy locale syntax.
2214 //
2215 // Note:
Jungshik Shin87232d82017-05-13 21:10:13 -07002216 // LDML/CLDR provides some definition of keyword syntax in
2217 // * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and
2218 // * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax
2219 // Values (types) can only consist of [0-9a-zA-Z], plus for legacy values
2220 // we allow [/_-+] in the middle (e.g. "Etc/GMT+1", "Asia/Tel_Aviv")
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08002221 if (isWellFormedLegacyType(value)) {
2222 return value;
2223 }
2224 }
2225 return legacyType;
2226}
2227
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002228/*eof*/