blob: 1b14e641422be040b37c566f5c3c23ab7aa5b2ce [file] [log] [blame]
Jungshik Shin87232d82017-05-13 21:10:13 -07001// © 2016 and later: Unicode, Inc. and others.
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002// License & terms of use: http://www.unicode.org/copyright.html
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00003/*
4**********************************************************************
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07005* Copyright (C) 1997-2016, International Business Machines
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00006* Corporation and others. All Rights Reserved.
7**********************************************************************
8*
9* File ULOC.CPP
10*
11* Modification History:
12*
13* Date Name Description
14* 04/01/97 aliu Creation.
15* 08/21/98 stephen JDK 1.2 sync
16* 12/08/98 rtg New Locale implementation and C API
17* 03/15/99 damiba overhaul.
18* 04/06/99 stephen changed setDefault() to realloc and copy
19* 06/14/99 stephen Changed calls to ures_open for new params
20* 07/21/99 stephen Modified setDefault() to propagate to C++
21* 05/14/04 alan 7 years later: refactored, cleaned up, fixed bugs,
22* brought canonicalization code into line with spec
23*****************************************************************************/
24
25/*
26 POSIX's locale format, from putil.c: [no spaces]
27
28 ll [ _CC ] [ . MM ] [ @ VV]
29
30 l = lang, C = ctry, M = charmap, V = variant
31*/
32
Frank Tangf2223962020-04-27 18:25:29 -070033#include "unicode/bytestream.h"
34#include "unicode/errorcode.h"
35#include "unicode/stringpiece.h"
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000036#include "unicode/utypes.h"
37#include "unicode/ustring.h"
38#include "unicode/uloc.h"
39
Frank Tangf2223962020-04-27 18:25:29 -070040#include "bytesinkutil.h"
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000041#include "putilimp.h"
42#include "ustr_imp.h"
43#include "ulocimp.h"
44#include "umutex.h"
45#include "cstring.h"
46#include "cmemory.h"
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000047#include "locmap.h"
48#include "uarrsort.h"
49#include "uenumimp.h"
50#include "uassert.h"
Jungshik Shin87232d82017-05-13 21:10:13 -070051#include "charstr.h"
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000052
Jungshik Shin87232d82017-05-13 21:10:13 -070053U_NAMESPACE_USE
Jungshik Shin5feb9ad2016-10-21 12:52:48 -070054
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000055/* ### Declarations **************************************************/
56
57/* Locale stuff from locid.cpp */
58U_CFUNC void locale_set_default(const char *id);
59U_CFUNC const char *locale_get_default(void);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000060
61/* ### Data tables **************************************************/
62
63/**
64 * Table of language codes, both 2- and 3-letter, with preference
65 * given to 2-letter codes where possible. Includes 3-letter codes
66 * that lack a 2-letter equivalent.
67 *
68 * This list must be in sorted order. This list is returned directly
69 * to the user by some API.
70 *
71 * This list must be kept in sync with LANGUAGES_3, with corresponding
72 * entries matched.
73 *
74 * This table should be terminated with a NULL entry, followed by a
75 * second list, and another NULL entry. The first list is visible to
76 * user code when this array is returned by API. The second list
77 * contains codes we support, but do not expose through user API.
78 *
79 * Notes
80 *
81 * Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to
82 * include the revisions up to 2001/7/27 *CWB*
83 *
84 * The 3 character codes are the terminology codes like RFC 3066. This
85 * is compatible with prior ICU codes
86 *
87 * "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the
88 * table but now at the end of the table because 3 character codes are
89 * duplicates. This avoids bad searches going from 3 to 2 character
90 * codes.
91 *
92 * The range qaa-qtz is reserved for local use
93 */
94/* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
Jungshik Shin70f82502016-01-29 00:32:36 -080095/* ISO639 table version is 20150505 */
Jungshik Shinb3189662017-11-07 11:18:34 -080096/* Subsequent hand addition of selected languages */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000097static const char * const LANGUAGES[] = {
Jungshik Shin5feb9ad2016-10-21 12:52:48 -070098 "aa", "ab", "ace", "ach", "ada", "ady", "ae", "aeb",
99 "af", "afh", "agq", "ain", "ak", "akk", "akz", "ale",
100 "aln", "alt", "am", "an", "ang", "anp", "ar", "arc",
Jungshik Shin87232d82017-05-13 21:10:13 -0700101 "arn", "aro", "arp", "arq", "ars", "arw", "ary", "arz", "as",
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700102 "asa", "ase", "ast", "av", "avk", "awa", "ay", "az",
103 "ba", "bal", "ban", "bar", "bas", "bax", "bbc", "bbj",
104 "be", "bej", "bem", "bew", "bez", "bfd", "bfq", "bg",
105 "bgn", "bho", "bi", "bik", "bin", "bjn", "bkm", "bla",
106 "bm", "bn", "bo", "bpy", "bqi", "br", "bra", "brh",
107 "brx", "bs", "bss", "bua", "bug", "bum", "byn", "byv",
Jungshik Shinb3189662017-11-07 11:18:34 -0800108 "ca", "cad", "car", "cay", "cch", "ccp", "ce", "ceb", "cgg",
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700109 "ch", "chb", "chg", "chk", "chm", "chn", "cho", "chp",
110 "chr", "chy", "ckb", "co", "cop", "cps", "cr", "crh",
111 "cs", "csb", "cu", "cv", "cy",
112 "da", "dak", "dar", "dav", "de", "del", "den", "dgr",
113 "din", "dje", "doi", "dsb", "dtp", "dua", "dum", "dv",
114 "dyo", "dyu", "dz", "dzg",
115 "ebu", "ee", "efi", "egl", "egy", "eka", "el", "elx",
116 "en", "enm", "eo", "es", "esu", "et", "eu", "ewo",
117 "ext",
118 "fa", "fan", "fat", "ff", "fi", "fil", "fit", "fj",
119 "fo", "fon", "fr", "frc", "frm", "fro", "frp", "frr",
120 "frs", "fur", "fy",
121 "ga", "gaa", "gag", "gan", "gay", "gba", "gbz", "gd",
122 "gez", "gil", "gl", "glk", "gmh", "gn", "goh", "gom",
123 "gon", "gor", "got", "grb", "grc", "gsw", "gu", "guc",
124 "gur", "guz", "gv", "gwi",
125 "ha", "hai", "hak", "haw", "he", "hi", "hif", "hil",
126 "hit", "hmn", "ho", "hr", "hsb", "hsn", "ht", "hu",
127 "hup", "hy", "hz",
128 "ia", "iba", "ibb", "id", "ie", "ig", "ii", "ik",
129 "ilo", "inh", "io", "is", "it", "iu", "izh",
130 "ja", "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut",
131 "jv",
132 "ka", "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd",
133 "kbl", "kcg", "kde", "kea", "ken", "kfo", "kg", "kgp",
134 "kha", "kho", "khq", "khw", "ki", "kiu", "kj", "kk",
135 "kkj", "kl", "kln", "km", "kmb", "kn", "ko", "koi",
136 "kok", "kos", "kpe", "kr", "krc", "kri", "krj", "krl",
137 "kru", "ks", "ksb", "ksf", "ksh", "ku", "kum", "kut",
138 "kv", "kw", "ky",
139 "la", "lad", "lag", "lah", "lam", "lb", "lez", "lfn",
140 "lg", "li", "lij", "liv", "lkt", "lmo", "ln", "lo",
141 "lol", "loz", "lrc", "lt", "ltg", "lu", "lua", "lui",
142 "lun", "luo", "lus", "luy", "lv", "lzh", "lzz",
143 "mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",
144 "mdf", "mdh", "mdr", "men", "mer", "mfe", "mg", "mga",
145 "mgh", "mgo", "mh", "mi", "mic", "min", "mis", "mk",
Frank Tang7e7574b2021-04-13 21:19:13 -0700146 "ml", "mn", "mnc", "mni",
Frank Tangb8696612019-10-25 14:58:21 -0700147 "moh", "mos", "mr", "mrj",
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700148 "ms", "mt", "mua", "mul", "mus", "mwl", "mwr", "mwv",
149 "my", "mye", "myv", "mzn",
150 "na", "nan", "nap", "naq", "nb", "nd", "nds", "ne",
151 "new", "ng", "nia", "niu", "njo", "nl", "nmg", "nn",
152 "nnh", "no", "nog", "non", "nov", "nqo", "nr", "nso",
153 "nus", "nv", "nwc", "ny", "nym", "nyn", "nyo", "nzi",
154 "oc", "oj", "om", "or", "os", "osa", "ota",
Frank Tangf2223962020-04-27 18:25:29 -0700155 "pa", "pag", "pal", "pam", "pap", "pau", "pcd", "pcm", "pdc",
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700156 "pdt", "peo", "pfl", "phn", "pi", "pl", "pms", "pnt",
157 "pon", "prg", "pro", "ps", "pt",
158 "qu", "quc", "qug",
159 "raj", "rap", "rar", "rgn", "rif", "rm", "rn", "ro",
160 "rof", "rom", "rtm", "ru", "rue", "rug", "rup",
161 "rw", "rwk",
162 "sa", "sad", "sah", "sam", "saq", "sas", "sat", "saz",
163 "sba", "sbp", "sc", "scn", "sco", "sd", "sdc", "sdh",
164 "se", "see", "seh", "sei", "sel", "ses", "sg", "sga",
165 "sgs", "shi", "shn", "shu", "si", "sid", "sk",
166 "sl", "sli", "sly", "sm", "sma", "smj", "smn", "sms",
167 "sn", "snk", "so", "sog", "sq", "sr", "srn", "srr",
168 "ss", "ssy", "st", "stq", "su", "suk", "sus", "sux",
Frank Tang7e7574b2021-04-13 21:19:13 -0700169 "sv", "sw", "swb", "syc", "syr", "szl",
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700170 "ta", "tcy", "te", "tem", "teo", "ter", "tet", "tg",
Frank Tang7e7574b2021-04-13 21:19:13 -0700171 "th", "ti", "tig", "tiv", "tk", "tkl", "tkr",
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700172 "tlh", "tli", "tly", "tmh", "tn", "to", "tog", "tpi",
173 "tr", "tru", "trv", "ts", "tsd", "tsi", "tt", "ttt",
174 "tum", "tvl", "tw", "twq", "ty", "tyv", "tzm",
175 "udm", "ug", "uga", "uk", "umb", "und", "ur", "uz",
176 "vai", "ve", "vec", "vep", "vi", "vls", "vmf", "vo",
177 "vot", "vro", "vun",
178 "wa", "wae", "wal", "war", "was", "wbp", "wo", "wuu",
179 "xal", "xh", "xmf", "xog",
180 "yao", "yap", "yav", "ybb", "yi", "yo", "yrl", "yue",
181 "za", "zap", "zbl", "zea", "zen", "zgh", "zh", "zu",
182 "zun", "zxx", "zza",
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000183NULL,
Frank Tang7e7574b2021-04-13 21:19:13 -0700184 "in", "iw", "ji", "jw", "mo", "sh", "swc", "tl", /* obsolete language codes */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000185NULL
186};
187
188static const char* const DEPRECATED_LANGUAGES[]={
189 "in", "iw", "ji", "jw", NULL, NULL
190};
191static const char* const REPLACEMENT_LANGUAGES[]={
192 "id", "he", "yi", "jv", NULL, NULL
193};
194
195/**
196 * Table of 3-letter language codes.
197 *
198 * This is a lookup table used to convert 3-letter language codes to
199 * their 2-letter equivalent, where possible. It must be kept in sync
200 * with LANGUAGES. For all valid i, LANGUAGES[i] must refer to the
201 * same language as LANGUAGES_3[i]. The commented-out lines are
202 * copied from LANGUAGES to make eyeballing this baby easier.
203 *
204 * Where a 3-letter language code has no 2-letter equivalent, the
205 * 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i].
206 *
207 * This table should be terminated with a NULL entry, followed by a
208 * second list, and another NULL entry. The two lists correspond to
209 * the two lists in LANGUAGES.
210 */
211/* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
Jungshik Shin70f82502016-01-29 00:32:36 -0800212/* ISO639 table version is 20150505 */
Jungshik Shinb3189662017-11-07 11:18:34 -0800213/* Subsequent hand addition of selected languages */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000214static const char * const LANGUAGES_3[] = {
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700215 "aar", "abk", "ace", "ach", "ada", "ady", "ave", "aeb",
216 "afr", "afh", "agq", "ain", "aka", "akk", "akz", "ale",
217 "aln", "alt", "amh", "arg", "ang", "anp", "ara", "arc",
Jungshik Shin87232d82017-05-13 21:10:13 -0700218 "arn", "aro", "arp", "arq", "ars", "arw", "ary", "arz", "asm",
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700219 "asa", "ase", "ast", "ava", "avk", "awa", "aym", "aze",
220 "bak", "bal", "ban", "bar", "bas", "bax", "bbc", "bbj",
221 "bel", "bej", "bem", "bew", "bez", "bfd", "bfq", "bul",
222 "bgn", "bho", "bis", "bik", "bin", "bjn", "bkm", "bla",
223 "bam", "ben", "bod", "bpy", "bqi", "bre", "bra", "brh",
224 "brx", "bos", "bss", "bua", "bug", "bum", "byn", "byv",
Jungshik Shinb3189662017-11-07 11:18:34 -0800225 "cat", "cad", "car", "cay", "cch", "ccp", "che", "ceb", "cgg",
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700226 "cha", "chb", "chg", "chk", "chm", "chn", "cho", "chp",
227 "chr", "chy", "ckb", "cos", "cop", "cps", "cre", "crh",
228 "ces", "csb", "chu", "chv", "cym",
229 "dan", "dak", "dar", "dav", "deu", "del", "den", "dgr",
230 "din", "dje", "doi", "dsb", "dtp", "dua", "dum", "div",
231 "dyo", "dyu", "dzo", "dzg",
232 "ebu", "ewe", "efi", "egl", "egy", "eka", "ell", "elx",
233 "eng", "enm", "epo", "spa", "esu", "est", "eus", "ewo",
234 "ext",
235 "fas", "fan", "fat", "ful", "fin", "fil", "fit", "fij",
236 "fao", "fon", "fra", "frc", "frm", "fro", "frp", "frr",
237 "frs", "fur", "fry",
238 "gle", "gaa", "gag", "gan", "gay", "gba", "gbz", "gla",
239 "gez", "gil", "glg", "glk", "gmh", "grn", "goh", "gom",
240 "gon", "gor", "got", "grb", "grc", "gsw", "guj", "guc",
241 "gur", "guz", "glv", "gwi",
242 "hau", "hai", "hak", "haw", "heb", "hin", "hif", "hil",
243 "hit", "hmn", "hmo", "hrv", "hsb", "hsn", "hat", "hun",
244 "hup", "hye", "her",
245 "ina", "iba", "ibb", "ind", "ile", "ibo", "iii", "ipk",
246 "ilo", "inh", "ido", "isl", "ita", "iku", "izh",
247 "jpn", "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut",
248 "jav",
249 "kat", "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd",
250 "kbl", "kcg", "kde", "kea", "ken", "kfo", "kon", "kgp",
251 "kha", "kho", "khq", "khw", "kik", "kiu", "kua", "kaz",
252 "kkj", "kal", "kln", "khm", "kmb", "kan", "kor", "koi",
253 "kok", "kos", "kpe", "kau", "krc", "kri", "krj", "krl",
254 "kru", "kas", "ksb", "ksf", "ksh", "kur", "kum", "kut",
255 "kom", "cor", "kir",
256 "lat", "lad", "lag", "lah", "lam", "ltz", "lez", "lfn",
257 "lug", "lim", "lij", "liv", "lkt", "lmo", "lin", "lao",
258 "lol", "loz", "lrc", "lit", "ltg", "lub", "lua", "lui",
259 "lun", "luo", "lus", "luy", "lav", "lzh", "lzz",
260 "mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",
261 "mdf", "mdh", "mdr", "men", "mer", "mfe", "mlg", "mga",
262 "mgh", "mgo", "mah", "mri", "mic", "min", "mis", "mkd",
Frank Tang7e7574b2021-04-13 21:19:13 -0700263 "mal", "mon", "mnc", "mni",
Frank Tangb8696612019-10-25 14:58:21 -0700264 "moh", "mos", "mar", "mrj",
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700265 "msa", "mlt", "mua", "mul", "mus", "mwl", "mwr", "mwv",
266 "mya", "mye", "myv", "mzn",
267 "nau", "nan", "nap", "naq", "nob", "nde", "nds", "nep",
268 "new", "ndo", "nia", "niu", "njo", "nld", "nmg", "nno",
269 "nnh", "nor", "nog", "non", "nov", "nqo", "nbl", "nso",
270 "nus", "nav", "nwc", "nya", "nym", "nyn", "nyo", "nzi",
271 "oci", "oji", "orm", "ori", "oss", "osa", "ota",
Frank Tangf2223962020-04-27 18:25:29 -0700272 "pan", "pag", "pal", "pam", "pap", "pau", "pcd", "pcm", "pdc",
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700273 "pdt", "peo", "pfl", "phn", "pli", "pol", "pms", "pnt",
274 "pon", "prg", "pro", "pus", "por",
275 "que", "quc", "qug",
276 "raj", "rap", "rar", "rgn", "rif", "roh", "run", "ron",
277 "rof", "rom", "rtm", "rus", "rue", "rug", "rup",
278 "kin", "rwk",
279 "san", "sad", "sah", "sam", "saq", "sas", "sat", "saz",
280 "sba", "sbp", "srd", "scn", "sco", "snd", "sdc", "sdh",
281 "sme", "see", "seh", "sei", "sel", "ses", "sag", "sga",
282 "sgs", "shi", "shn", "shu", "sin", "sid", "slk",
283 "slv", "sli", "sly", "smo", "sma", "smj", "smn", "sms",
284 "sna", "snk", "som", "sog", "sqi", "srp", "srn", "srr",
285 "ssw", "ssy", "sot", "stq", "sun", "suk", "sus", "sux",
Frank Tang7e7574b2021-04-13 21:19:13 -0700286 "swe", "swa", "swb", "syc", "syr", "szl",
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700287 "tam", "tcy", "tel", "tem", "teo", "ter", "tet", "tgk",
Frank Tang7e7574b2021-04-13 21:19:13 -0700288 "tha", "tir", "tig", "tiv", "tuk", "tkl", "tkr",
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700289 "tlh", "tli", "tly", "tmh", "tsn", "ton", "tog", "tpi",
290 "tur", "tru", "trv", "tso", "tsd", "tsi", "tat", "ttt",
291 "tum", "tvl", "twi", "twq", "tah", "tyv", "tzm",
292 "udm", "uig", "uga", "ukr", "umb", "und", "urd", "uzb",
293 "vai", "ven", "vec", "vep", "vie", "vls", "vmf", "vol",
294 "vot", "vro", "vun",
295 "wln", "wae", "wal", "war", "was", "wbp", "wol", "wuu",
296 "xal", "xho", "xmf", "xog",
297 "yao", "yap", "yav", "ybb", "yid", "yor", "yrl", "yue",
298 "zha", "zap", "zbl", "zea", "zen", "zgh", "zho", "zul",
299 "zun", "zxx", "zza",
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000300NULL,
Frank Tang7e7574b2021-04-13 21:19:13 -0700301/* "in", "iw", "ji", "jw", "mo", "sh", "swc", "tl", */
302 "ind", "heb", "yid", "jaw", "mol", "srp", "swc", "tgl",
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000303NULL
304};
305
306/**
307 * Table of 2-letter country codes.
308 *
309 * This list must be in sorted order. This list is returned directly
310 * to the user by some API.
311 *
312 * This list must be kept in sync with COUNTRIES_3, with corresponding
313 * entries matched.
314 *
315 * This table should be terminated with a NULL entry, followed by a
316 * second list, and another NULL entry. The first list is visible to
317 * user code when this array is returned by API. The second list
318 * contains codes we support, but do not expose through user API.
319 *
320 * Notes:
321 *
322 * ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per
323 * http://www.evertype.com/standards/iso3166/iso3166-1-en.html added
324 * new codes keeping the old ones for compatibility updated to include
325 * 1999/12/03 revisions *CWB*
326 *
327 * RO(ROM) is now RO(ROU) according to
328 * http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html
329 */
330static const char * const COUNTRIES[] = {
331 "AD", "AE", "AF", "AG", "AI", "AL", "AM",
332 "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ",
333 "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI",
334 "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV",
335 "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG",
336 "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR",
Frank Tang7e7574b2021-04-13 21:19:13 -0700337 "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DG", "DJ", "DK",
338 "DM", "DO", "DZ", "EA", "EC", "EE", "EG", "EH", "ER",
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000339 "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR",
340 "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL",
341 "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU",
342 "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU",
Frank Tang7e7574b2021-04-13 21:19:13 -0700343 "IC", "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS",
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000344 "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI",
345 "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA",
346 "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU",
347 "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK",
348 "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS",
349 "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA",
350 "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP",
351 "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG",
352 "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT",
353 "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA",
354 "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ",
355 "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV",
356 "SX", "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ",
357 "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV",
358 "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ",
359 "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF",
Frank Tang7e7574b2021-04-13 21:19:13 -0700360 "WS", "XK", "YE", "YT", "ZA", "ZM", "ZW",
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000361NULL,
362 "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR", /* obsolete country codes */
363NULL
364};
365
366static const char* const DEPRECATED_COUNTRIES[] = {
367 "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR", NULL, NULL /* deprecated country list */
368};
369static const char* const REPLACEMENT_COUNTRIES[] = {
370/* "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR" */
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700371 "CW", "MM", "RS", "DE", "BJ", "FR", "BF", "VU", "ZW", "RU", "TL", "GB", "VN", "YE", "RS", "CD", NULL, NULL /* replacement country codes */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000372};
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700373
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000374/**
375 * Table of 3-letter country codes.
376 *
377 * This is a lookup table used to convert 3-letter country codes to
378 * their 2-letter equivalent. It must be kept in sync with COUNTRIES.
379 * For all valid i, COUNTRIES[i] must refer to the same country as
380 * COUNTRIES_3[i]. The commented-out lines are copied from COUNTRIES
381 * to make eyeballing this baby easier.
382 *
383 * This table should be terminated with a NULL entry, followed by a
384 * second list, and another NULL entry. The two lists correspond to
385 * the two lists in COUNTRIES.
386 */
387static const char * const COUNTRIES_3[] = {
388/* "AD", "AE", "AF", "AG", "AI", "AL", "AM", */
389 "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM",
390/* "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ", */
391 "AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE",
392/* "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", */
393 "BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI",
394/* "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV", */
395 "BEN", "BLM", "BMU", "BRN", "BOL", "BES", "BRA", "BHS", "BTN", "BVT",
396/* "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG", */
397 "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",
398/* "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR", */
399 "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRI",
Frank Tang7e7574b2021-04-13 21:19:13 -0700400/* "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DG", "DJ", "DK", */
401 "CUB", "CPV", "CUW", "CXR", "CYP", "CZE", "DEU", "DGA", "DJI", "DNK",
402/* "DM", "DO", "DZ", "EA", "EC", "EE", "EG", "EH", "ER", */
403 "DMA", "DOM", "DZA", "XEA", "ECU", "EST", "EGY", "ESH", "ERI",
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000404/* "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", */
405 "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",
406/* "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", */
407 "GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL",
408/* "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", */
409 "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",
410/* "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", */
411 "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",
Frank Tang7e7574b2021-04-13 21:19:13 -0700412/* "IC", "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS" */
413 "XIC", "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL",
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000414/* "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", */
415 "ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",
416/* "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", */
417 "COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO",
418/* "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", */
419 "LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX",
420/* "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK", */
421 "LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD",
422/* "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS", */
423 "MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR",
424/* "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", */
425 "MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM",
426/* "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP", */
427 "NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL",
428/* "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG", */
429 "NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG",
430/* "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT", */
431 "PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT",
432/* "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA", */
433 "PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU",
434/* "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ", */
435 "SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM",
436/* "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV", */
437 "SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "SSD", "STP", "SLV",
438/* "SX", "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ", */
439 "SXM", "SYR", "SWZ", "TCA", "TCD", "ATF", "TGO", "THA", "TJK",
440/* "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV", */
441 "TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV",
442/* "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ", */
443 "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",
444/* "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", */
445 "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",
Frank Tang7e7574b2021-04-13 21:19:13 -0700446/* "WS", "XK", "YE", "YT", "ZA", "ZM", "ZW", */
447 "WSM", "XXK", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000448NULL,
449/* "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR" */
450 "ANT", "BUR", "SCG", "FXX", "ROM", "SUN", "TMP", "YMD", "YUG", "ZAR",
451NULL
452};
453
454typedef struct CanonicalizationMap {
455 const char *id; /* input ID */
456 const char *canonicalID; /* canonicalized output ID */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000457} CanonicalizationMap;
458
459/**
460 * A map to canonicalize locale IDs. This handles a variety of
461 * different semantic kinds of transformations.
462 */
463static const CanonicalizationMap CANONICALIZE_MAP[] = {
Frank Tangf2223962020-04-27 18:25:29 -0700464 { "art__LOJBAN", "jbo" }, /* registered name */
Frank Tang960f1952019-02-15 16:46:49 -0800465 { "hy__AREVELA", "hy" }, /* Registered IANA variant */
466 { "hy__AREVMDA", "hyw" }, /* Registered IANA variant */
Frank Tangf2223962020-04-27 18:25:29 -0700467 { "zh__GUOYU", "zh" }, /* registered name */
468 { "zh__HAKKA", "hak" }, /* registered name */
469 { "zh__XIANG", "hsn" }, /* registered name */
470 // subtags with 3 chars won't be treated as variants.
Frank Tang960f1952019-02-15 16:46:49 -0800471 { "zh_GAN", "gan" }, /* registered name */
Frank Tang960f1952019-02-15 16:46:49 -0800472 { "zh_MIN_NAN", "nan" }, /* registered name */
473 { "zh_WUU", "wuu" }, /* registered name */
Frank Tang960f1952019-02-15 16:46:49 -0800474 { "zh_YUE", "yue" }, /* registered name */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000475};
476
477/* ### BCP47 Conversion *******************************************/
478/* Test if the locale id has BCP47 u extension and does not have '@' */
479#define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == NULL && getShortestSubtagLength(localeID) == 1)
480/* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */
Frank Tanga38aef92021-08-10 15:57:41 -0700481static int32_t _ConvertBCP47(
482 const char*& finalID, const char* id, char* buffer, int32_t length, UErrorCode* err) {
483 int32_t localeIDSize = uloc_forLanguageTag(id, buffer, length, NULL, err);
484 if (localeIDSize <= 0 || U_FAILURE(*err) || *err == U_STRING_NOT_TERMINATED_WARNING) {
485 finalID=id;
486 if (*err == U_STRING_NOT_TERMINATED_WARNING) {
487 *err = U_BUFFER_OVERFLOW_ERROR;
488 }
489 } else {
490 finalID=buffer;
491 }
492 return localeIDSize;
493}
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000494/* Gets the size of the shortest subtag in the given localeID. */
495static int32_t getShortestSubtagLength(const char *localeID) {
Jungshik Shinb3189662017-11-07 11:18:34 -0800496 int32_t localeIDLength = static_cast<int32_t>(uprv_strlen(localeID));
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000497 int32_t length = localeIDLength;
498 int32_t tmpLength = 0;
499 int32_t i;
500 UBool reset = TRUE;
501
502 for (i = 0; i < localeIDLength; i++) {
503 if (localeID[i] != '_' && localeID[i] != '-') {
504 if (reset) {
505 tmpLength = 0;
506 reset = FALSE;
507 }
508 tmpLength++;
509 } else {
510 if (tmpLength != 0 && tmpLength < length) {
511 length = tmpLength;
512 }
513 reset = TRUE;
514 }
515 }
516
517 return length;
518}
519
520/* ### Keywords **************************************************/
Jungshik Shin87232d82017-05-13 21:10:13 -0700521#define UPRV_ISDIGIT(c) (((c) >= '0') && ((c) <= '9'))
522#define UPRV_ISALPHANUM(c) (uprv_isASCIILetter(c) || UPRV_ISDIGIT(c) )
523/* Punctuation/symbols allowed in legacy key values */
524#define UPRV_OK_VALUE_PUNCTUATION(c) ((c) == '_' || (c) == '-' || (c) == '+' || (c) == '/')
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000525
526#define ULOC_KEYWORD_BUFFER_LEN 25
527#define ULOC_MAX_NO_KEYWORDS 25
528
529U_CAPI const char * U_EXPORT2
530locale_getKeywordsStart(const char *localeID) {
531 const char *result = NULL;
532 if((result = uprv_strchr(localeID, '@')) != NULL) {
533 return result;
534 }
535#if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
536 else {
537 /* We do this because the @ sign is variant, and the @ sign used on one
538 EBCDIC machine won't be compiled the same way on other EBCDIC based
539 machines. */
540 static const uint8_t ebcdicSigns[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
541 const uint8_t *charToFind = ebcdicSigns;
542 while(*charToFind) {
543 if((result = uprv_strchr(localeID, *charToFind)) != NULL) {
544 return result;
545 }
546 charToFind++;
547 }
548 }
549#endif
550 return NULL;
551}
552
553/**
554 * @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN]
555 * @param keywordName incoming name to be canonicalized
556 * @param status return status (keyword too long)
557 * @return length of the keyword name
558 */
559static int32_t locale_canonKeywordName(char *buf, const char *keywordName, UErrorCode *status)
560{
Jungshik Shin87232d82017-05-13 21:10:13 -0700561 int32_t keywordNameLen = 0;
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700562
Jungshik Shin87232d82017-05-13 21:10:13 -0700563 for (; *keywordName != 0; keywordName++) {
564 if (!UPRV_ISALPHANUM(*keywordName)) {
565 *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
566 return 0;
567 }
568 if (keywordNameLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
569 buf[keywordNameLen++] = uprv_tolower(*keywordName);
570 } else {
571 /* keyword name too long for internal buffer */
572 *status = U_INTERNAL_PROGRAM_ERROR;
573 return 0;
574 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000575 }
Jungshik Shin87232d82017-05-13 21:10:13 -0700576 if (keywordNameLen == 0) {
577 *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name */
578 return 0;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000579 }
Jungshik Shin87232d82017-05-13 21:10:13 -0700580 buf[keywordNameLen] = 0; /* terminate */
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700581
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000582 return keywordNameLen;
583}
584
585typedef struct {
586 char keyword[ULOC_KEYWORD_BUFFER_LEN];
587 int32_t keywordLen;
588 const char *valueStart;
589 int32_t valueLen;
590} KeywordStruct;
591
592static int32_t U_CALLCONV
593compareKeywordStructs(const void * /*context*/, const void *left, const void *right) {
594 const char* leftString = ((const KeywordStruct *)left)->keyword;
595 const char* rightString = ((const KeywordStruct *)right)->keyword;
596 return uprv_strcmp(leftString, rightString);
597}
598
Frank Tangf90543d2020-10-30 19:02:04 -0700599U_CFUNC void
600ulocimp_getKeywords(const char *localeID,
601 char prev,
602 ByteSink& sink,
603 UBool valuesToo,
604 UErrorCode *status)
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000605{
606 KeywordStruct keywordList[ULOC_MAX_NO_KEYWORDS];
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700607
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000608 int32_t maxKeywords = ULOC_MAX_NO_KEYWORDS;
609 int32_t numKeywords = 0;
610 const char* pos = localeID;
611 const char* equalSign = NULL;
612 const char* semicolon = NULL;
613 int32_t i = 0, j, n;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000614
615 if(prev == '@') { /* start of keyword definition */
616 /* we will grab pairs, trim spaces, lowercase keywords, sort and return */
617 do {
618 UBool duplicate = FALSE;
619 /* skip leading spaces */
620 while(*pos == ' ') {
621 pos++;
622 }
623 if (!*pos) { /* handle trailing "; " */
624 break;
625 }
626 if(numKeywords == maxKeywords) {
627 *status = U_INTERNAL_PROGRAM_ERROR;
Frank Tangf2223962020-04-27 18:25:29 -0700628 return;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000629 }
630 equalSign = uprv_strchr(pos, '=');
631 semicolon = uprv_strchr(pos, ';');
632 /* lack of '=' [foo@currency] is illegal */
633 /* ';' before '=' [foo@currency;collation=pinyin] is illegal */
634 if(!equalSign || (semicolon && semicolon<equalSign)) {
635 *status = U_INVALID_FORMAT_ERROR;
Frank Tangf2223962020-04-27 18:25:29 -0700636 return;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000637 }
638 /* need to normalize both keyword and keyword name */
639 if(equalSign - pos >= ULOC_KEYWORD_BUFFER_LEN) {
640 /* keyword name too long for internal buffer */
641 *status = U_INTERNAL_PROGRAM_ERROR;
Frank Tangf2223962020-04-27 18:25:29 -0700642 return;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000643 }
644 for(i = 0, n = 0; i < equalSign - pos; ++i) {
645 if (pos[i] != ' ') {
646 keywordList[numKeywords].keyword[n++] = uprv_tolower(pos[i]);
647 }
648 }
649
650 /* zero-length keyword is an error. */
651 if (n == 0) {
652 *status = U_INVALID_FORMAT_ERROR;
Frank Tangf2223962020-04-27 18:25:29 -0700653 return;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000654 }
655
656 keywordList[numKeywords].keyword[n] = 0;
657 keywordList[numKeywords].keywordLen = n;
658 /* now grab the value part. First we skip the '=' */
659 equalSign++;
660 /* then we leading spaces */
661 while(*equalSign == ' ') {
662 equalSign++;
663 }
664
665 /* Premature end or zero-length value */
Jungshik Shin (jungshik at google)46be5162015-03-26 11:46:43 -0700666 if (!*equalSign || equalSign == semicolon) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000667 *status = U_INVALID_FORMAT_ERROR;
Frank Tangf2223962020-04-27 18:25:29 -0700668 return;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000669 }
670
671 keywordList[numKeywords].valueStart = equalSign;
672
673 pos = semicolon;
674 i = 0;
675 if(pos) {
676 while(*(pos - i - 1) == ' ') {
677 i++;
678 }
679 keywordList[numKeywords].valueLen = (int32_t)(pos - equalSign - i);
680 pos++;
681 } else {
682 i = (int32_t)uprv_strlen(equalSign);
683 while(i && equalSign[i-1] == ' ') {
684 i--;
685 }
686 keywordList[numKeywords].valueLen = i;
687 }
688 /* If this is a duplicate keyword, then ignore it */
689 for (j=0; j<numKeywords; ++j) {
690 if (uprv_strcmp(keywordList[j].keyword, keywordList[numKeywords].keyword) == 0) {
691 duplicate = TRUE;
692 break;
693 }
694 }
695 if (!duplicate) {
696 ++numKeywords;
697 }
698 } while(pos);
699
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000700 /* now we have a list of keywords */
701 /* we need to sort it */
702 uprv_sortArray(keywordList, numKeywords, sizeof(KeywordStruct), compareKeywordStructs, NULL, FALSE, status);
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700703
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000704 /* Now construct the keyword part */
705 for(i = 0; i < numKeywords; i++) {
Frank Tangf2223962020-04-27 18:25:29 -0700706 sink.Append(keywordList[i].keyword, keywordList[i].keywordLen);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000707 if(valuesToo) {
Frank Tangf2223962020-04-27 18:25:29 -0700708 sink.Append("=", 1);
709 sink.Append(keywordList[i].valueStart, keywordList[i].valueLen);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000710 if(i < numKeywords - 1) {
Frank Tangf2223962020-04-27 18:25:29 -0700711 sink.Append(";", 1);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000712 }
Frank Tangf2223962020-04-27 18:25:29 -0700713 } else {
714 sink.Append("\0", 1);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000715 }
716 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000717 }
718}
719
Frank Tangf90543d2020-10-30 19:02:04 -0700720U_CAPI int32_t U_EXPORT2
721uloc_getKeywordValue(const char* localeID,
722 const char* keywordName,
723 char* buffer, int32_t bufferCapacity,
724 UErrorCode* status)
725{
Frank Tangf2223962020-04-27 18:25:29 -0700726 if (U_FAILURE(*status)) {
727 return 0;
728 }
729
Frank Tangf90543d2020-10-30 19:02:04 -0700730 CheckedArrayByteSink sink(buffer, bufferCapacity);
731 ulocimp_getKeywordValue(localeID, keywordName, sink, status);
Frank Tangf2223962020-04-27 18:25:29 -0700732
733 int32_t reslen = sink.NumberOfBytesAppended();
734
735 if (U_FAILURE(*status)) {
736 return reslen;
737 }
738
739 if (sink.Overflowed()) {
740 *status = U_BUFFER_OVERFLOW_ERROR;
741 } else {
Frank Tangf90543d2020-10-30 19:02:04 -0700742 u_terminateChars(buffer, bufferCapacity, reslen, status);
Frank Tangf2223962020-04-27 18:25:29 -0700743 }
744
745 return reslen;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000746}
747
Frank Tangf90543d2020-10-30 19:02:04 -0700748U_CAPI void U_EXPORT2
749ulocimp_getKeywordValue(const char* localeID,
750 const char* keywordName,
751 icu::ByteSink& sink,
752 UErrorCode* status)
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700753{
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000754 const char* startSearchHere = NULL;
755 const char* nextSeparator = NULL;
756 char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
757 char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000758
759 if(status && U_SUCCESS(*status) && localeID) {
760 char tempBuffer[ULOC_FULLNAME_CAPACITY];
761 const char* tmpLocaleID;
762
Jungshik Shin87232d82017-05-13 21:10:13 -0700763 if (keywordName == NULL || keywordName[0] == 0) {
764 *status = U_ILLEGAL_ARGUMENT_ERROR;
Frank Tangf90543d2020-10-30 19:02:04 -0700765 return;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000766 }
767
768 locale_canonKeywordName(keywordNameBuffer, keywordName, status);
769 if(U_FAILURE(*status)) {
Frank Tangf90543d2020-10-30 19:02:04 -0700770 return;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000771 }
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700772
Jungshik Shin87232d82017-05-13 21:10:13 -0700773 if (_hasBCP47Extension(localeID)) {
774 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
775 } else {
776 tmpLocaleID=localeID;
777 }
778
779 startSearchHere = locale_getKeywordsStart(tmpLocaleID);
780 if(startSearchHere == NULL) {
781 /* no keywords, return at once */
Frank Tangf90543d2020-10-30 19:02:04 -0700782 return;
Jungshik Shin87232d82017-05-13 21:10:13 -0700783 }
784
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000785 /* find the first keyword */
786 while(startSearchHere) {
Jungshik Shin87232d82017-05-13 21:10:13 -0700787 const char* keyValueTail;
788 int32_t keyValueLen;
789
790 startSearchHere++; /* skip @ or ; */
791 nextSeparator = uprv_strchr(startSearchHere, '=');
792 if(!nextSeparator) {
793 *status = U_ILLEGAL_ARGUMENT_ERROR; /* key must have =value */
Frank Tangf90543d2020-10-30 19:02:04 -0700794 return;
Jungshik Shin87232d82017-05-13 21:10:13 -0700795 }
796 /* strip leading & trailing spaces (TC decided to tolerate these) */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000797 while(*startSearchHere == ' ') {
798 startSearchHere++;
799 }
Jungshik Shin87232d82017-05-13 21:10:13 -0700800 keyValueTail = nextSeparator;
801 while (keyValueTail > startSearchHere && *(keyValueTail-1) == ' ') {
802 keyValueTail--;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000803 }
Jungshik Shin87232d82017-05-13 21:10:13 -0700804 /* now keyValueTail points to first char after the keyName */
805 /* copy & normalize keyName from locale */
806 if (startSearchHere == keyValueTail) {
807 *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name in passed-in locale */
Frank Tangf90543d2020-10-30 19:02:04 -0700808 return;
Jungshik Shin87232d82017-05-13 21:10:13 -0700809 }
810 keyValueLen = 0;
811 while (startSearchHere < keyValueTail) {
812 if (!UPRV_ISALPHANUM(*startSearchHere)) {
813 *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
Frank Tangf90543d2020-10-30 19:02:04 -0700814 return;
Jungshik Shin87232d82017-05-13 21:10:13 -0700815 }
816 if (keyValueLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
817 localeKeywordNameBuffer[keyValueLen++] = uprv_tolower(*startSearchHere++);
818 } else {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000819 /* keyword name too long for internal buffer */
820 *status = U_INTERNAL_PROGRAM_ERROR;
Frank Tangf90543d2020-10-30 19:02:04 -0700821 return;
Jungshik Shin87232d82017-05-13 21:10:13 -0700822 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000823 }
Jungshik Shin87232d82017-05-13 21:10:13 -0700824 localeKeywordNameBuffer[keyValueLen] = 0; /* terminate */
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700825
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000826 startSearchHere = uprv_strchr(nextSeparator, ';');
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700827
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000828 if(uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer) == 0) {
Jungshik Shin87232d82017-05-13 21:10:13 -0700829 /* current entry matches the keyword. */
830 nextSeparator++; /* skip '=' */
831 /* First strip leading & trailing spaces (TC decided to tolerate these) */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000832 while(*nextSeparator == ' ') {
Jungshik Shin87232d82017-05-13 21:10:13 -0700833 nextSeparator++;
834 }
835 keyValueTail = (startSearchHere)? startSearchHere: nextSeparator + uprv_strlen(nextSeparator);
836 while(keyValueTail > nextSeparator && *(keyValueTail-1) == ' ') {
837 keyValueTail--;
838 }
839 /* Now copy the value, but check well-formedness */
840 if (nextSeparator == keyValueTail) {
841 *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty key value name in passed-in locale */
Frank Tangf90543d2020-10-30 19:02:04 -0700842 return;
Jungshik Shin87232d82017-05-13 21:10:13 -0700843 }
Jungshik Shin87232d82017-05-13 21:10:13 -0700844 while (nextSeparator < keyValueTail) {
845 if (!UPRV_ISALPHANUM(*nextSeparator) && !UPRV_OK_VALUE_PUNCTUATION(*nextSeparator)) {
846 *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed key value */
Frank Tangf90543d2020-10-30 19:02:04 -0700847 return;
Jungshik Shin87232d82017-05-13 21:10:13 -0700848 }
Frank Tangf90543d2020-10-30 19:02:04 -0700849 /* Should we lowercase value to return here? Tests expect as-is. */
850 sink.Append(nextSeparator++, 1);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000851 }
Frank Tangf90543d2020-10-30 19:02:04 -0700852 return;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000853 }
854 }
855 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000856}
857
858U_CAPI int32_t U_EXPORT2
859uloc_setKeywordValue(const char* keywordName,
860 const char* keywordValue,
861 char* buffer, int32_t bufferCapacity,
862 UErrorCode* status)
863{
864 /* TODO: sorting. removal. */
865 int32_t keywordNameLen;
866 int32_t keywordValueLen;
867 int32_t bufLen;
868 int32_t needLen = 0;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000869 char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
Jungshik Shin87232d82017-05-13 21:10:13 -0700870 char keywordValueBuffer[ULOC_KEYWORDS_CAPACITY+1];
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000871 char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000872 int32_t rc;
873 char* nextSeparator = NULL;
874 char* nextEqualsign = NULL;
875 char* startSearchHere = NULL;
876 char* keywordStart = NULL;
Jungshik Shin87232d82017-05-13 21:10:13 -0700877 CharString updatedKeysAndValues;
Jungshik Shin87232d82017-05-13 21:10:13 -0700878 UBool handledInputKeyAndValue = FALSE;
879 char keyValuePrefix = '@';
880
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700881 if(U_FAILURE(*status)) {
882 return -1;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000883 }
Frank Tangefc58852020-11-12 11:50:18 -0800884 if (*status == U_STRING_NOT_TERMINATED_WARNING) {
885 *status = U_ZERO_ERROR;
886 }
Jungshik Shin87232d82017-05-13 21:10:13 -0700887 if (keywordName == NULL || keywordName[0] == 0 || bufferCapacity <= 1) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000888 *status = U_ILLEGAL_ARGUMENT_ERROR;
889 return 0;
890 }
Jungshik Shin87232d82017-05-13 21:10:13 -0700891 bufLen = (int32_t)uprv_strlen(buffer);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000892 if(bufferCapacity<bufLen) {
893 /* The capacity is less than the length?! Is this NULL terminated? */
894 *status = U_ILLEGAL_ARGUMENT_ERROR;
895 return 0;
896 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000897 keywordNameLen = locale_canonKeywordName(keywordNameBuffer, keywordName, status);
898 if(U_FAILURE(*status)) {
899 return 0;
900 }
Jungshik Shin87232d82017-05-13 21:10:13 -0700901
902 keywordValueLen = 0;
903 if(keywordValue) {
904 while (*keywordValue != 0) {
905 if (!UPRV_ISALPHANUM(*keywordValue) && !UPRV_OK_VALUE_PUNCTUATION(*keywordValue)) {
906 *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed key value */
907 return 0;
908 }
909 if (keywordValueLen < ULOC_KEYWORDS_CAPACITY) {
910 /* Should we force lowercase in value to set? */
911 keywordValueBuffer[keywordValueLen++] = *keywordValue++;
912 } else {
913 /* keywordValue too long for internal buffer */
914 *status = U_INTERNAL_PROGRAM_ERROR;
915 return 0;
916 }
917 }
918 }
919 keywordValueBuffer[keywordValueLen] = 0; /* terminate */
920
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000921 startSearchHere = (char*)locale_getKeywordsStart(buffer);
922 if(startSearchHere == NULL || (startSearchHere[1]==0)) {
Jungshik Shin87232d82017-05-13 21:10:13 -0700923 if(keywordValueLen == 0) { /* no keywords = nothing to remove */
Frank Tangefc58852020-11-12 11:50:18 -0800924 U_ASSERT(*status != U_STRING_NOT_TERMINATED_WARNING);
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700925 return bufLen;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000926 }
927
928 needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700929 if(startSearchHere) { /* had a single @ */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000930 needLen--; /* already had the @ */
931 /* startSearchHere points at the @ */
932 } else {
933 startSearchHere=buffer+bufLen;
934 }
935 if(needLen >= bufferCapacity) {
936 *status = U_BUFFER_OVERFLOW_ERROR;
937 return needLen; /* no change */
938 }
Jungshik Shin87232d82017-05-13 21:10:13 -0700939 *startSearchHere++ = '@';
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000940 uprv_strcpy(startSearchHere, keywordNameBuffer);
941 startSearchHere += keywordNameLen;
Jungshik Shin87232d82017-05-13 21:10:13 -0700942 *startSearchHere++ = '=';
943 uprv_strcpy(startSearchHere, keywordValueBuffer);
Frank Tangefc58852020-11-12 11:50:18 -0800944 U_ASSERT(*status != U_STRING_NOT_TERMINATED_WARNING);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000945 return needLen;
946 } /* end shortcut - no @ */
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700947
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000948 keywordStart = startSearchHere;
949 /* search for keyword */
950 while(keywordStart) {
Jungshik Shin87232d82017-05-13 21:10:13 -0700951 const char* keyValueTail;
952 int32_t keyValueLen;
953
954 keywordStart++; /* skip @ or ; */
955 nextEqualsign = uprv_strchr(keywordStart, '=');
956 if (!nextEqualsign) {
957 *status = U_ILLEGAL_ARGUMENT_ERROR; /* key must have =value */
958 return 0;
959 }
960 /* strip leading & trailing spaces (TC decided to tolerate these) */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000961 while(*keywordStart == ' ') {
962 keywordStart++;
963 }
Jungshik Shin87232d82017-05-13 21:10:13 -0700964 keyValueTail = nextEqualsign;
965 while (keyValueTail > keywordStart && *(keyValueTail-1) == ' ') {
966 keyValueTail--;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000967 }
Jungshik Shin87232d82017-05-13 21:10:13 -0700968 /* now keyValueTail points to first char after the keyName */
969 /* copy & normalize keyName from locale */
970 if (keywordStart == keyValueTail) {
971 *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name in passed-in locale */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000972 return 0;
973 }
Jungshik Shin87232d82017-05-13 21:10:13 -0700974 keyValueLen = 0;
975 while (keywordStart < keyValueTail) {
976 if (!UPRV_ISALPHANUM(*keywordStart)) {
977 *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
978 return 0;
979 }
980 if (keyValueLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
981 localeKeywordNameBuffer[keyValueLen++] = uprv_tolower(*keywordStart++);
982 } else {
983 /* keyword name too long for internal buffer */
984 *status = U_INTERNAL_PROGRAM_ERROR;
985 return 0;
986 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000987 }
Jungshik Shin87232d82017-05-13 21:10:13 -0700988 localeKeywordNameBuffer[keyValueLen] = 0; /* terminate */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000989
990 nextSeparator = uprv_strchr(nextEqualsign, ';');
Jungshik Shin87232d82017-05-13 21:10:13 -0700991
992 /* start processing the value part */
993 nextEqualsign++; /* skip '=' */
994 /* First strip leading & trailing spaces (TC decided to tolerate these) */
995 while(*nextEqualsign == ' ') {
996 nextEqualsign++;
997 }
998 keyValueTail = (nextSeparator)? nextSeparator: nextEqualsign + uprv_strlen(nextEqualsign);
999 while(keyValueTail > nextEqualsign && *(keyValueTail-1) == ' ') {
1000 keyValueTail--;
1001 }
1002 if (nextEqualsign == keyValueTail) {
1003 *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty key value in passed-in locale */
1004 return 0;
1005 }
1006
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001007 rc = uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer);
1008 if(rc == 0) {
Jungshik Shin87232d82017-05-13 21:10:13 -07001009 /* Current entry matches the input keyword. Update the entry */
1010 if(keywordValueLen > 0) { /* updating a value */
1011 updatedKeysAndValues.append(keyValuePrefix, *status);
1012 keyValuePrefix = ';'; /* for any subsequent key-value pair */
1013 updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
1014 updatedKeysAndValues.append('=', *status);
1015 updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
1016 } /* else removing this entry, don't emit anything */
1017 handledInputKeyAndValue = TRUE;
1018 } else {
1019 /* input keyword sorts earlier than current entry, add before current entry */
1020 if (rc < 0 && keywordValueLen > 0 && !handledInputKeyAndValue) {
1021 /* insert new entry at this location */
1022 updatedKeysAndValues.append(keyValuePrefix, *status);
1023 keyValuePrefix = ';'; /* for any subsequent key-value pair */
1024 updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
1025 updatedKeysAndValues.append('=', *status);
1026 updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
1027 handledInputKeyAndValue = TRUE;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001028 }
Jungshik Shin87232d82017-05-13 21:10:13 -07001029 /* copy the current entry */
1030 updatedKeysAndValues.append(keyValuePrefix, *status);
1031 keyValuePrefix = ';'; /* for any subsequent key-value pair */
1032 updatedKeysAndValues.append(localeKeywordNameBuffer, keyValueLen, *status);
1033 updatedKeysAndValues.append('=', *status);
Jungshik Shin42d50272018-10-24 01:22:09 -07001034 updatedKeysAndValues.append(nextEqualsign, static_cast<int32_t>(keyValueTail-nextEqualsign), *status);
Jungshik Shin87232d82017-05-13 21:10:13 -07001035 }
1036 if (!nextSeparator && keywordValueLen > 0 && !handledInputKeyAndValue) {
1037 /* append new entry at the end, it sorts later than existing entries */
1038 updatedKeysAndValues.append(keyValuePrefix, *status);
1039 /* skip keyValuePrefix update, no subsequent key-value pair */
1040 updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
1041 updatedKeysAndValues.append('=', *status);
1042 updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
1043 handledInputKeyAndValue = TRUE;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001044 }
1045 keywordStart = nextSeparator;
1046 } /* end loop searching */
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001047
Jungshik Shin87232d82017-05-13 21:10:13 -07001048 /* Any error from updatedKeysAndValues.append above would be internal and not due to
1049 * problems with the passed-in locale. So if we did encounter problems with the
1050 * passed-in locale above, those errors took precedence and overrode any error
1051 * status from updatedKeysAndValues.append, and also caused a return of 0. If there
1052 * are errors here they are from updatedKeysAndValues.append; they do cause an
1053 * error return but the passed-in locale is unmodified and the original bufLen is
1054 * returned.
1055 */
1056 if (!handledInputKeyAndValue || U_FAILURE(*status)) {
1057 /* if input key/value specified removal of a keyword not present in locale, or
1058 * there was an error in CharString.append, leave original locale alone. */
Frank Tangefc58852020-11-12 11:50:18 -08001059 U_ASSERT(*status != U_STRING_NOT_TERMINATED_WARNING);
Jungshik Shin87232d82017-05-13 21:10:13 -07001060 return bufLen;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001061 }
1062
Frank Tangf90543d2020-10-30 19:02:04 -07001063 // needLen = length of the part before '@'
1064 needLen = (int32_t)(startSearchHere - buffer);
Frank Tangefc58852020-11-12 11:50:18 -08001065 // Check to see can we fit the startSearchHere, if not, return
1066 // U_BUFFER_OVERFLOW_ERROR without copy updatedKeysAndValues into it.
1067 // We do this because this API function does not behave like most others:
1068 // It promises never to set a U_STRING_NOT_TERMINATED_WARNING.
1069 // When the contents fits but without the terminating NUL, in this case we need to not change
1070 // the buffer contents and return with a buffer overflow error.
1071 int32_t appendLength = updatedKeysAndValues.length();
1072 if (appendLength >= bufferCapacity - needLen) {
1073 *status = U_BUFFER_OVERFLOW_ERROR;
1074 return needLen + appendLength;
1075 }
1076 needLen += updatedKeysAndValues.extract(
Frank Tangf90543d2020-10-30 19:02:04 -07001077 startSearchHere, bufferCapacity - needLen, *status);
Frank Tangefc58852020-11-12 11:50:18 -08001078 U_ASSERT(*status != U_STRING_NOT_TERMINATED_WARNING);
1079 return needLen;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001080}
1081
1082/* ### ID parsing implementation **************************************************/
1083
1084#define _isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
1085
1086/*returns TRUE if one of the special prefixes is here (s=string)
1087 'x-' or 'i-' */
1088#define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1]))
1089
1090/* Dot terminates it because of POSIX form where dot precedes the codepage
1091 * except for variant
1092 */
1093#define _isTerminator(a) ((a==0)||(a=='.')||(a=='@'))
1094
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001095/**
1096 * Lookup 'key' in the array 'list'. The array 'list' should contain
1097 * a NULL entry, followed by more entries, and a second NULL entry.
1098 *
1099 * The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or
1100 * COUNTRIES_3.
1101 */
1102static int16_t _findIndex(const char* const* list, const char* key)
1103{
1104 const char* const* anchor = list;
1105 int32_t pass = 0;
1106
1107 /* Make two passes through two NULL-terminated arrays at 'list' */
1108 while (pass++ < 2) {
1109 while (*list) {
1110 if (uprv_strcmp(key, *list) == 0) {
1111 return (int16_t)(list - anchor);
1112 }
1113 list++;
1114 }
1115 ++list; /* skip final NULL *CWB*/
1116 }
1117 return -1;
1118}
1119
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001120U_CFUNC const char*
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001121uloc_getCurrentCountryID(const char* oldID){
1122 int32_t offset = _findIndex(DEPRECATED_COUNTRIES, oldID);
1123 if (offset >= 0) {
1124 return REPLACEMENT_COUNTRIES[offset];
1125 }
1126 return oldID;
1127}
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001128U_CFUNC const char*
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001129uloc_getCurrentLanguageID(const char* oldID){
1130 int32_t offset = _findIndex(DEPRECATED_LANGUAGES, oldID);
1131 if (offset >= 0) {
1132 return REPLACEMENT_LANGUAGES[offset];
1133 }
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001134 return oldID;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001135}
1136/*
1137 * the internal functions _getLanguage(), _getCountry(), _getVariant()
1138 * avoid duplicating code to handle the earlier locale ID pieces
1139 * in the functions for the later ones by
1140 * setting the *pEnd pointer to where they stopped parsing
1141 *
1142 * TODO try to use this in Locale
1143 */
Frank Tangf90543d2020-10-30 19:02:04 -07001144CharString U_EXPORT2
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001145ulocimp_getLanguage(const char *localeID,
Frank Tangf2223962020-04-27 18:25:29 -07001146 const char **pEnd,
1147 UErrorCode &status) {
1148 CharString result;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001149
Frank Tang69c72a62019-04-03 21:41:21 -07001150 if (uprv_stricmp(localeID, "root") == 0) {
1151 localeID += 4;
1152 } else if (uprv_strnicmp(localeID, "und", 3) == 0 &&
1153 (localeID[3] == '\0' ||
1154 localeID[3] == '-' ||
1155 localeID[3] == '_' ||
1156 localeID[3] == '@')) {
1157 localeID += 3;
1158 }
1159
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001160 /* if it starts with i- or x- then copy that prefix */
1161 if(_isIDPrefix(localeID)) {
Frank Tangf2223962020-04-27 18:25:29 -07001162 result.append((char)uprv_tolower(*localeID), status);
1163 result.append('-', status);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001164 localeID+=2;
1165 }
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001166
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001167 /* copy the language as far as possible and count its length */
1168 while(!_isTerminator(*localeID) && !_isIDSeparator(*localeID)) {
Frank Tangf2223962020-04-27 18:25:29 -07001169 result.append((char)uprv_tolower(*localeID), status);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001170 localeID++;
1171 }
1172
Frank Tangf2223962020-04-27 18:25:29 -07001173 if(result.length()==3) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001174 /* convert 3 character code to 2 character code if possible *CWB*/
Frank Tangf2223962020-04-27 18:25:29 -07001175 int32_t offset = _findIndex(LANGUAGES_3, result.data());
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001176 if(offset>=0) {
Frank Tangf2223962020-04-27 18:25:29 -07001177 result.clear();
1178 result.append(LANGUAGES[offset], status);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001179 }
1180 }
1181
1182 if(pEnd!=NULL) {
1183 *pEnd=localeID;
1184 }
Frank Tangf2223962020-04-27 18:25:29 -07001185
1186 return result;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001187}
1188
Frank Tangf90543d2020-10-30 19:02:04 -07001189CharString U_EXPORT2
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001190ulocimp_getScript(const char *localeID,
Frank Tangf2223962020-04-27 18:25:29 -07001191 const char **pEnd,
1192 UErrorCode &status) {
1193 CharString result;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001194 int32_t idLen = 0;
1195
1196 if (pEnd != NULL) {
1197 *pEnd = localeID;
1198 }
1199
1200 /* copy the second item as far as possible and count its length */
1201 while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])
1202 && uprv_isASCIILetter(localeID[idLen])) {
1203 idLen++;
1204 }
1205
1206 /* If it's exactly 4 characters long, then it's a script and not a country. */
1207 if (idLen == 4) {
1208 int32_t i;
1209 if (pEnd != NULL) {
1210 *pEnd = localeID+idLen;
1211 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001212 if (idLen >= 1) {
Frank Tangf2223962020-04-27 18:25:29 -07001213 result.append((char)uprv_toupper(*(localeID++)), status);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001214 }
1215 for (i = 1; i < idLen; i++) {
Frank Tangf2223962020-04-27 18:25:29 -07001216 result.append((char)uprv_tolower(*(localeID++)), status);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001217 }
1218 }
Frank Tangf2223962020-04-27 18:25:29 -07001219
1220 return result;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001221}
1222
Frank Tangf90543d2020-10-30 19:02:04 -07001223CharString U_EXPORT2
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001224ulocimp_getCountry(const char *localeID,
Frank Tangf2223962020-04-27 18:25:29 -07001225 const char **pEnd,
1226 UErrorCode &status) {
1227 CharString result;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001228 int32_t idLen=0;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001229
1230 /* copy the country as far as possible and count its length */
1231 while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) {
Frank Tangf2223962020-04-27 18:25:29 -07001232 result.append((char)uprv_toupper(localeID[idLen]), status);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001233 idLen++;
1234 }
1235
1236 /* the country should be either length 2 or 3 */
1237 if (idLen == 2 || idLen == 3) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001238 /* convert 3 character code to 2 character code if possible *CWB*/
1239 if(idLen==3) {
Frank Tangf2223962020-04-27 18:25:29 -07001240 int32_t offset = _findIndex(COUNTRIES_3, result.data());
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001241 if(offset>=0) {
Frank Tangf2223962020-04-27 18:25:29 -07001242 result.clear();
1243 result.append(COUNTRIES[offset], status);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001244 }
1245 }
1246 localeID+=idLen;
1247 } else {
Frank Tangf2223962020-04-27 18:25:29 -07001248 result.clear();
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001249 }
1250
1251 if(pEnd!=NULL) {
1252 *pEnd=localeID;
1253 }
1254
Frank Tangf2223962020-04-27 18:25:29 -07001255 return result;
1256}
1257
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001258/**
1259 * @param needSeparator if true, then add leading '_' if any variants
1260 * are added to 'variant'
1261 */
Frank Tangf2223962020-04-27 18:25:29 -07001262static void
Frank Tangf90543d2020-10-30 19:02:04 -07001263_getVariant(const char *localeID,
1264 char prev,
1265 ByteSink& sink,
1266 UBool needSeparator) {
Frank Tangf2223962020-04-27 18:25:29 -07001267 UBool hasVariant = FALSE;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001268
1269 /* get one or more variant tags and separate them with '_' */
1270 if(_isIDSeparator(prev)) {
1271 /* get a variant string after a '-' or '_' */
1272 while(!_isTerminator(*localeID)) {
1273 if (needSeparator) {
Frank Tangf2223962020-04-27 18:25:29 -07001274 sink.Append("_", 1);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001275 needSeparator = FALSE;
1276 }
Frank Tangf2223962020-04-27 18:25:29 -07001277 char c = (char)uprv_toupper(*localeID);
1278 if (c == '-') c = '_';
1279 sink.Append(&c, 1);
1280 hasVariant = TRUE;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001281 localeID++;
1282 }
1283 }
1284
1285 /* if there is no variant tag after a '-' or '_' then look for '@' */
Frank Tangf2223962020-04-27 18:25:29 -07001286 if(!hasVariant) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001287 if(prev=='@') {
1288 /* keep localeID */
1289 } else if((localeID=locale_getKeywordsStart(localeID))!=NULL) {
1290 ++localeID; /* point after the '@' */
1291 } else {
Frank Tangf2223962020-04-27 18:25:29 -07001292 return;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001293 }
1294 while(!_isTerminator(*localeID)) {
1295 if (needSeparator) {
Frank Tangf2223962020-04-27 18:25:29 -07001296 sink.Append("_", 1);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001297 needSeparator = FALSE;
1298 }
Frank Tangf2223962020-04-27 18:25:29 -07001299 char c = (char)uprv_toupper(*localeID);
1300 if (c == '-' || c == ',') c = '_';
1301 sink.Append(&c, 1);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001302 localeID++;
1303 }
1304 }
Frank Tangf2223962020-04-27 18:25:29 -07001305}
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001306
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001307/* Keyword enumeration */
1308
1309typedef struct UKeywordsContext {
1310 char* keywords;
1311 char* current;
1312} UKeywordsContext;
1313
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001314U_CDECL_BEGIN
1315
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001316static void U_CALLCONV
1317uloc_kw_closeKeywords(UEnumeration *enumerator) {
1318 uprv_free(((UKeywordsContext *)enumerator->context)->keywords);
1319 uprv_free(enumerator->context);
1320 uprv_free(enumerator);
1321}
1322
1323static int32_t U_CALLCONV
1324uloc_kw_countKeywords(UEnumeration *en, UErrorCode * /*status*/) {
1325 char *kw = ((UKeywordsContext *)en->context)->keywords;
1326 int32_t result = 0;
1327 while(*kw) {
1328 result++;
1329 kw += uprv_strlen(kw)+1;
1330 }
1331 return result;
1332}
1333
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001334static const char * U_CALLCONV
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001335uloc_kw_nextKeyword(UEnumeration* en,
1336 int32_t* resultLength,
1337 UErrorCode* /*status*/) {
1338 const char* result = ((UKeywordsContext *)en->context)->current;
1339 int32_t len = 0;
1340 if(*result) {
1341 len = (int32_t)uprv_strlen(((UKeywordsContext *)en->context)->current);
1342 ((UKeywordsContext *)en->context)->current += len+1;
1343 } else {
1344 result = NULL;
1345 }
1346 if (resultLength) {
1347 *resultLength = len;
1348 }
1349 return result;
1350}
1351
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001352static void U_CALLCONV
1353uloc_kw_resetKeywords(UEnumeration* en,
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001354 UErrorCode* /*status*/) {
1355 ((UKeywordsContext *)en->context)->current = ((UKeywordsContext *)en->context)->keywords;
1356}
1357
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001358U_CDECL_END
1359
1360
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001361static const UEnumeration gKeywordsEnum = {
1362 NULL,
1363 NULL,
1364 uloc_kw_closeKeywords,
1365 uloc_kw_countKeywords,
1366 uenum_unextDefault,
1367 uloc_kw_nextKeyword,
1368 uloc_kw_resetKeywords
1369};
1370
1371U_CAPI UEnumeration* U_EXPORT2
1372uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status)
1373{
Frank Tangb8696612019-10-25 14:58:21 -07001374 LocalMemory<UKeywordsContext> myContext;
1375 LocalMemory<UEnumeration> result;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001376
Frank Tangb8696612019-10-25 14:58:21 -07001377 if (U_FAILURE(*status)) {
1378 return nullptr;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001379 }
Frank Tangb8696612019-10-25 14:58:21 -07001380 myContext.adoptInstead(static_cast<UKeywordsContext *>(uprv_malloc(sizeof(UKeywordsContext))));
1381 result.adoptInstead(static_cast<UEnumeration *>(uprv_malloc(sizeof(UEnumeration))));
1382 if (myContext.isNull() || result.isNull()) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001383 *status = U_MEMORY_ALLOCATION_ERROR;
Frank Tangb8696612019-10-25 14:58:21 -07001384 return nullptr;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001385 }
Frank Tangb8696612019-10-25 14:58:21 -07001386 uprv_memcpy(result.getAlias(), &gKeywordsEnum, sizeof(UEnumeration));
1387 myContext->keywords = static_cast<char *>(uprv_malloc(keywordListSize+1));
1388 if (myContext->keywords == nullptr) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001389 *status = U_MEMORY_ALLOCATION_ERROR;
Frank Tangb8696612019-10-25 14:58:21 -07001390 return nullptr;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001391 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001392 uprv_memcpy(myContext->keywords, keywordList, keywordListSize);
1393 myContext->keywords[keywordListSize] = 0;
1394 myContext->current = myContext->keywords;
Frank Tangb8696612019-10-25 14:58:21 -07001395 result->context = myContext.orphan();
1396 return result.orphan();
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001397}
1398
1399U_CAPI UEnumeration* U_EXPORT2
1400uloc_openKeywords(const char* localeID,
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001401 UErrorCode* status)
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001402{
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001403 char tempBuffer[ULOC_FULLNAME_CAPACITY];
1404 const char* tmpLocaleID;
1405
1406 if(status==NULL || U_FAILURE(*status)) {
1407 return 0;
1408 }
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001409
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001410 if (_hasBCP47Extension(localeID)) {
1411 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
1412 } else {
1413 if (localeID==NULL) {
1414 localeID=uloc_getDefault();
1415 }
1416 tmpLocaleID=localeID;
1417 }
1418
1419 /* Skip the language */
Frank Tangf90543d2020-10-30 19:02:04 -07001420 ulocimp_getLanguage(tmpLocaleID, &tmpLocaleID, *status);
1421 if (U_FAILURE(*status)) {
1422 return 0;
1423 }
1424
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001425 if(_isIDSeparator(*tmpLocaleID)) {
1426 const char *scriptID;
1427 /* Skip the script if available */
Frank Tangf90543d2020-10-30 19:02:04 -07001428 ulocimp_getScript(tmpLocaleID+1, &scriptID, *status);
1429 if (U_FAILURE(*status)) {
1430 return 0;
1431 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001432 if(scriptID != tmpLocaleID+1) {
1433 /* Found optional script */
1434 tmpLocaleID = scriptID;
1435 }
1436 /* Skip the Country */
1437 if (_isIDSeparator(*tmpLocaleID)) {
Frank Tangf90543d2020-10-30 19:02:04 -07001438 ulocimp_getCountry(tmpLocaleID+1, &tmpLocaleID, *status);
1439 if (U_FAILURE(*status)) {
1440 return 0;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001441 }
1442 }
1443 }
1444
1445 /* keywords are located after '@' */
1446 if((tmpLocaleID = locale_getKeywordsStart(tmpLocaleID)) != NULL) {
Frank Tangf90543d2020-10-30 19:02:04 -07001447 CharString keywords;
1448 CharStringByteSink sink(&keywords);
1449 ulocimp_getKeywords(tmpLocaleID+1, '@', sink, FALSE, status);
1450 if (U_FAILURE(*status)) {
1451 return NULL;
1452 }
1453 return uloc_openKeywordList(keywords.data(), keywords.length(), status);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001454 }
Frank Tangf90543d2020-10-30 19:02:04 -07001455 return NULL;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001456}
1457
1458
1459/* bit-flags for 'options' parameter of _canonicalize */
1460#define _ULOC_STRIP_KEYWORDS 0x2
1461#define _ULOC_CANONICALIZE 0x1
1462
1463#define OPTION_SET(options, mask) ((options & mask) != 0)
1464
1465static const char i_default[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'};
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001466#define I_DEFAULT_LENGTH UPRV_LENGTHOF(i_default)
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001467
1468/**
1469 * Canonicalize the given localeID, to level 1 or to level 2,
1470 * depending on the options. To specify level 1, pass in options=0.
1471 * To specify level 2, pass in options=_ULOC_CANONICALIZE.
1472 *
1473 * This is the code underlying uloc_getName and uloc_canonicalize.
1474 */
Frank Tangf2223962020-04-27 18:25:29 -07001475static void
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001476_canonicalize(const char* localeID,
Frank Tangf2223962020-04-27 18:25:29 -07001477 ByteSink& sink,
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001478 uint32_t options,
1479 UErrorCode* err) {
Frank Tangf2223962020-04-27 18:25:29 -07001480 int32_t j, fieldCount=0, scriptSize=0, variantSize=0;
Frank Tanga38aef92021-08-10 15:57:41 -07001481 PreflightingLocaleIDBuffer tempBuffer;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001482 const char* origLocaleID;
1483 const char* tmpLocaleID;
1484 const char* keywordAssign = NULL;
1485 const char* separatorIndicator = NULL;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001486
1487 if (U_FAILURE(*err)) {
Frank Tangf2223962020-04-27 18:25:29 -07001488 return;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001489 }
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001490
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001491 if (_hasBCP47Extension(localeID)) {
Frank Tanga38aef92021-08-10 15:57:41 -07001492 do {
1493 tempBuffer.requestedCapacity = _ConvertBCP47(tmpLocaleID, localeID,
1494 tempBuffer.getBuffer(), tempBuffer.getCapacity(), err);
1495 } while (tempBuffer.needToTryAgain(err));
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001496 } else {
1497 if (localeID==NULL) {
1498 localeID=uloc_getDefault();
1499 }
1500 tmpLocaleID=localeID;
1501 }
1502
1503 origLocaleID=tmpLocaleID;
1504
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001505 /* get all pieces, one after another, and separate with '_' */
Frank Tangf2223962020-04-27 18:25:29 -07001506 CharString tag = ulocimp_getLanguage(tmpLocaleID, &tmpLocaleID, *err);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001507
Frank Tangf2223962020-04-27 18:25:29 -07001508 if (tag.length() == I_DEFAULT_LENGTH &&
1509 uprv_strncmp(origLocaleID, i_default, I_DEFAULT_LENGTH) == 0) {
1510 tag.clear();
1511 tag.append(uloc_getDefault(), *err);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001512 } else if(_isIDSeparator(*tmpLocaleID)) {
1513 const char *scriptID;
1514
1515 ++fieldCount;
Frank Tangf2223962020-04-27 18:25:29 -07001516 tag.append('_', *err);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001517
Frank Tangf2223962020-04-27 18:25:29 -07001518 CharString script = ulocimp_getScript(tmpLocaleID+1, &scriptID, *err);
1519 tag.append(script, *err);
1520 scriptSize = script.length();
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001521 if(scriptSize > 0) {
1522 /* Found optional script */
1523 tmpLocaleID = scriptID;
1524 ++fieldCount;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001525 if (_isIDSeparator(*tmpLocaleID)) {
1526 /* If there is something else, then we add the _ */
Frank Tangf2223962020-04-27 18:25:29 -07001527 tag.append('_', *err);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001528 }
1529 }
1530
1531 if (_isIDSeparator(*tmpLocaleID)) {
1532 const char *cntryID;
Frank Tangf2223962020-04-27 18:25:29 -07001533
1534 CharString country = ulocimp_getCountry(tmpLocaleID+1, &cntryID, *err);
1535 tag.append(country, *err);
1536 if (!country.isEmpty()) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001537 /* Found optional country */
1538 tmpLocaleID = cntryID;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001539 }
1540 if(_isIDSeparator(*tmpLocaleID)) {
1541 /* If there is something else, then we add the _ if we found country before. */
Frank Tangf2223962020-04-27 18:25:29 -07001542 if (!_isIDSeparator(*(tmpLocaleID+1))) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001543 ++fieldCount;
Frank Tangf2223962020-04-27 18:25:29 -07001544 tag.append('_', *err);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001545 }
1546
Frank Tangf2223962020-04-27 18:25:29 -07001547 variantSize = -tag.length();
1548 {
1549 CharStringByteSink s(&tag);
Frank Tangf90543d2020-10-30 19:02:04 -07001550 _getVariant(tmpLocaleID+1, *tmpLocaleID, s, FALSE);
Frank Tangf2223962020-04-27 18:25:29 -07001551 }
1552 variantSize += tag.length();
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001553 if (variantSize > 0) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001554 tmpLocaleID += variantSize + 1; /* skip '_' and variant */
1555 }
1556 }
1557 }
1558 }
1559
1560 /* Copy POSIX-style charset specifier, if any [mr.utf8] */
1561 if (!OPTION_SET(options, _ULOC_CANONICALIZE) && *tmpLocaleID == '.') {
1562 UBool done = FALSE;
1563 do {
1564 char c = *tmpLocaleID;
1565 switch (c) {
1566 case 0:
1567 case '@':
1568 done = TRUE;
1569 break;
1570 default:
Frank Tangf2223962020-04-27 18:25:29 -07001571 tag.append(c, *err);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001572 ++tmpLocaleID;
1573 break;
1574 }
1575 } while (!done);
1576 }
1577
1578 /* Scan ahead to next '@' and determine if it is followed by '=' and/or ';'
1579 After this, tmpLocaleID either points to '@' or is NULL */
1580 if ((tmpLocaleID=locale_getKeywordsStart(tmpLocaleID))!=NULL) {
1581 keywordAssign = uprv_strchr(tmpLocaleID, '=');
1582 separatorIndicator = uprv_strchr(tmpLocaleID, ';');
1583 }
1584
1585 /* Copy POSIX-style variant, if any [mr@FOO] */
1586 if (!OPTION_SET(options, _ULOC_CANONICALIZE) &&
1587 tmpLocaleID != NULL && keywordAssign == NULL) {
1588 for (;;) {
1589 char c = *tmpLocaleID;
1590 if (c == 0) {
1591 break;
1592 }
Frank Tangf2223962020-04-27 18:25:29 -07001593 tag.append(c, *err);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001594 ++tmpLocaleID;
1595 }
1596 }
1597
1598 if (OPTION_SET(options, _ULOC_CANONICALIZE)) {
1599 /* Handle @FOO variant if @ is present and not followed by = */
1600 if (tmpLocaleID!=NULL && keywordAssign==NULL) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001601 /* Add missing '_' if needed */
1602 if (fieldCount < 2 || (fieldCount < 3 && scriptSize > 0)) {
1603 do {
Frank Tangf2223962020-04-27 18:25:29 -07001604 tag.append('_', *err);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001605 ++fieldCount;
1606 } while(fieldCount<2);
1607 }
Frank Tangf2223962020-04-27 18:25:29 -07001608
1609 int32_t posixVariantSize = -tag.length();
1610 {
1611 CharStringByteSink s(&tag);
Frank Tangf90543d2020-10-30 19:02:04 -07001612 _getVariant(tmpLocaleID+1, '@', s, (UBool)(variantSize > 0));
Frank Tangf2223962020-04-27 18:25:29 -07001613 }
1614 posixVariantSize += tag.length();
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001615 if (posixVariantSize > 0) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001616 variantSize += posixVariantSize;
1617 }
1618 }
1619
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001620 /* Look up the ID in the canonicalization map */
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001621 for (j=0; j<UPRV_LENGTHOF(CANONICALIZE_MAP); j++) {
Frank Tangf2223962020-04-27 18:25:29 -07001622 StringPiece id(CANONICALIZE_MAP[j].id);
1623 if (tag == id) {
1624 if (id.empty() && tmpLocaleID != NULL) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001625 break; /* Don't remap "" if keywords present */
1626 }
Frank Tangf2223962020-04-27 18:25:29 -07001627 tag.clear();
1628 tag.append(CANONICALIZE_MAP[j].canonicalID, *err);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001629 break;
1630 }
1631 }
1632 }
1633
Frank Tangf2223962020-04-27 18:25:29 -07001634 sink.Append(tag.data(), tag.length());
1635
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001636 if (!OPTION_SET(options, _ULOC_STRIP_KEYWORDS)) {
1637 if (tmpLocaleID!=NULL && keywordAssign!=NULL &&
1638 (!separatorIndicator || separatorIndicator > keywordAssign)) {
Frank Tangf2223962020-04-27 18:25:29 -07001639 sink.Append("@", 1);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001640 ++fieldCount;
Frank Tangf90543d2020-10-30 19:02:04 -07001641 ulocimp_getKeywords(tmpLocaleID+1, '@', sink, TRUE, err);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001642 }
1643 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001644}
1645
1646/* ### ID parsing API **************************************************/
1647
1648U_CAPI int32_t U_EXPORT2
1649uloc_getParent(const char* localeID,
1650 char* parent,
1651 int32_t parentCapacity,
1652 UErrorCode* err)
1653{
1654 const char *lastUnderscore;
1655 int32_t i;
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001656
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001657 if (U_FAILURE(*err))
1658 return 0;
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001659
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001660 if (localeID == NULL)
1661 localeID = uloc_getDefault();
1662
1663 lastUnderscore=uprv_strrchr(localeID, '_');
1664 if(lastUnderscore!=NULL) {
1665 i=(int32_t)(lastUnderscore-localeID);
1666 } else {
1667 i=0;
1668 }
1669
Frank Tang69c72a62019-04-03 21:41:21 -07001670 if (i > 0) {
1671 if (uprv_strnicmp(localeID, "und_", 4) == 0) {
1672 localeID += 3;
1673 i -= 3;
1674 uprv_memmove(parent, localeID, uprv_min(i, parentCapacity));
1675 } else if (parent != localeID) {
1676 uprv_memcpy(parent, localeID, uprv_min(i, parentCapacity));
1677 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001678 }
Frank Tang69c72a62019-04-03 21:41:21 -07001679
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001680 return u_terminateChars(parent, parentCapacity, i, err);
1681}
1682
1683U_CAPI int32_t U_EXPORT2
1684uloc_getLanguage(const char* localeID,
1685 char* language,
1686 int32_t languageCapacity,
1687 UErrorCode* err)
1688{
1689 /* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001690
1691 if (err==NULL || U_FAILURE(*err)) {
1692 return 0;
1693 }
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001694
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001695 if(localeID==NULL) {
1696 localeID=uloc_getDefault();
1697 }
1698
Frank Tangf90543d2020-10-30 19:02:04 -07001699 return ulocimp_getLanguage(localeID, NULL, *err).extract(language, languageCapacity, *err);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001700}
1701
1702U_CAPI int32_t U_EXPORT2
1703uloc_getScript(const char* localeID,
1704 char* script,
1705 int32_t scriptCapacity,
1706 UErrorCode* err)
1707{
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001708 if(err==NULL || U_FAILURE(*err)) {
1709 return 0;
1710 }
1711
1712 if(localeID==NULL) {
1713 localeID=uloc_getDefault();
1714 }
1715
1716 /* skip the language */
Frank Tangf90543d2020-10-30 19:02:04 -07001717 ulocimp_getLanguage(localeID, &localeID, *err);
1718 if (U_FAILURE(*err)) {
1719 return 0;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001720 }
Frank Tangf90543d2020-10-30 19:02:04 -07001721
1722 if(_isIDSeparator(*localeID)) {
1723 return ulocimp_getScript(localeID+1, NULL, *err).extract(script, scriptCapacity, *err);
1724 }
1725 return u_terminateChars(script, scriptCapacity, 0, err);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001726}
1727
1728U_CAPI int32_t U_EXPORT2
1729uloc_getCountry(const char* localeID,
1730 char* country,
1731 int32_t countryCapacity,
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001732 UErrorCode* err)
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001733{
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001734 if(err==NULL || U_FAILURE(*err)) {
1735 return 0;
1736 }
1737
1738 if(localeID==NULL) {
1739 localeID=uloc_getDefault();
1740 }
1741
1742 /* Skip the language */
Frank Tangf90543d2020-10-30 19:02:04 -07001743 ulocimp_getLanguage(localeID, &localeID, *err);
1744 if (U_FAILURE(*err)) {
1745 return 0;
1746 }
1747
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001748 if(_isIDSeparator(*localeID)) {
1749 const char *scriptID;
1750 /* Skip the script if available */
Frank Tangf90543d2020-10-30 19:02:04 -07001751 ulocimp_getScript(localeID+1, &scriptID, *err);
1752 if (U_FAILURE(*err)) {
1753 return 0;
1754 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001755 if(scriptID != localeID+1) {
1756 /* Found optional script */
1757 localeID = scriptID;
1758 }
1759 if(_isIDSeparator(*localeID)) {
Frank Tangf90543d2020-10-30 19:02:04 -07001760 return ulocimp_getCountry(localeID+1, NULL, *err).extract(country, countryCapacity, *err);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001761 }
1762 }
Frank Tangf90543d2020-10-30 19:02:04 -07001763 return u_terminateChars(country, countryCapacity, 0, err);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001764}
1765
1766U_CAPI int32_t U_EXPORT2
1767uloc_getVariant(const char* localeID,
1768 char* variant,
1769 int32_t variantCapacity,
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001770 UErrorCode* err)
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001771{
1772 char tempBuffer[ULOC_FULLNAME_CAPACITY];
1773 const char* tmpLocaleID;
1774 int32_t i=0;
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001775
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001776 if(err==NULL || U_FAILURE(*err)) {
1777 return 0;
1778 }
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001779
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001780 if (_hasBCP47Extension(localeID)) {
1781 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
1782 } else {
1783 if (localeID==NULL) {
1784 localeID=uloc_getDefault();
1785 }
1786 tmpLocaleID=localeID;
1787 }
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001788
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001789 /* Skip the language */
Frank Tangf90543d2020-10-30 19:02:04 -07001790 ulocimp_getLanguage(tmpLocaleID, &tmpLocaleID, *err);
1791 if (U_FAILURE(*err)) {
1792 return 0;
1793 }
1794
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001795 if(_isIDSeparator(*tmpLocaleID)) {
1796 const char *scriptID;
1797 /* Skip the script if available */
Frank Tangf90543d2020-10-30 19:02:04 -07001798 ulocimp_getScript(tmpLocaleID+1, &scriptID, *err);
1799 if (U_FAILURE(*err)) {
1800 return 0;
1801 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001802 if(scriptID != tmpLocaleID+1) {
1803 /* Found optional script */
1804 tmpLocaleID = scriptID;
1805 }
1806 /* Skip the Country */
1807 if (_isIDSeparator(*tmpLocaleID)) {
1808 const char *cntryID;
Frank Tangf90543d2020-10-30 19:02:04 -07001809 ulocimp_getCountry(tmpLocaleID+1, &cntryID, *err);
1810 if (U_FAILURE(*err)) {
1811 return 0;
1812 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001813 if (cntryID != tmpLocaleID+1) {
1814 /* Found optional country */
1815 tmpLocaleID = cntryID;
1816 }
1817 if(_isIDSeparator(*tmpLocaleID)) {
1818 /* If there was no country ID, skip a possible extra IDSeparator */
1819 if (tmpLocaleID != cntryID && _isIDSeparator(tmpLocaleID[1])) {
1820 tmpLocaleID++;
1821 }
Frank Tangf90543d2020-10-30 19:02:04 -07001822
1823 CheckedArrayByteSink sink(variant, variantCapacity);
1824 _getVariant(tmpLocaleID+1, *tmpLocaleID, sink, FALSE);
1825
1826 i = sink.NumberOfBytesAppended();
1827
1828 if (U_FAILURE(*err)) {
1829 return i;
1830 }
1831
1832 if (sink.Overflowed()) {
1833 *err = U_BUFFER_OVERFLOW_ERROR;
1834 return i;
1835 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001836 }
1837 }
1838 }
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001839
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001840 return u_terminateChars(variant, variantCapacity, i, err);
1841}
1842
1843U_CAPI int32_t U_EXPORT2
1844uloc_getName(const char* localeID,
1845 char* name,
1846 int32_t nameCapacity,
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001847 UErrorCode* err)
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001848{
Frank Tangf2223962020-04-27 18:25:29 -07001849 if (U_FAILURE(*err)) {
1850 return 0;
1851 }
1852
1853 CheckedArrayByteSink sink(name, nameCapacity);
1854 ulocimp_getName(localeID, sink, err);
1855
1856 int32_t reslen = sink.NumberOfBytesAppended();
1857
1858 if (U_FAILURE(*err)) {
1859 return reslen;
1860 }
1861
1862 if (sink.Overflowed()) {
1863 *err = U_BUFFER_OVERFLOW_ERROR;
1864 } else {
1865 u_terminateChars(name, nameCapacity, reslen, err);
1866 }
1867
1868 return reslen;
1869}
1870
Frank Tangf90543d2020-10-30 19:02:04 -07001871U_CAPI void U_EXPORT2
Frank Tangf2223962020-04-27 18:25:29 -07001872ulocimp_getName(const char* localeID,
1873 ByteSink& sink,
1874 UErrorCode* err)
1875{
1876 _canonicalize(localeID, sink, 0, err);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001877}
1878
1879U_CAPI int32_t U_EXPORT2
1880uloc_getBaseName(const char* localeID,
1881 char* name,
1882 int32_t nameCapacity,
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001883 UErrorCode* err)
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001884{
Frank Tangf2223962020-04-27 18:25:29 -07001885 if (U_FAILURE(*err)) {
1886 return 0;
1887 }
1888
1889 CheckedArrayByteSink sink(name, nameCapacity);
1890 ulocimp_getBaseName(localeID, sink, err);
1891
1892 int32_t reslen = sink.NumberOfBytesAppended();
1893
1894 if (U_FAILURE(*err)) {
1895 return reslen;
1896 }
1897
1898 if (sink.Overflowed()) {
1899 *err = U_BUFFER_OVERFLOW_ERROR;
1900 } else {
1901 u_terminateChars(name, nameCapacity, reslen, err);
1902 }
1903
1904 return reslen;
1905}
1906
Frank Tangf90543d2020-10-30 19:02:04 -07001907U_CAPI void U_EXPORT2
Frank Tangf2223962020-04-27 18:25:29 -07001908ulocimp_getBaseName(const char* localeID,
1909 ByteSink& sink,
1910 UErrorCode* err)
1911{
1912 _canonicalize(localeID, sink, _ULOC_STRIP_KEYWORDS, err);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001913}
1914
1915U_CAPI int32_t U_EXPORT2
1916uloc_canonicalize(const char* localeID,
1917 char* name,
1918 int32_t nameCapacity,
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001919 UErrorCode* err)
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001920{
Frank Tangf2223962020-04-27 18:25:29 -07001921 if (U_FAILURE(*err)) {
1922 return 0;
1923 }
1924
1925 CheckedArrayByteSink sink(name, nameCapacity);
1926 ulocimp_canonicalize(localeID, sink, err);
1927
1928 int32_t reslen = sink.NumberOfBytesAppended();
1929
1930 if (U_FAILURE(*err)) {
1931 return reslen;
1932 }
1933
1934 if (sink.Overflowed()) {
1935 *err = U_BUFFER_OVERFLOW_ERROR;
1936 } else {
1937 u_terminateChars(name, nameCapacity, reslen, err);
1938 }
1939
1940 return reslen;
1941}
1942
Frank Tangf90543d2020-10-30 19:02:04 -07001943U_CAPI void U_EXPORT2
Frank Tangf2223962020-04-27 18:25:29 -07001944ulocimp_canonicalize(const char* localeID,
1945 ByteSink& sink,
1946 UErrorCode* err)
1947{
1948 _canonicalize(localeID, sink, _ULOC_CANONICALIZE, err);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001949}
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001950
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001951U_CAPI const char* U_EXPORT2
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001952uloc_getISO3Language(const char* localeID)
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001953{
1954 int16_t offset;
1955 char lang[ULOC_LANG_CAPACITY];
1956 UErrorCode err = U_ZERO_ERROR;
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001957
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001958 if (localeID == NULL)
1959 {
1960 localeID = uloc_getDefault();
1961 }
1962 uloc_getLanguage(localeID, lang, ULOC_LANG_CAPACITY, &err);
1963 if (U_FAILURE(err))
1964 return "";
1965 offset = _findIndex(LANGUAGES, lang);
1966 if (offset < 0)
1967 return "";
1968 return LANGUAGES_3[offset];
1969}
1970
1971U_CAPI const char* U_EXPORT2
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001972uloc_getISO3Country(const char* localeID)
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001973{
1974 int16_t offset;
1975 char cntry[ULOC_LANG_CAPACITY];
1976 UErrorCode err = U_ZERO_ERROR;
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001977
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001978 if (localeID == NULL)
1979 {
1980 localeID = uloc_getDefault();
1981 }
1982 uloc_getCountry(localeID, cntry, ULOC_LANG_CAPACITY, &err);
1983 if (U_FAILURE(err))
1984 return "";
1985 offset = _findIndex(COUNTRIES, cntry);
1986 if (offset < 0)
1987 return "";
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001988
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001989 return COUNTRIES_3[offset];
1990}
1991
1992U_CAPI uint32_t U_EXPORT2
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001993uloc_getLCID(const char* localeID)
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001994{
1995 UErrorCode status = U_ZERO_ERROR;
1996 char langID[ULOC_FULLNAME_CAPACITY];
Jungshik Shin87232d82017-05-13 21:10:13 -07001997 uint32_t lcid = 0;
1998
1999 /* Check for incomplete id. */
2000 if (!localeID || uprv_strlen(localeID) < 2) {
2001 return 0;
2002 }
2003
Frank Tang69c72a62019-04-03 21:41:21 -07002004 // First, attempt Windows platform lookup if available, but fall
2005 // through to catch any special cases (ICU vs Windows name differences).
2006 lcid = uprv_convertToLCIDPlatform(localeID, &status);
2007 if (U_FAILURE(status)) {
2008 return 0;
2009 }
2010 if (lcid > 0) {
Jungshik Shin87232d82017-05-13 21:10:13 -07002011 // Windows found an LCID, return that
2012 return lcid;
2013 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002014
2015 uloc_getLanguage(localeID, langID, sizeof(langID), &status);
Frank Tang69c72a62019-04-03 21:41:21 -07002016 if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002017 return 0;
2018 }
2019
2020 if (uprv_strchr(localeID, '@')) {
2021 // uprv_convertToLCID does not support keywords other than collation.
2022 // Remove all keywords except collation.
2023 int32_t len;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002024 char tmpLocaleID[ULOC_FULLNAME_CAPACITY];
2025
Frank Tangf90543d2020-10-30 19:02:04 -07002026 CharString collVal;
2027 {
2028 CharStringByteSink sink(&collVal);
2029 ulocimp_getKeywordValue(localeID, "collation", sink, &status);
2030 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002031
Frank Tangf90543d2020-10-30 19:02:04 -07002032 if (U_SUCCESS(status) && !collVal.isEmpty()) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002033 len = uloc_getBaseName(localeID, tmpLocaleID,
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002034 UPRV_LENGTHOF(tmpLocaleID) - 1, &status);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002035
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002036 if (U_SUCCESS(status) && len > 0) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002037 tmpLocaleID[len] = 0;
2038
Frank Tangf90543d2020-10-30 19:02:04 -07002039 len = uloc_setKeywordValue("collation", collVal.data(), tmpLocaleID,
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002040 UPRV_LENGTHOF(tmpLocaleID) - len - 1, &status);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002041
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002042 if (U_SUCCESS(status) && len > 0) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002043 tmpLocaleID[len] = 0;
2044 return uprv_convertToLCID(langID, tmpLocaleID, &status);
2045 }
2046 }
2047 }
2048
2049 // fall through - all keywords are simply ignored
2050 status = U_ZERO_ERROR;
2051 }
2052
2053 return uprv_convertToLCID(langID, localeID, &status);
2054}
2055
2056U_CAPI int32_t U_EXPORT2
2057uloc_getLocaleForLCID(uint32_t hostid, char *locale, int32_t localeCapacity,
2058 UErrorCode *status)
2059{
2060 return uprv_convertToPosix(hostid, locale, localeCapacity, status);
2061}
2062
2063/* ### Default locale **************************************************/
2064
2065U_CAPI const char* U_EXPORT2
2066uloc_getDefault()
2067{
2068 return locale_get_default();
2069}
2070
2071U_CAPI void U_EXPORT2
2072uloc_setDefault(const char* newDefaultLocale,
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002073 UErrorCode* err)
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002074{
2075 if (U_FAILURE(*err))
2076 return;
2077 /* the error code isn't currently used for anything by this function*/
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002078
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002079 /* propagate change to C++ */
2080 locale_set_default(newDefaultLocale);
2081}
2082
2083/**
2084 * Returns a list of all 2-letter language codes defined in ISO 639. This is a pointer
2085 * to an array of pointers to arrays of char. All of these pointers are owned
2086 * by ICU-- do not delete them, and do not write through them. The array is
2087 * terminated with a null pointer.
2088 */
2089U_CAPI const char* const* U_EXPORT2
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002090uloc_getISOLanguages()
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002091{
2092 return LANGUAGES;
2093}
2094
2095/**
2096 * Returns a list of all 2-letter country codes defined in ISO 639. This is a
2097 * pointer to an array of pointers to arrays of char. All of these pointers are
2098 * owned by ICU-- do not delete them, and do not write through them. The array is
2099 * terminated with a null pointer.
2100 */
2101U_CAPI const char* const* U_EXPORT2
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002102uloc_getISOCountries()
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002103{
2104 return COUNTRIES;
2105}
2106
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08002107U_CAPI const char* U_EXPORT2
2108uloc_toUnicodeLocaleKey(const char* keyword)
2109{
2110 const char* bcpKey = ulocimp_toBcpKey(keyword);
2111 if (bcpKey == NULL && ultag_isUnicodeLocaleKey(keyword, -1)) {
2112 // unknown keyword, but syntax is fine..
2113 return keyword;
2114 }
2115 return bcpKey;
2116}
2117
2118U_CAPI const char* U_EXPORT2
2119uloc_toUnicodeLocaleType(const char* keyword, const char* value)
2120{
2121 const char* bcpType = ulocimp_toBcpType(keyword, value, NULL, NULL);
2122 if (bcpType == NULL && ultag_isUnicodeLocaleType(value, -1)) {
2123 // unknown keyword, but syntax is fine..
2124 return value;
2125 }
2126 return bcpType;
2127}
2128
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08002129static UBool
2130isWellFormedLegacyKey(const char* legacyKey)
2131{
2132 const char* p = legacyKey;
2133 while (*p) {
2134 if (!UPRV_ISALPHANUM(*p)) {
2135 return FALSE;
2136 }
2137 p++;
2138 }
2139 return TRUE;
2140}
2141
2142static UBool
2143isWellFormedLegacyType(const char* legacyType)
2144{
2145 const char* p = legacyType;
2146 int32_t alphaNumLen = 0;
2147 while (*p) {
2148 if (*p == '_' || *p == '/' || *p == '-') {
2149 if (alphaNumLen == 0) {
2150 return FALSE;
2151 }
2152 alphaNumLen = 0;
2153 } else if (UPRV_ISALPHANUM(*p)) {
2154 alphaNumLen++;
2155 } else {
2156 return FALSE;
2157 }
2158 p++;
2159 }
2160 return (alphaNumLen != 0);
2161}
2162
2163U_CAPI const char* U_EXPORT2
2164uloc_toLegacyKey(const char* keyword)
2165{
2166 const char* legacyKey = ulocimp_toLegacyKey(keyword);
2167 if (legacyKey == NULL) {
2168 // Checks if the specified locale key is well-formed with the legacy locale syntax.
2169 //
2170 // Note:
Jungshik Shin87232d82017-05-13 21:10:13 -07002171 // LDML/CLDR provides some definition of keyword syntax in
2172 // * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and
2173 // * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax
2174 // Keys can only consist of [0-9a-zA-Z].
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08002175 if (isWellFormedLegacyKey(keyword)) {
2176 return keyword;
2177 }
2178 }
2179 return legacyKey;
2180}
2181
2182U_CAPI const char* U_EXPORT2
2183uloc_toLegacyType(const char* keyword, const char* value)
2184{
2185 const char* legacyType = ulocimp_toLegacyType(keyword, value, NULL, NULL);
2186 if (legacyType == NULL) {
2187 // Checks if the specified locale type is well-formed with the legacy locale syntax.
2188 //
2189 // Note:
Jungshik Shin87232d82017-05-13 21:10:13 -07002190 // LDML/CLDR provides some definition of keyword syntax in
2191 // * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and
2192 // * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax
2193 // Values (types) can only consist of [0-9a-zA-Z], plus for legacy values
2194 // we allow [/_-+] in the middle (e.g. "Etc/GMT+1", "Asia/Tel_Aviv")
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08002195 if (isWellFormedLegacyType(value)) {
2196 return value;
2197 }
2198 }
2199 return legacyType;
2200}
2201
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002202/*eof*/