blob: ebfbb506508c1b9a183e92a7dd23b8d9c62f1824 [file] [log] [blame]
Jungshik Shin87232d82017-05-13 21:10:13 -07001// © 2016 and later: Unicode, Inc. and others.
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002// License & terms of use: http://www.unicode.org/copyright.html
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00003/*
4**********************************************************************
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07005* Copyright (C) 1997-2016, International Business Machines
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00006* Corporation and others. All Rights Reserved.
7**********************************************************************
8*
9* File ULOC.CPP
10*
11* Modification History:
12*
13* Date Name Description
14* 04/01/97 aliu Creation.
15* 08/21/98 stephen JDK 1.2 sync
16* 12/08/98 rtg New Locale implementation and C API
17* 03/15/99 damiba overhaul.
18* 04/06/99 stephen changed setDefault() to realloc and copy
19* 06/14/99 stephen Changed calls to ures_open for new params
20* 07/21/99 stephen Modified setDefault() to propagate to C++
21* 05/14/04 alan 7 years later: refactored, cleaned up, fixed bugs,
22* brought canonicalization code into line with spec
23*****************************************************************************/
24
25/*
26 POSIX's locale format, from putil.c: [no spaces]
27
28 ll [ _CC ] [ . MM ] [ @ VV]
29
30 l = lang, C = ctry, M = charmap, V = variant
31*/
32
Frank Tangf2223962020-04-27 18:25:29 -070033#include "unicode/bytestream.h"
34#include "unicode/errorcode.h"
35#include "unicode/stringpiece.h"
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000036#include "unicode/utypes.h"
37#include "unicode/ustring.h"
38#include "unicode/uloc.h"
39
Frank Tangf2223962020-04-27 18:25:29 -070040#include "bytesinkutil.h"
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000041#include "putilimp.h"
42#include "ustr_imp.h"
43#include "ulocimp.h"
44#include "umutex.h"
45#include "cstring.h"
46#include "cmemory.h"
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000047#include "locmap.h"
48#include "uarrsort.h"
49#include "uenumimp.h"
50#include "uassert.h"
Jungshik Shin87232d82017-05-13 21:10:13 -070051#include "charstr.h"
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000052
Jungshik Shin87232d82017-05-13 21:10:13 -070053U_NAMESPACE_USE
Jungshik Shin5feb9ad2016-10-21 12:52:48 -070054
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000055/* ### Declarations **************************************************/
56
57/* Locale stuff from locid.cpp */
58U_CFUNC void locale_set_default(const char *id);
59U_CFUNC const char *locale_get_default(void);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000060
61/* ### Data tables **************************************************/
62
63/**
64 * Table of language codes, both 2- and 3-letter, with preference
65 * given to 2-letter codes where possible. Includes 3-letter codes
66 * that lack a 2-letter equivalent.
67 *
68 * This list must be in sorted order. This list is returned directly
69 * to the user by some API.
70 *
71 * This list must be kept in sync with LANGUAGES_3, with corresponding
72 * entries matched.
73 *
74 * This table should be terminated with a NULL entry, followed by a
75 * second list, and another NULL entry. The first list is visible to
76 * user code when this array is returned by API. The second list
77 * contains codes we support, but do not expose through user API.
78 *
79 * Notes
80 *
81 * Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to
82 * include the revisions up to 2001/7/27 *CWB*
83 *
84 * The 3 character codes are the terminology codes like RFC 3066. This
85 * is compatible with prior ICU codes
86 *
87 * "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the
88 * table but now at the end of the table because 3 character codes are
89 * duplicates. This avoids bad searches going from 3 to 2 character
90 * codes.
91 *
92 * The range qaa-qtz is reserved for local use
93 */
94/* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
Jungshik Shin70f82502016-01-29 00:32:36 -080095/* ISO639 table version is 20150505 */
Jungshik Shinb3189662017-11-07 11:18:34 -080096/* Subsequent hand addition of selected languages */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000097static const char * const LANGUAGES[] = {
Jungshik Shin5feb9ad2016-10-21 12:52:48 -070098 "aa", "ab", "ace", "ach", "ada", "ady", "ae", "aeb",
99 "af", "afh", "agq", "ain", "ak", "akk", "akz", "ale",
100 "aln", "alt", "am", "an", "ang", "anp", "ar", "arc",
Jungshik Shin87232d82017-05-13 21:10:13 -0700101 "arn", "aro", "arp", "arq", "ars", "arw", "ary", "arz", "as",
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700102 "asa", "ase", "ast", "av", "avk", "awa", "ay", "az",
103 "ba", "bal", "ban", "bar", "bas", "bax", "bbc", "bbj",
104 "be", "bej", "bem", "bew", "bez", "bfd", "bfq", "bg",
105 "bgn", "bho", "bi", "bik", "bin", "bjn", "bkm", "bla",
106 "bm", "bn", "bo", "bpy", "bqi", "br", "bra", "brh",
107 "brx", "bs", "bss", "bua", "bug", "bum", "byn", "byv",
Jungshik Shinb3189662017-11-07 11:18:34 -0800108 "ca", "cad", "car", "cay", "cch", "ccp", "ce", "ceb", "cgg",
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700109 "ch", "chb", "chg", "chk", "chm", "chn", "cho", "chp",
110 "chr", "chy", "ckb", "co", "cop", "cps", "cr", "crh",
111 "cs", "csb", "cu", "cv", "cy",
112 "da", "dak", "dar", "dav", "de", "del", "den", "dgr",
113 "din", "dje", "doi", "dsb", "dtp", "dua", "dum", "dv",
114 "dyo", "dyu", "dz", "dzg",
115 "ebu", "ee", "efi", "egl", "egy", "eka", "el", "elx",
116 "en", "enm", "eo", "es", "esu", "et", "eu", "ewo",
117 "ext",
118 "fa", "fan", "fat", "ff", "fi", "fil", "fit", "fj",
119 "fo", "fon", "fr", "frc", "frm", "fro", "frp", "frr",
120 "frs", "fur", "fy",
121 "ga", "gaa", "gag", "gan", "gay", "gba", "gbz", "gd",
122 "gez", "gil", "gl", "glk", "gmh", "gn", "goh", "gom",
123 "gon", "gor", "got", "grb", "grc", "gsw", "gu", "guc",
124 "gur", "guz", "gv", "gwi",
125 "ha", "hai", "hak", "haw", "he", "hi", "hif", "hil",
126 "hit", "hmn", "ho", "hr", "hsb", "hsn", "ht", "hu",
127 "hup", "hy", "hz",
128 "ia", "iba", "ibb", "id", "ie", "ig", "ii", "ik",
129 "ilo", "inh", "io", "is", "it", "iu", "izh",
130 "ja", "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut",
131 "jv",
132 "ka", "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd",
133 "kbl", "kcg", "kde", "kea", "ken", "kfo", "kg", "kgp",
134 "kha", "kho", "khq", "khw", "ki", "kiu", "kj", "kk",
135 "kkj", "kl", "kln", "km", "kmb", "kn", "ko", "koi",
136 "kok", "kos", "kpe", "kr", "krc", "kri", "krj", "krl",
137 "kru", "ks", "ksb", "ksf", "ksh", "ku", "kum", "kut",
138 "kv", "kw", "ky",
139 "la", "lad", "lag", "lah", "lam", "lb", "lez", "lfn",
140 "lg", "li", "lij", "liv", "lkt", "lmo", "ln", "lo",
141 "lol", "loz", "lrc", "lt", "ltg", "lu", "lua", "lui",
142 "lun", "luo", "lus", "luy", "lv", "lzh", "lzz",
143 "mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",
144 "mdf", "mdh", "mdr", "men", "mer", "mfe", "mg", "mga",
145 "mgh", "mgo", "mh", "mi", "mic", "min", "mis", "mk",
Frank Tangb8696612019-10-25 14:58:21 -0700146 "ml", "mn", "mnc", "mni", "mo",
147 "moh", "mos", "mr", "mrj",
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700148 "ms", "mt", "mua", "mul", "mus", "mwl", "mwr", "mwv",
149 "my", "mye", "myv", "mzn",
150 "na", "nan", "nap", "naq", "nb", "nd", "nds", "ne",
151 "new", "ng", "nia", "niu", "njo", "nl", "nmg", "nn",
152 "nnh", "no", "nog", "non", "nov", "nqo", "nr", "nso",
153 "nus", "nv", "nwc", "ny", "nym", "nyn", "nyo", "nzi",
154 "oc", "oj", "om", "or", "os", "osa", "ota",
Frank Tangf2223962020-04-27 18:25:29 -0700155 "pa", "pag", "pal", "pam", "pap", "pau", "pcd", "pcm", "pdc",
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700156 "pdt", "peo", "pfl", "phn", "pi", "pl", "pms", "pnt",
157 "pon", "prg", "pro", "ps", "pt",
158 "qu", "quc", "qug",
159 "raj", "rap", "rar", "rgn", "rif", "rm", "rn", "ro",
160 "rof", "rom", "rtm", "ru", "rue", "rug", "rup",
161 "rw", "rwk",
162 "sa", "sad", "sah", "sam", "saq", "sas", "sat", "saz",
163 "sba", "sbp", "sc", "scn", "sco", "sd", "sdc", "sdh",
164 "se", "see", "seh", "sei", "sel", "ses", "sg", "sga",
165 "sgs", "shi", "shn", "shu", "si", "sid", "sk",
166 "sl", "sli", "sly", "sm", "sma", "smj", "smn", "sms",
167 "sn", "snk", "so", "sog", "sq", "sr", "srn", "srr",
168 "ss", "ssy", "st", "stq", "su", "suk", "sus", "sux",
169 "sv", "sw", "swb", "swc", "syc", "syr", "szl",
170 "ta", "tcy", "te", "tem", "teo", "ter", "tet", "tg",
171 "th", "ti", "tig", "tiv", "tk", "tkl", "tkr", "tl",
172 "tlh", "tli", "tly", "tmh", "tn", "to", "tog", "tpi",
173 "tr", "tru", "trv", "ts", "tsd", "tsi", "tt", "ttt",
174 "tum", "tvl", "tw", "twq", "ty", "tyv", "tzm",
175 "udm", "ug", "uga", "uk", "umb", "und", "ur", "uz",
176 "vai", "ve", "vec", "vep", "vi", "vls", "vmf", "vo",
177 "vot", "vro", "vun",
178 "wa", "wae", "wal", "war", "was", "wbp", "wo", "wuu",
179 "xal", "xh", "xmf", "xog",
180 "yao", "yap", "yav", "ybb", "yi", "yo", "yrl", "yue",
181 "za", "zap", "zbl", "zea", "zen", "zgh", "zh", "zu",
182 "zun", "zxx", "zza",
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000183NULL,
184 "in", "iw", "ji", "jw", "sh", /* obsolete language codes */
185NULL
186};
187
188static const char* const DEPRECATED_LANGUAGES[]={
189 "in", "iw", "ji", "jw", NULL, NULL
190};
191static const char* const REPLACEMENT_LANGUAGES[]={
192 "id", "he", "yi", "jv", NULL, NULL
193};
194
195/**
196 * Table of 3-letter language codes.
197 *
198 * This is a lookup table used to convert 3-letter language codes to
199 * their 2-letter equivalent, where possible. It must be kept in sync
200 * with LANGUAGES. For all valid i, LANGUAGES[i] must refer to the
201 * same language as LANGUAGES_3[i]. The commented-out lines are
202 * copied from LANGUAGES to make eyeballing this baby easier.
203 *
204 * Where a 3-letter language code has no 2-letter equivalent, the
205 * 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i].
206 *
207 * This table should be terminated with a NULL entry, followed by a
208 * second list, and another NULL entry. The two lists correspond to
209 * the two lists in LANGUAGES.
210 */
211/* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
Jungshik Shin70f82502016-01-29 00:32:36 -0800212/* ISO639 table version is 20150505 */
Jungshik Shinb3189662017-11-07 11:18:34 -0800213/* Subsequent hand addition of selected languages */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000214static const char * const LANGUAGES_3[] = {
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700215 "aar", "abk", "ace", "ach", "ada", "ady", "ave", "aeb",
216 "afr", "afh", "agq", "ain", "aka", "akk", "akz", "ale",
217 "aln", "alt", "amh", "arg", "ang", "anp", "ara", "arc",
Jungshik Shin87232d82017-05-13 21:10:13 -0700218 "arn", "aro", "arp", "arq", "ars", "arw", "ary", "arz", "asm",
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700219 "asa", "ase", "ast", "ava", "avk", "awa", "aym", "aze",
220 "bak", "bal", "ban", "bar", "bas", "bax", "bbc", "bbj",
221 "bel", "bej", "bem", "bew", "bez", "bfd", "bfq", "bul",
222 "bgn", "bho", "bis", "bik", "bin", "bjn", "bkm", "bla",
223 "bam", "ben", "bod", "bpy", "bqi", "bre", "bra", "brh",
224 "brx", "bos", "bss", "bua", "bug", "bum", "byn", "byv",
Jungshik Shinb3189662017-11-07 11:18:34 -0800225 "cat", "cad", "car", "cay", "cch", "ccp", "che", "ceb", "cgg",
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700226 "cha", "chb", "chg", "chk", "chm", "chn", "cho", "chp",
227 "chr", "chy", "ckb", "cos", "cop", "cps", "cre", "crh",
228 "ces", "csb", "chu", "chv", "cym",
229 "dan", "dak", "dar", "dav", "deu", "del", "den", "dgr",
230 "din", "dje", "doi", "dsb", "dtp", "dua", "dum", "div",
231 "dyo", "dyu", "dzo", "dzg",
232 "ebu", "ewe", "efi", "egl", "egy", "eka", "ell", "elx",
233 "eng", "enm", "epo", "spa", "esu", "est", "eus", "ewo",
234 "ext",
235 "fas", "fan", "fat", "ful", "fin", "fil", "fit", "fij",
236 "fao", "fon", "fra", "frc", "frm", "fro", "frp", "frr",
237 "frs", "fur", "fry",
238 "gle", "gaa", "gag", "gan", "gay", "gba", "gbz", "gla",
239 "gez", "gil", "glg", "glk", "gmh", "grn", "goh", "gom",
240 "gon", "gor", "got", "grb", "grc", "gsw", "guj", "guc",
241 "gur", "guz", "glv", "gwi",
242 "hau", "hai", "hak", "haw", "heb", "hin", "hif", "hil",
243 "hit", "hmn", "hmo", "hrv", "hsb", "hsn", "hat", "hun",
244 "hup", "hye", "her",
245 "ina", "iba", "ibb", "ind", "ile", "ibo", "iii", "ipk",
246 "ilo", "inh", "ido", "isl", "ita", "iku", "izh",
247 "jpn", "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut",
248 "jav",
249 "kat", "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd",
250 "kbl", "kcg", "kde", "kea", "ken", "kfo", "kon", "kgp",
251 "kha", "kho", "khq", "khw", "kik", "kiu", "kua", "kaz",
252 "kkj", "kal", "kln", "khm", "kmb", "kan", "kor", "koi",
253 "kok", "kos", "kpe", "kau", "krc", "kri", "krj", "krl",
254 "kru", "kas", "ksb", "ksf", "ksh", "kur", "kum", "kut",
255 "kom", "cor", "kir",
256 "lat", "lad", "lag", "lah", "lam", "ltz", "lez", "lfn",
257 "lug", "lim", "lij", "liv", "lkt", "lmo", "lin", "lao",
258 "lol", "loz", "lrc", "lit", "ltg", "lub", "lua", "lui",
259 "lun", "luo", "lus", "luy", "lav", "lzh", "lzz",
260 "mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",
261 "mdf", "mdh", "mdr", "men", "mer", "mfe", "mlg", "mga",
262 "mgh", "mgo", "mah", "mri", "mic", "min", "mis", "mkd",
Frank Tangb8696612019-10-25 14:58:21 -0700263 "mal", "mon", "mnc", "mni", "mol",
264 "moh", "mos", "mar", "mrj",
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700265 "msa", "mlt", "mua", "mul", "mus", "mwl", "mwr", "mwv",
266 "mya", "mye", "myv", "mzn",
267 "nau", "nan", "nap", "naq", "nob", "nde", "nds", "nep",
268 "new", "ndo", "nia", "niu", "njo", "nld", "nmg", "nno",
269 "nnh", "nor", "nog", "non", "nov", "nqo", "nbl", "nso",
270 "nus", "nav", "nwc", "nya", "nym", "nyn", "nyo", "nzi",
271 "oci", "oji", "orm", "ori", "oss", "osa", "ota",
Frank Tangf2223962020-04-27 18:25:29 -0700272 "pan", "pag", "pal", "pam", "pap", "pau", "pcd", "pcm", "pdc",
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700273 "pdt", "peo", "pfl", "phn", "pli", "pol", "pms", "pnt",
274 "pon", "prg", "pro", "pus", "por",
275 "que", "quc", "qug",
276 "raj", "rap", "rar", "rgn", "rif", "roh", "run", "ron",
277 "rof", "rom", "rtm", "rus", "rue", "rug", "rup",
278 "kin", "rwk",
279 "san", "sad", "sah", "sam", "saq", "sas", "sat", "saz",
280 "sba", "sbp", "srd", "scn", "sco", "snd", "sdc", "sdh",
281 "sme", "see", "seh", "sei", "sel", "ses", "sag", "sga",
282 "sgs", "shi", "shn", "shu", "sin", "sid", "slk",
283 "slv", "sli", "sly", "smo", "sma", "smj", "smn", "sms",
284 "sna", "snk", "som", "sog", "sqi", "srp", "srn", "srr",
285 "ssw", "ssy", "sot", "stq", "sun", "suk", "sus", "sux",
286 "swe", "swa", "swb", "swc", "syc", "syr", "szl",
287 "tam", "tcy", "tel", "tem", "teo", "ter", "tet", "tgk",
288 "tha", "tir", "tig", "tiv", "tuk", "tkl", "tkr", "tgl",
289 "tlh", "tli", "tly", "tmh", "tsn", "ton", "tog", "tpi",
290 "tur", "tru", "trv", "tso", "tsd", "tsi", "tat", "ttt",
291 "tum", "tvl", "twi", "twq", "tah", "tyv", "tzm",
292 "udm", "uig", "uga", "ukr", "umb", "und", "urd", "uzb",
293 "vai", "ven", "vec", "vep", "vie", "vls", "vmf", "vol",
294 "vot", "vro", "vun",
295 "wln", "wae", "wal", "war", "was", "wbp", "wol", "wuu",
296 "xal", "xho", "xmf", "xog",
297 "yao", "yap", "yav", "ybb", "yid", "yor", "yrl", "yue",
298 "zha", "zap", "zbl", "zea", "zen", "zgh", "zho", "zul",
299 "zun", "zxx", "zza",
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000300NULL,
301/* "in", "iw", "ji", "jw", "sh", */
302 "ind", "heb", "yid", "jaw", "srp",
303NULL
304};
305
306/**
307 * Table of 2-letter country codes.
308 *
309 * This list must be in sorted order. This list is returned directly
310 * to the user by some API.
311 *
312 * This list must be kept in sync with COUNTRIES_3, with corresponding
313 * entries matched.
314 *
315 * This table should be terminated with a NULL entry, followed by a
316 * second list, and another NULL entry. The first list is visible to
317 * user code when this array is returned by API. The second list
318 * contains codes we support, but do not expose through user API.
319 *
320 * Notes:
321 *
322 * ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per
323 * http://www.evertype.com/standards/iso3166/iso3166-1-en.html added
324 * new codes keeping the old ones for compatibility updated to include
325 * 1999/12/03 revisions *CWB*
326 *
327 * RO(ROM) is now RO(ROU) according to
328 * http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html
329 */
330static const char * const COUNTRIES[] = {
331 "AD", "AE", "AF", "AG", "AI", "AL", "AM",
332 "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ",
333 "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI",
334 "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV",
335 "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG",
336 "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR",
337 "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DJ", "DK",
338 "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER",
339 "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR",
340 "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL",
341 "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU",
342 "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU",
343 "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS",
344 "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI",
345 "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA",
346 "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU",
347 "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK",
348 "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS",
349 "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA",
350 "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP",
351 "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG",
352 "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT",
353 "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA",
354 "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ",
355 "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV",
356 "SX", "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ",
357 "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV",
358 "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ",
359 "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF",
360 "WS", "YE", "YT", "ZA", "ZM", "ZW",
361NULL,
362 "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR", /* obsolete country codes */
363NULL
364};
365
366static const char* const DEPRECATED_COUNTRIES[] = {
367 "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR", NULL, NULL /* deprecated country list */
368};
369static const char* const REPLACEMENT_COUNTRIES[] = {
370/* "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR" */
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700371 "CW", "MM", "RS", "DE", "BJ", "FR", "BF", "VU", "ZW", "RU", "TL", "GB", "VN", "YE", "RS", "CD", NULL, NULL /* replacement country codes */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000372};
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700373
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000374/**
375 * Table of 3-letter country codes.
376 *
377 * This is a lookup table used to convert 3-letter country codes to
378 * their 2-letter equivalent. It must be kept in sync with COUNTRIES.
379 * For all valid i, COUNTRIES[i] must refer to the same country as
380 * COUNTRIES_3[i]. The commented-out lines are copied from COUNTRIES
381 * to make eyeballing this baby easier.
382 *
383 * This table should be terminated with a NULL entry, followed by a
384 * second list, and another NULL entry. The two lists correspond to
385 * the two lists in COUNTRIES.
386 */
387static const char * const COUNTRIES_3[] = {
388/* "AD", "AE", "AF", "AG", "AI", "AL", "AM", */
389 "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM",
390/* "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ", */
391 "AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE",
392/* "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", */
393 "BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI",
394/* "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV", */
395 "BEN", "BLM", "BMU", "BRN", "BOL", "BES", "BRA", "BHS", "BTN", "BVT",
396/* "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG", */
397 "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",
398/* "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR", */
399 "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRI",
400/* "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DJ", "DK", */
401 "CUB", "CPV", "CUW", "CXR", "CYP", "CZE", "DEU", "DJI", "DNK",
402/* "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER", */
403 "DMA", "DOM", "DZA", "ECU", "EST", "EGY", "ESH", "ERI",
404/* "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", */
405 "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",
406/* "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", */
407 "GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL",
408/* "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", */
409 "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",
410/* "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", */
411 "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",
412/* "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS" */
413 "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL",
414/* "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", */
415 "ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",
416/* "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", */
417 "COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO",
418/* "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", */
419 "LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX",
420/* "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK", */
421 "LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD",
422/* "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS", */
423 "MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR",
424/* "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", */
425 "MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM",
426/* "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP", */
427 "NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL",
428/* "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG", */
429 "NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG",
430/* "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT", */
431 "PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT",
432/* "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA", */
433 "PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU",
434/* "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ", */
435 "SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM",
436/* "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV", */
437 "SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "SSD", "STP", "SLV",
438/* "SX", "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ", */
439 "SXM", "SYR", "SWZ", "TCA", "TCD", "ATF", "TGO", "THA", "TJK",
440/* "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV", */
441 "TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV",
442/* "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ", */
443 "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",
444/* "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", */
445 "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",
446/* "WS", "YE", "YT", "ZA", "ZM", "ZW", */
447 "WSM", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
448NULL,
449/* "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR" */
450 "ANT", "BUR", "SCG", "FXX", "ROM", "SUN", "TMP", "YMD", "YUG", "ZAR",
451NULL
452};
453
454typedef struct CanonicalizationMap {
455 const char *id; /* input ID */
456 const char *canonicalID; /* canonicalized output ID */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000457} CanonicalizationMap;
458
459/**
460 * A map to canonicalize locale IDs. This handles a variety of
461 * different semantic kinds of transformations.
462 */
463static const CanonicalizationMap CANONICALIZE_MAP[] = {
Frank Tangf2223962020-04-27 18:25:29 -0700464 { "art__LOJBAN", "jbo" }, /* registered name */
Frank Tang960f1952019-02-15 16:46:49 -0800465 { "hy__AREVELA", "hy" }, /* Registered IANA variant */
466 { "hy__AREVMDA", "hyw" }, /* Registered IANA variant */
Frank Tangf2223962020-04-27 18:25:29 -0700467 { "zh__GUOYU", "zh" }, /* registered name */
468 { "zh__HAKKA", "hak" }, /* registered name */
469 { "zh__XIANG", "hsn" }, /* registered name */
470 // subtags with 3 chars won't be treated as variants.
Frank Tang960f1952019-02-15 16:46:49 -0800471 { "zh_GAN", "gan" }, /* registered name */
Frank Tang960f1952019-02-15 16:46:49 -0800472 { "zh_MIN_NAN", "nan" }, /* registered name */
473 { "zh_WUU", "wuu" }, /* registered name */
Frank Tang960f1952019-02-15 16:46:49 -0800474 { "zh_YUE", "yue" }, /* registered name */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000475};
476
477/* ### BCP47 Conversion *******************************************/
478/* Test if the locale id has BCP47 u extension and does not have '@' */
479#define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == NULL && getShortestSubtagLength(localeID) == 1)
480/* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */
Frank Tangb8696612019-10-25 14:58:21 -0700481#define _ConvertBCP47(finalID, id, buffer, length,err) UPRV_BLOCK_MACRO_BEGIN { \
482 if (uloc_forLanguageTag(id, buffer, length, NULL, err) <= 0 || \
483 U_FAILURE(*err) || *err == U_STRING_NOT_TERMINATED_WARNING) { \
484 finalID=id; \
485 if (*err == U_STRING_NOT_TERMINATED_WARNING) { *err = U_BUFFER_OVERFLOW_ERROR; } \
486 } else { \
487 finalID=buffer; \
488 } \
489} UPRV_BLOCK_MACRO_END
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000490/* Gets the size of the shortest subtag in the given localeID. */
491static int32_t getShortestSubtagLength(const char *localeID) {
Jungshik Shinb3189662017-11-07 11:18:34 -0800492 int32_t localeIDLength = static_cast<int32_t>(uprv_strlen(localeID));
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000493 int32_t length = localeIDLength;
494 int32_t tmpLength = 0;
495 int32_t i;
496 UBool reset = TRUE;
497
498 for (i = 0; i < localeIDLength; i++) {
499 if (localeID[i] != '_' && localeID[i] != '-') {
500 if (reset) {
501 tmpLength = 0;
502 reset = FALSE;
503 }
504 tmpLength++;
505 } else {
506 if (tmpLength != 0 && tmpLength < length) {
507 length = tmpLength;
508 }
509 reset = TRUE;
510 }
511 }
512
513 return length;
514}
515
516/* ### Keywords **************************************************/
Jungshik Shin87232d82017-05-13 21:10:13 -0700517#define UPRV_ISDIGIT(c) (((c) >= '0') && ((c) <= '9'))
518#define UPRV_ISALPHANUM(c) (uprv_isASCIILetter(c) || UPRV_ISDIGIT(c) )
519/* Punctuation/symbols allowed in legacy key values */
520#define UPRV_OK_VALUE_PUNCTUATION(c) ((c) == '_' || (c) == '-' || (c) == '+' || (c) == '/')
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000521
522#define ULOC_KEYWORD_BUFFER_LEN 25
523#define ULOC_MAX_NO_KEYWORDS 25
524
525U_CAPI const char * U_EXPORT2
526locale_getKeywordsStart(const char *localeID) {
527 const char *result = NULL;
528 if((result = uprv_strchr(localeID, '@')) != NULL) {
529 return result;
530 }
531#if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
532 else {
533 /* We do this because the @ sign is variant, and the @ sign used on one
534 EBCDIC machine won't be compiled the same way on other EBCDIC based
535 machines. */
536 static const uint8_t ebcdicSigns[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
537 const uint8_t *charToFind = ebcdicSigns;
538 while(*charToFind) {
539 if((result = uprv_strchr(localeID, *charToFind)) != NULL) {
540 return result;
541 }
542 charToFind++;
543 }
544 }
545#endif
546 return NULL;
547}
548
549/**
550 * @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN]
551 * @param keywordName incoming name to be canonicalized
552 * @param status return status (keyword too long)
553 * @return length of the keyword name
554 */
555static int32_t locale_canonKeywordName(char *buf, const char *keywordName, UErrorCode *status)
556{
Jungshik Shin87232d82017-05-13 21:10:13 -0700557 int32_t keywordNameLen = 0;
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700558
Jungshik Shin87232d82017-05-13 21:10:13 -0700559 for (; *keywordName != 0; keywordName++) {
560 if (!UPRV_ISALPHANUM(*keywordName)) {
561 *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
562 return 0;
563 }
564 if (keywordNameLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
565 buf[keywordNameLen++] = uprv_tolower(*keywordName);
566 } else {
567 /* keyword name too long for internal buffer */
568 *status = U_INTERNAL_PROGRAM_ERROR;
569 return 0;
570 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000571 }
Jungshik Shin87232d82017-05-13 21:10:13 -0700572 if (keywordNameLen == 0) {
573 *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name */
574 return 0;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000575 }
Jungshik Shin87232d82017-05-13 21:10:13 -0700576 buf[keywordNameLen] = 0; /* terminate */
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700577
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000578 return keywordNameLen;
579}
580
581typedef struct {
582 char keyword[ULOC_KEYWORD_BUFFER_LEN];
583 int32_t keywordLen;
584 const char *valueStart;
585 int32_t valueLen;
586} KeywordStruct;
587
588static int32_t U_CALLCONV
589compareKeywordStructs(const void * /*context*/, const void *left, const void *right) {
590 const char* leftString = ((const KeywordStruct *)left)->keyword;
591 const char* rightString = ((const KeywordStruct *)right)->keyword;
592 return uprv_strcmp(leftString, rightString);
593}
594
Frank Tangf90543d2020-10-30 19:02:04 -0700595U_CFUNC void
596ulocimp_getKeywords(const char *localeID,
597 char prev,
598 ByteSink& sink,
599 UBool valuesToo,
600 UErrorCode *status)
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000601{
602 KeywordStruct keywordList[ULOC_MAX_NO_KEYWORDS];
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700603
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000604 int32_t maxKeywords = ULOC_MAX_NO_KEYWORDS;
605 int32_t numKeywords = 0;
606 const char* pos = localeID;
607 const char* equalSign = NULL;
608 const char* semicolon = NULL;
609 int32_t i = 0, j, n;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000610
611 if(prev == '@') { /* start of keyword definition */
612 /* we will grab pairs, trim spaces, lowercase keywords, sort and return */
613 do {
614 UBool duplicate = FALSE;
615 /* skip leading spaces */
616 while(*pos == ' ') {
617 pos++;
618 }
619 if (!*pos) { /* handle trailing "; " */
620 break;
621 }
622 if(numKeywords == maxKeywords) {
623 *status = U_INTERNAL_PROGRAM_ERROR;
Frank Tangf2223962020-04-27 18:25:29 -0700624 return;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000625 }
626 equalSign = uprv_strchr(pos, '=');
627 semicolon = uprv_strchr(pos, ';');
628 /* lack of '=' [foo@currency] is illegal */
629 /* ';' before '=' [foo@currency;collation=pinyin] is illegal */
630 if(!equalSign || (semicolon && semicolon<equalSign)) {
631 *status = U_INVALID_FORMAT_ERROR;
Frank Tangf2223962020-04-27 18:25:29 -0700632 return;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000633 }
634 /* need to normalize both keyword and keyword name */
635 if(equalSign - pos >= ULOC_KEYWORD_BUFFER_LEN) {
636 /* keyword name too long for internal buffer */
637 *status = U_INTERNAL_PROGRAM_ERROR;
Frank Tangf2223962020-04-27 18:25:29 -0700638 return;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000639 }
640 for(i = 0, n = 0; i < equalSign - pos; ++i) {
641 if (pos[i] != ' ') {
642 keywordList[numKeywords].keyword[n++] = uprv_tolower(pos[i]);
643 }
644 }
645
646 /* zero-length keyword is an error. */
647 if (n == 0) {
648 *status = U_INVALID_FORMAT_ERROR;
Frank Tangf2223962020-04-27 18:25:29 -0700649 return;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000650 }
651
652 keywordList[numKeywords].keyword[n] = 0;
653 keywordList[numKeywords].keywordLen = n;
654 /* now grab the value part. First we skip the '=' */
655 equalSign++;
656 /* then we leading spaces */
657 while(*equalSign == ' ') {
658 equalSign++;
659 }
660
661 /* Premature end or zero-length value */
Jungshik Shin (jungshik at google)46be5162015-03-26 11:46:43 -0700662 if (!*equalSign || equalSign == semicolon) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000663 *status = U_INVALID_FORMAT_ERROR;
Frank Tangf2223962020-04-27 18:25:29 -0700664 return;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000665 }
666
667 keywordList[numKeywords].valueStart = equalSign;
668
669 pos = semicolon;
670 i = 0;
671 if(pos) {
672 while(*(pos - i - 1) == ' ') {
673 i++;
674 }
675 keywordList[numKeywords].valueLen = (int32_t)(pos - equalSign - i);
676 pos++;
677 } else {
678 i = (int32_t)uprv_strlen(equalSign);
679 while(i && equalSign[i-1] == ' ') {
680 i--;
681 }
682 keywordList[numKeywords].valueLen = i;
683 }
684 /* If this is a duplicate keyword, then ignore it */
685 for (j=0; j<numKeywords; ++j) {
686 if (uprv_strcmp(keywordList[j].keyword, keywordList[numKeywords].keyword) == 0) {
687 duplicate = TRUE;
688 break;
689 }
690 }
691 if (!duplicate) {
692 ++numKeywords;
693 }
694 } while(pos);
695
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000696 /* now we have a list of keywords */
697 /* we need to sort it */
698 uprv_sortArray(keywordList, numKeywords, sizeof(KeywordStruct), compareKeywordStructs, NULL, FALSE, status);
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700699
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000700 /* Now construct the keyword part */
701 for(i = 0; i < numKeywords; i++) {
Frank Tangf2223962020-04-27 18:25:29 -0700702 sink.Append(keywordList[i].keyword, keywordList[i].keywordLen);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000703 if(valuesToo) {
Frank Tangf2223962020-04-27 18:25:29 -0700704 sink.Append("=", 1);
705 sink.Append(keywordList[i].valueStart, keywordList[i].valueLen);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000706 if(i < numKeywords - 1) {
Frank Tangf2223962020-04-27 18:25:29 -0700707 sink.Append(";", 1);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000708 }
Frank Tangf2223962020-04-27 18:25:29 -0700709 } else {
710 sink.Append("\0", 1);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000711 }
712 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000713 }
714}
715
Frank Tangf90543d2020-10-30 19:02:04 -0700716U_CAPI int32_t U_EXPORT2
717uloc_getKeywordValue(const char* localeID,
718 const char* keywordName,
719 char* buffer, int32_t bufferCapacity,
720 UErrorCode* status)
721{
Frank Tangf2223962020-04-27 18:25:29 -0700722 if (U_FAILURE(*status)) {
723 return 0;
724 }
725
Frank Tangf90543d2020-10-30 19:02:04 -0700726 CheckedArrayByteSink sink(buffer, bufferCapacity);
727 ulocimp_getKeywordValue(localeID, keywordName, sink, status);
Frank Tangf2223962020-04-27 18:25:29 -0700728
729 int32_t reslen = sink.NumberOfBytesAppended();
730
731 if (U_FAILURE(*status)) {
732 return reslen;
733 }
734
735 if (sink.Overflowed()) {
736 *status = U_BUFFER_OVERFLOW_ERROR;
737 } else {
Frank Tangf90543d2020-10-30 19:02:04 -0700738 u_terminateChars(buffer, bufferCapacity, reslen, status);
Frank Tangf2223962020-04-27 18:25:29 -0700739 }
740
741 return reslen;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000742}
743
Frank Tangf90543d2020-10-30 19:02:04 -0700744U_CAPI void U_EXPORT2
745ulocimp_getKeywordValue(const char* localeID,
746 const char* keywordName,
747 icu::ByteSink& sink,
748 UErrorCode* status)
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700749{
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000750 const char* startSearchHere = NULL;
751 const char* nextSeparator = NULL;
752 char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
753 char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000754
755 if(status && U_SUCCESS(*status) && localeID) {
756 char tempBuffer[ULOC_FULLNAME_CAPACITY];
757 const char* tmpLocaleID;
758
Jungshik Shin87232d82017-05-13 21:10:13 -0700759 if (keywordName == NULL || keywordName[0] == 0) {
760 *status = U_ILLEGAL_ARGUMENT_ERROR;
Frank Tangf90543d2020-10-30 19:02:04 -0700761 return;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000762 }
763
764 locale_canonKeywordName(keywordNameBuffer, keywordName, status);
765 if(U_FAILURE(*status)) {
Frank Tangf90543d2020-10-30 19:02:04 -0700766 return;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000767 }
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700768
Jungshik Shin87232d82017-05-13 21:10:13 -0700769 if (_hasBCP47Extension(localeID)) {
770 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
771 } else {
772 tmpLocaleID=localeID;
773 }
774
775 startSearchHere = locale_getKeywordsStart(tmpLocaleID);
776 if(startSearchHere == NULL) {
777 /* no keywords, return at once */
Frank Tangf90543d2020-10-30 19:02:04 -0700778 return;
Jungshik Shin87232d82017-05-13 21:10:13 -0700779 }
780
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000781 /* find the first keyword */
782 while(startSearchHere) {
Jungshik Shin87232d82017-05-13 21:10:13 -0700783 const char* keyValueTail;
784 int32_t keyValueLen;
785
786 startSearchHere++; /* skip @ or ; */
787 nextSeparator = uprv_strchr(startSearchHere, '=');
788 if(!nextSeparator) {
789 *status = U_ILLEGAL_ARGUMENT_ERROR; /* key must have =value */
Frank Tangf90543d2020-10-30 19:02:04 -0700790 return;
Jungshik Shin87232d82017-05-13 21:10:13 -0700791 }
792 /* strip leading & trailing spaces (TC decided to tolerate these) */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000793 while(*startSearchHere == ' ') {
794 startSearchHere++;
795 }
Jungshik Shin87232d82017-05-13 21:10:13 -0700796 keyValueTail = nextSeparator;
797 while (keyValueTail > startSearchHere && *(keyValueTail-1) == ' ') {
798 keyValueTail--;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000799 }
Jungshik Shin87232d82017-05-13 21:10:13 -0700800 /* now keyValueTail points to first char after the keyName */
801 /* copy & normalize keyName from locale */
802 if (startSearchHere == keyValueTail) {
803 *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name in passed-in locale */
Frank Tangf90543d2020-10-30 19:02:04 -0700804 return;
Jungshik Shin87232d82017-05-13 21:10:13 -0700805 }
806 keyValueLen = 0;
807 while (startSearchHere < keyValueTail) {
808 if (!UPRV_ISALPHANUM(*startSearchHere)) {
809 *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
Frank Tangf90543d2020-10-30 19:02:04 -0700810 return;
Jungshik Shin87232d82017-05-13 21:10:13 -0700811 }
812 if (keyValueLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
813 localeKeywordNameBuffer[keyValueLen++] = uprv_tolower(*startSearchHere++);
814 } else {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000815 /* keyword name too long for internal buffer */
816 *status = U_INTERNAL_PROGRAM_ERROR;
Frank Tangf90543d2020-10-30 19:02:04 -0700817 return;
Jungshik Shin87232d82017-05-13 21:10:13 -0700818 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000819 }
Jungshik Shin87232d82017-05-13 21:10:13 -0700820 localeKeywordNameBuffer[keyValueLen] = 0; /* terminate */
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700821
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000822 startSearchHere = uprv_strchr(nextSeparator, ';');
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700823
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000824 if(uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer) == 0) {
Jungshik Shin87232d82017-05-13 21:10:13 -0700825 /* current entry matches the keyword. */
826 nextSeparator++; /* skip '=' */
827 /* First strip leading & trailing spaces (TC decided to tolerate these) */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000828 while(*nextSeparator == ' ') {
Jungshik Shin87232d82017-05-13 21:10:13 -0700829 nextSeparator++;
830 }
831 keyValueTail = (startSearchHere)? startSearchHere: nextSeparator + uprv_strlen(nextSeparator);
832 while(keyValueTail > nextSeparator && *(keyValueTail-1) == ' ') {
833 keyValueTail--;
834 }
835 /* Now copy the value, but check well-formedness */
836 if (nextSeparator == keyValueTail) {
837 *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty key value name in passed-in locale */
Frank Tangf90543d2020-10-30 19:02:04 -0700838 return;
Jungshik Shin87232d82017-05-13 21:10:13 -0700839 }
Jungshik Shin87232d82017-05-13 21:10:13 -0700840 while (nextSeparator < keyValueTail) {
841 if (!UPRV_ISALPHANUM(*nextSeparator) && !UPRV_OK_VALUE_PUNCTUATION(*nextSeparator)) {
842 *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed key value */
Frank Tangf90543d2020-10-30 19:02:04 -0700843 return;
Jungshik Shin87232d82017-05-13 21:10:13 -0700844 }
Frank Tangf90543d2020-10-30 19:02:04 -0700845 /* Should we lowercase value to return here? Tests expect as-is. */
846 sink.Append(nextSeparator++, 1);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000847 }
Frank Tangf90543d2020-10-30 19:02:04 -0700848 return;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000849 }
850 }
851 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000852}
853
854U_CAPI int32_t U_EXPORT2
855uloc_setKeywordValue(const char* keywordName,
856 const char* keywordValue,
857 char* buffer, int32_t bufferCapacity,
858 UErrorCode* status)
859{
860 /* TODO: sorting. removal. */
861 int32_t keywordNameLen;
862 int32_t keywordValueLen;
863 int32_t bufLen;
864 int32_t needLen = 0;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000865 char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
Jungshik Shin87232d82017-05-13 21:10:13 -0700866 char keywordValueBuffer[ULOC_KEYWORDS_CAPACITY+1];
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000867 char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000868 int32_t rc;
869 char* nextSeparator = NULL;
870 char* nextEqualsign = NULL;
871 char* startSearchHere = NULL;
872 char* keywordStart = NULL;
Jungshik Shin87232d82017-05-13 21:10:13 -0700873 CharString updatedKeysAndValues;
Jungshik Shin87232d82017-05-13 21:10:13 -0700874 UBool handledInputKeyAndValue = FALSE;
875 char keyValuePrefix = '@';
876
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700877 if(U_FAILURE(*status)) {
878 return -1;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000879 }
Frank Tangefc58852020-11-12 11:50:18 -0800880 if (*status == U_STRING_NOT_TERMINATED_WARNING) {
881 *status = U_ZERO_ERROR;
882 }
Jungshik Shin87232d82017-05-13 21:10:13 -0700883 if (keywordName == NULL || keywordName[0] == 0 || bufferCapacity <= 1) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000884 *status = U_ILLEGAL_ARGUMENT_ERROR;
885 return 0;
886 }
Jungshik Shin87232d82017-05-13 21:10:13 -0700887 bufLen = (int32_t)uprv_strlen(buffer);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000888 if(bufferCapacity<bufLen) {
889 /* The capacity is less than the length?! Is this NULL terminated? */
890 *status = U_ILLEGAL_ARGUMENT_ERROR;
891 return 0;
892 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000893 keywordNameLen = locale_canonKeywordName(keywordNameBuffer, keywordName, status);
894 if(U_FAILURE(*status)) {
895 return 0;
896 }
Jungshik Shin87232d82017-05-13 21:10:13 -0700897
898 keywordValueLen = 0;
899 if(keywordValue) {
900 while (*keywordValue != 0) {
901 if (!UPRV_ISALPHANUM(*keywordValue) && !UPRV_OK_VALUE_PUNCTUATION(*keywordValue)) {
902 *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed key value */
903 return 0;
904 }
905 if (keywordValueLen < ULOC_KEYWORDS_CAPACITY) {
906 /* Should we force lowercase in value to set? */
907 keywordValueBuffer[keywordValueLen++] = *keywordValue++;
908 } else {
909 /* keywordValue too long for internal buffer */
910 *status = U_INTERNAL_PROGRAM_ERROR;
911 return 0;
912 }
913 }
914 }
915 keywordValueBuffer[keywordValueLen] = 0; /* terminate */
916
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000917 startSearchHere = (char*)locale_getKeywordsStart(buffer);
918 if(startSearchHere == NULL || (startSearchHere[1]==0)) {
Jungshik Shin87232d82017-05-13 21:10:13 -0700919 if(keywordValueLen == 0) { /* no keywords = nothing to remove */
Frank Tangefc58852020-11-12 11:50:18 -0800920 U_ASSERT(*status != U_STRING_NOT_TERMINATED_WARNING);
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700921 return bufLen;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000922 }
923
924 needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700925 if(startSearchHere) { /* had a single @ */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000926 needLen--; /* already had the @ */
927 /* startSearchHere points at the @ */
928 } else {
929 startSearchHere=buffer+bufLen;
930 }
931 if(needLen >= bufferCapacity) {
932 *status = U_BUFFER_OVERFLOW_ERROR;
933 return needLen; /* no change */
934 }
Jungshik Shin87232d82017-05-13 21:10:13 -0700935 *startSearchHere++ = '@';
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000936 uprv_strcpy(startSearchHere, keywordNameBuffer);
937 startSearchHere += keywordNameLen;
Jungshik Shin87232d82017-05-13 21:10:13 -0700938 *startSearchHere++ = '=';
939 uprv_strcpy(startSearchHere, keywordValueBuffer);
Frank Tangefc58852020-11-12 11:50:18 -0800940 U_ASSERT(*status != U_STRING_NOT_TERMINATED_WARNING);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000941 return needLen;
942 } /* end shortcut - no @ */
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700943
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000944 keywordStart = startSearchHere;
945 /* search for keyword */
946 while(keywordStart) {
Jungshik Shin87232d82017-05-13 21:10:13 -0700947 const char* keyValueTail;
948 int32_t keyValueLen;
949
950 keywordStart++; /* skip @ or ; */
951 nextEqualsign = uprv_strchr(keywordStart, '=');
952 if (!nextEqualsign) {
953 *status = U_ILLEGAL_ARGUMENT_ERROR; /* key must have =value */
954 return 0;
955 }
956 /* strip leading & trailing spaces (TC decided to tolerate these) */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000957 while(*keywordStart == ' ') {
958 keywordStart++;
959 }
Jungshik Shin87232d82017-05-13 21:10:13 -0700960 keyValueTail = nextEqualsign;
961 while (keyValueTail > keywordStart && *(keyValueTail-1) == ' ') {
962 keyValueTail--;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000963 }
Jungshik Shin87232d82017-05-13 21:10:13 -0700964 /* now keyValueTail points to first char after the keyName */
965 /* copy & normalize keyName from locale */
966 if (keywordStart == keyValueTail) {
967 *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name in passed-in locale */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000968 return 0;
969 }
Jungshik Shin87232d82017-05-13 21:10:13 -0700970 keyValueLen = 0;
971 while (keywordStart < keyValueTail) {
972 if (!UPRV_ISALPHANUM(*keywordStart)) {
973 *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
974 return 0;
975 }
976 if (keyValueLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
977 localeKeywordNameBuffer[keyValueLen++] = uprv_tolower(*keywordStart++);
978 } else {
979 /* keyword name too long for internal buffer */
980 *status = U_INTERNAL_PROGRAM_ERROR;
981 return 0;
982 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000983 }
Jungshik Shin87232d82017-05-13 21:10:13 -0700984 localeKeywordNameBuffer[keyValueLen] = 0; /* terminate */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000985
986 nextSeparator = uprv_strchr(nextEqualsign, ';');
Jungshik Shin87232d82017-05-13 21:10:13 -0700987
988 /* start processing the value part */
989 nextEqualsign++; /* skip '=' */
990 /* First strip leading & trailing spaces (TC decided to tolerate these) */
991 while(*nextEqualsign == ' ') {
992 nextEqualsign++;
993 }
994 keyValueTail = (nextSeparator)? nextSeparator: nextEqualsign + uprv_strlen(nextEqualsign);
995 while(keyValueTail > nextEqualsign && *(keyValueTail-1) == ' ') {
996 keyValueTail--;
997 }
998 if (nextEqualsign == keyValueTail) {
999 *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty key value in passed-in locale */
1000 return 0;
1001 }
1002
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001003 rc = uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer);
1004 if(rc == 0) {
Jungshik Shin87232d82017-05-13 21:10:13 -07001005 /* Current entry matches the input keyword. Update the entry */
1006 if(keywordValueLen > 0) { /* updating a value */
1007 updatedKeysAndValues.append(keyValuePrefix, *status);
1008 keyValuePrefix = ';'; /* for any subsequent key-value pair */
1009 updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
1010 updatedKeysAndValues.append('=', *status);
1011 updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
1012 } /* else removing this entry, don't emit anything */
1013 handledInputKeyAndValue = TRUE;
1014 } else {
1015 /* input keyword sorts earlier than current entry, add before current entry */
1016 if (rc < 0 && keywordValueLen > 0 && !handledInputKeyAndValue) {
1017 /* insert new entry at this location */
1018 updatedKeysAndValues.append(keyValuePrefix, *status);
1019 keyValuePrefix = ';'; /* for any subsequent key-value pair */
1020 updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
1021 updatedKeysAndValues.append('=', *status);
1022 updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
1023 handledInputKeyAndValue = TRUE;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001024 }
Jungshik Shin87232d82017-05-13 21:10:13 -07001025 /* copy the current entry */
1026 updatedKeysAndValues.append(keyValuePrefix, *status);
1027 keyValuePrefix = ';'; /* for any subsequent key-value pair */
1028 updatedKeysAndValues.append(localeKeywordNameBuffer, keyValueLen, *status);
1029 updatedKeysAndValues.append('=', *status);
Jungshik Shin42d50272018-10-24 01:22:09 -07001030 updatedKeysAndValues.append(nextEqualsign, static_cast<int32_t>(keyValueTail-nextEqualsign), *status);
Jungshik Shin87232d82017-05-13 21:10:13 -07001031 }
1032 if (!nextSeparator && keywordValueLen > 0 && !handledInputKeyAndValue) {
1033 /* append new entry at the end, it sorts later than existing entries */
1034 updatedKeysAndValues.append(keyValuePrefix, *status);
1035 /* skip keyValuePrefix update, no subsequent key-value pair */
1036 updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
1037 updatedKeysAndValues.append('=', *status);
1038 updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
1039 handledInputKeyAndValue = TRUE;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001040 }
1041 keywordStart = nextSeparator;
1042 } /* end loop searching */
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001043
Jungshik Shin87232d82017-05-13 21:10:13 -07001044 /* Any error from updatedKeysAndValues.append above would be internal and not due to
1045 * problems with the passed-in locale. So if we did encounter problems with the
1046 * passed-in locale above, those errors took precedence and overrode any error
1047 * status from updatedKeysAndValues.append, and also caused a return of 0. If there
1048 * are errors here they are from updatedKeysAndValues.append; they do cause an
1049 * error return but the passed-in locale is unmodified and the original bufLen is
1050 * returned.
1051 */
1052 if (!handledInputKeyAndValue || U_FAILURE(*status)) {
1053 /* if input key/value specified removal of a keyword not present in locale, or
1054 * there was an error in CharString.append, leave original locale alone. */
Frank Tangefc58852020-11-12 11:50:18 -08001055 U_ASSERT(*status != U_STRING_NOT_TERMINATED_WARNING);
Jungshik Shin87232d82017-05-13 21:10:13 -07001056 return bufLen;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001057 }
1058
Frank Tangf90543d2020-10-30 19:02:04 -07001059 // needLen = length of the part before '@'
1060 needLen = (int32_t)(startSearchHere - buffer);
Frank Tangefc58852020-11-12 11:50:18 -08001061 // Check to see can we fit the startSearchHere, if not, return
1062 // U_BUFFER_OVERFLOW_ERROR without copy updatedKeysAndValues into it.
1063 // We do this because this API function does not behave like most others:
1064 // It promises never to set a U_STRING_NOT_TERMINATED_WARNING.
1065 // When the contents fits but without the terminating NUL, in this case we need to not change
1066 // the buffer contents and return with a buffer overflow error.
1067 int32_t appendLength = updatedKeysAndValues.length();
1068 if (appendLength >= bufferCapacity - needLen) {
1069 *status = U_BUFFER_OVERFLOW_ERROR;
1070 return needLen + appendLength;
1071 }
1072 needLen += updatedKeysAndValues.extract(
Frank Tangf90543d2020-10-30 19:02:04 -07001073 startSearchHere, bufferCapacity - needLen, *status);
Frank Tangefc58852020-11-12 11:50:18 -08001074 U_ASSERT(*status != U_STRING_NOT_TERMINATED_WARNING);
1075 return needLen;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001076}
1077
1078/* ### ID parsing implementation **************************************************/
1079
1080#define _isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
1081
1082/*returns TRUE if one of the special prefixes is here (s=string)
1083 'x-' or 'i-' */
1084#define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1]))
1085
1086/* Dot terminates it because of POSIX form where dot precedes the codepage
1087 * except for variant
1088 */
1089#define _isTerminator(a) ((a==0)||(a=='.')||(a=='@'))
1090
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001091/**
1092 * Lookup 'key' in the array 'list'. The array 'list' should contain
1093 * a NULL entry, followed by more entries, and a second NULL entry.
1094 *
1095 * The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or
1096 * COUNTRIES_3.
1097 */
1098static int16_t _findIndex(const char* const* list, const char* key)
1099{
1100 const char* const* anchor = list;
1101 int32_t pass = 0;
1102
1103 /* Make two passes through two NULL-terminated arrays at 'list' */
1104 while (pass++ < 2) {
1105 while (*list) {
1106 if (uprv_strcmp(key, *list) == 0) {
1107 return (int16_t)(list - anchor);
1108 }
1109 list++;
1110 }
1111 ++list; /* skip final NULL *CWB*/
1112 }
1113 return -1;
1114}
1115
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001116U_CFUNC const char*
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001117uloc_getCurrentCountryID(const char* oldID){
1118 int32_t offset = _findIndex(DEPRECATED_COUNTRIES, oldID);
1119 if (offset >= 0) {
1120 return REPLACEMENT_COUNTRIES[offset];
1121 }
1122 return oldID;
1123}
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001124U_CFUNC const char*
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001125uloc_getCurrentLanguageID(const char* oldID){
1126 int32_t offset = _findIndex(DEPRECATED_LANGUAGES, oldID);
1127 if (offset >= 0) {
1128 return REPLACEMENT_LANGUAGES[offset];
1129 }
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001130 return oldID;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001131}
1132/*
1133 * the internal functions _getLanguage(), _getCountry(), _getVariant()
1134 * avoid duplicating code to handle the earlier locale ID pieces
1135 * in the functions for the later ones by
1136 * setting the *pEnd pointer to where they stopped parsing
1137 *
1138 * TODO try to use this in Locale
1139 */
Frank Tangf90543d2020-10-30 19:02:04 -07001140CharString U_EXPORT2
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001141ulocimp_getLanguage(const char *localeID,
Frank Tangf2223962020-04-27 18:25:29 -07001142 const char **pEnd,
1143 UErrorCode &status) {
1144 CharString result;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001145
Frank Tang69c72a62019-04-03 21:41:21 -07001146 if (uprv_stricmp(localeID, "root") == 0) {
1147 localeID += 4;
1148 } else if (uprv_strnicmp(localeID, "und", 3) == 0 &&
1149 (localeID[3] == '\0' ||
1150 localeID[3] == '-' ||
1151 localeID[3] == '_' ||
1152 localeID[3] == '@')) {
1153 localeID += 3;
1154 }
1155
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001156 /* if it starts with i- or x- then copy that prefix */
1157 if(_isIDPrefix(localeID)) {
Frank Tangf2223962020-04-27 18:25:29 -07001158 result.append((char)uprv_tolower(*localeID), status);
1159 result.append('-', status);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001160 localeID+=2;
1161 }
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001162
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001163 /* copy the language as far as possible and count its length */
1164 while(!_isTerminator(*localeID) && !_isIDSeparator(*localeID)) {
Frank Tangf2223962020-04-27 18:25:29 -07001165 result.append((char)uprv_tolower(*localeID), status);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001166 localeID++;
1167 }
1168
Frank Tangf2223962020-04-27 18:25:29 -07001169 if(result.length()==3) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001170 /* convert 3 character code to 2 character code if possible *CWB*/
Frank Tangf2223962020-04-27 18:25:29 -07001171 int32_t offset = _findIndex(LANGUAGES_3, result.data());
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001172 if(offset>=0) {
Frank Tangf2223962020-04-27 18:25:29 -07001173 result.clear();
1174 result.append(LANGUAGES[offset], status);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001175 }
1176 }
1177
1178 if(pEnd!=NULL) {
1179 *pEnd=localeID;
1180 }
Frank Tangf2223962020-04-27 18:25:29 -07001181
1182 return result;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001183}
1184
Frank Tangf90543d2020-10-30 19:02:04 -07001185CharString U_EXPORT2
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001186ulocimp_getScript(const char *localeID,
Frank Tangf2223962020-04-27 18:25:29 -07001187 const char **pEnd,
1188 UErrorCode &status) {
1189 CharString result;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001190 int32_t idLen = 0;
1191
1192 if (pEnd != NULL) {
1193 *pEnd = localeID;
1194 }
1195
1196 /* copy the second item as far as possible and count its length */
1197 while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])
1198 && uprv_isASCIILetter(localeID[idLen])) {
1199 idLen++;
1200 }
1201
1202 /* If it's exactly 4 characters long, then it's a script and not a country. */
1203 if (idLen == 4) {
1204 int32_t i;
1205 if (pEnd != NULL) {
1206 *pEnd = localeID+idLen;
1207 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001208 if (idLen >= 1) {
Frank Tangf2223962020-04-27 18:25:29 -07001209 result.append((char)uprv_toupper(*(localeID++)), status);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001210 }
1211 for (i = 1; i < idLen; i++) {
Frank Tangf2223962020-04-27 18:25:29 -07001212 result.append((char)uprv_tolower(*(localeID++)), status);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001213 }
1214 }
Frank Tangf2223962020-04-27 18:25:29 -07001215
1216 return result;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001217}
1218
Frank Tangf90543d2020-10-30 19:02:04 -07001219CharString U_EXPORT2
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001220ulocimp_getCountry(const char *localeID,
Frank Tangf2223962020-04-27 18:25:29 -07001221 const char **pEnd,
1222 UErrorCode &status) {
1223 CharString result;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001224 int32_t idLen=0;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001225
1226 /* copy the country as far as possible and count its length */
1227 while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) {
Frank Tangf2223962020-04-27 18:25:29 -07001228 result.append((char)uprv_toupper(localeID[idLen]), status);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001229 idLen++;
1230 }
1231
1232 /* the country should be either length 2 or 3 */
1233 if (idLen == 2 || idLen == 3) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001234 /* convert 3 character code to 2 character code if possible *CWB*/
1235 if(idLen==3) {
Frank Tangf2223962020-04-27 18:25:29 -07001236 int32_t offset = _findIndex(COUNTRIES_3, result.data());
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001237 if(offset>=0) {
Frank Tangf2223962020-04-27 18:25:29 -07001238 result.clear();
1239 result.append(COUNTRIES[offset], status);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001240 }
1241 }
1242 localeID+=idLen;
1243 } else {
Frank Tangf2223962020-04-27 18:25:29 -07001244 result.clear();
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001245 }
1246
1247 if(pEnd!=NULL) {
1248 *pEnd=localeID;
1249 }
1250
Frank Tangf2223962020-04-27 18:25:29 -07001251 return result;
1252}
1253
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001254/**
1255 * @param needSeparator if true, then add leading '_' if any variants
1256 * are added to 'variant'
1257 */
Frank Tangf2223962020-04-27 18:25:29 -07001258static void
Frank Tangf90543d2020-10-30 19:02:04 -07001259_getVariant(const char *localeID,
1260 char prev,
1261 ByteSink& sink,
1262 UBool needSeparator) {
Frank Tangf2223962020-04-27 18:25:29 -07001263 UBool hasVariant = FALSE;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001264
1265 /* get one or more variant tags and separate them with '_' */
1266 if(_isIDSeparator(prev)) {
1267 /* get a variant string after a '-' or '_' */
1268 while(!_isTerminator(*localeID)) {
1269 if (needSeparator) {
Frank Tangf2223962020-04-27 18:25:29 -07001270 sink.Append("_", 1);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001271 needSeparator = FALSE;
1272 }
Frank Tangf2223962020-04-27 18:25:29 -07001273 char c = (char)uprv_toupper(*localeID);
1274 if (c == '-') c = '_';
1275 sink.Append(&c, 1);
1276 hasVariant = TRUE;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001277 localeID++;
1278 }
1279 }
1280
1281 /* if there is no variant tag after a '-' or '_' then look for '@' */
Frank Tangf2223962020-04-27 18:25:29 -07001282 if(!hasVariant) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001283 if(prev=='@') {
1284 /* keep localeID */
1285 } else if((localeID=locale_getKeywordsStart(localeID))!=NULL) {
1286 ++localeID; /* point after the '@' */
1287 } else {
Frank Tangf2223962020-04-27 18:25:29 -07001288 return;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001289 }
1290 while(!_isTerminator(*localeID)) {
1291 if (needSeparator) {
Frank Tangf2223962020-04-27 18:25:29 -07001292 sink.Append("_", 1);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001293 needSeparator = FALSE;
1294 }
Frank Tangf2223962020-04-27 18:25:29 -07001295 char c = (char)uprv_toupper(*localeID);
1296 if (c == '-' || c == ',') c = '_';
1297 sink.Append(&c, 1);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001298 localeID++;
1299 }
1300 }
Frank Tangf2223962020-04-27 18:25:29 -07001301}
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001302
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001303/* Keyword enumeration */
1304
1305typedef struct UKeywordsContext {
1306 char* keywords;
1307 char* current;
1308} UKeywordsContext;
1309
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001310U_CDECL_BEGIN
1311
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001312static void U_CALLCONV
1313uloc_kw_closeKeywords(UEnumeration *enumerator) {
1314 uprv_free(((UKeywordsContext *)enumerator->context)->keywords);
1315 uprv_free(enumerator->context);
1316 uprv_free(enumerator);
1317}
1318
1319static int32_t U_CALLCONV
1320uloc_kw_countKeywords(UEnumeration *en, UErrorCode * /*status*/) {
1321 char *kw = ((UKeywordsContext *)en->context)->keywords;
1322 int32_t result = 0;
1323 while(*kw) {
1324 result++;
1325 kw += uprv_strlen(kw)+1;
1326 }
1327 return result;
1328}
1329
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001330static const char * U_CALLCONV
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001331uloc_kw_nextKeyword(UEnumeration* en,
1332 int32_t* resultLength,
1333 UErrorCode* /*status*/) {
1334 const char* result = ((UKeywordsContext *)en->context)->current;
1335 int32_t len = 0;
1336 if(*result) {
1337 len = (int32_t)uprv_strlen(((UKeywordsContext *)en->context)->current);
1338 ((UKeywordsContext *)en->context)->current += len+1;
1339 } else {
1340 result = NULL;
1341 }
1342 if (resultLength) {
1343 *resultLength = len;
1344 }
1345 return result;
1346}
1347
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001348static void U_CALLCONV
1349uloc_kw_resetKeywords(UEnumeration* en,
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001350 UErrorCode* /*status*/) {
1351 ((UKeywordsContext *)en->context)->current = ((UKeywordsContext *)en->context)->keywords;
1352}
1353
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001354U_CDECL_END
1355
1356
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001357static const UEnumeration gKeywordsEnum = {
1358 NULL,
1359 NULL,
1360 uloc_kw_closeKeywords,
1361 uloc_kw_countKeywords,
1362 uenum_unextDefault,
1363 uloc_kw_nextKeyword,
1364 uloc_kw_resetKeywords
1365};
1366
1367U_CAPI UEnumeration* U_EXPORT2
1368uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status)
1369{
Frank Tangb8696612019-10-25 14:58:21 -07001370 LocalMemory<UKeywordsContext> myContext;
1371 LocalMemory<UEnumeration> result;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001372
Frank Tangb8696612019-10-25 14:58:21 -07001373 if (U_FAILURE(*status)) {
1374 return nullptr;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001375 }
Frank Tangb8696612019-10-25 14:58:21 -07001376 myContext.adoptInstead(static_cast<UKeywordsContext *>(uprv_malloc(sizeof(UKeywordsContext))));
1377 result.adoptInstead(static_cast<UEnumeration *>(uprv_malloc(sizeof(UEnumeration))));
1378 if (myContext.isNull() || result.isNull()) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001379 *status = U_MEMORY_ALLOCATION_ERROR;
Frank Tangb8696612019-10-25 14:58:21 -07001380 return nullptr;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001381 }
Frank Tangb8696612019-10-25 14:58:21 -07001382 uprv_memcpy(result.getAlias(), &gKeywordsEnum, sizeof(UEnumeration));
1383 myContext->keywords = static_cast<char *>(uprv_malloc(keywordListSize+1));
1384 if (myContext->keywords == nullptr) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001385 *status = U_MEMORY_ALLOCATION_ERROR;
Frank Tangb8696612019-10-25 14:58:21 -07001386 return nullptr;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001387 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001388 uprv_memcpy(myContext->keywords, keywordList, keywordListSize);
1389 myContext->keywords[keywordListSize] = 0;
1390 myContext->current = myContext->keywords;
Frank Tangb8696612019-10-25 14:58:21 -07001391 result->context = myContext.orphan();
1392 return result.orphan();
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001393}
1394
1395U_CAPI UEnumeration* U_EXPORT2
1396uloc_openKeywords(const char* localeID,
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001397 UErrorCode* status)
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001398{
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001399 char tempBuffer[ULOC_FULLNAME_CAPACITY];
1400 const char* tmpLocaleID;
1401
1402 if(status==NULL || U_FAILURE(*status)) {
1403 return 0;
1404 }
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001405
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001406 if (_hasBCP47Extension(localeID)) {
1407 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
1408 } else {
1409 if (localeID==NULL) {
1410 localeID=uloc_getDefault();
1411 }
1412 tmpLocaleID=localeID;
1413 }
1414
1415 /* Skip the language */
Frank Tangf90543d2020-10-30 19:02:04 -07001416 ulocimp_getLanguage(tmpLocaleID, &tmpLocaleID, *status);
1417 if (U_FAILURE(*status)) {
1418 return 0;
1419 }
1420
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001421 if(_isIDSeparator(*tmpLocaleID)) {
1422 const char *scriptID;
1423 /* Skip the script if available */
Frank Tangf90543d2020-10-30 19:02:04 -07001424 ulocimp_getScript(tmpLocaleID+1, &scriptID, *status);
1425 if (U_FAILURE(*status)) {
1426 return 0;
1427 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001428 if(scriptID != tmpLocaleID+1) {
1429 /* Found optional script */
1430 tmpLocaleID = scriptID;
1431 }
1432 /* Skip the Country */
1433 if (_isIDSeparator(*tmpLocaleID)) {
Frank Tangf90543d2020-10-30 19:02:04 -07001434 ulocimp_getCountry(tmpLocaleID+1, &tmpLocaleID, *status);
1435 if (U_FAILURE(*status)) {
1436 return 0;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001437 }
1438 }
1439 }
1440
1441 /* keywords are located after '@' */
1442 if((tmpLocaleID = locale_getKeywordsStart(tmpLocaleID)) != NULL) {
Frank Tangf90543d2020-10-30 19:02:04 -07001443 CharString keywords;
1444 CharStringByteSink sink(&keywords);
1445 ulocimp_getKeywords(tmpLocaleID+1, '@', sink, FALSE, status);
1446 if (U_FAILURE(*status)) {
1447 return NULL;
1448 }
1449 return uloc_openKeywordList(keywords.data(), keywords.length(), status);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001450 }
Frank Tangf90543d2020-10-30 19:02:04 -07001451 return NULL;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001452}
1453
1454
1455/* bit-flags for 'options' parameter of _canonicalize */
1456#define _ULOC_STRIP_KEYWORDS 0x2
1457#define _ULOC_CANONICALIZE 0x1
1458
1459#define OPTION_SET(options, mask) ((options & mask) != 0)
1460
1461static const char i_default[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'};
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001462#define I_DEFAULT_LENGTH UPRV_LENGTHOF(i_default)
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001463
1464/**
1465 * Canonicalize the given localeID, to level 1 or to level 2,
1466 * depending on the options. To specify level 1, pass in options=0.
1467 * To specify level 2, pass in options=_ULOC_CANONICALIZE.
1468 *
1469 * This is the code underlying uloc_getName and uloc_canonicalize.
1470 */
Frank Tangf2223962020-04-27 18:25:29 -07001471static void
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001472_canonicalize(const char* localeID,
Frank Tangf2223962020-04-27 18:25:29 -07001473 ByteSink& sink,
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001474 uint32_t options,
1475 UErrorCode* err) {
Frank Tangf2223962020-04-27 18:25:29 -07001476 int32_t j, fieldCount=0, scriptSize=0, variantSize=0;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001477 char tempBuffer[ULOC_FULLNAME_CAPACITY];
1478 const char* origLocaleID;
1479 const char* tmpLocaleID;
1480 const char* keywordAssign = NULL;
1481 const char* separatorIndicator = NULL;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001482
1483 if (U_FAILURE(*err)) {
Frank Tangf2223962020-04-27 18:25:29 -07001484 return;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001485 }
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001486
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001487 if (_hasBCP47Extension(localeID)) {
1488 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
1489 } else {
1490 if (localeID==NULL) {
1491 localeID=uloc_getDefault();
1492 }
1493 tmpLocaleID=localeID;
1494 }
1495
1496 origLocaleID=tmpLocaleID;
1497
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001498 /* get all pieces, one after another, and separate with '_' */
Frank Tangf2223962020-04-27 18:25:29 -07001499 CharString tag = ulocimp_getLanguage(tmpLocaleID, &tmpLocaleID, *err);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001500
Frank Tangf2223962020-04-27 18:25:29 -07001501 if (tag.length() == I_DEFAULT_LENGTH &&
1502 uprv_strncmp(origLocaleID, i_default, I_DEFAULT_LENGTH) == 0) {
1503 tag.clear();
1504 tag.append(uloc_getDefault(), *err);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001505 } else if(_isIDSeparator(*tmpLocaleID)) {
1506 const char *scriptID;
1507
1508 ++fieldCount;
Frank Tangf2223962020-04-27 18:25:29 -07001509 tag.append('_', *err);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001510
Frank Tangf2223962020-04-27 18:25:29 -07001511 CharString script = ulocimp_getScript(tmpLocaleID+1, &scriptID, *err);
1512 tag.append(script, *err);
1513 scriptSize = script.length();
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001514 if(scriptSize > 0) {
1515 /* Found optional script */
1516 tmpLocaleID = scriptID;
1517 ++fieldCount;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001518 if (_isIDSeparator(*tmpLocaleID)) {
1519 /* If there is something else, then we add the _ */
Frank Tangf2223962020-04-27 18:25:29 -07001520 tag.append('_', *err);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001521 }
1522 }
1523
1524 if (_isIDSeparator(*tmpLocaleID)) {
1525 const char *cntryID;
Frank Tangf2223962020-04-27 18:25:29 -07001526
1527 CharString country = ulocimp_getCountry(tmpLocaleID+1, &cntryID, *err);
1528 tag.append(country, *err);
1529 if (!country.isEmpty()) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001530 /* Found optional country */
1531 tmpLocaleID = cntryID;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001532 }
1533 if(_isIDSeparator(*tmpLocaleID)) {
1534 /* If there is something else, then we add the _ if we found country before. */
Frank Tangf2223962020-04-27 18:25:29 -07001535 if (!_isIDSeparator(*(tmpLocaleID+1))) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001536 ++fieldCount;
Frank Tangf2223962020-04-27 18:25:29 -07001537 tag.append('_', *err);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001538 }
1539
Frank Tangf2223962020-04-27 18:25:29 -07001540 variantSize = -tag.length();
1541 {
1542 CharStringByteSink s(&tag);
Frank Tangf90543d2020-10-30 19:02:04 -07001543 _getVariant(tmpLocaleID+1, *tmpLocaleID, s, FALSE);
Frank Tangf2223962020-04-27 18:25:29 -07001544 }
1545 variantSize += tag.length();
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001546 if (variantSize > 0) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001547 tmpLocaleID += variantSize + 1; /* skip '_' and variant */
1548 }
1549 }
1550 }
1551 }
1552
1553 /* Copy POSIX-style charset specifier, if any [mr.utf8] */
1554 if (!OPTION_SET(options, _ULOC_CANONICALIZE) && *tmpLocaleID == '.') {
1555 UBool done = FALSE;
1556 do {
1557 char c = *tmpLocaleID;
1558 switch (c) {
1559 case 0:
1560 case '@':
1561 done = TRUE;
1562 break;
1563 default:
Frank Tangf2223962020-04-27 18:25:29 -07001564 tag.append(c, *err);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001565 ++tmpLocaleID;
1566 break;
1567 }
1568 } while (!done);
1569 }
1570
1571 /* Scan ahead to next '@' and determine if it is followed by '=' and/or ';'
1572 After this, tmpLocaleID either points to '@' or is NULL */
1573 if ((tmpLocaleID=locale_getKeywordsStart(tmpLocaleID))!=NULL) {
1574 keywordAssign = uprv_strchr(tmpLocaleID, '=');
1575 separatorIndicator = uprv_strchr(tmpLocaleID, ';');
1576 }
1577
1578 /* Copy POSIX-style variant, if any [mr@FOO] */
1579 if (!OPTION_SET(options, _ULOC_CANONICALIZE) &&
1580 tmpLocaleID != NULL && keywordAssign == NULL) {
1581 for (;;) {
1582 char c = *tmpLocaleID;
1583 if (c == 0) {
1584 break;
1585 }
Frank Tangf2223962020-04-27 18:25:29 -07001586 tag.append(c, *err);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001587 ++tmpLocaleID;
1588 }
1589 }
1590
1591 if (OPTION_SET(options, _ULOC_CANONICALIZE)) {
1592 /* Handle @FOO variant if @ is present and not followed by = */
1593 if (tmpLocaleID!=NULL && keywordAssign==NULL) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001594 /* Add missing '_' if needed */
1595 if (fieldCount < 2 || (fieldCount < 3 && scriptSize > 0)) {
1596 do {
Frank Tangf2223962020-04-27 18:25:29 -07001597 tag.append('_', *err);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001598 ++fieldCount;
1599 } while(fieldCount<2);
1600 }
Frank Tangf2223962020-04-27 18:25:29 -07001601
1602 int32_t posixVariantSize = -tag.length();
1603 {
1604 CharStringByteSink s(&tag);
Frank Tangf90543d2020-10-30 19:02:04 -07001605 _getVariant(tmpLocaleID+1, '@', s, (UBool)(variantSize > 0));
Frank Tangf2223962020-04-27 18:25:29 -07001606 }
1607 posixVariantSize += tag.length();
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001608 if (posixVariantSize > 0) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001609 variantSize += posixVariantSize;
1610 }
1611 }
1612
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001613 /* Look up the ID in the canonicalization map */
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001614 for (j=0; j<UPRV_LENGTHOF(CANONICALIZE_MAP); j++) {
Frank Tangf2223962020-04-27 18:25:29 -07001615 StringPiece id(CANONICALIZE_MAP[j].id);
1616 if (tag == id) {
1617 if (id.empty() && tmpLocaleID != NULL) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001618 break; /* Don't remap "" if keywords present */
1619 }
Frank Tangf2223962020-04-27 18:25:29 -07001620 tag.clear();
1621 tag.append(CANONICALIZE_MAP[j].canonicalID, *err);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001622 break;
1623 }
1624 }
1625 }
1626
Frank Tangf2223962020-04-27 18:25:29 -07001627 sink.Append(tag.data(), tag.length());
1628
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001629 if (!OPTION_SET(options, _ULOC_STRIP_KEYWORDS)) {
1630 if (tmpLocaleID!=NULL && keywordAssign!=NULL &&
1631 (!separatorIndicator || separatorIndicator > keywordAssign)) {
Frank Tangf2223962020-04-27 18:25:29 -07001632 sink.Append("@", 1);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001633 ++fieldCount;
Frank Tangf90543d2020-10-30 19:02:04 -07001634 ulocimp_getKeywords(tmpLocaleID+1, '@', sink, TRUE, err);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001635 }
1636 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001637}
1638
1639/* ### ID parsing API **************************************************/
1640
1641U_CAPI int32_t U_EXPORT2
1642uloc_getParent(const char* localeID,
1643 char* parent,
1644 int32_t parentCapacity,
1645 UErrorCode* err)
1646{
1647 const char *lastUnderscore;
1648 int32_t i;
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001649
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001650 if (U_FAILURE(*err))
1651 return 0;
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001652
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001653 if (localeID == NULL)
1654 localeID = uloc_getDefault();
1655
1656 lastUnderscore=uprv_strrchr(localeID, '_');
1657 if(lastUnderscore!=NULL) {
1658 i=(int32_t)(lastUnderscore-localeID);
1659 } else {
1660 i=0;
1661 }
1662
Frank Tang69c72a62019-04-03 21:41:21 -07001663 if (i > 0) {
1664 if (uprv_strnicmp(localeID, "und_", 4) == 0) {
1665 localeID += 3;
1666 i -= 3;
1667 uprv_memmove(parent, localeID, uprv_min(i, parentCapacity));
1668 } else if (parent != localeID) {
1669 uprv_memcpy(parent, localeID, uprv_min(i, parentCapacity));
1670 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001671 }
Frank Tang69c72a62019-04-03 21:41:21 -07001672
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001673 return u_terminateChars(parent, parentCapacity, i, err);
1674}
1675
1676U_CAPI int32_t U_EXPORT2
1677uloc_getLanguage(const char* localeID,
1678 char* language,
1679 int32_t languageCapacity,
1680 UErrorCode* err)
1681{
1682 /* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001683
1684 if (err==NULL || U_FAILURE(*err)) {
1685 return 0;
1686 }
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001687
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001688 if(localeID==NULL) {
1689 localeID=uloc_getDefault();
1690 }
1691
Frank Tangf90543d2020-10-30 19:02:04 -07001692 return ulocimp_getLanguage(localeID, NULL, *err).extract(language, languageCapacity, *err);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001693}
1694
1695U_CAPI int32_t U_EXPORT2
1696uloc_getScript(const char* localeID,
1697 char* script,
1698 int32_t scriptCapacity,
1699 UErrorCode* err)
1700{
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001701 if(err==NULL || U_FAILURE(*err)) {
1702 return 0;
1703 }
1704
1705 if(localeID==NULL) {
1706 localeID=uloc_getDefault();
1707 }
1708
1709 /* skip the language */
Frank Tangf90543d2020-10-30 19:02:04 -07001710 ulocimp_getLanguage(localeID, &localeID, *err);
1711 if (U_FAILURE(*err)) {
1712 return 0;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001713 }
Frank Tangf90543d2020-10-30 19:02:04 -07001714
1715 if(_isIDSeparator(*localeID)) {
1716 return ulocimp_getScript(localeID+1, NULL, *err).extract(script, scriptCapacity, *err);
1717 }
1718 return u_terminateChars(script, scriptCapacity, 0, err);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001719}
1720
1721U_CAPI int32_t U_EXPORT2
1722uloc_getCountry(const char* localeID,
1723 char* country,
1724 int32_t countryCapacity,
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001725 UErrorCode* err)
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001726{
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001727 if(err==NULL || U_FAILURE(*err)) {
1728 return 0;
1729 }
1730
1731 if(localeID==NULL) {
1732 localeID=uloc_getDefault();
1733 }
1734
1735 /* Skip the language */
Frank Tangf90543d2020-10-30 19:02:04 -07001736 ulocimp_getLanguage(localeID, &localeID, *err);
1737 if (U_FAILURE(*err)) {
1738 return 0;
1739 }
1740
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001741 if(_isIDSeparator(*localeID)) {
1742 const char *scriptID;
1743 /* Skip the script if available */
Frank Tangf90543d2020-10-30 19:02:04 -07001744 ulocimp_getScript(localeID+1, &scriptID, *err);
1745 if (U_FAILURE(*err)) {
1746 return 0;
1747 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001748 if(scriptID != localeID+1) {
1749 /* Found optional script */
1750 localeID = scriptID;
1751 }
1752 if(_isIDSeparator(*localeID)) {
Frank Tangf90543d2020-10-30 19:02:04 -07001753 return ulocimp_getCountry(localeID+1, NULL, *err).extract(country, countryCapacity, *err);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001754 }
1755 }
Frank Tangf90543d2020-10-30 19:02:04 -07001756 return u_terminateChars(country, countryCapacity, 0, err);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001757}
1758
1759U_CAPI int32_t U_EXPORT2
1760uloc_getVariant(const char* localeID,
1761 char* variant,
1762 int32_t variantCapacity,
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001763 UErrorCode* err)
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001764{
1765 char tempBuffer[ULOC_FULLNAME_CAPACITY];
1766 const char* tmpLocaleID;
1767 int32_t i=0;
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001768
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001769 if(err==NULL || U_FAILURE(*err)) {
1770 return 0;
1771 }
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001772
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001773 if (_hasBCP47Extension(localeID)) {
1774 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
1775 } else {
1776 if (localeID==NULL) {
1777 localeID=uloc_getDefault();
1778 }
1779 tmpLocaleID=localeID;
1780 }
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001781
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001782 /* Skip the language */
Frank Tangf90543d2020-10-30 19:02:04 -07001783 ulocimp_getLanguage(tmpLocaleID, &tmpLocaleID, *err);
1784 if (U_FAILURE(*err)) {
1785 return 0;
1786 }
1787
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001788 if(_isIDSeparator(*tmpLocaleID)) {
1789 const char *scriptID;
1790 /* Skip the script if available */
Frank Tangf90543d2020-10-30 19:02:04 -07001791 ulocimp_getScript(tmpLocaleID+1, &scriptID, *err);
1792 if (U_FAILURE(*err)) {
1793 return 0;
1794 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001795 if(scriptID != tmpLocaleID+1) {
1796 /* Found optional script */
1797 tmpLocaleID = scriptID;
1798 }
1799 /* Skip the Country */
1800 if (_isIDSeparator(*tmpLocaleID)) {
1801 const char *cntryID;
Frank Tangf90543d2020-10-30 19:02:04 -07001802 ulocimp_getCountry(tmpLocaleID+1, &cntryID, *err);
1803 if (U_FAILURE(*err)) {
1804 return 0;
1805 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001806 if (cntryID != tmpLocaleID+1) {
1807 /* Found optional country */
1808 tmpLocaleID = cntryID;
1809 }
1810 if(_isIDSeparator(*tmpLocaleID)) {
1811 /* If there was no country ID, skip a possible extra IDSeparator */
1812 if (tmpLocaleID != cntryID && _isIDSeparator(tmpLocaleID[1])) {
1813 tmpLocaleID++;
1814 }
Frank Tangf90543d2020-10-30 19:02:04 -07001815
1816 CheckedArrayByteSink sink(variant, variantCapacity);
1817 _getVariant(tmpLocaleID+1, *tmpLocaleID, sink, FALSE);
1818
1819 i = sink.NumberOfBytesAppended();
1820
1821 if (U_FAILURE(*err)) {
1822 return i;
1823 }
1824
1825 if (sink.Overflowed()) {
1826 *err = U_BUFFER_OVERFLOW_ERROR;
1827 return i;
1828 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001829 }
1830 }
1831 }
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001832
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001833 return u_terminateChars(variant, variantCapacity, i, err);
1834}
1835
1836U_CAPI int32_t U_EXPORT2
1837uloc_getName(const char* localeID,
1838 char* name,
1839 int32_t nameCapacity,
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001840 UErrorCode* err)
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001841{
Frank Tangf2223962020-04-27 18:25:29 -07001842 if (U_FAILURE(*err)) {
1843 return 0;
1844 }
1845
1846 CheckedArrayByteSink sink(name, nameCapacity);
1847 ulocimp_getName(localeID, sink, err);
1848
1849 int32_t reslen = sink.NumberOfBytesAppended();
1850
1851 if (U_FAILURE(*err)) {
1852 return reslen;
1853 }
1854
1855 if (sink.Overflowed()) {
1856 *err = U_BUFFER_OVERFLOW_ERROR;
1857 } else {
1858 u_terminateChars(name, nameCapacity, reslen, err);
1859 }
1860
1861 return reslen;
1862}
1863
Frank Tangf90543d2020-10-30 19:02:04 -07001864U_CAPI void U_EXPORT2
Frank Tangf2223962020-04-27 18:25:29 -07001865ulocimp_getName(const char* localeID,
1866 ByteSink& sink,
1867 UErrorCode* err)
1868{
1869 _canonicalize(localeID, sink, 0, err);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001870}
1871
1872U_CAPI int32_t U_EXPORT2
1873uloc_getBaseName(const char* localeID,
1874 char* name,
1875 int32_t nameCapacity,
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001876 UErrorCode* err)
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001877{
Frank Tangf2223962020-04-27 18:25:29 -07001878 if (U_FAILURE(*err)) {
1879 return 0;
1880 }
1881
1882 CheckedArrayByteSink sink(name, nameCapacity);
1883 ulocimp_getBaseName(localeID, sink, err);
1884
1885 int32_t reslen = sink.NumberOfBytesAppended();
1886
1887 if (U_FAILURE(*err)) {
1888 return reslen;
1889 }
1890
1891 if (sink.Overflowed()) {
1892 *err = U_BUFFER_OVERFLOW_ERROR;
1893 } else {
1894 u_terminateChars(name, nameCapacity, reslen, err);
1895 }
1896
1897 return reslen;
1898}
1899
Frank Tangf90543d2020-10-30 19:02:04 -07001900U_CAPI void U_EXPORT2
Frank Tangf2223962020-04-27 18:25:29 -07001901ulocimp_getBaseName(const char* localeID,
1902 ByteSink& sink,
1903 UErrorCode* err)
1904{
1905 _canonicalize(localeID, sink, _ULOC_STRIP_KEYWORDS, err);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001906}
1907
1908U_CAPI int32_t U_EXPORT2
1909uloc_canonicalize(const char* localeID,
1910 char* name,
1911 int32_t nameCapacity,
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001912 UErrorCode* err)
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001913{
Frank Tangf2223962020-04-27 18:25:29 -07001914 if (U_FAILURE(*err)) {
1915 return 0;
1916 }
1917
1918 CheckedArrayByteSink sink(name, nameCapacity);
1919 ulocimp_canonicalize(localeID, sink, err);
1920
1921 int32_t reslen = sink.NumberOfBytesAppended();
1922
1923 if (U_FAILURE(*err)) {
1924 return reslen;
1925 }
1926
1927 if (sink.Overflowed()) {
1928 *err = U_BUFFER_OVERFLOW_ERROR;
1929 } else {
1930 u_terminateChars(name, nameCapacity, reslen, err);
1931 }
1932
1933 return reslen;
1934}
1935
Frank Tangf90543d2020-10-30 19:02:04 -07001936U_CAPI void U_EXPORT2
Frank Tangf2223962020-04-27 18:25:29 -07001937ulocimp_canonicalize(const char* localeID,
1938 ByteSink& sink,
1939 UErrorCode* err)
1940{
1941 _canonicalize(localeID, sink, _ULOC_CANONICALIZE, err);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001942}
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001943
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001944U_CAPI const char* U_EXPORT2
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001945uloc_getISO3Language(const char* localeID)
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001946{
1947 int16_t offset;
1948 char lang[ULOC_LANG_CAPACITY];
1949 UErrorCode err = U_ZERO_ERROR;
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001950
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001951 if (localeID == NULL)
1952 {
1953 localeID = uloc_getDefault();
1954 }
1955 uloc_getLanguage(localeID, lang, ULOC_LANG_CAPACITY, &err);
1956 if (U_FAILURE(err))
1957 return "";
1958 offset = _findIndex(LANGUAGES, lang);
1959 if (offset < 0)
1960 return "";
1961 return LANGUAGES_3[offset];
1962}
1963
1964U_CAPI const char* U_EXPORT2
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001965uloc_getISO3Country(const char* localeID)
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001966{
1967 int16_t offset;
1968 char cntry[ULOC_LANG_CAPACITY];
1969 UErrorCode err = U_ZERO_ERROR;
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001970
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001971 if (localeID == NULL)
1972 {
1973 localeID = uloc_getDefault();
1974 }
1975 uloc_getCountry(localeID, cntry, ULOC_LANG_CAPACITY, &err);
1976 if (U_FAILURE(err))
1977 return "";
1978 offset = _findIndex(COUNTRIES, cntry);
1979 if (offset < 0)
1980 return "";
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001981
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001982 return COUNTRIES_3[offset];
1983}
1984
1985U_CAPI uint32_t U_EXPORT2
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001986uloc_getLCID(const char* localeID)
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001987{
1988 UErrorCode status = U_ZERO_ERROR;
1989 char langID[ULOC_FULLNAME_CAPACITY];
Jungshik Shin87232d82017-05-13 21:10:13 -07001990 uint32_t lcid = 0;
1991
1992 /* Check for incomplete id. */
1993 if (!localeID || uprv_strlen(localeID) < 2) {
1994 return 0;
1995 }
1996
Frank Tang69c72a62019-04-03 21:41:21 -07001997 // First, attempt Windows platform lookup if available, but fall
1998 // through to catch any special cases (ICU vs Windows name differences).
1999 lcid = uprv_convertToLCIDPlatform(localeID, &status);
2000 if (U_FAILURE(status)) {
2001 return 0;
2002 }
2003 if (lcid > 0) {
Jungshik Shin87232d82017-05-13 21:10:13 -07002004 // Windows found an LCID, return that
2005 return lcid;
2006 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002007
2008 uloc_getLanguage(localeID, langID, sizeof(langID), &status);
Frank Tang69c72a62019-04-03 21:41:21 -07002009 if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002010 return 0;
2011 }
2012
2013 if (uprv_strchr(localeID, '@')) {
2014 // uprv_convertToLCID does not support keywords other than collation.
2015 // Remove all keywords except collation.
2016 int32_t len;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002017 char tmpLocaleID[ULOC_FULLNAME_CAPACITY];
2018
Frank Tangf90543d2020-10-30 19:02:04 -07002019 CharString collVal;
2020 {
2021 CharStringByteSink sink(&collVal);
2022 ulocimp_getKeywordValue(localeID, "collation", sink, &status);
2023 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002024
Frank Tangf90543d2020-10-30 19:02:04 -07002025 if (U_SUCCESS(status) && !collVal.isEmpty()) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002026 len = uloc_getBaseName(localeID, tmpLocaleID,
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002027 UPRV_LENGTHOF(tmpLocaleID) - 1, &status);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002028
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002029 if (U_SUCCESS(status) && len > 0) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002030 tmpLocaleID[len] = 0;
2031
Frank Tangf90543d2020-10-30 19:02:04 -07002032 len = uloc_setKeywordValue("collation", collVal.data(), tmpLocaleID,
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002033 UPRV_LENGTHOF(tmpLocaleID) - len - 1, &status);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002034
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002035 if (U_SUCCESS(status) && len > 0) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002036 tmpLocaleID[len] = 0;
2037 return uprv_convertToLCID(langID, tmpLocaleID, &status);
2038 }
2039 }
2040 }
2041
2042 // fall through - all keywords are simply ignored
2043 status = U_ZERO_ERROR;
2044 }
2045
2046 return uprv_convertToLCID(langID, localeID, &status);
2047}
2048
2049U_CAPI int32_t U_EXPORT2
2050uloc_getLocaleForLCID(uint32_t hostid, char *locale, int32_t localeCapacity,
2051 UErrorCode *status)
2052{
2053 return uprv_convertToPosix(hostid, locale, localeCapacity, status);
2054}
2055
2056/* ### Default locale **************************************************/
2057
2058U_CAPI const char* U_EXPORT2
2059uloc_getDefault()
2060{
2061 return locale_get_default();
2062}
2063
2064U_CAPI void U_EXPORT2
2065uloc_setDefault(const char* newDefaultLocale,
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002066 UErrorCode* err)
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002067{
2068 if (U_FAILURE(*err))
2069 return;
2070 /* the error code isn't currently used for anything by this function*/
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002071
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002072 /* propagate change to C++ */
2073 locale_set_default(newDefaultLocale);
2074}
2075
2076/**
2077 * Returns a list of all 2-letter language codes defined in ISO 639. This is a pointer
2078 * to an array of pointers to arrays of char. All of these pointers are owned
2079 * by ICU-- do not delete them, and do not write through them. The array is
2080 * terminated with a null pointer.
2081 */
2082U_CAPI const char* const* U_EXPORT2
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002083uloc_getISOLanguages()
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002084{
2085 return LANGUAGES;
2086}
2087
2088/**
2089 * Returns a list of all 2-letter country codes defined in ISO 639. This is a
2090 * pointer to an array of pointers to arrays of char. All of these pointers are
2091 * owned by ICU-- do not delete them, and do not write through them. The array is
2092 * terminated with a null pointer.
2093 */
2094U_CAPI const char* const* U_EXPORT2
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002095uloc_getISOCountries()
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002096{
2097 return COUNTRIES;
2098}
2099
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08002100U_CAPI const char* U_EXPORT2
2101uloc_toUnicodeLocaleKey(const char* keyword)
2102{
2103 const char* bcpKey = ulocimp_toBcpKey(keyword);
2104 if (bcpKey == NULL && ultag_isUnicodeLocaleKey(keyword, -1)) {
2105 // unknown keyword, but syntax is fine..
2106 return keyword;
2107 }
2108 return bcpKey;
2109}
2110
2111U_CAPI const char* U_EXPORT2
2112uloc_toUnicodeLocaleType(const char* keyword, const char* value)
2113{
2114 const char* bcpType = ulocimp_toBcpType(keyword, value, NULL, NULL);
2115 if (bcpType == NULL && ultag_isUnicodeLocaleType(value, -1)) {
2116 // unknown keyword, but syntax is fine..
2117 return value;
2118 }
2119 return bcpType;
2120}
2121
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08002122static UBool
2123isWellFormedLegacyKey(const char* legacyKey)
2124{
2125 const char* p = legacyKey;
2126 while (*p) {
2127 if (!UPRV_ISALPHANUM(*p)) {
2128 return FALSE;
2129 }
2130 p++;
2131 }
2132 return TRUE;
2133}
2134
2135static UBool
2136isWellFormedLegacyType(const char* legacyType)
2137{
2138 const char* p = legacyType;
2139 int32_t alphaNumLen = 0;
2140 while (*p) {
2141 if (*p == '_' || *p == '/' || *p == '-') {
2142 if (alphaNumLen == 0) {
2143 return FALSE;
2144 }
2145 alphaNumLen = 0;
2146 } else if (UPRV_ISALPHANUM(*p)) {
2147 alphaNumLen++;
2148 } else {
2149 return FALSE;
2150 }
2151 p++;
2152 }
2153 return (alphaNumLen != 0);
2154}
2155
2156U_CAPI const char* U_EXPORT2
2157uloc_toLegacyKey(const char* keyword)
2158{
2159 const char* legacyKey = ulocimp_toLegacyKey(keyword);
2160 if (legacyKey == NULL) {
2161 // Checks if the specified locale key is well-formed with the legacy locale syntax.
2162 //
2163 // Note:
Jungshik Shin87232d82017-05-13 21:10:13 -07002164 // LDML/CLDR provides some definition of keyword syntax in
2165 // * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and
2166 // * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax
2167 // Keys can only consist of [0-9a-zA-Z].
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08002168 if (isWellFormedLegacyKey(keyword)) {
2169 return keyword;
2170 }
2171 }
2172 return legacyKey;
2173}
2174
2175U_CAPI const char* U_EXPORT2
2176uloc_toLegacyType(const char* keyword, const char* value)
2177{
2178 const char* legacyType = ulocimp_toLegacyType(keyword, value, NULL, NULL);
2179 if (legacyType == NULL) {
2180 // Checks if the specified locale type is well-formed with the legacy locale syntax.
2181 //
2182 // Note:
Jungshik Shin87232d82017-05-13 21:10:13 -07002183 // LDML/CLDR provides some definition of keyword syntax in
2184 // * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and
2185 // * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax
2186 // Values (types) can only consist of [0-9a-zA-Z], plus for legacy values
2187 // we allow [/_-+] in the middle (e.g. "Etc/GMT+1", "Asia/Tel_Aviv")
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08002188 if (isWellFormedLegacyType(value)) {
2189 return value;
2190 }
2191 }
2192 return legacyType;
2193}
2194
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002195/*eof*/