blob: 522f33dbe243a9b9623341e4e87c6f3d19d8e859 [file] [log] [blame]
Jungshik Shin87232d82017-05-13 21:10:13 -07001// © 2016 and later: Unicode, Inc. and others.
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002// License & terms of use: http://www.unicode.org/copyright.html
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00003/*
4**********************************************************************
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07005* Copyright (C) 1997-2016, International Business Machines
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00006* Corporation and others. All Rights Reserved.
7**********************************************************************
8*
9* File ULOC.CPP
10*
11* Modification History:
12*
13* Date Name Description
14* 04/01/97 aliu Creation.
15* 08/21/98 stephen JDK 1.2 sync
16* 12/08/98 rtg New Locale implementation and C API
17* 03/15/99 damiba overhaul.
18* 04/06/99 stephen changed setDefault() to realloc and copy
19* 06/14/99 stephen Changed calls to ures_open for new params
20* 07/21/99 stephen Modified setDefault() to propagate to C++
21* 05/14/04 alan 7 years later: refactored, cleaned up, fixed bugs,
22* brought canonicalization code into line with spec
23*****************************************************************************/
24
25/*
26 POSIX's locale format, from putil.c: [no spaces]
27
28 ll [ _CC ] [ . MM ] [ @ VV]
29
30 l = lang, C = ctry, M = charmap, V = variant
31*/
32
Frank Tangf2223962020-04-27 18:25:29 -070033#include "unicode/bytestream.h"
34#include "unicode/errorcode.h"
35#include "unicode/stringpiece.h"
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000036#include "unicode/utypes.h"
37#include "unicode/ustring.h"
38#include "unicode/uloc.h"
39
Frank Tangf2223962020-04-27 18:25:29 -070040#include "bytesinkutil.h"
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000041#include "putilimp.h"
42#include "ustr_imp.h"
43#include "ulocimp.h"
44#include "umutex.h"
45#include "cstring.h"
46#include "cmemory.h"
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000047#include "locmap.h"
48#include "uarrsort.h"
49#include "uenumimp.h"
50#include "uassert.h"
Jungshik Shin87232d82017-05-13 21:10:13 -070051#include "charstr.h"
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000052
Jungshik Shin87232d82017-05-13 21:10:13 -070053U_NAMESPACE_USE
Jungshik Shin5feb9ad2016-10-21 12:52:48 -070054
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000055/* ### Declarations **************************************************/
56
57/* Locale stuff from locid.cpp */
58U_CFUNC void locale_set_default(const char *id);
59U_CFUNC const char *locale_get_default(void);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000060
61/* ### Data tables **************************************************/
62
63/**
64 * Table of language codes, both 2- and 3-letter, with preference
65 * given to 2-letter codes where possible. Includes 3-letter codes
66 * that lack a 2-letter equivalent.
67 *
68 * This list must be in sorted order. This list is returned directly
69 * to the user by some API.
70 *
71 * This list must be kept in sync with LANGUAGES_3, with corresponding
72 * entries matched.
73 *
74 * This table should be terminated with a NULL entry, followed by a
75 * second list, and another NULL entry. The first list is visible to
76 * user code when this array is returned by API. The second list
77 * contains codes we support, but do not expose through user API.
78 *
79 * Notes
80 *
81 * Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to
82 * include the revisions up to 2001/7/27 *CWB*
83 *
84 * The 3 character codes are the terminology codes like RFC 3066. This
85 * is compatible with prior ICU codes
86 *
87 * "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the
88 * table but now at the end of the table because 3 character codes are
89 * duplicates. This avoids bad searches going from 3 to 2 character
90 * codes.
91 *
92 * The range qaa-qtz is reserved for local use
93 */
94/* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
Jungshik Shin70f82502016-01-29 00:32:36 -080095/* ISO639 table version is 20150505 */
Jungshik Shinb3189662017-11-07 11:18:34 -080096/* Subsequent hand addition of selected languages */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000097static const char * const LANGUAGES[] = {
Jungshik Shin5feb9ad2016-10-21 12:52:48 -070098 "aa", "ab", "ace", "ach", "ada", "ady", "ae", "aeb",
99 "af", "afh", "agq", "ain", "ak", "akk", "akz", "ale",
100 "aln", "alt", "am", "an", "ang", "anp", "ar", "arc",
Jungshik Shin87232d82017-05-13 21:10:13 -0700101 "arn", "aro", "arp", "arq", "ars", "arw", "ary", "arz", "as",
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700102 "asa", "ase", "ast", "av", "avk", "awa", "ay", "az",
103 "ba", "bal", "ban", "bar", "bas", "bax", "bbc", "bbj",
104 "be", "bej", "bem", "bew", "bez", "bfd", "bfq", "bg",
105 "bgn", "bho", "bi", "bik", "bin", "bjn", "bkm", "bla",
106 "bm", "bn", "bo", "bpy", "bqi", "br", "bra", "brh",
107 "brx", "bs", "bss", "bua", "bug", "bum", "byn", "byv",
Jungshik Shinb3189662017-11-07 11:18:34 -0800108 "ca", "cad", "car", "cay", "cch", "ccp", "ce", "ceb", "cgg",
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700109 "ch", "chb", "chg", "chk", "chm", "chn", "cho", "chp",
110 "chr", "chy", "ckb", "co", "cop", "cps", "cr", "crh",
111 "cs", "csb", "cu", "cv", "cy",
112 "da", "dak", "dar", "dav", "de", "del", "den", "dgr",
113 "din", "dje", "doi", "dsb", "dtp", "dua", "dum", "dv",
114 "dyo", "dyu", "dz", "dzg",
115 "ebu", "ee", "efi", "egl", "egy", "eka", "el", "elx",
116 "en", "enm", "eo", "es", "esu", "et", "eu", "ewo",
117 "ext",
118 "fa", "fan", "fat", "ff", "fi", "fil", "fit", "fj",
119 "fo", "fon", "fr", "frc", "frm", "fro", "frp", "frr",
120 "frs", "fur", "fy",
121 "ga", "gaa", "gag", "gan", "gay", "gba", "gbz", "gd",
122 "gez", "gil", "gl", "glk", "gmh", "gn", "goh", "gom",
123 "gon", "gor", "got", "grb", "grc", "gsw", "gu", "guc",
124 "gur", "guz", "gv", "gwi",
125 "ha", "hai", "hak", "haw", "he", "hi", "hif", "hil",
126 "hit", "hmn", "ho", "hr", "hsb", "hsn", "ht", "hu",
127 "hup", "hy", "hz",
128 "ia", "iba", "ibb", "id", "ie", "ig", "ii", "ik",
129 "ilo", "inh", "io", "is", "it", "iu", "izh",
130 "ja", "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut",
131 "jv",
132 "ka", "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd",
133 "kbl", "kcg", "kde", "kea", "ken", "kfo", "kg", "kgp",
134 "kha", "kho", "khq", "khw", "ki", "kiu", "kj", "kk",
135 "kkj", "kl", "kln", "km", "kmb", "kn", "ko", "koi",
136 "kok", "kos", "kpe", "kr", "krc", "kri", "krj", "krl",
137 "kru", "ks", "ksb", "ksf", "ksh", "ku", "kum", "kut",
138 "kv", "kw", "ky",
139 "la", "lad", "lag", "lah", "lam", "lb", "lez", "lfn",
140 "lg", "li", "lij", "liv", "lkt", "lmo", "ln", "lo",
141 "lol", "loz", "lrc", "lt", "ltg", "lu", "lua", "lui",
142 "lun", "luo", "lus", "luy", "lv", "lzh", "lzz",
143 "mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",
144 "mdf", "mdh", "mdr", "men", "mer", "mfe", "mg", "mga",
145 "mgh", "mgo", "mh", "mi", "mic", "min", "mis", "mk",
Frank Tangb8696612019-10-25 14:58:21 -0700146 "ml", "mn", "mnc", "mni", "mo",
147 "moh", "mos", "mr", "mrj",
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700148 "ms", "mt", "mua", "mul", "mus", "mwl", "mwr", "mwv",
149 "my", "mye", "myv", "mzn",
150 "na", "nan", "nap", "naq", "nb", "nd", "nds", "ne",
151 "new", "ng", "nia", "niu", "njo", "nl", "nmg", "nn",
152 "nnh", "no", "nog", "non", "nov", "nqo", "nr", "nso",
153 "nus", "nv", "nwc", "ny", "nym", "nyn", "nyo", "nzi",
154 "oc", "oj", "om", "or", "os", "osa", "ota",
Frank Tangf2223962020-04-27 18:25:29 -0700155 "pa", "pag", "pal", "pam", "pap", "pau", "pcd", "pcm", "pdc",
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700156 "pdt", "peo", "pfl", "phn", "pi", "pl", "pms", "pnt",
157 "pon", "prg", "pro", "ps", "pt",
158 "qu", "quc", "qug",
159 "raj", "rap", "rar", "rgn", "rif", "rm", "rn", "ro",
160 "rof", "rom", "rtm", "ru", "rue", "rug", "rup",
161 "rw", "rwk",
162 "sa", "sad", "sah", "sam", "saq", "sas", "sat", "saz",
163 "sba", "sbp", "sc", "scn", "sco", "sd", "sdc", "sdh",
164 "se", "see", "seh", "sei", "sel", "ses", "sg", "sga",
165 "sgs", "shi", "shn", "shu", "si", "sid", "sk",
166 "sl", "sli", "sly", "sm", "sma", "smj", "smn", "sms",
167 "sn", "snk", "so", "sog", "sq", "sr", "srn", "srr",
168 "ss", "ssy", "st", "stq", "su", "suk", "sus", "sux",
169 "sv", "sw", "swb", "swc", "syc", "syr", "szl",
170 "ta", "tcy", "te", "tem", "teo", "ter", "tet", "tg",
171 "th", "ti", "tig", "tiv", "tk", "tkl", "tkr", "tl",
172 "tlh", "tli", "tly", "tmh", "tn", "to", "tog", "tpi",
173 "tr", "tru", "trv", "ts", "tsd", "tsi", "tt", "ttt",
174 "tum", "tvl", "tw", "twq", "ty", "tyv", "tzm",
175 "udm", "ug", "uga", "uk", "umb", "und", "ur", "uz",
176 "vai", "ve", "vec", "vep", "vi", "vls", "vmf", "vo",
177 "vot", "vro", "vun",
178 "wa", "wae", "wal", "war", "was", "wbp", "wo", "wuu",
179 "xal", "xh", "xmf", "xog",
180 "yao", "yap", "yav", "ybb", "yi", "yo", "yrl", "yue",
181 "za", "zap", "zbl", "zea", "zen", "zgh", "zh", "zu",
182 "zun", "zxx", "zza",
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000183NULL,
184 "in", "iw", "ji", "jw", "sh", /* obsolete language codes */
185NULL
186};
187
188static const char* const DEPRECATED_LANGUAGES[]={
189 "in", "iw", "ji", "jw", NULL, NULL
190};
191static const char* const REPLACEMENT_LANGUAGES[]={
192 "id", "he", "yi", "jv", NULL, NULL
193};
194
195/**
196 * Table of 3-letter language codes.
197 *
198 * This is a lookup table used to convert 3-letter language codes to
199 * their 2-letter equivalent, where possible. It must be kept in sync
200 * with LANGUAGES. For all valid i, LANGUAGES[i] must refer to the
201 * same language as LANGUAGES_3[i]. The commented-out lines are
202 * copied from LANGUAGES to make eyeballing this baby easier.
203 *
204 * Where a 3-letter language code has no 2-letter equivalent, the
205 * 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i].
206 *
207 * This table should be terminated with a NULL entry, followed by a
208 * second list, and another NULL entry. The two lists correspond to
209 * the two lists in LANGUAGES.
210 */
211/* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
Jungshik Shin70f82502016-01-29 00:32:36 -0800212/* ISO639 table version is 20150505 */
Jungshik Shinb3189662017-11-07 11:18:34 -0800213/* Subsequent hand addition of selected languages */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000214static const char * const LANGUAGES_3[] = {
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700215 "aar", "abk", "ace", "ach", "ada", "ady", "ave", "aeb",
216 "afr", "afh", "agq", "ain", "aka", "akk", "akz", "ale",
217 "aln", "alt", "amh", "arg", "ang", "anp", "ara", "arc",
Jungshik Shin87232d82017-05-13 21:10:13 -0700218 "arn", "aro", "arp", "arq", "ars", "arw", "ary", "arz", "asm",
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700219 "asa", "ase", "ast", "ava", "avk", "awa", "aym", "aze",
220 "bak", "bal", "ban", "bar", "bas", "bax", "bbc", "bbj",
221 "bel", "bej", "bem", "bew", "bez", "bfd", "bfq", "bul",
222 "bgn", "bho", "bis", "bik", "bin", "bjn", "bkm", "bla",
223 "bam", "ben", "bod", "bpy", "bqi", "bre", "bra", "brh",
224 "brx", "bos", "bss", "bua", "bug", "bum", "byn", "byv",
Jungshik Shinb3189662017-11-07 11:18:34 -0800225 "cat", "cad", "car", "cay", "cch", "ccp", "che", "ceb", "cgg",
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700226 "cha", "chb", "chg", "chk", "chm", "chn", "cho", "chp",
227 "chr", "chy", "ckb", "cos", "cop", "cps", "cre", "crh",
228 "ces", "csb", "chu", "chv", "cym",
229 "dan", "dak", "dar", "dav", "deu", "del", "den", "dgr",
230 "din", "dje", "doi", "dsb", "dtp", "dua", "dum", "div",
231 "dyo", "dyu", "dzo", "dzg",
232 "ebu", "ewe", "efi", "egl", "egy", "eka", "ell", "elx",
233 "eng", "enm", "epo", "spa", "esu", "est", "eus", "ewo",
234 "ext",
235 "fas", "fan", "fat", "ful", "fin", "fil", "fit", "fij",
236 "fao", "fon", "fra", "frc", "frm", "fro", "frp", "frr",
237 "frs", "fur", "fry",
238 "gle", "gaa", "gag", "gan", "gay", "gba", "gbz", "gla",
239 "gez", "gil", "glg", "glk", "gmh", "grn", "goh", "gom",
240 "gon", "gor", "got", "grb", "grc", "gsw", "guj", "guc",
241 "gur", "guz", "glv", "gwi",
242 "hau", "hai", "hak", "haw", "heb", "hin", "hif", "hil",
243 "hit", "hmn", "hmo", "hrv", "hsb", "hsn", "hat", "hun",
244 "hup", "hye", "her",
245 "ina", "iba", "ibb", "ind", "ile", "ibo", "iii", "ipk",
246 "ilo", "inh", "ido", "isl", "ita", "iku", "izh",
247 "jpn", "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut",
248 "jav",
249 "kat", "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd",
250 "kbl", "kcg", "kde", "kea", "ken", "kfo", "kon", "kgp",
251 "kha", "kho", "khq", "khw", "kik", "kiu", "kua", "kaz",
252 "kkj", "kal", "kln", "khm", "kmb", "kan", "kor", "koi",
253 "kok", "kos", "kpe", "kau", "krc", "kri", "krj", "krl",
254 "kru", "kas", "ksb", "ksf", "ksh", "kur", "kum", "kut",
255 "kom", "cor", "kir",
256 "lat", "lad", "lag", "lah", "lam", "ltz", "lez", "lfn",
257 "lug", "lim", "lij", "liv", "lkt", "lmo", "lin", "lao",
258 "lol", "loz", "lrc", "lit", "ltg", "lub", "lua", "lui",
259 "lun", "luo", "lus", "luy", "lav", "lzh", "lzz",
260 "mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",
261 "mdf", "mdh", "mdr", "men", "mer", "mfe", "mlg", "mga",
262 "mgh", "mgo", "mah", "mri", "mic", "min", "mis", "mkd",
Frank Tangb8696612019-10-25 14:58:21 -0700263 "mal", "mon", "mnc", "mni", "mol",
264 "moh", "mos", "mar", "mrj",
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700265 "msa", "mlt", "mua", "mul", "mus", "mwl", "mwr", "mwv",
266 "mya", "mye", "myv", "mzn",
267 "nau", "nan", "nap", "naq", "nob", "nde", "nds", "nep",
268 "new", "ndo", "nia", "niu", "njo", "nld", "nmg", "nno",
269 "nnh", "nor", "nog", "non", "nov", "nqo", "nbl", "nso",
270 "nus", "nav", "nwc", "nya", "nym", "nyn", "nyo", "nzi",
271 "oci", "oji", "orm", "ori", "oss", "osa", "ota",
Frank Tangf2223962020-04-27 18:25:29 -0700272 "pan", "pag", "pal", "pam", "pap", "pau", "pcd", "pcm", "pdc",
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700273 "pdt", "peo", "pfl", "phn", "pli", "pol", "pms", "pnt",
274 "pon", "prg", "pro", "pus", "por",
275 "que", "quc", "qug",
276 "raj", "rap", "rar", "rgn", "rif", "roh", "run", "ron",
277 "rof", "rom", "rtm", "rus", "rue", "rug", "rup",
278 "kin", "rwk",
279 "san", "sad", "sah", "sam", "saq", "sas", "sat", "saz",
280 "sba", "sbp", "srd", "scn", "sco", "snd", "sdc", "sdh",
281 "sme", "see", "seh", "sei", "sel", "ses", "sag", "sga",
282 "sgs", "shi", "shn", "shu", "sin", "sid", "slk",
283 "slv", "sli", "sly", "smo", "sma", "smj", "smn", "sms",
284 "sna", "snk", "som", "sog", "sqi", "srp", "srn", "srr",
285 "ssw", "ssy", "sot", "stq", "sun", "suk", "sus", "sux",
286 "swe", "swa", "swb", "swc", "syc", "syr", "szl",
287 "tam", "tcy", "tel", "tem", "teo", "ter", "tet", "tgk",
288 "tha", "tir", "tig", "tiv", "tuk", "tkl", "tkr", "tgl",
289 "tlh", "tli", "tly", "tmh", "tsn", "ton", "tog", "tpi",
290 "tur", "tru", "trv", "tso", "tsd", "tsi", "tat", "ttt",
291 "tum", "tvl", "twi", "twq", "tah", "tyv", "tzm",
292 "udm", "uig", "uga", "ukr", "umb", "und", "urd", "uzb",
293 "vai", "ven", "vec", "vep", "vie", "vls", "vmf", "vol",
294 "vot", "vro", "vun",
295 "wln", "wae", "wal", "war", "was", "wbp", "wol", "wuu",
296 "xal", "xho", "xmf", "xog",
297 "yao", "yap", "yav", "ybb", "yid", "yor", "yrl", "yue",
298 "zha", "zap", "zbl", "zea", "zen", "zgh", "zho", "zul",
299 "zun", "zxx", "zza",
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000300NULL,
301/* "in", "iw", "ji", "jw", "sh", */
302 "ind", "heb", "yid", "jaw", "srp",
303NULL
304};
305
306/**
307 * Table of 2-letter country codes.
308 *
309 * This list must be in sorted order. This list is returned directly
310 * to the user by some API.
311 *
312 * This list must be kept in sync with COUNTRIES_3, with corresponding
313 * entries matched.
314 *
315 * This table should be terminated with a NULL entry, followed by a
316 * second list, and another NULL entry. The first list is visible to
317 * user code when this array is returned by API. The second list
318 * contains codes we support, but do not expose through user API.
319 *
320 * Notes:
321 *
322 * ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per
323 * http://www.evertype.com/standards/iso3166/iso3166-1-en.html added
324 * new codes keeping the old ones for compatibility updated to include
325 * 1999/12/03 revisions *CWB*
326 *
327 * RO(ROM) is now RO(ROU) according to
328 * http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html
329 */
330static const char * const COUNTRIES[] = {
331 "AD", "AE", "AF", "AG", "AI", "AL", "AM",
332 "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ",
333 "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI",
334 "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV",
335 "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG",
336 "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR",
337 "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DJ", "DK",
338 "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER",
339 "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR",
340 "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL",
341 "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU",
342 "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU",
343 "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS",
344 "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI",
345 "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA",
346 "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU",
347 "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK",
348 "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS",
349 "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA",
350 "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP",
351 "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG",
352 "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT",
353 "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA",
354 "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ",
355 "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV",
356 "SX", "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ",
357 "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV",
358 "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ",
359 "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF",
360 "WS", "YE", "YT", "ZA", "ZM", "ZW",
361NULL,
362 "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR", /* obsolete country codes */
363NULL
364};
365
366static const char* const DEPRECATED_COUNTRIES[] = {
367 "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR", NULL, NULL /* deprecated country list */
368};
369static const char* const REPLACEMENT_COUNTRIES[] = {
370/* "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR" */
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700371 "CW", "MM", "RS", "DE", "BJ", "FR", "BF", "VU", "ZW", "RU", "TL", "GB", "VN", "YE", "RS", "CD", NULL, NULL /* replacement country codes */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000372};
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700373
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000374/**
375 * Table of 3-letter country codes.
376 *
377 * This is a lookup table used to convert 3-letter country codes to
378 * their 2-letter equivalent. It must be kept in sync with COUNTRIES.
379 * For all valid i, COUNTRIES[i] must refer to the same country as
380 * COUNTRIES_3[i]. The commented-out lines are copied from COUNTRIES
381 * to make eyeballing this baby easier.
382 *
383 * This table should be terminated with a NULL entry, followed by a
384 * second list, and another NULL entry. The two lists correspond to
385 * the two lists in COUNTRIES.
386 */
387static const char * const COUNTRIES_3[] = {
388/* "AD", "AE", "AF", "AG", "AI", "AL", "AM", */
389 "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM",
390/* "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ", */
391 "AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE",
392/* "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", */
393 "BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI",
394/* "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV", */
395 "BEN", "BLM", "BMU", "BRN", "BOL", "BES", "BRA", "BHS", "BTN", "BVT",
396/* "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG", */
397 "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",
398/* "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR", */
399 "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRI",
400/* "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DJ", "DK", */
401 "CUB", "CPV", "CUW", "CXR", "CYP", "CZE", "DEU", "DJI", "DNK",
402/* "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER", */
403 "DMA", "DOM", "DZA", "ECU", "EST", "EGY", "ESH", "ERI",
404/* "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", */
405 "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",
406/* "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", */
407 "GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL",
408/* "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", */
409 "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",
410/* "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", */
411 "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",
412/* "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS" */
413 "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL",
414/* "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", */
415 "ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",
416/* "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", */
417 "COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO",
418/* "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", */
419 "LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX",
420/* "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK", */
421 "LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD",
422/* "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS", */
423 "MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR",
424/* "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", */
425 "MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM",
426/* "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP", */
427 "NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL",
428/* "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG", */
429 "NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG",
430/* "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT", */
431 "PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT",
432/* "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA", */
433 "PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU",
434/* "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ", */
435 "SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM",
436/* "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV", */
437 "SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "SSD", "STP", "SLV",
438/* "SX", "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ", */
439 "SXM", "SYR", "SWZ", "TCA", "TCD", "ATF", "TGO", "THA", "TJK",
440/* "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV", */
441 "TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV",
442/* "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ", */
443 "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",
444/* "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", */
445 "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",
446/* "WS", "YE", "YT", "ZA", "ZM", "ZW", */
447 "WSM", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
448NULL,
449/* "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR" */
450 "ANT", "BUR", "SCG", "FXX", "ROM", "SUN", "TMP", "YMD", "YUG", "ZAR",
451NULL
452};
453
454typedef struct CanonicalizationMap {
455 const char *id; /* input ID */
456 const char *canonicalID; /* canonicalized output ID */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000457} CanonicalizationMap;
458
459/**
460 * A map to canonicalize locale IDs. This handles a variety of
461 * different semantic kinds of transformations.
462 */
463static const CanonicalizationMap CANONICALIZE_MAP[] = {
Frank Tangf2223962020-04-27 18:25:29 -0700464 { "art__LOJBAN", "jbo" }, /* registered name */
Frank Tang960f1952019-02-15 16:46:49 -0800465 { "hy__AREVELA", "hy" }, /* Registered IANA variant */
466 { "hy__AREVMDA", "hyw" }, /* Registered IANA variant */
Frank Tangf2223962020-04-27 18:25:29 -0700467 { "zh__GUOYU", "zh" }, /* registered name */
468 { "zh__HAKKA", "hak" }, /* registered name */
469 { "zh__XIANG", "hsn" }, /* registered name */
470 // subtags with 3 chars won't be treated as variants.
Frank Tang960f1952019-02-15 16:46:49 -0800471 { "zh_GAN", "gan" }, /* registered name */
Frank Tang960f1952019-02-15 16:46:49 -0800472 { "zh_MIN_NAN", "nan" }, /* registered name */
473 { "zh_WUU", "wuu" }, /* registered name */
Frank Tang960f1952019-02-15 16:46:49 -0800474 { "zh_YUE", "yue" }, /* registered name */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000475};
476
477/* ### BCP47 Conversion *******************************************/
478/* Test if the locale id has BCP47 u extension and does not have '@' */
479#define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == NULL && getShortestSubtagLength(localeID) == 1)
480/* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */
Frank Tangb8696612019-10-25 14:58:21 -0700481#define _ConvertBCP47(finalID, id, buffer, length,err) UPRV_BLOCK_MACRO_BEGIN { \
482 if (uloc_forLanguageTag(id, buffer, length, NULL, err) <= 0 || \
483 U_FAILURE(*err) || *err == U_STRING_NOT_TERMINATED_WARNING) { \
484 finalID=id; \
485 if (*err == U_STRING_NOT_TERMINATED_WARNING) { *err = U_BUFFER_OVERFLOW_ERROR; } \
486 } else { \
487 finalID=buffer; \
488 } \
489} UPRV_BLOCK_MACRO_END
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000490/* Gets the size of the shortest subtag in the given localeID. */
491static int32_t getShortestSubtagLength(const char *localeID) {
Jungshik Shinb3189662017-11-07 11:18:34 -0800492 int32_t localeIDLength = static_cast<int32_t>(uprv_strlen(localeID));
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000493 int32_t length = localeIDLength;
494 int32_t tmpLength = 0;
495 int32_t i;
496 UBool reset = TRUE;
497
498 for (i = 0; i < localeIDLength; i++) {
499 if (localeID[i] != '_' && localeID[i] != '-') {
500 if (reset) {
501 tmpLength = 0;
502 reset = FALSE;
503 }
504 tmpLength++;
505 } else {
506 if (tmpLength != 0 && tmpLength < length) {
507 length = tmpLength;
508 }
509 reset = TRUE;
510 }
511 }
512
513 return length;
514}
515
516/* ### Keywords **************************************************/
Jungshik Shin87232d82017-05-13 21:10:13 -0700517#define UPRV_ISDIGIT(c) (((c) >= '0') && ((c) <= '9'))
518#define UPRV_ISALPHANUM(c) (uprv_isASCIILetter(c) || UPRV_ISDIGIT(c) )
519/* Punctuation/symbols allowed in legacy key values */
520#define UPRV_OK_VALUE_PUNCTUATION(c) ((c) == '_' || (c) == '-' || (c) == '+' || (c) == '/')
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000521
522#define ULOC_KEYWORD_BUFFER_LEN 25
523#define ULOC_MAX_NO_KEYWORDS 25
524
525U_CAPI const char * U_EXPORT2
526locale_getKeywordsStart(const char *localeID) {
527 const char *result = NULL;
528 if((result = uprv_strchr(localeID, '@')) != NULL) {
529 return result;
530 }
531#if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
532 else {
533 /* We do this because the @ sign is variant, and the @ sign used on one
534 EBCDIC machine won't be compiled the same way on other EBCDIC based
535 machines. */
536 static const uint8_t ebcdicSigns[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
537 const uint8_t *charToFind = ebcdicSigns;
538 while(*charToFind) {
539 if((result = uprv_strchr(localeID, *charToFind)) != NULL) {
540 return result;
541 }
542 charToFind++;
543 }
544 }
545#endif
546 return NULL;
547}
548
549/**
550 * @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN]
551 * @param keywordName incoming name to be canonicalized
552 * @param status return status (keyword too long)
553 * @return length of the keyword name
554 */
555static int32_t locale_canonKeywordName(char *buf, const char *keywordName, UErrorCode *status)
556{
Jungshik Shin87232d82017-05-13 21:10:13 -0700557 int32_t keywordNameLen = 0;
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700558
Jungshik Shin87232d82017-05-13 21:10:13 -0700559 for (; *keywordName != 0; keywordName++) {
560 if (!UPRV_ISALPHANUM(*keywordName)) {
561 *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
562 return 0;
563 }
564 if (keywordNameLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
565 buf[keywordNameLen++] = uprv_tolower(*keywordName);
566 } else {
567 /* keyword name too long for internal buffer */
568 *status = U_INTERNAL_PROGRAM_ERROR;
569 return 0;
570 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000571 }
Jungshik Shin87232d82017-05-13 21:10:13 -0700572 if (keywordNameLen == 0) {
573 *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name */
574 return 0;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000575 }
Jungshik Shin87232d82017-05-13 21:10:13 -0700576 buf[keywordNameLen] = 0; /* terminate */
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700577
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000578 return keywordNameLen;
579}
580
581typedef struct {
582 char keyword[ULOC_KEYWORD_BUFFER_LEN];
583 int32_t keywordLen;
584 const char *valueStart;
585 int32_t valueLen;
586} KeywordStruct;
587
588static int32_t U_CALLCONV
589compareKeywordStructs(const void * /*context*/, const void *left, const void *right) {
590 const char* leftString = ((const KeywordStruct *)left)->keyword;
591 const char* rightString = ((const KeywordStruct *)right)->keyword;
592 return uprv_strcmp(leftString, rightString);
593}
594
Frank Tangf90543d2020-10-30 19:02:04 -0700595U_CFUNC void
596ulocimp_getKeywords(const char *localeID,
597 char prev,
598 ByteSink& sink,
599 UBool valuesToo,
600 UErrorCode *status)
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000601{
602 KeywordStruct keywordList[ULOC_MAX_NO_KEYWORDS];
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700603
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000604 int32_t maxKeywords = ULOC_MAX_NO_KEYWORDS;
605 int32_t numKeywords = 0;
606 const char* pos = localeID;
607 const char* equalSign = NULL;
608 const char* semicolon = NULL;
609 int32_t i = 0, j, n;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000610
611 if(prev == '@') { /* start of keyword definition */
612 /* we will grab pairs, trim spaces, lowercase keywords, sort and return */
613 do {
614 UBool duplicate = FALSE;
615 /* skip leading spaces */
616 while(*pos == ' ') {
617 pos++;
618 }
619 if (!*pos) { /* handle trailing "; " */
620 break;
621 }
622 if(numKeywords == maxKeywords) {
623 *status = U_INTERNAL_PROGRAM_ERROR;
Frank Tangf2223962020-04-27 18:25:29 -0700624 return;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000625 }
626 equalSign = uprv_strchr(pos, '=');
627 semicolon = uprv_strchr(pos, ';');
628 /* lack of '=' [foo@currency] is illegal */
629 /* ';' before '=' [foo@currency;collation=pinyin] is illegal */
630 if(!equalSign || (semicolon && semicolon<equalSign)) {
631 *status = U_INVALID_FORMAT_ERROR;
Frank Tangf2223962020-04-27 18:25:29 -0700632 return;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000633 }
634 /* need to normalize both keyword and keyword name */
635 if(equalSign - pos >= ULOC_KEYWORD_BUFFER_LEN) {
636 /* keyword name too long for internal buffer */
637 *status = U_INTERNAL_PROGRAM_ERROR;
Frank Tangf2223962020-04-27 18:25:29 -0700638 return;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000639 }
640 for(i = 0, n = 0; i < equalSign - pos; ++i) {
641 if (pos[i] != ' ') {
642 keywordList[numKeywords].keyword[n++] = uprv_tolower(pos[i]);
643 }
644 }
645
646 /* zero-length keyword is an error. */
647 if (n == 0) {
648 *status = U_INVALID_FORMAT_ERROR;
Frank Tangf2223962020-04-27 18:25:29 -0700649 return;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000650 }
651
652 keywordList[numKeywords].keyword[n] = 0;
653 keywordList[numKeywords].keywordLen = n;
654 /* now grab the value part. First we skip the '=' */
655 equalSign++;
656 /* then we leading spaces */
657 while(*equalSign == ' ') {
658 equalSign++;
659 }
660
661 /* Premature end or zero-length value */
Jungshik Shin (jungshik at google)46be5162015-03-26 11:46:43 -0700662 if (!*equalSign || equalSign == semicolon) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000663 *status = U_INVALID_FORMAT_ERROR;
Frank Tangf2223962020-04-27 18:25:29 -0700664 return;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000665 }
666
667 keywordList[numKeywords].valueStart = equalSign;
668
669 pos = semicolon;
670 i = 0;
671 if(pos) {
672 while(*(pos - i - 1) == ' ') {
673 i++;
674 }
675 keywordList[numKeywords].valueLen = (int32_t)(pos - equalSign - i);
676 pos++;
677 } else {
678 i = (int32_t)uprv_strlen(equalSign);
679 while(i && equalSign[i-1] == ' ') {
680 i--;
681 }
682 keywordList[numKeywords].valueLen = i;
683 }
684 /* If this is a duplicate keyword, then ignore it */
685 for (j=0; j<numKeywords; ++j) {
686 if (uprv_strcmp(keywordList[j].keyword, keywordList[numKeywords].keyword) == 0) {
687 duplicate = TRUE;
688 break;
689 }
690 }
691 if (!duplicate) {
692 ++numKeywords;
693 }
694 } while(pos);
695
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000696 /* now we have a list of keywords */
697 /* we need to sort it */
698 uprv_sortArray(keywordList, numKeywords, sizeof(KeywordStruct), compareKeywordStructs, NULL, FALSE, status);
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700699
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000700 /* Now construct the keyword part */
701 for(i = 0; i < numKeywords; i++) {
Frank Tangf2223962020-04-27 18:25:29 -0700702 sink.Append(keywordList[i].keyword, keywordList[i].keywordLen);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000703 if(valuesToo) {
Frank Tangf2223962020-04-27 18:25:29 -0700704 sink.Append("=", 1);
705 sink.Append(keywordList[i].valueStart, keywordList[i].valueLen);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000706 if(i < numKeywords - 1) {
Frank Tangf2223962020-04-27 18:25:29 -0700707 sink.Append(";", 1);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000708 }
Frank Tangf2223962020-04-27 18:25:29 -0700709 } else {
710 sink.Append("\0", 1);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000711 }
712 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000713 }
714}
715
Frank Tangf90543d2020-10-30 19:02:04 -0700716U_CAPI int32_t U_EXPORT2
717uloc_getKeywordValue(const char* localeID,
718 const char* keywordName,
719 char* buffer, int32_t bufferCapacity,
720 UErrorCode* status)
721{
Frank Tangf2223962020-04-27 18:25:29 -0700722 if (U_FAILURE(*status)) {
723 return 0;
724 }
725
Frank Tangf90543d2020-10-30 19:02:04 -0700726 CheckedArrayByteSink sink(buffer, bufferCapacity);
727 ulocimp_getKeywordValue(localeID, keywordName, sink, status);
Frank Tangf2223962020-04-27 18:25:29 -0700728
729 int32_t reslen = sink.NumberOfBytesAppended();
730
731 if (U_FAILURE(*status)) {
732 return reslen;
733 }
734
735 if (sink.Overflowed()) {
736 *status = U_BUFFER_OVERFLOW_ERROR;
737 } else {
Frank Tangf90543d2020-10-30 19:02:04 -0700738 u_terminateChars(buffer, bufferCapacity, reslen, status);
Frank Tangf2223962020-04-27 18:25:29 -0700739 }
740
741 return reslen;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000742}
743
Frank Tangf90543d2020-10-30 19:02:04 -0700744U_CAPI void U_EXPORT2
745ulocimp_getKeywordValue(const char* localeID,
746 const char* keywordName,
747 icu::ByteSink& sink,
748 UErrorCode* status)
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700749{
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000750 const char* startSearchHere = NULL;
751 const char* nextSeparator = NULL;
752 char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
753 char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000754
755 if(status && U_SUCCESS(*status) && localeID) {
756 char tempBuffer[ULOC_FULLNAME_CAPACITY];
757 const char* tmpLocaleID;
758
Jungshik Shin87232d82017-05-13 21:10:13 -0700759 if (keywordName == NULL || keywordName[0] == 0) {
760 *status = U_ILLEGAL_ARGUMENT_ERROR;
Frank Tangf90543d2020-10-30 19:02:04 -0700761 return;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000762 }
763
764 locale_canonKeywordName(keywordNameBuffer, keywordName, status);
765 if(U_FAILURE(*status)) {
Frank Tangf90543d2020-10-30 19:02:04 -0700766 return;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000767 }
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700768
Jungshik Shin87232d82017-05-13 21:10:13 -0700769 if (_hasBCP47Extension(localeID)) {
770 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
771 } else {
772 tmpLocaleID=localeID;
773 }
774
775 startSearchHere = locale_getKeywordsStart(tmpLocaleID);
776 if(startSearchHere == NULL) {
777 /* no keywords, return at once */
Frank Tangf90543d2020-10-30 19:02:04 -0700778 return;
Jungshik Shin87232d82017-05-13 21:10:13 -0700779 }
780
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000781 /* find the first keyword */
782 while(startSearchHere) {
Jungshik Shin87232d82017-05-13 21:10:13 -0700783 const char* keyValueTail;
784 int32_t keyValueLen;
785
786 startSearchHere++; /* skip @ or ; */
787 nextSeparator = uprv_strchr(startSearchHere, '=');
788 if(!nextSeparator) {
789 *status = U_ILLEGAL_ARGUMENT_ERROR; /* key must have =value */
Frank Tangf90543d2020-10-30 19:02:04 -0700790 return;
Jungshik Shin87232d82017-05-13 21:10:13 -0700791 }
792 /* strip leading & trailing spaces (TC decided to tolerate these) */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000793 while(*startSearchHere == ' ') {
794 startSearchHere++;
795 }
Jungshik Shin87232d82017-05-13 21:10:13 -0700796 keyValueTail = nextSeparator;
797 while (keyValueTail > startSearchHere && *(keyValueTail-1) == ' ') {
798 keyValueTail--;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000799 }
Jungshik Shin87232d82017-05-13 21:10:13 -0700800 /* now keyValueTail points to first char after the keyName */
801 /* copy & normalize keyName from locale */
802 if (startSearchHere == keyValueTail) {
803 *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name in passed-in locale */
Frank Tangf90543d2020-10-30 19:02:04 -0700804 return;
Jungshik Shin87232d82017-05-13 21:10:13 -0700805 }
806 keyValueLen = 0;
807 while (startSearchHere < keyValueTail) {
808 if (!UPRV_ISALPHANUM(*startSearchHere)) {
809 *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
Frank Tangf90543d2020-10-30 19:02:04 -0700810 return;
Jungshik Shin87232d82017-05-13 21:10:13 -0700811 }
812 if (keyValueLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
813 localeKeywordNameBuffer[keyValueLen++] = uprv_tolower(*startSearchHere++);
814 } else {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000815 /* keyword name too long for internal buffer */
816 *status = U_INTERNAL_PROGRAM_ERROR;
Frank Tangf90543d2020-10-30 19:02:04 -0700817 return;
Jungshik Shin87232d82017-05-13 21:10:13 -0700818 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000819 }
Jungshik Shin87232d82017-05-13 21:10:13 -0700820 localeKeywordNameBuffer[keyValueLen] = 0; /* terminate */
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700821
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000822 startSearchHere = uprv_strchr(nextSeparator, ';');
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700823
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000824 if(uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer) == 0) {
Jungshik Shin87232d82017-05-13 21:10:13 -0700825 /* current entry matches the keyword. */
826 nextSeparator++; /* skip '=' */
827 /* First strip leading & trailing spaces (TC decided to tolerate these) */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000828 while(*nextSeparator == ' ') {
Jungshik Shin87232d82017-05-13 21:10:13 -0700829 nextSeparator++;
830 }
831 keyValueTail = (startSearchHere)? startSearchHere: nextSeparator + uprv_strlen(nextSeparator);
832 while(keyValueTail > nextSeparator && *(keyValueTail-1) == ' ') {
833 keyValueTail--;
834 }
835 /* Now copy the value, but check well-formedness */
836 if (nextSeparator == keyValueTail) {
837 *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty key value name in passed-in locale */
Frank Tangf90543d2020-10-30 19:02:04 -0700838 return;
Jungshik Shin87232d82017-05-13 21:10:13 -0700839 }
Jungshik Shin87232d82017-05-13 21:10:13 -0700840 while (nextSeparator < keyValueTail) {
841 if (!UPRV_ISALPHANUM(*nextSeparator) && !UPRV_OK_VALUE_PUNCTUATION(*nextSeparator)) {
842 *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed key value */
Frank Tangf90543d2020-10-30 19:02:04 -0700843 return;
Jungshik Shin87232d82017-05-13 21:10:13 -0700844 }
Frank Tangf90543d2020-10-30 19:02:04 -0700845 /* Should we lowercase value to return here? Tests expect as-is. */
846 sink.Append(nextSeparator++, 1);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000847 }
Frank Tangf90543d2020-10-30 19:02:04 -0700848 return;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000849 }
850 }
851 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000852}
853
854U_CAPI int32_t U_EXPORT2
855uloc_setKeywordValue(const char* keywordName,
856 const char* keywordValue,
857 char* buffer, int32_t bufferCapacity,
858 UErrorCode* status)
859{
860 /* TODO: sorting. removal. */
861 int32_t keywordNameLen;
862 int32_t keywordValueLen;
863 int32_t bufLen;
864 int32_t needLen = 0;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000865 char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
Jungshik Shin87232d82017-05-13 21:10:13 -0700866 char keywordValueBuffer[ULOC_KEYWORDS_CAPACITY+1];
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000867 char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000868 int32_t rc;
869 char* nextSeparator = NULL;
870 char* nextEqualsign = NULL;
871 char* startSearchHere = NULL;
872 char* keywordStart = NULL;
Jungshik Shin87232d82017-05-13 21:10:13 -0700873 CharString updatedKeysAndValues;
Jungshik Shin87232d82017-05-13 21:10:13 -0700874 UBool handledInputKeyAndValue = FALSE;
875 char keyValuePrefix = '@';
876
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700877 if(U_FAILURE(*status)) {
878 return -1;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000879 }
Jungshik Shin87232d82017-05-13 21:10:13 -0700880 if (keywordName == NULL || keywordName[0] == 0 || bufferCapacity <= 1) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000881 *status = U_ILLEGAL_ARGUMENT_ERROR;
882 return 0;
883 }
Jungshik Shin87232d82017-05-13 21:10:13 -0700884 bufLen = (int32_t)uprv_strlen(buffer);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000885 if(bufferCapacity<bufLen) {
886 /* The capacity is less than the length?! Is this NULL terminated? */
887 *status = U_ILLEGAL_ARGUMENT_ERROR;
888 return 0;
889 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000890 keywordNameLen = locale_canonKeywordName(keywordNameBuffer, keywordName, status);
891 if(U_FAILURE(*status)) {
892 return 0;
893 }
Jungshik Shin87232d82017-05-13 21:10:13 -0700894
895 keywordValueLen = 0;
896 if(keywordValue) {
897 while (*keywordValue != 0) {
898 if (!UPRV_ISALPHANUM(*keywordValue) && !UPRV_OK_VALUE_PUNCTUATION(*keywordValue)) {
899 *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed key value */
900 return 0;
901 }
902 if (keywordValueLen < ULOC_KEYWORDS_CAPACITY) {
903 /* Should we force lowercase in value to set? */
904 keywordValueBuffer[keywordValueLen++] = *keywordValue++;
905 } else {
906 /* keywordValue too long for internal buffer */
907 *status = U_INTERNAL_PROGRAM_ERROR;
908 return 0;
909 }
910 }
911 }
912 keywordValueBuffer[keywordValueLen] = 0; /* terminate */
913
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000914 startSearchHere = (char*)locale_getKeywordsStart(buffer);
915 if(startSearchHere == NULL || (startSearchHere[1]==0)) {
Jungshik Shin87232d82017-05-13 21:10:13 -0700916 if(keywordValueLen == 0) { /* no keywords = nothing to remove */
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700917 return bufLen;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000918 }
919
920 needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700921 if(startSearchHere) { /* had a single @ */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000922 needLen--; /* already had the @ */
923 /* startSearchHere points at the @ */
924 } else {
925 startSearchHere=buffer+bufLen;
926 }
927 if(needLen >= bufferCapacity) {
928 *status = U_BUFFER_OVERFLOW_ERROR;
929 return needLen; /* no change */
930 }
Jungshik Shin87232d82017-05-13 21:10:13 -0700931 *startSearchHere++ = '@';
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000932 uprv_strcpy(startSearchHere, keywordNameBuffer);
933 startSearchHere += keywordNameLen;
Jungshik Shin87232d82017-05-13 21:10:13 -0700934 *startSearchHere++ = '=';
935 uprv_strcpy(startSearchHere, keywordValueBuffer);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000936 return needLen;
937 } /* end shortcut - no @ */
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700938
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000939 keywordStart = startSearchHere;
940 /* search for keyword */
941 while(keywordStart) {
Jungshik Shin87232d82017-05-13 21:10:13 -0700942 const char* keyValueTail;
943 int32_t keyValueLen;
944
945 keywordStart++; /* skip @ or ; */
946 nextEqualsign = uprv_strchr(keywordStart, '=');
947 if (!nextEqualsign) {
948 *status = U_ILLEGAL_ARGUMENT_ERROR; /* key must have =value */
949 return 0;
950 }
951 /* strip leading & trailing spaces (TC decided to tolerate these) */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000952 while(*keywordStart == ' ') {
953 keywordStart++;
954 }
Jungshik Shin87232d82017-05-13 21:10:13 -0700955 keyValueTail = nextEqualsign;
956 while (keyValueTail > keywordStart && *(keyValueTail-1) == ' ') {
957 keyValueTail--;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000958 }
Jungshik Shin87232d82017-05-13 21:10:13 -0700959 /* now keyValueTail points to first char after the keyName */
960 /* copy & normalize keyName from locale */
961 if (keywordStart == keyValueTail) {
962 *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name in passed-in locale */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000963 return 0;
964 }
Jungshik Shin87232d82017-05-13 21:10:13 -0700965 keyValueLen = 0;
966 while (keywordStart < keyValueTail) {
967 if (!UPRV_ISALPHANUM(*keywordStart)) {
968 *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
969 return 0;
970 }
971 if (keyValueLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
972 localeKeywordNameBuffer[keyValueLen++] = uprv_tolower(*keywordStart++);
973 } else {
974 /* keyword name too long for internal buffer */
975 *status = U_INTERNAL_PROGRAM_ERROR;
976 return 0;
977 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000978 }
Jungshik Shin87232d82017-05-13 21:10:13 -0700979 localeKeywordNameBuffer[keyValueLen] = 0; /* terminate */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000980
981 nextSeparator = uprv_strchr(nextEqualsign, ';');
Jungshik Shin87232d82017-05-13 21:10:13 -0700982
983 /* start processing the value part */
984 nextEqualsign++; /* skip '=' */
985 /* First strip leading & trailing spaces (TC decided to tolerate these) */
986 while(*nextEqualsign == ' ') {
987 nextEqualsign++;
988 }
989 keyValueTail = (nextSeparator)? nextSeparator: nextEqualsign + uprv_strlen(nextEqualsign);
990 while(keyValueTail > nextEqualsign && *(keyValueTail-1) == ' ') {
991 keyValueTail--;
992 }
993 if (nextEqualsign == keyValueTail) {
994 *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty key value in passed-in locale */
995 return 0;
996 }
997
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000998 rc = uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer);
999 if(rc == 0) {
Jungshik Shin87232d82017-05-13 21:10:13 -07001000 /* Current entry matches the input keyword. Update the entry */
1001 if(keywordValueLen > 0) { /* updating a value */
1002 updatedKeysAndValues.append(keyValuePrefix, *status);
1003 keyValuePrefix = ';'; /* for any subsequent key-value pair */
1004 updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
1005 updatedKeysAndValues.append('=', *status);
1006 updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
1007 } /* else removing this entry, don't emit anything */
1008 handledInputKeyAndValue = TRUE;
1009 } else {
1010 /* input keyword sorts earlier than current entry, add before current entry */
1011 if (rc < 0 && keywordValueLen > 0 && !handledInputKeyAndValue) {
1012 /* insert new entry at this location */
1013 updatedKeysAndValues.append(keyValuePrefix, *status);
1014 keyValuePrefix = ';'; /* for any subsequent key-value pair */
1015 updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
1016 updatedKeysAndValues.append('=', *status);
1017 updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
1018 handledInputKeyAndValue = TRUE;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001019 }
Jungshik Shin87232d82017-05-13 21:10:13 -07001020 /* copy the current entry */
1021 updatedKeysAndValues.append(keyValuePrefix, *status);
1022 keyValuePrefix = ';'; /* for any subsequent key-value pair */
1023 updatedKeysAndValues.append(localeKeywordNameBuffer, keyValueLen, *status);
1024 updatedKeysAndValues.append('=', *status);
Jungshik Shin42d50272018-10-24 01:22:09 -07001025 updatedKeysAndValues.append(nextEqualsign, static_cast<int32_t>(keyValueTail-nextEqualsign), *status);
Jungshik Shin87232d82017-05-13 21:10:13 -07001026 }
1027 if (!nextSeparator && keywordValueLen > 0 && !handledInputKeyAndValue) {
1028 /* append new entry at the end, it sorts later than existing entries */
1029 updatedKeysAndValues.append(keyValuePrefix, *status);
1030 /* skip keyValuePrefix update, no subsequent key-value pair */
1031 updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
1032 updatedKeysAndValues.append('=', *status);
1033 updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
1034 handledInputKeyAndValue = TRUE;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001035 }
1036 keywordStart = nextSeparator;
1037 } /* end loop searching */
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001038
Jungshik Shin87232d82017-05-13 21:10:13 -07001039 /* Any error from updatedKeysAndValues.append above would be internal and not due to
1040 * problems with the passed-in locale. So if we did encounter problems with the
1041 * passed-in locale above, those errors took precedence and overrode any error
1042 * status from updatedKeysAndValues.append, and also caused a return of 0. If there
1043 * are errors here they are from updatedKeysAndValues.append; they do cause an
1044 * error return but the passed-in locale is unmodified and the original bufLen is
1045 * returned.
1046 */
1047 if (!handledInputKeyAndValue || U_FAILURE(*status)) {
1048 /* if input key/value specified removal of a keyword not present in locale, or
1049 * there was an error in CharString.append, leave original locale alone. */
1050 return bufLen;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001051 }
1052
Frank Tangf90543d2020-10-30 19:02:04 -07001053 // needLen = length of the part before '@'
1054 needLen = (int32_t)(startSearchHere - buffer);
1055 return needLen + updatedKeysAndValues.extract(
1056 startSearchHere, bufferCapacity - needLen, *status);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001057}
1058
1059/* ### ID parsing implementation **************************************************/
1060
1061#define _isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
1062
1063/*returns TRUE if one of the special prefixes is here (s=string)
1064 'x-' or 'i-' */
1065#define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1]))
1066
1067/* Dot terminates it because of POSIX form where dot precedes the codepage
1068 * except for variant
1069 */
1070#define _isTerminator(a) ((a==0)||(a=='.')||(a=='@'))
1071
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001072/**
1073 * Lookup 'key' in the array 'list'. The array 'list' should contain
1074 * a NULL entry, followed by more entries, and a second NULL entry.
1075 *
1076 * The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or
1077 * COUNTRIES_3.
1078 */
1079static int16_t _findIndex(const char* const* list, const char* key)
1080{
1081 const char* const* anchor = list;
1082 int32_t pass = 0;
1083
1084 /* Make two passes through two NULL-terminated arrays at 'list' */
1085 while (pass++ < 2) {
1086 while (*list) {
1087 if (uprv_strcmp(key, *list) == 0) {
1088 return (int16_t)(list - anchor);
1089 }
1090 list++;
1091 }
1092 ++list; /* skip final NULL *CWB*/
1093 }
1094 return -1;
1095}
1096
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001097U_CFUNC const char*
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001098uloc_getCurrentCountryID(const char* oldID){
1099 int32_t offset = _findIndex(DEPRECATED_COUNTRIES, oldID);
1100 if (offset >= 0) {
1101 return REPLACEMENT_COUNTRIES[offset];
1102 }
1103 return oldID;
1104}
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001105U_CFUNC const char*
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001106uloc_getCurrentLanguageID(const char* oldID){
1107 int32_t offset = _findIndex(DEPRECATED_LANGUAGES, oldID);
1108 if (offset >= 0) {
1109 return REPLACEMENT_LANGUAGES[offset];
1110 }
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001111 return oldID;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001112}
1113/*
1114 * the internal functions _getLanguage(), _getCountry(), _getVariant()
1115 * avoid duplicating code to handle the earlier locale ID pieces
1116 * in the functions for the later ones by
1117 * setting the *pEnd pointer to where they stopped parsing
1118 *
1119 * TODO try to use this in Locale
1120 */
Frank Tangf90543d2020-10-30 19:02:04 -07001121CharString U_EXPORT2
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001122ulocimp_getLanguage(const char *localeID,
Frank Tangf2223962020-04-27 18:25:29 -07001123 const char **pEnd,
1124 UErrorCode &status) {
1125 CharString result;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001126
Frank Tang69c72a62019-04-03 21:41:21 -07001127 if (uprv_stricmp(localeID, "root") == 0) {
1128 localeID += 4;
1129 } else if (uprv_strnicmp(localeID, "und", 3) == 0 &&
1130 (localeID[3] == '\0' ||
1131 localeID[3] == '-' ||
1132 localeID[3] == '_' ||
1133 localeID[3] == '@')) {
1134 localeID += 3;
1135 }
1136
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001137 /* if it starts with i- or x- then copy that prefix */
1138 if(_isIDPrefix(localeID)) {
Frank Tangf2223962020-04-27 18:25:29 -07001139 result.append((char)uprv_tolower(*localeID), status);
1140 result.append('-', status);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001141 localeID+=2;
1142 }
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001143
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001144 /* copy the language as far as possible and count its length */
1145 while(!_isTerminator(*localeID) && !_isIDSeparator(*localeID)) {
Frank Tangf2223962020-04-27 18:25:29 -07001146 result.append((char)uprv_tolower(*localeID), status);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001147 localeID++;
1148 }
1149
Frank Tangf2223962020-04-27 18:25:29 -07001150 if(result.length()==3) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001151 /* convert 3 character code to 2 character code if possible *CWB*/
Frank Tangf2223962020-04-27 18:25:29 -07001152 int32_t offset = _findIndex(LANGUAGES_3, result.data());
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001153 if(offset>=0) {
Frank Tangf2223962020-04-27 18:25:29 -07001154 result.clear();
1155 result.append(LANGUAGES[offset], status);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001156 }
1157 }
1158
1159 if(pEnd!=NULL) {
1160 *pEnd=localeID;
1161 }
Frank Tangf2223962020-04-27 18:25:29 -07001162
1163 return result;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001164}
1165
Frank Tangf90543d2020-10-30 19:02:04 -07001166CharString U_EXPORT2
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001167ulocimp_getScript(const char *localeID,
Frank Tangf2223962020-04-27 18:25:29 -07001168 const char **pEnd,
1169 UErrorCode &status) {
1170 CharString result;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001171 int32_t idLen = 0;
1172
1173 if (pEnd != NULL) {
1174 *pEnd = localeID;
1175 }
1176
1177 /* copy the second item as far as possible and count its length */
1178 while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])
1179 && uprv_isASCIILetter(localeID[idLen])) {
1180 idLen++;
1181 }
1182
1183 /* If it's exactly 4 characters long, then it's a script and not a country. */
1184 if (idLen == 4) {
1185 int32_t i;
1186 if (pEnd != NULL) {
1187 *pEnd = localeID+idLen;
1188 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001189 if (idLen >= 1) {
Frank Tangf2223962020-04-27 18:25:29 -07001190 result.append((char)uprv_toupper(*(localeID++)), status);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001191 }
1192 for (i = 1; i < idLen; i++) {
Frank Tangf2223962020-04-27 18:25:29 -07001193 result.append((char)uprv_tolower(*(localeID++)), status);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001194 }
1195 }
Frank Tangf2223962020-04-27 18:25:29 -07001196
1197 return result;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001198}
1199
Frank Tangf90543d2020-10-30 19:02:04 -07001200CharString U_EXPORT2
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001201ulocimp_getCountry(const char *localeID,
Frank Tangf2223962020-04-27 18:25:29 -07001202 const char **pEnd,
1203 UErrorCode &status) {
1204 CharString result;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001205 int32_t idLen=0;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001206
1207 /* copy the country as far as possible and count its length */
1208 while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) {
Frank Tangf2223962020-04-27 18:25:29 -07001209 result.append((char)uprv_toupper(localeID[idLen]), status);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001210 idLen++;
1211 }
1212
1213 /* the country should be either length 2 or 3 */
1214 if (idLen == 2 || idLen == 3) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001215 /* convert 3 character code to 2 character code if possible *CWB*/
1216 if(idLen==3) {
Frank Tangf2223962020-04-27 18:25:29 -07001217 int32_t offset = _findIndex(COUNTRIES_3, result.data());
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001218 if(offset>=0) {
Frank Tangf2223962020-04-27 18:25:29 -07001219 result.clear();
1220 result.append(COUNTRIES[offset], status);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001221 }
1222 }
1223 localeID+=idLen;
1224 } else {
Frank Tangf2223962020-04-27 18:25:29 -07001225 result.clear();
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001226 }
1227
1228 if(pEnd!=NULL) {
1229 *pEnd=localeID;
1230 }
1231
Frank Tangf2223962020-04-27 18:25:29 -07001232 return result;
1233}
1234
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001235/**
1236 * @param needSeparator if true, then add leading '_' if any variants
1237 * are added to 'variant'
1238 */
Frank Tangf2223962020-04-27 18:25:29 -07001239static void
Frank Tangf90543d2020-10-30 19:02:04 -07001240_getVariant(const char *localeID,
1241 char prev,
1242 ByteSink& sink,
1243 UBool needSeparator) {
Frank Tangf2223962020-04-27 18:25:29 -07001244 UBool hasVariant = FALSE;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001245
1246 /* get one or more variant tags and separate them with '_' */
1247 if(_isIDSeparator(prev)) {
1248 /* get a variant string after a '-' or '_' */
1249 while(!_isTerminator(*localeID)) {
1250 if (needSeparator) {
Frank Tangf2223962020-04-27 18:25:29 -07001251 sink.Append("_", 1);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001252 needSeparator = FALSE;
1253 }
Frank Tangf2223962020-04-27 18:25:29 -07001254 char c = (char)uprv_toupper(*localeID);
1255 if (c == '-') c = '_';
1256 sink.Append(&c, 1);
1257 hasVariant = TRUE;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001258 localeID++;
1259 }
1260 }
1261
1262 /* if there is no variant tag after a '-' or '_' then look for '@' */
Frank Tangf2223962020-04-27 18:25:29 -07001263 if(!hasVariant) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001264 if(prev=='@') {
1265 /* keep localeID */
1266 } else if((localeID=locale_getKeywordsStart(localeID))!=NULL) {
1267 ++localeID; /* point after the '@' */
1268 } else {
Frank Tangf2223962020-04-27 18:25:29 -07001269 return;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001270 }
1271 while(!_isTerminator(*localeID)) {
1272 if (needSeparator) {
Frank Tangf2223962020-04-27 18:25:29 -07001273 sink.Append("_", 1);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001274 needSeparator = FALSE;
1275 }
Frank Tangf2223962020-04-27 18:25:29 -07001276 char c = (char)uprv_toupper(*localeID);
1277 if (c == '-' || c == ',') c = '_';
1278 sink.Append(&c, 1);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001279 localeID++;
1280 }
1281 }
Frank Tangf2223962020-04-27 18:25:29 -07001282}
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001283
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001284/* Keyword enumeration */
1285
1286typedef struct UKeywordsContext {
1287 char* keywords;
1288 char* current;
1289} UKeywordsContext;
1290
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001291U_CDECL_BEGIN
1292
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001293static void U_CALLCONV
1294uloc_kw_closeKeywords(UEnumeration *enumerator) {
1295 uprv_free(((UKeywordsContext *)enumerator->context)->keywords);
1296 uprv_free(enumerator->context);
1297 uprv_free(enumerator);
1298}
1299
1300static int32_t U_CALLCONV
1301uloc_kw_countKeywords(UEnumeration *en, UErrorCode * /*status*/) {
1302 char *kw = ((UKeywordsContext *)en->context)->keywords;
1303 int32_t result = 0;
1304 while(*kw) {
1305 result++;
1306 kw += uprv_strlen(kw)+1;
1307 }
1308 return result;
1309}
1310
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001311static const char * U_CALLCONV
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001312uloc_kw_nextKeyword(UEnumeration* en,
1313 int32_t* resultLength,
1314 UErrorCode* /*status*/) {
1315 const char* result = ((UKeywordsContext *)en->context)->current;
1316 int32_t len = 0;
1317 if(*result) {
1318 len = (int32_t)uprv_strlen(((UKeywordsContext *)en->context)->current);
1319 ((UKeywordsContext *)en->context)->current += len+1;
1320 } else {
1321 result = NULL;
1322 }
1323 if (resultLength) {
1324 *resultLength = len;
1325 }
1326 return result;
1327}
1328
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001329static void U_CALLCONV
1330uloc_kw_resetKeywords(UEnumeration* en,
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001331 UErrorCode* /*status*/) {
1332 ((UKeywordsContext *)en->context)->current = ((UKeywordsContext *)en->context)->keywords;
1333}
1334
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001335U_CDECL_END
1336
1337
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001338static const UEnumeration gKeywordsEnum = {
1339 NULL,
1340 NULL,
1341 uloc_kw_closeKeywords,
1342 uloc_kw_countKeywords,
1343 uenum_unextDefault,
1344 uloc_kw_nextKeyword,
1345 uloc_kw_resetKeywords
1346};
1347
1348U_CAPI UEnumeration* U_EXPORT2
1349uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status)
1350{
Frank Tangb8696612019-10-25 14:58:21 -07001351 LocalMemory<UKeywordsContext> myContext;
1352 LocalMemory<UEnumeration> result;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001353
Frank Tangb8696612019-10-25 14:58:21 -07001354 if (U_FAILURE(*status)) {
1355 return nullptr;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001356 }
Frank Tangb8696612019-10-25 14:58:21 -07001357 myContext.adoptInstead(static_cast<UKeywordsContext *>(uprv_malloc(sizeof(UKeywordsContext))));
1358 result.adoptInstead(static_cast<UEnumeration *>(uprv_malloc(sizeof(UEnumeration))));
1359 if (myContext.isNull() || result.isNull()) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001360 *status = U_MEMORY_ALLOCATION_ERROR;
Frank Tangb8696612019-10-25 14:58:21 -07001361 return nullptr;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001362 }
Frank Tangb8696612019-10-25 14:58:21 -07001363 uprv_memcpy(result.getAlias(), &gKeywordsEnum, sizeof(UEnumeration));
1364 myContext->keywords = static_cast<char *>(uprv_malloc(keywordListSize+1));
1365 if (myContext->keywords == nullptr) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001366 *status = U_MEMORY_ALLOCATION_ERROR;
Frank Tangb8696612019-10-25 14:58:21 -07001367 return nullptr;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001368 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001369 uprv_memcpy(myContext->keywords, keywordList, keywordListSize);
1370 myContext->keywords[keywordListSize] = 0;
1371 myContext->current = myContext->keywords;
Frank Tangb8696612019-10-25 14:58:21 -07001372 result->context = myContext.orphan();
1373 return result.orphan();
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001374}
1375
1376U_CAPI UEnumeration* U_EXPORT2
1377uloc_openKeywords(const char* localeID,
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001378 UErrorCode* status)
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001379{
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001380 char tempBuffer[ULOC_FULLNAME_CAPACITY];
1381 const char* tmpLocaleID;
1382
1383 if(status==NULL || U_FAILURE(*status)) {
1384 return 0;
1385 }
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001386
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001387 if (_hasBCP47Extension(localeID)) {
1388 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
1389 } else {
1390 if (localeID==NULL) {
1391 localeID=uloc_getDefault();
1392 }
1393 tmpLocaleID=localeID;
1394 }
1395
1396 /* Skip the language */
Frank Tangf90543d2020-10-30 19:02:04 -07001397 ulocimp_getLanguage(tmpLocaleID, &tmpLocaleID, *status);
1398 if (U_FAILURE(*status)) {
1399 return 0;
1400 }
1401
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001402 if(_isIDSeparator(*tmpLocaleID)) {
1403 const char *scriptID;
1404 /* Skip the script if available */
Frank Tangf90543d2020-10-30 19:02:04 -07001405 ulocimp_getScript(tmpLocaleID+1, &scriptID, *status);
1406 if (U_FAILURE(*status)) {
1407 return 0;
1408 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001409 if(scriptID != tmpLocaleID+1) {
1410 /* Found optional script */
1411 tmpLocaleID = scriptID;
1412 }
1413 /* Skip the Country */
1414 if (_isIDSeparator(*tmpLocaleID)) {
Frank Tangf90543d2020-10-30 19:02:04 -07001415 ulocimp_getCountry(tmpLocaleID+1, &tmpLocaleID, *status);
1416 if (U_FAILURE(*status)) {
1417 return 0;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001418 }
1419 }
1420 }
1421
1422 /* keywords are located after '@' */
1423 if((tmpLocaleID = locale_getKeywordsStart(tmpLocaleID)) != NULL) {
Frank Tangf90543d2020-10-30 19:02:04 -07001424 CharString keywords;
1425 CharStringByteSink sink(&keywords);
1426 ulocimp_getKeywords(tmpLocaleID+1, '@', sink, FALSE, status);
1427 if (U_FAILURE(*status)) {
1428 return NULL;
1429 }
1430 return uloc_openKeywordList(keywords.data(), keywords.length(), status);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001431 }
Frank Tangf90543d2020-10-30 19:02:04 -07001432 return NULL;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001433}
1434
1435
1436/* bit-flags for 'options' parameter of _canonicalize */
1437#define _ULOC_STRIP_KEYWORDS 0x2
1438#define _ULOC_CANONICALIZE 0x1
1439
1440#define OPTION_SET(options, mask) ((options & mask) != 0)
1441
1442static const char i_default[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'};
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001443#define I_DEFAULT_LENGTH UPRV_LENGTHOF(i_default)
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001444
1445/**
1446 * Canonicalize the given localeID, to level 1 or to level 2,
1447 * depending on the options. To specify level 1, pass in options=0.
1448 * To specify level 2, pass in options=_ULOC_CANONICALIZE.
1449 *
1450 * This is the code underlying uloc_getName and uloc_canonicalize.
1451 */
Frank Tangf2223962020-04-27 18:25:29 -07001452static void
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001453_canonicalize(const char* localeID,
Frank Tangf2223962020-04-27 18:25:29 -07001454 ByteSink& sink,
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001455 uint32_t options,
1456 UErrorCode* err) {
Frank Tangf2223962020-04-27 18:25:29 -07001457 int32_t j, fieldCount=0, scriptSize=0, variantSize=0;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001458 char tempBuffer[ULOC_FULLNAME_CAPACITY];
1459 const char* origLocaleID;
1460 const char* tmpLocaleID;
1461 const char* keywordAssign = NULL;
1462 const char* separatorIndicator = NULL;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001463
1464 if (U_FAILURE(*err)) {
Frank Tangf2223962020-04-27 18:25:29 -07001465 return;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001466 }
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001467
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001468 if (_hasBCP47Extension(localeID)) {
1469 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
1470 } else {
1471 if (localeID==NULL) {
1472 localeID=uloc_getDefault();
1473 }
1474 tmpLocaleID=localeID;
1475 }
1476
1477 origLocaleID=tmpLocaleID;
1478
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001479 /* get all pieces, one after another, and separate with '_' */
Frank Tangf2223962020-04-27 18:25:29 -07001480 CharString tag = ulocimp_getLanguage(tmpLocaleID, &tmpLocaleID, *err);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001481
Frank Tangf2223962020-04-27 18:25:29 -07001482 if (tag.length() == I_DEFAULT_LENGTH &&
1483 uprv_strncmp(origLocaleID, i_default, I_DEFAULT_LENGTH) == 0) {
1484 tag.clear();
1485 tag.append(uloc_getDefault(), *err);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001486 } else if(_isIDSeparator(*tmpLocaleID)) {
1487 const char *scriptID;
1488
1489 ++fieldCount;
Frank Tangf2223962020-04-27 18:25:29 -07001490 tag.append('_', *err);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001491
Frank Tangf2223962020-04-27 18:25:29 -07001492 CharString script = ulocimp_getScript(tmpLocaleID+1, &scriptID, *err);
1493 tag.append(script, *err);
1494 scriptSize = script.length();
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001495 if(scriptSize > 0) {
1496 /* Found optional script */
1497 tmpLocaleID = scriptID;
1498 ++fieldCount;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001499 if (_isIDSeparator(*tmpLocaleID)) {
1500 /* If there is something else, then we add the _ */
Frank Tangf2223962020-04-27 18:25:29 -07001501 tag.append('_', *err);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001502 }
1503 }
1504
1505 if (_isIDSeparator(*tmpLocaleID)) {
1506 const char *cntryID;
Frank Tangf2223962020-04-27 18:25:29 -07001507
1508 CharString country = ulocimp_getCountry(tmpLocaleID+1, &cntryID, *err);
1509 tag.append(country, *err);
1510 if (!country.isEmpty()) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001511 /* Found optional country */
1512 tmpLocaleID = cntryID;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001513 }
1514 if(_isIDSeparator(*tmpLocaleID)) {
1515 /* If there is something else, then we add the _ if we found country before. */
Frank Tangf2223962020-04-27 18:25:29 -07001516 if (!_isIDSeparator(*(tmpLocaleID+1))) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001517 ++fieldCount;
Frank Tangf2223962020-04-27 18:25:29 -07001518 tag.append('_', *err);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001519 }
1520
Frank Tangf2223962020-04-27 18:25:29 -07001521 variantSize = -tag.length();
1522 {
1523 CharStringByteSink s(&tag);
Frank Tangf90543d2020-10-30 19:02:04 -07001524 _getVariant(tmpLocaleID+1, *tmpLocaleID, s, FALSE);
Frank Tangf2223962020-04-27 18:25:29 -07001525 }
1526 variantSize += tag.length();
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001527 if (variantSize > 0) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001528 tmpLocaleID += variantSize + 1; /* skip '_' and variant */
1529 }
1530 }
1531 }
1532 }
1533
1534 /* Copy POSIX-style charset specifier, if any [mr.utf8] */
1535 if (!OPTION_SET(options, _ULOC_CANONICALIZE) && *tmpLocaleID == '.') {
1536 UBool done = FALSE;
1537 do {
1538 char c = *tmpLocaleID;
1539 switch (c) {
1540 case 0:
1541 case '@':
1542 done = TRUE;
1543 break;
1544 default:
Frank Tangf2223962020-04-27 18:25:29 -07001545 tag.append(c, *err);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001546 ++tmpLocaleID;
1547 break;
1548 }
1549 } while (!done);
1550 }
1551
1552 /* Scan ahead to next '@' and determine if it is followed by '=' and/or ';'
1553 After this, tmpLocaleID either points to '@' or is NULL */
1554 if ((tmpLocaleID=locale_getKeywordsStart(tmpLocaleID))!=NULL) {
1555 keywordAssign = uprv_strchr(tmpLocaleID, '=');
1556 separatorIndicator = uprv_strchr(tmpLocaleID, ';');
1557 }
1558
1559 /* Copy POSIX-style variant, if any [mr@FOO] */
1560 if (!OPTION_SET(options, _ULOC_CANONICALIZE) &&
1561 tmpLocaleID != NULL && keywordAssign == NULL) {
1562 for (;;) {
1563 char c = *tmpLocaleID;
1564 if (c == 0) {
1565 break;
1566 }
Frank Tangf2223962020-04-27 18:25:29 -07001567 tag.append(c, *err);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001568 ++tmpLocaleID;
1569 }
1570 }
1571
1572 if (OPTION_SET(options, _ULOC_CANONICALIZE)) {
1573 /* Handle @FOO variant if @ is present and not followed by = */
1574 if (tmpLocaleID!=NULL && keywordAssign==NULL) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001575 /* Add missing '_' if needed */
1576 if (fieldCount < 2 || (fieldCount < 3 && scriptSize > 0)) {
1577 do {
Frank Tangf2223962020-04-27 18:25:29 -07001578 tag.append('_', *err);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001579 ++fieldCount;
1580 } while(fieldCount<2);
1581 }
Frank Tangf2223962020-04-27 18:25:29 -07001582
1583 int32_t posixVariantSize = -tag.length();
1584 {
1585 CharStringByteSink s(&tag);
Frank Tangf90543d2020-10-30 19:02:04 -07001586 _getVariant(tmpLocaleID+1, '@', s, (UBool)(variantSize > 0));
Frank Tangf2223962020-04-27 18:25:29 -07001587 }
1588 posixVariantSize += tag.length();
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001589 if (posixVariantSize > 0) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001590 variantSize += posixVariantSize;
1591 }
1592 }
1593
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001594 /* Look up the ID in the canonicalization map */
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001595 for (j=0; j<UPRV_LENGTHOF(CANONICALIZE_MAP); j++) {
Frank Tangf2223962020-04-27 18:25:29 -07001596 StringPiece id(CANONICALIZE_MAP[j].id);
1597 if (tag == id) {
1598 if (id.empty() && tmpLocaleID != NULL) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001599 break; /* Don't remap "" if keywords present */
1600 }
Frank Tangf2223962020-04-27 18:25:29 -07001601 tag.clear();
1602 tag.append(CANONICALIZE_MAP[j].canonicalID, *err);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001603 break;
1604 }
1605 }
1606 }
1607
Frank Tangf2223962020-04-27 18:25:29 -07001608 sink.Append(tag.data(), tag.length());
1609
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001610 if (!OPTION_SET(options, _ULOC_STRIP_KEYWORDS)) {
1611 if (tmpLocaleID!=NULL && keywordAssign!=NULL &&
1612 (!separatorIndicator || separatorIndicator > keywordAssign)) {
Frank Tangf2223962020-04-27 18:25:29 -07001613 sink.Append("@", 1);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001614 ++fieldCount;
Frank Tangf90543d2020-10-30 19:02:04 -07001615 ulocimp_getKeywords(tmpLocaleID+1, '@', sink, TRUE, err);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001616 }
1617 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001618}
1619
1620/* ### ID parsing API **************************************************/
1621
1622U_CAPI int32_t U_EXPORT2
1623uloc_getParent(const char* localeID,
1624 char* parent,
1625 int32_t parentCapacity,
1626 UErrorCode* err)
1627{
1628 const char *lastUnderscore;
1629 int32_t i;
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001630
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001631 if (U_FAILURE(*err))
1632 return 0;
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001633
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001634 if (localeID == NULL)
1635 localeID = uloc_getDefault();
1636
1637 lastUnderscore=uprv_strrchr(localeID, '_');
1638 if(lastUnderscore!=NULL) {
1639 i=(int32_t)(lastUnderscore-localeID);
1640 } else {
1641 i=0;
1642 }
1643
Frank Tang69c72a62019-04-03 21:41:21 -07001644 if (i > 0) {
1645 if (uprv_strnicmp(localeID, "und_", 4) == 0) {
1646 localeID += 3;
1647 i -= 3;
1648 uprv_memmove(parent, localeID, uprv_min(i, parentCapacity));
1649 } else if (parent != localeID) {
1650 uprv_memcpy(parent, localeID, uprv_min(i, parentCapacity));
1651 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001652 }
Frank Tang69c72a62019-04-03 21:41:21 -07001653
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001654 return u_terminateChars(parent, parentCapacity, i, err);
1655}
1656
1657U_CAPI int32_t U_EXPORT2
1658uloc_getLanguage(const char* localeID,
1659 char* language,
1660 int32_t languageCapacity,
1661 UErrorCode* err)
1662{
1663 /* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001664
1665 if (err==NULL || U_FAILURE(*err)) {
1666 return 0;
1667 }
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001668
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001669 if(localeID==NULL) {
1670 localeID=uloc_getDefault();
1671 }
1672
Frank Tangf90543d2020-10-30 19:02:04 -07001673 return ulocimp_getLanguage(localeID, NULL, *err).extract(language, languageCapacity, *err);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001674}
1675
1676U_CAPI int32_t U_EXPORT2
1677uloc_getScript(const char* localeID,
1678 char* script,
1679 int32_t scriptCapacity,
1680 UErrorCode* err)
1681{
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001682 if(err==NULL || U_FAILURE(*err)) {
1683 return 0;
1684 }
1685
1686 if(localeID==NULL) {
1687 localeID=uloc_getDefault();
1688 }
1689
1690 /* skip the language */
Frank Tangf90543d2020-10-30 19:02:04 -07001691 ulocimp_getLanguage(localeID, &localeID, *err);
1692 if (U_FAILURE(*err)) {
1693 return 0;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001694 }
Frank Tangf90543d2020-10-30 19:02:04 -07001695
1696 if(_isIDSeparator(*localeID)) {
1697 return ulocimp_getScript(localeID+1, NULL, *err).extract(script, scriptCapacity, *err);
1698 }
1699 return u_terminateChars(script, scriptCapacity, 0, err);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001700}
1701
1702U_CAPI int32_t U_EXPORT2
1703uloc_getCountry(const char* localeID,
1704 char* country,
1705 int32_t countryCapacity,
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001706 UErrorCode* err)
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001707{
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001708 if(err==NULL || U_FAILURE(*err)) {
1709 return 0;
1710 }
1711
1712 if(localeID==NULL) {
1713 localeID=uloc_getDefault();
1714 }
1715
1716 /* Skip the language */
Frank Tangf90543d2020-10-30 19:02:04 -07001717 ulocimp_getLanguage(localeID, &localeID, *err);
1718 if (U_FAILURE(*err)) {
1719 return 0;
1720 }
1721
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001722 if(_isIDSeparator(*localeID)) {
1723 const char *scriptID;
1724 /* Skip the script if available */
Frank Tangf90543d2020-10-30 19:02:04 -07001725 ulocimp_getScript(localeID+1, &scriptID, *err);
1726 if (U_FAILURE(*err)) {
1727 return 0;
1728 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001729 if(scriptID != localeID+1) {
1730 /* Found optional script */
1731 localeID = scriptID;
1732 }
1733 if(_isIDSeparator(*localeID)) {
Frank Tangf90543d2020-10-30 19:02:04 -07001734 return ulocimp_getCountry(localeID+1, NULL, *err).extract(country, countryCapacity, *err);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001735 }
1736 }
Frank Tangf90543d2020-10-30 19:02:04 -07001737 return u_terminateChars(country, countryCapacity, 0, err);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001738}
1739
1740U_CAPI int32_t U_EXPORT2
1741uloc_getVariant(const char* localeID,
1742 char* variant,
1743 int32_t variantCapacity,
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001744 UErrorCode* err)
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001745{
1746 char tempBuffer[ULOC_FULLNAME_CAPACITY];
1747 const char* tmpLocaleID;
1748 int32_t i=0;
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001749
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001750 if(err==NULL || U_FAILURE(*err)) {
1751 return 0;
1752 }
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001753
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001754 if (_hasBCP47Extension(localeID)) {
1755 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
1756 } else {
1757 if (localeID==NULL) {
1758 localeID=uloc_getDefault();
1759 }
1760 tmpLocaleID=localeID;
1761 }
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001762
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001763 /* Skip the language */
Frank Tangf90543d2020-10-30 19:02:04 -07001764 ulocimp_getLanguage(tmpLocaleID, &tmpLocaleID, *err);
1765 if (U_FAILURE(*err)) {
1766 return 0;
1767 }
1768
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001769 if(_isIDSeparator(*tmpLocaleID)) {
1770 const char *scriptID;
1771 /* Skip the script if available */
Frank Tangf90543d2020-10-30 19:02:04 -07001772 ulocimp_getScript(tmpLocaleID+1, &scriptID, *err);
1773 if (U_FAILURE(*err)) {
1774 return 0;
1775 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001776 if(scriptID != tmpLocaleID+1) {
1777 /* Found optional script */
1778 tmpLocaleID = scriptID;
1779 }
1780 /* Skip the Country */
1781 if (_isIDSeparator(*tmpLocaleID)) {
1782 const char *cntryID;
Frank Tangf90543d2020-10-30 19:02:04 -07001783 ulocimp_getCountry(tmpLocaleID+1, &cntryID, *err);
1784 if (U_FAILURE(*err)) {
1785 return 0;
1786 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001787 if (cntryID != tmpLocaleID+1) {
1788 /* Found optional country */
1789 tmpLocaleID = cntryID;
1790 }
1791 if(_isIDSeparator(*tmpLocaleID)) {
1792 /* If there was no country ID, skip a possible extra IDSeparator */
1793 if (tmpLocaleID != cntryID && _isIDSeparator(tmpLocaleID[1])) {
1794 tmpLocaleID++;
1795 }
Frank Tangf90543d2020-10-30 19:02:04 -07001796
1797 CheckedArrayByteSink sink(variant, variantCapacity);
1798 _getVariant(tmpLocaleID+1, *tmpLocaleID, sink, FALSE);
1799
1800 i = sink.NumberOfBytesAppended();
1801
1802 if (U_FAILURE(*err)) {
1803 return i;
1804 }
1805
1806 if (sink.Overflowed()) {
1807 *err = U_BUFFER_OVERFLOW_ERROR;
1808 return i;
1809 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001810 }
1811 }
1812 }
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001813
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001814 return u_terminateChars(variant, variantCapacity, i, err);
1815}
1816
1817U_CAPI int32_t U_EXPORT2
1818uloc_getName(const char* localeID,
1819 char* name,
1820 int32_t nameCapacity,
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001821 UErrorCode* err)
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001822{
Frank Tangf2223962020-04-27 18:25:29 -07001823 if (U_FAILURE(*err)) {
1824 return 0;
1825 }
1826
1827 CheckedArrayByteSink sink(name, nameCapacity);
1828 ulocimp_getName(localeID, sink, err);
1829
1830 int32_t reslen = sink.NumberOfBytesAppended();
1831
1832 if (U_FAILURE(*err)) {
1833 return reslen;
1834 }
1835
1836 if (sink.Overflowed()) {
1837 *err = U_BUFFER_OVERFLOW_ERROR;
1838 } else {
1839 u_terminateChars(name, nameCapacity, reslen, err);
1840 }
1841
1842 return reslen;
1843}
1844
Frank Tangf90543d2020-10-30 19:02:04 -07001845U_CAPI void U_EXPORT2
Frank Tangf2223962020-04-27 18:25:29 -07001846ulocimp_getName(const char* localeID,
1847 ByteSink& sink,
1848 UErrorCode* err)
1849{
1850 _canonicalize(localeID, sink, 0, err);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001851}
1852
1853U_CAPI int32_t U_EXPORT2
1854uloc_getBaseName(const char* localeID,
1855 char* name,
1856 int32_t nameCapacity,
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001857 UErrorCode* err)
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001858{
Frank Tangf2223962020-04-27 18:25:29 -07001859 if (U_FAILURE(*err)) {
1860 return 0;
1861 }
1862
1863 CheckedArrayByteSink sink(name, nameCapacity);
1864 ulocimp_getBaseName(localeID, sink, err);
1865
1866 int32_t reslen = sink.NumberOfBytesAppended();
1867
1868 if (U_FAILURE(*err)) {
1869 return reslen;
1870 }
1871
1872 if (sink.Overflowed()) {
1873 *err = U_BUFFER_OVERFLOW_ERROR;
1874 } else {
1875 u_terminateChars(name, nameCapacity, reslen, err);
1876 }
1877
1878 return reslen;
1879}
1880
Frank Tangf90543d2020-10-30 19:02:04 -07001881U_CAPI void U_EXPORT2
Frank Tangf2223962020-04-27 18:25:29 -07001882ulocimp_getBaseName(const char* localeID,
1883 ByteSink& sink,
1884 UErrorCode* err)
1885{
1886 _canonicalize(localeID, sink, _ULOC_STRIP_KEYWORDS, err);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001887}
1888
1889U_CAPI int32_t U_EXPORT2
1890uloc_canonicalize(const char* localeID,
1891 char* name,
1892 int32_t nameCapacity,
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001893 UErrorCode* err)
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001894{
Frank Tangf2223962020-04-27 18:25:29 -07001895 if (U_FAILURE(*err)) {
1896 return 0;
1897 }
1898
1899 CheckedArrayByteSink sink(name, nameCapacity);
1900 ulocimp_canonicalize(localeID, sink, err);
1901
1902 int32_t reslen = sink.NumberOfBytesAppended();
1903
1904 if (U_FAILURE(*err)) {
1905 return reslen;
1906 }
1907
1908 if (sink.Overflowed()) {
1909 *err = U_BUFFER_OVERFLOW_ERROR;
1910 } else {
1911 u_terminateChars(name, nameCapacity, reslen, err);
1912 }
1913
1914 return reslen;
1915}
1916
Frank Tangf90543d2020-10-30 19:02:04 -07001917U_CAPI void U_EXPORT2
Frank Tangf2223962020-04-27 18:25:29 -07001918ulocimp_canonicalize(const char* localeID,
1919 ByteSink& sink,
1920 UErrorCode* err)
1921{
1922 _canonicalize(localeID, sink, _ULOC_CANONICALIZE, err);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001923}
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001924
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001925U_CAPI const char* U_EXPORT2
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001926uloc_getISO3Language(const char* localeID)
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001927{
1928 int16_t offset;
1929 char lang[ULOC_LANG_CAPACITY];
1930 UErrorCode err = U_ZERO_ERROR;
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001931
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001932 if (localeID == NULL)
1933 {
1934 localeID = uloc_getDefault();
1935 }
1936 uloc_getLanguage(localeID, lang, ULOC_LANG_CAPACITY, &err);
1937 if (U_FAILURE(err))
1938 return "";
1939 offset = _findIndex(LANGUAGES, lang);
1940 if (offset < 0)
1941 return "";
1942 return LANGUAGES_3[offset];
1943}
1944
1945U_CAPI const char* U_EXPORT2
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001946uloc_getISO3Country(const char* localeID)
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001947{
1948 int16_t offset;
1949 char cntry[ULOC_LANG_CAPACITY];
1950 UErrorCode err = U_ZERO_ERROR;
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001951
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001952 if (localeID == NULL)
1953 {
1954 localeID = uloc_getDefault();
1955 }
1956 uloc_getCountry(localeID, cntry, ULOC_LANG_CAPACITY, &err);
1957 if (U_FAILURE(err))
1958 return "";
1959 offset = _findIndex(COUNTRIES, cntry);
1960 if (offset < 0)
1961 return "";
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001962
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001963 return COUNTRIES_3[offset];
1964}
1965
1966U_CAPI uint32_t U_EXPORT2
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001967uloc_getLCID(const char* localeID)
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001968{
1969 UErrorCode status = U_ZERO_ERROR;
1970 char langID[ULOC_FULLNAME_CAPACITY];
Jungshik Shin87232d82017-05-13 21:10:13 -07001971 uint32_t lcid = 0;
1972
1973 /* Check for incomplete id. */
1974 if (!localeID || uprv_strlen(localeID) < 2) {
1975 return 0;
1976 }
1977
Frank Tang69c72a62019-04-03 21:41:21 -07001978 // First, attempt Windows platform lookup if available, but fall
1979 // through to catch any special cases (ICU vs Windows name differences).
1980 lcid = uprv_convertToLCIDPlatform(localeID, &status);
1981 if (U_FAILURE(status)) {
1982 return 0;
1983 }
1984 if (lcid > 0) {
Jungshik Shin87232d82017-05-13 21:10:13 -07001985 // Windows found an LCID, return that
1986 return lcid;
1987 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001988
1989 uloc_getLanguage(localeID, langID, sizeof(langID), &status);
Frank Tang69c72a62019-04-03 21:41:21 -07001990 if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001991 return 0;
1992 }
1993
1994 if (uprv_strchr(localeID, '@')) {
1995 // uprv_convertToLCID does not support keywords other than collation.
1996 // Remove all keywords except collation.
1997 int32_t len;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001998 char tmpLocaleID[ULOC_FULLNAME_CAPACITY];
1999
Frank Tangf90543d2020-10-30 19:02:04 -07002000 CharString collVal;
2001 {
2002 CharStringByteSink sink(&collVal);
2003 ulocimp_getKeywordValue(localeID, "collation", sink, &status);
2004 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002005
Frank Tangf90543d2020-10-30 19:02:04 -07002006 if (U_SUCCESS(status) && !collVal.isEmpty()) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002007 len = uloc_getBaseName(localeID, tmpLocaleID,
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002008 UPRV_LENGTHOF(tmpLocaleID) - 1, &status);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002009
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002010 if (U_SUCCESS(status) && len > 0) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002011 tmpLocaleID[len] = 0;
2012
Frank Tangf90543d2020-10-30 19:02:04 -07002013 len = uloc_setKeywordValue("collation", collVal.data(), tmpLocaleID,
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002014 UPRV_LENGTHOF(tmpLocaleID) - len - 1, &status);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002015
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002016 if (U_SUCCESS(status) && len > 0) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002017 tmpLocaleID[len] = 0;
2018 return uprv_convertToLCID(langID, tmpLocaleID, &status);
2019 }
2020 }
2021 }
2022
2023 // fall through - all keywords are simply ignored
2024 status = U_ZERO_ERROR;
2025 }
2026
2027 return uprv_convertToLCID(langID, localeID, &status);
2028}
2029
2030U_CAPI int32_t U_EXPORT2
2031uloc_getLocaleForLCID(uint32_t hostid, char *locale, int32_t localeCapacity,
2032 UErrorCode *status)
2033{
2034 return uprv_convertToPosix(hostid, locale, localeCapacity, status);
2035}
2036
2037/* ### Default locale **************************************************/
2038
2039U_CAPI const char* U_EXPORT2
2040uloc_getDefault()
2041{
2042 return locale_get_default();
2043}
2044
2045U_CAPI void U_EXPORT2
2046uloc_setDefault(const char* newDefaultLocale,
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002047 UErrorCode* err)
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002048{
2049 if (U_FAILURE(*err))
2050 return;
2051 /* the error code isn't currently used for anything by this function*/
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002052
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002053 /* propagate change to C++ */
2054 locale_set_default(newDefaultLocale);
2055}
2056
2057/**
2058 * Returns a list of all 2-letter language codes defined in ISO 639. This is a pointer
2059 * to an array of pointers to arrays of char. All of these pointers are owned
2060 * by ICU-- do not delete them, and do not write through them. The array is
2061 * terminated with a null pointer.
2062 */
2063U_CAPI const char* const* U_EXPORT2
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002064uloc_getISOLanguages()
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002065{
2066 return LANGUAGES;
2067}
2068
2069/**
2070 * Returns a list of all 2-letter country codes defined in ISO 639. This is a
2071 * pointer to an array of pointers to arrays of char. All of these pointers are
2072 * owned by ICU-- do not delete them, and do not write through them. The array is
2073 * terminated with a null pointer.
2074 */
2075U_CAPI const char* const* U_EXPORT2
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002076uloc_getISOCountries()
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002077{
2078 return COUNTRIES;
2079}
2080
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08002081U_CAPI const char* U_EXPORT2
2082uloc_toUnicodeLocaleKey(const char* keyword)
2083{
2084 const char* bcpKey = ulocimp_toBcpKey(keyword);
2085 if (bcpKey == NULL && ultag_isUnicodeLocaleKey(keyword, -1)) {
2086 // unknown keyword, but syntax is fine..
2087 return keyword;
2088 }
2089 return bcpKey;
2090}
2091
2092U_CAPI const char* U_EXPORT2
2093uloc_toUnicodeLocaleType(const char* keyword, const char* value)
2094{
2095 const char* bcpType = ulocimp_toBcpType(keyword, value, NULL, NULL);
2096 if (bcpType == NULL && ultag_isUnicodeLocaleType(value, -1)) {
2097 // unknown keyword, but syntax is fine..
2098 return value;
2099 }
2100 return bcpType;
2101}
2102
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08002103static UBool
2104isWellFormedLegacyKey(const char* legacyKey)
2105{
2106 const char* p = legacyKey;
2107 while (*p) {
2108 if (!UPRV_ISALPHANUM(*p)) {
2109 return FALSE;
2110 }
2111 p++;
2112 }
2113 return TRUE;
2114}
2115
2116static UBool
2117isWellFormedLegacyType(const char* legacyType)
2118{
2119 const char* p = legacyType;
2120 int32_t alphaNumLen = 0;
2121 while (*p) {
2122 if (*p == '_' || *p == '/' || *p == '-') {
2123 if (alphaNumLen == 0) {
2124 return FALSE;
2125 }
2126 alphaNumLen = 0;
2127 } else if (UPRV_ISALPHANUM(*p)) {
2128 alphaNumLen++;
2129 } else {
2130 return FALSE;
2131 }
2132 p++;
2133 }
2134 return (alphaNumLen != 0);
2135}
2136
2137U_CAPI const char* U_EXPORT2
2138uloc_toLegacyKey(const char* keyword)
2139{
2140 const char* legacyKey = ulocimp_toLegacyKey(keyword);
2141 if (legacyKey == NULL) {
2142 // Checks if the specified locale key is well-formed with the legacy locale syntax.
2143 //
2144 // Note:
Jungshik Shin87232d82017-05-13 21:10:13 -07002145 // LDML/CLDR provides some definition of keyword syntax in
2146 // * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and
2147 // * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax
2148 // Keys can only consist of [0-9a-zA-Z].
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08002149 if (isWellFormedLegacyKey(keyword)) {
2150 return keyword;
2151 }
2152 }
2153 return legacyKey;
2154}
2155
2156U_CAPI const char* U_EXPORT2
2157uloc_toLegacyType(const char* keyword, const char* value)
2158{
2159 const char* legacyType = ulocimp_toLegacyType(keyword, value, NULL, NULL);
2160 if (legacyType == NULL) {
2161 // Checks if the specified locale type is well-formed with the legacy locale syntax.
2162 //
2163 // Note:
Jungshik Shin87232d82017-05-13 21:10:13 -07002164 // LDML/CLDR provides some definition of keyword syntax in
2165 // * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and
2166 // * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax
2167 // Values (types) can only consist of [0-9a-zA-Z], plus for legacy values
2168 // we allow [/_-+] in the middle (e.g. "Etc/GMT+1", "Asia/Tel_Aviv")
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08002169 if (isWellFormedLegacyType(value)) {
2170 return value;
2171 }
2172 }
2173 return legacyType;
2174}
2175
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002176/*eof*/