blob: 81b6e0f68ab88be97d59dcec345e8d612ebd45e4 [file] [log] [blame]
Jungshik Shin87232d82017-05-13 21:10:13 -07001// © 2016 and later: Unicode, Inc. and others.
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002// License & terms of use: http://www.unicode.org/copyright.html
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00003/*
4**********************************************************************
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07005* Copyright (C) 1997-2016, International Business Machines
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00006* Corporation and others. All Rights Reserved.
7**********************************************************************
8*
9* File ULOC.CPP
10*
11* Modification History:
12*
13* Date Name Description
14* 04/01/97 aliu Creation.
15* 08/21/98 stephen JDK 1.2 sync
16* 12/08/98 rtg New Locale implementation and C API
17* 03/15/99 damiba overhaul.
18* 04/06/99 stephen changed setDefault() to realloc and copy
19* 06/14/99 stephen Changed calls to ures_open for new params
20* 07/21/99 stephen Modified setDefault() to propagate to C++
21* 05/14/04 alan 7 years later: refactored, cleaned up, fixed bugs,
22* brought canonicalization code into line with spec
23*****************************************************************************/
24
25/*
26 POSIX's locale format, from putil.c: [no spaces]
27
28 ll [ _CC ] [ . MM ] [ @ VV]
29
30 l = lang, C = ctry, M = charmap, V = variant
31*/
32
33#include "unicode/utypes.h"
34#include "unicode/ustring.h"
35#include "unicode/uloc.h"
36
37#include "putilimp.h"
38#include "ustr_imp.h"
39#include "ulocimp.h"
40#include "umutex.h"
41#include "cstring.h"
42#include "cmemory.h"
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000043#include "locmap.h"
44#include "uarrsort.h"
45#include "uenumimp.h"
46#include "uassert.h"
Jungshik Shin87232d82017-05-13 21:10:13 -070047#include "charstr.h"
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000048
49#include <stdio.h> /* for sprintf */
50
Jungshik Shin87232d82017-05-13 21:10:13 -070051U_NAMESPACE_USE
Jungshik Shin5feb9ad2016-10-21 12:52:48 -070052
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000053/* ### Declarations **************************************************/
54
55/* Locale stuff from locid.cpp */
56U_CFUNC void locale_set_default(const char *id);
57U_CFUNC const char *locale_get_default(void);
58U_CFUNC int32_t
59locale_getKeywords(const char *localeID,
60 char prev,
61 char *keywords, int32_t keywordCapacity,
62 char *values, int32_t valuesCapacity, int32_t *valLen,
63 UBool valuesToo,
64 UErrorCode *status);
65
66/* ### Data tables **************************************************/
67
68/**
69 * Table of language codes, both 2- and 3-letter, with preference
70 * given to 2-letter codes where possible. Includes 3-letter codes
71 * that lack a 2-letter equivalent.
72 *
73 * This list must be in sorted order. This list is returned directly
74 * to the user by some API.
75 *
76 * This list must be kept in sync with LANGUAGES_3, with corresponding
77 * entries matched.
78 *
79 * This table should be terminated with a NULL entry, followed by a
80 * second list, and another NULL entry. The first list is visible to
81 * user code when this array is returned by API. The second list
82 * contains codes we support, but do not expose through user API.
83 *
84 * Notes
85 *
86 * Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to
87 * include the revisions up to 2001/7/27 *CWB*
88 *
89 * The 3 character codes are the terminology codes like RFC 3066. This
90 * is compatible with prior ICU codes
91 *
92 * "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the
93 * table but now at the end of the table because 3 character codes are
94 * duplicates. This avoids bad searches going from 3 to 2 character
95 * codes.
96 *
97 * The range qaa-qtz is reserved for local use
98 */
99/* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
Jungshik Shin70f82502016-01-29 00:32:36 -0800100/* ISO639 table version is 20150505 */
Jungshik Shinb3189662017-11-07 11:18:34 -0800101/* Subsequent hand addition of selected languages */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000102static const char * const LANGUAGES[] = {
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700103 "aa", "ab", "ace", "ach", "ada", "ady", "ae", "aeb",
104 "af", "afh", "agq", "ain", "ak", "akk", "akz", "ale",
105 "aln", "alt", "am", "an", "ang", "anp", "ar", "arc",
Jungshik Shin87232d82017-05-13 21:10:13 -0700106 "arn", "aro", "arp", "arq", "ars", "arw", "ary", "arz", "as",
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700107 "asa", "ase", "ast", "av", "avk", "awa", "ay", "az",
108 "ba", "bal", "ban", "bar", "bas", "bax", "bbc", "bbj",
109 "be", "bej", "bem", "bew", "bez", "bfd", "bfq", "bg",
110 "bgn", "bho", "bi", "bik", "bin", "bjn", "bkm", "bla",
111 "bm", "bn", "bo", "bpy", "bqi", "br", "bra", "brh",
112 "brx", "bs", "bss", "bua", "bug", "bum", "byn", "byv",
Jungshik Shinb3189662017-11-07 11:18:34 -0800113 "ca", "cad", "car", "cay", "cch", "ccp", "ce", "ceb", "cgg",
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700114 "ch", "chb", "chg", "chk", "chm", "chn", "cho", "chp",
115 "chr", "chy", "ckb", "co", "cop", "cps", "cr", "crh",
116 "cs", "csb", "cu", "cv", "cy",
117 "da", "dak", "dar", "dav", "de", "del", "den", "dgr",
118 "din", "dje", "doi", "dsb", "dtp", "dua", "dum", "dv",
119 "dyo", "dyu", "dz", "dzg",
120 "ebu", "ee", "efi", "egl", "egy", "eka", "el", "elx",
121 "en", "enm", "eo", "es", "esu", "et", "eu", "ewo",
122 "ext",
123 "fa", "fan", "fat", "ff", "fi", "fil", "fit", "fj",
124 "fo", "fon", "fr", "frc", "frm", "fro", "frp", "frr",
125 "frs", "fur", "fy",
126 "ga", "gaa", "gag", "gan", "gay", "gba", "gbz", "gd",
127 "gez", "gil", "gl", "glk", "gmh", "gn", "goh", "gom",
128 "gon", "gor", "got", "grb", "grc", "gsw", "gu", "guc",
129 "gur", "guz", "gv", "gwi",
130 "ha", "hai", "hak", "haw", "he", "hi", "hif", "hil",
131 "hit", "hmn", "ho", "hr", "hsb", "hsn", "ht", "hu",
132 "hup", "hy", "hz",
133 "ia", "iba", "ibb", "id", "ie", "ig", "ii", "ik",
134 "ilo", "inh", "io", "is", "it", "iu", "izh",
135 "ja", "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut",
136 "jv",
137 "ka", "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd",
138 "kbl", "kcg", "kde", "kea", "ken", "kfo", "kg", "kgp",
139 "kha", "kho", "khq", "khw", "ki", "kiu", "kj", "kk",
140 "kkj", "kl", "kln", "km", "kmb", "kn", "ko", "koi",
141 "kok", "kos", "kpe", "kr", "krc", "kri", "krj", "krl",
142 "kru", "ks", "ksb", "ksf", "ksh", "ku", "kum", "kut",
143 "kv", "kw", "ky",
144 "la", "lad", "lag", "lah", "lam", "lb", "lez", "lfn",
145 "lg", "li", "lij", "liv", "lkt", "lmo", "ln", "lo",
146 "lol", "loz", "lrc", "lt", "ltg", "lu", "lua", "lui",
147 "lun", "luo", "lus", "luy", "lv", "lzh", "lzz",
148 "mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",
149 "mdf", "mdh", "mdr", "men", "mer", "mfe", "mg", "mga",
150 "mgh", "mgo", "mh", "mi", "mic", "min", "mis", "mk",
151 "ml", "mn", "mnc", "mni", "moh", "mos", "mr", "mrj",
152 "ms", "mt", "mua", "mul", "mus", "mwl", "mwr", "mwv",
153 "my", "mye", "myv", "mzn",
154 "na", "nan", "nap", "naq", "nb", "nd", "nds", "ne",
155 "new", "ng", "nia", "niu", "njo", "nl", "nmg", "nn",
156 "nnh", "no", "nog", "non", "nov", "nqo", "nr", "nso",
157 "nus", "nv", "nwc", "ny", "nym", "nyn", "nyo", "nzi",
158 "oc", "oj", "om", "or", "os", "osa", "ota",
159 "pa", "pag", "pal", "pam", "pap", "pau", "pcd", "pdc",
160 "pdt", "peo", "pfl", "phn", "pi", "pl", "pms", "pnt",
161 "pon", "prg", "pro", "ps", "pt",
162 "qu", "quc", "qug",
163 "raj", "rap", "rar", "rgn", "rif", "rm", "rn", "ro",
164 "rof", "rom", "rtm", "ru", "rue", "rug", "rup",
165 "rw", "rwk",
166 "sa", "sad", "sah", "sam", "saq", "sas", "sat", "saz",
167 "sba", "sbp", "sc", "scn", "sco", "sd", "sdc", "sdh",
168 "se", "see", "seh", "sei", "sel", "ses", "sg", "sga",
169 "sgs", "shi", "shn", "shu", "si", "sid", "sk",
170 "sl", "sli", "sly", "sm", "sma", "smj", "smn", "sms",
171 "sn", "snk", "so", "sog", "sq", "sr", "srn", "srr",
172 "ss", "ssy", "st", "stq", "su", "suk", "sus", "sux",
173 "sv", "sw", "swb", "swc", "syc", "syr", "szl",
174 "ta", "tcy", "te", "tem", "teo", "ter", "tet", "tg",
175 "th", "ti", "tig", "tiv", "tk", "tkl", "tkr", "tl",
176 "tlh", "tli", "tly", "tmh", "tn", "to", "tog", "tpi",
177 "tr", "tru", "trv", "ts", "tsd", "tsi", "tt", "ttt",
178 "tum", "tvl", "tw", "twq", "ty", "tyv", "tzm",
179 "udm", "ug", "uga", "uk", "umb", "und", "ur", "uz",
180 "vai", "ve", "vec", "vep", "vi", "vls", "vmf", "vo",
181 "vot", "vro", "vun",
182 "wa", "wae", "wal", "war", "was", "wbp", "wo", "wuu",
183 "xal", "xh", "xmf", "xog",
184 "yao", "yap", "yav", "ybb", "yi", "yo", "yrl", "yue",
185 "za", "zap", "zbl", "zea", "zen", "zgh", "zh", "zu",
186 "zun", "zxx", "zza",
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000187NULL,
188 "in", "iw", "ji", "jw", "sh", /* obsolete language codes */
189NULL
190};
191
192static const char* const DEPRECATED_LANGUAGES[]={
193 "in", "iw", "ji", "jw", NULL, NULL
194};
195static const char* const REPLACEMENT_LANGUAGES[]={
196 "id", "he", "yi", "jv", NULL, NULL
197};
198
199/**
200 * Table of 3-letter language codes.
201 *
202 * This is a lookup table used to convert 3-letter language codes to
203 * their 2-letter equivalent, where possible. It must be kept in sync
204 * with LANGUAGES. For all valid i, LANGUAGES[i] must refer to the
205 * same language as LANGUAGES_3[i]. The commented-out lines are
206 * copied from LANGUAGES to make eyeballing this baby easier.
207 *
208 * Where a 3-letter language code has no 2-letter equivalent, the
209 * 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i].
210 *
211 * This table should be terminated with a NULL entry, followed by a
212 * second list, and another NULL entry. The two lists correspond to
213 * the two lists in LANGUAGES.
214 */
215/* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
Jungshik Shin70f82502016-01-29 00:32:36 -0800216/* ISO639 table version is 20150505 */
Jungshik Shinb3189662017-11-07 11:18:34 -0800217/* Subsequent hand addition of selected languages */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000218static const char * const LANGUAGES_3[] = {
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700219 "aar", "abk", "ace", "ach", "ada", "ady", "ave", "aeb",
220 "afr", "afh", "agq", "ain", "aka", "akk", "akz", "ale",
221 "aln", "alt", "amh", "arg", "ang", "anp", "ara", "arc",
Jungshik Shin87232d82017-05-13 21:10:13 -0700222 "arn", "aro", "arp", "arq", "ars", "arw", "ary", "arz", "asm",
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700223 "asa", "ase", "ast", "ava", "avk", "awa", "aym", "aze",
224 "bak", "bal", "ban", "bar", "bas", "bax", "bbc", "bbj",
225 "bel", "bej", "bem", "bew", "bez", "bfd", "bfq", "bul",
226 "bgn", "bho", "bis", "bik", "bin", "bjn", "bkm", "bla",
227 "bam", "ben", "bod", "bpy", "bqi", "bre", "bra", "brh",
228 "brx", "bos", "bss", "bua", "bug", "bum", "byn", "byv",
Jungshik Shinb3189662017-11-07 11:18:34 -0800229 "cat", "cad", "car", "cay", "cch", "ccp", "che", "ceb", "cgg",
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700230 "cha", "chb", "chg", "chk", "chm", "chn", "cho", "chp",
231 "chr", "chy", "ckb", "cos", "cop", "cps", "cre", "crh",
232 "ces", "csb", "chu", "chv", "cym",
233 "dan", "dak", "dar", "dav", "deu", "del", "den", "dgr",
234 "din", "dje", "doi", "dsb", "dtp", "dua", "dum", "div",
235 "dyo", "dyu", "dzo", "dzg",
236 "ebu", "ewe", "efi", "egl", "egy", "eka", "ell", "elx",
237 "eng", "enm", "epo", "spa", "esu", "est", "eus", "ewo",
238 "ext",
239 "fas", "fan", "fat", "ful", "fin", "fil", "fit", "fij",
240 "fao", "fon", "fra", "frc", "frm", "fro", "frp", "frr",
241 "frs", "fur", "fry",
242 "gle", "gaa", "gag", "gan", "gay", "gba", "gbz", "gla",
243 "gez", "gil", "glg", "glk", "gmh", "grn", "goh", "gom",
244 "gon", "gor", "got", "grb", "grc", "gsw", "guj", "guc",
245 "gur", "guz", "glv", "gwi",
246 "hau", "hai", "hak", "haw", "heb", "hin", "hif", "hil",
247 "hit", "hmn", "hmo", "hrv", "hsb", "hsn", "hat", "hun",
248 "hup", "hye", "her",
249 "ina", "iba", "ibb", "ind", "ile", "ibo", "iii", "ipk",
250 "ilo", "inh", "ido", "isl", "ita", "iku", "izh",
251 "jpn", "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut",
252 "jav",
253 "kat", "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd",
254 "kbl", "kcg", "kde", "kea", "ken", "kfo", "kon", "kgp",
255 "kha", "kho", "khq", "khw", "kik", "kiu", "kua", "kaz",
256 "kkj", "kal", "kln", "khm", "kmb", "kan", "kor", "koi",
257 "kok", "kos", "kpe", "kau", "krc", "kri", "krj", "krl",
258 "kru", "kas", "ksb", "ksf", "ksh", "kur", "kum", "kut",
259 "kom", "cor", "kir",
260 "lat", "lad", "lag", "lah", "lam", "ltz", "lez", "lfn",
261 "lug", "lim", "lij", "liv", "lkt", "lmo", "lin", "lao",
262 "lol", "loz", "lrc", "lit", "ltg", "lub", "lua", "lui",
263 "lun", "luo", "lus", "luy", "lav", "lzh", "lzz",
264 "mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",
265 "mdf", "mdh", "mdr", "men", "mer", "mfe", "mlg", "mga",
266 "mgh", "mgo", "mah", "mri", "mic", "min", "mis", "mkd",
267 "mal", "mon", "mnc", "mni", "moh", "mos", "mar", "mrj",
268 "msa", "mlt", "mua", "mul", "mus", "mwl", "mwr", "mwv",
269 "mya", "mye", "myv", "mzn",
270 "nau", "nan", "nap", "naq", "nob", "nde", "nds", "nep",
271 "new", "ndo", "nia", "niu", "njo", "nld", "nmg", "nno",
272 "nnh", "nor", "nog", "non", "nov", "nqo", "nbl", "nso",
273 "nus", "nav", "nwc", "nya", "nym", "nyn", "nyo", "nzi",
274 "oci", "oji", "orm", "ori", "oss", "osa", "ota",
275 "pan", "pag", "pal", "pam", "pap", "pau", "pcd", "pdc",
276 "pdt", "peo", "pfl", "phn", "pli", "pol", "pms", "pnt",
277 "pon", "prg", "pro", "pus", "por",
278 "que", "quc", "qug",
279 "raj", "rap", "rar", "rgn", "rif", "roh", "run", "ron",
280 "rof", "rom", "rtm", "rus", "rue", "rug", "rup",
281 "kin", "rwk",
282 "san", "sad", "sah", "sam", "saq", "sas", "sat", "saz",
283 "sba", "sbp", "srd", "scn", "sco", "snd", "sdc", "sdh",
284 "sme", "see", "seh", "sei", "sel", "ses", "sag", "sga",
285 "sgs", "shi", "shn", "shu", "sin", "sid", "slk",
286 "slv", "sli", "sly", "smo", "sma", "smj", "smn", "sms",
287 "sna", "snk", "som", "sog", "sqi", "srp", "srn", "srr",
288 "ssw", "ssy", "sot", "stq", "sun", "suk", "sus", "sux",
289 "swe", "swa", "swb", "swc", "syc", "syr", "szl",
290 "tam", "tcy", "tel", "tem", "teo", "ter", "tet", "tgk",
291 "tha", "tir", "tig", "tiv", "tuk", "tkl", "tkr", "tgl",
292 "tlh", "tli", "tly", "tmh", "tsn", "ton", "tog", "tpi",
293 "tur", "tru", "trv", "tso", "tsd", "tsi", "tat", "ttt",
294 "tum", "tvl", "twi", "twq", "tah", "tyv", "tzm",
295 "udm", "uig", "uga", "ukr", "umb", "und", "urd", "uzb",
296 "vai", "ven", "vec", "vep", "vie", "vls", "vmf", "vol",
297 "vot", "vro", "vun",
298 "wln", "wae", "wal", "war", "was", "wbp", "wol", "wuu",
299 "xal", "xho", "xmf", "xog",
300 "yao", "yap", "yav", "ybb", "yid", "yor", "yrl", "yue",
301 "zha", "zap", "zbl", "zea", "zen", "zgh", "zho", "zul",
302 "zun", "zxx", "zza",
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000303NULL,
304/* "in", "iw", "ji", "jw", "sh", */
305 "ind", "heb", "yid", "jaw", "srp",
306NULL
307};
308
309/**
310 * Table of 2-letter country codes.
311 *
312 * This list must be in sorted order. This list is returned directly
313 * to the user by some API.
314 *
315 * This list must be kept in sync with COUNTRIES_3, with corresponding
316 * entries matched.
317 *
318 * This table should be terminated with a NULL entry, followed by a
319 * second list, and another NULL entry. The first list is visible to
320 * user code when this array is returned by API. The second list
321 * contains codes we support, but do not expose through user API.
322 *
323 * Notes:
324 *
325 * ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per
326 * http://www.evertype.com/standards/iso3166/iso3166-1-en.html added
327 * new codes keeping the old ones for compatibility updated to include
328 * 1999/12/03 revisions *CWB*
329 *
330 * RO(ROM) is now RO(ROU) according to
331 * http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html
332 */
333static const char * const COUNTRIES[] = {
334 "AD", "AE", "AF", "AG", "AI", "AL", "AM",
335 "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ",
336 "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI",
337 "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV",
338 "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG",
339 "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR",
340 "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DJ", "DK",
341 "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER",
342 "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR",
343 "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL",
344 "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU",
345 "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU",
346 "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS",
347 "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI",
348 "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA",
349 "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU",
350 "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK",
351 "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS",
352 "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA",
353 "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP",
354 "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG",
355 "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT",
356 "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA",
357 "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ",
358 "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV",
359 "SX", "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ",
360 "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV",
361 "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ",
362 "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF",
363 "WS", "YE", "YT", "ZA", "ZM", "ZW",
364NULL,
365 "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR", /* obsolete country codes */
366NULL
367};
368
369static const char* const DEPRECATED_COUNTRIES[] = {
370 "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR", NULL, NULL /* deprecated country list */
371};
372static const char* const REPLACEMENT_COUNTRIES[] = {
373/* "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR" */
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700374 "CW", "MM", "RS", "DE", "BJ", "FR", "BF", "VU", "ZW", "RU", "TL", "GB", "VN", "YE", "RS", "CD", NULL, NULL /* replacement country codes */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000375};
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700376
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000377/**
378 * Table of 3-letter country codes.
379 *
380 * This is a lookup table used to convert 3-letter country codes to
381 * their 2-letter equivalent. It must be kept in sync with COUNTRIES.
382 * For all valid i, COUNTRIES[i] must refer to the same country as
383 * COUNTRIES_3[i]. The commented-out lines are copied from COUNTRIES
384 * to make eyeballing this baby easier.
385 *
386 * This table should be terminated with a NULL entry, followed by a
387 * second list, and another NULL entry. The two lists correspond to
388 * the two lists in COUNTRIES.
389 */
390static const char * const COUNTRIES_3[] = {
391/* "AD", "AE", "AF", "AG", "AI", "AL", "AM", */
392 "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM",
393/* "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ", */
394 "AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE",
395/* "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", */
396 "BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI",
397/* "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV", */
398 "BEN", "BLM", "BMU", "BRN", "BOL", "BES", "BRA", "BHS", "BTN", "BVT",
399/* "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG", */
400 "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",
401/* "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR", */
402 "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRI",
403/* "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DJ", "DK", */
404 "CUB", "CPV", "CUW", "CXR", "CYP", "CZE", "DEU", "DJI", "DNK",
405/* "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER", */
406 "DMA", "DOM", "DZA", "ECU", "EST", "EGY", "ESH", "ERI",
407/* "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", */
408 "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",
409/* "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", */
410 "GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL",
411/* "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", */
412 "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",
413/* "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", */
414 "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",
415/* "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS" */
416 "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL",
417/* "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", */
418 "ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",
419/* "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", */
420 "COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO",
421/* "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", */
422 "LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX",
423/* "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK", */
424 "LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD",
425/* "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS", */
426 "MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR",
427/* "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", */
428 "MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM",
429/* "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP", */
430 "NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL",
431/* "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG", */
432 "NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG",
433/* "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT", */
434 "PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT",
435/* "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA", */
436 "PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU",
437/* "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ", */
438 "SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM",
439/* "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV", */
440 "SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "SSD", "STP", "SLV",
441/* "SX", "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ", */
442 "SXM", "SYR", "SWZ", "TCA", "TCD", "ATF", "TGO", "THA", "TJK",
443/* "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV", */
444 "TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV",
445/* "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ", */
446 "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",
447/* "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", */
448 "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",
449/* "WS", "YE", "YT", "ZA", "ZM", "ZW", */
450 "WSM", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
451NULL,
452/* "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR" */
453 "ANT", "BUR", "SCG", "FXX", "ROM", "SUN", "TMP", "YMD", "YUG", "ZAR",
454NULL
455};
456
457typedef struct CanonicalizationMap {
458 const char *id; /* input ID */
459 const char *canonicalID; /* canonicalized output ID */
460 const char *keyword; /* keyword, or NULL if none */
461 const char *value; /* keyword value, or NULL if kw==NULL */
462} CanonicalizationMap;
463
464/**
465 * A map to canonicalize locale IDs. This handles a variety of
466 * different semantic kinds of transformations.
467 */
468static const CanonicalizationMap CANONICALIZE_MAP[] = {
469 { "", "en_US_POSIX", NULL, NULL }, /* .NET name */
470 { "c", "en_US_POSIX", NULL, NULL }, /* POSIX name */
471 { "posix", "en_US_POSIX", NULL, NULL }, /* POSIX name (alias of C) */
472 { "art_LOJBAN", "jbo", NULL, NULL }, /* registered name */
473 { "az_AZ_CYRL", "az_Cyrl_AZ", NULL, NULL }, /* .NET name */
474 { "az_AZ_LATN", "az_Latn_AZ", NULL, NULL }, /* .NET name */
475 { "ca_ES_PREEURO", "ca_ES", "currency", "ESP" },
476 { "de__PHONEBOOK", "de", "collation", "phonebook" }, /* Old ICU name */
477 { "de_AT_PREEURO", "de_AT", "currency", "ATS" },
478 { "de_DE_PREEURO", "de_DE", "currency", "DEM" },
479 { "de_LU_PREEURO", "de_LU", "currency", "LUF" },
480 { "el_GR_PREEURO", "el_GR", "currency", "GRD" },
481 { "en_BE_PREEURO", "en_BE", "currency", "BEF" },
482 { "en_IE_PREEURO", "en_IE", "currency", "IEP" },
483 { "es__TRADITIONAL", "es", "collation", "traditional" }, /* Old ICU name */
484 { "es_ES_PREEURO", "es_ES", "currency", "ESP" },
485 { "eu_ES_PREEURO", "eu_ES", "currency", "ESP" },
486 { "fi_FI_PREEURO", "fi_FI", "currency", "FIM" },
487 { "fr_BE_PREEURO", "fr_BE", "currency", "BEF" },
488 { "fr_FR_PREEURO", "fr_FR", "currency", "FRF" },
489 { "fr_LU_PREEURO", "fr_LU", "currency", "LUF" },
490 { "ga_IE_PREEURO", "ga_IE", "currency", "IEP" },
491 { "gl_ES_PREEURO", "gl_ES", "currency", "ESP" },
492 { "hi__DIRECT", "hi", "collation", "direct" }, /* Old ICU name */
493 { "it_IT_PREEURO", "it_IT", "currency", "ITL" },
494 { "ja_JP_TRADITIONAL", "ja_JP", "calendar", "japanese" }, /* Old ICU name */
495 { "nb_NO_NY", "nn_NO", NULL, NULL }, /* "markus said this was ok" :-) */
496 { "nl_BE_PREEURO", "nl_BE", "currency", "BEF" },
497 { "nl_NL_PREEURO", "nl_NL", "currency", "NLG" },
498 { "pt_PT_PREEURO", "pt_PT", "currency", "PTE" },
499 { "sr_SP_CYRL", "sr_Cyrl_RS", NULL, NULL }, /* .NET name */
500 { "sr_SP_LATN", "sr_Latn_RS", NULL, NULL }, /* .NET name */
501 { "sr_YU_CYRILLIC", "sr_Cyrl_RS", NULL, NULL }, /* Linux name */
502 { "th_TH_TRADITIONAL", "th_TH", "calendar", "buddhist" }, /* Old ICU name */
503 { "uz_UZ_CYRILLIC", "uz_Cyrl_UZ", NULL, NULL }, /* Linux name */
504 { "uz_UZ_CYRL", "uz_Cyrl_UZ", NULL, NULL }, /* .NET name */
505 { "uz_UZ_LATN", "uz_Latn_UZ", NULL, NULL }, /* .NET name */
506 { "zh_CHS", "zh_Hans", NULL, NULL }, /* .NET name */
507 { "zh_CHT", "zh_Hant", NULL, NULL }, /* .NET name */
508 { "zh_GAN", "gan", NULL, NULL }, /* registered name */
509 { "zh_GUOYU", "zh", NULL, NULL }, /* registered name */
510 { "zh_HAKKA", "hak", NULL, NULL }, /* registered name */
511 { "zh_MIN_NAN", "nan", NULL, NULL }, /* registered name */
512 { "zh_WUU", "wuu", NULL, NULL }, /* registered name */
513 { "zh_XIANG", "hsn", NULL, NULL }, /* registered name */
514 { "zh_YUE", "yue", NULL, NULL }, /* registered name */
515};
516
517typedef struct VariantMap {
518 const char *variant; /* input ID */
519 const char *keyword; /* keyword, or NULL if none */
520 const char *value; /* keyword value, or NULL if kw==NULL */
521} VariantMap;
522
523static const VariantMap VARIANT_MAP[] = {
524 { "EURO", "currency", "EUR" },
525 { "PINYIN", "collation", "pinyin" }, /* Solaris variant */
526 { "STROKE", "collation", "stroke" } /* Solaris variant */
527};
528
529/* ### BCP47 Conversion *******************************************/
530/* Test if the locale id has BCP47 u extension and does not have '@' */
531#define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == NULL && getShortestSubtagLength(localeID) == 1)
532/* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */
533#define _ConvertBCP47(finalID, id, buffer, length,err) \
Jungshik Shinb3189662017-11-07 11:18:34 -0800534 if (uloc_forLanguageTag(id, buffer, length, NULL, err) <= 0 || \
535 U_FAILURE(*err) || *err == U_STRING_NOT_TERMINATED_WARNING) { \
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000536 finalID=id; \
Jungshik Shinb3189662017-11-07 11:18:34 -0800537 if (*err == U_STRING_NOT_TERMINATED_WARNING) { *err = U_BUFFER_OVERFLOW_ERROR; } \
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000538 } else { \
539 finalID=buffer; \
540 }
541/* Gets the size of the shortest subtag in the given localeID. */
542static int32_t getShortestSubtagLength(const char *localeID) {
Jungshik Shinb3189662017-11-07 11:18:34 -0800543 int32_t localeIDLength = static_cast<int32_t>(uprv_strlen(localeID));
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000544 int32_t length = localeIDLength;
545 int32_t tmpLength = 0;
546 int32_t i;
547 UBool reset = TRUE;
548
549 for (i = 0; i < localeIDLength; i++) {
550 if (localeID[i] != '_' && localeID[i] != '-') {
551 if (reset) {
552 tmpLength = 0;
553 reset = FALSE;
554 }
555 tmpLength++;
556 } else {
557 if (tmpLength != 0 && tmpLength < length) {
558 length = tmpLength;
559 }
560 reset = TRUE;
561 }
562 }
563
564 return length;
565}
566
567/* ### Keywords **************************************************/
Jungshik Shin87232d82017-05-13 21:10:13 -0700568#define UPRV_ISDIGIT(c) (((c) >= '0') && ((c) <= '9'))
569#define UPRV_ISALPHANUM(c) (uprv_isASCIILetter(c) || UPRV_ISDIGIT(c) )
570/* Punctuation/symbols allowed in legacy key values */
571#define UPRV_OK_VALUE_PUNCTUATION(c) ((c) == '_' || (c) == '-' || (c) == '+' || (c) == '/')
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000572
573#define ULOC_KEYWORD_BUFFER_LEN 25
574#define ULOC_MAX_NO_KEYWORDS 25
575
576U_CAPI const char * U_EXPORT2
577locale_getKeywordsStart(const char *localeID) {
578 const char *result = NULL;
579 if((result = uprv_strchr(localeID, '@')) != NULL) {
580 return result;
581 }
582#if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
583 else {
584 /* We do this because the @ sign is variant, and the @ sign used on one
585 EBCDIC machine won't be compiled the same way on other EBCDIC based
586 machines. */
587 static const uint8_t ebcdicSigns[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
588 const uint8_t *charToFind = ebcdicSigns;
589 while(*charToFind) {
590 if((result = uprv_strchr(localeID, *charToFind)) != NULL) {
591 return result;
592 }
593 charToFind++;
594 }
595 }
596#endif
597 return NULL;
598}
599
600/**
601 * @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN]
602 * @param keywordName incoming name to be canonicalized
603 * @param status return status (keyword too long)
604 * @return length of the keyword name
605 */
606static int32_t locale_canonKeywordName(char *buf, const char *keywordName, UErrorCode *status)
607{
Jungshik Shin87232d82017-05-13 21:10:13 -0700608 int32_t keywordNameLen = 0;
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700609
Jungshik Shin87232d82017-05-13 21:10:13 -0700610 for (; *keywordName != 0; keywordName++) {
611 if (!UPRV_ISALPHANUM(*keywordName)) {
612 *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
613 return 0;
614 }
615 if (keywordNameLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
616 buf[keywordNameLen++] = uprv_tolower(*keywordName);
617 } else {
618 /* keyword name too long for internal buffer */
619 *status = U_INTERNAL_PROGRAM_ERROR;
620 return 0;
621 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000622 }
Jungshik Shin87232d82017-05-13 21:10:13 -0700623 if (keywordNameLen == 0) {
624 *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name */
625 return 0;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000626 }
Jungshik Shin87232d82017-05-13 21:10:13 -0700627 buf[keywordNameLen] = 0; /* terminate */
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700628
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000629 return keywordNameLen;
630}
631
632typedef struct {
633 char keyword[ULOC_KEYWORD_BUFFER_LEN];
634 int32_t keywordLen;
635 const char *valueStart;
636 int32_t valueLen;
637} KeywordStruct;
638
639static int32_t U_CALLCONV
640compareKeywordStructs(const void * /*context*/, const void *left, const void *right) {
641 const char* leftString = ((const KeywordStruct *)left)->keyword;
642 const char* rightString = ((const KeywordStruct *)right)->keyword;
643 return uprv_strcmp(leftString, rightString);
644}
645
646/**
647 * Both addKeyword and addValue must already be in canonical form.
648 * Either both addKeyword and addValue are NULL, or neither is NULL.
649 * If they are not NULL they must be zero terminated.
650 * If addKeyword is not NULL is must have length small enough to fit in KeywordStruct.keyword.
651 */
652static int32_t
653_getKeywords(const char *localeID,
654 char prev,
655 char *keywords, int32_t keywordCapacity,
656 char *values, int32_t valuesCapacity, int32_t *valLen,
657 UBool valuesToo,
658 const char* addKeyword,
659 const char* addValue,
660 UErrorCode *status)
661{
662 KeywordStruct keywordList[ULOC_MAX_NO_KEYWORDS];
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700663
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000664 int32_t maxKeywords = ULOC_MAX_NO_KEYWORDS;
665 int32_t numKeywords = 0;
666 const char* pos = localeID;
667 const char* equalSign = NULL;
668 const char* semicolon = NULL;
669 int32_t i = 0, j, n;
670 int32_t keywordsLen = 0;
671 int32_t valuesLen = 0;
672
673 if(prev == '@') { /* start of keyword definition */
674 /* we will grab pairs, trim spaces, lowercase keywords, sort and return */
675 do {
676 UBool duplicate = FALSE;
677 /* skip leading spaces */
678 while(*pos == ' ') {
679 pos++;
680 }
681 if (!*pos) { /* handle trailing "; " */
682 break;
683 }
684 if(numKeywords == maxKeywords) {
685 *status = U_INTERNAL_PROGRAM_ERROR;
686 return 0;
687 }
688 equalSign = uprv_strchr(pos, '=');
689 semicolon = uprv_strchr(pos, ';');
690 /* lack of '=' [foo@currency] is illegal */
691 /* ';' before '=' [foo@currency;collation=pinyin] is illegal */
692 if(!equalSign || (semicolon && semicolon<equalSign)) {
693 *status = U_INVALID_FORMAT_ERROR;
694 return 0;
695 }
696 /* need to normalize both keyword and keyword name */
697 if(equalSign - pos >= ULOC_KEYWORD_BUFFER_LEN) {
698 /* keyword name too long for internal buffer */
699 *status = U_INTERNAL_PROGRAM_ERROR;
700 return 0;
701 }
702 for(i = 0, n = 0; i < equalSign - pos; ++i) {
703 if (pos[i] != ' ') {
704 keywordList[numKeywords].keyword[n++] = uprv_tolower(pos[i]);
705 }
706 }
707
708 /* zero-length keyword is an error. */
709 if (n == 0) {
710 *status = U_INVALID_FORMAT_ERROR;
711 return 0;
712 }
713
714 keywordList[numKeywords].keyword[n] = 0;
715 keywordList[numKeywords].keywordLen = n;
716 /* now grab the value part. First we skip the '=' */
717 equalSign++;
718 /* then we leading spaces */
719 while(*equalSign == ' ') {
720 equalSign++;
721 }
722
723 /* Premature end or zero-length value */
Jungshik Shin (jungshik at google)46be5162015-03-26 11:46:43 -0700724 if (!*equalSign || equalSign == semicolon) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000725 *status = U_INVALID_FORMAT_ERROR;
726 return 0;
727 }
728
729 keywordList[numKeywords].valueStart = equalSign;
730
731 pos = semicolon;
732 i = 0;
733 if(pos) {
734 while(*(pos - i - 1) == ' ') {
735 i++;
736 }
737 keywordList[numKeywords].valueLen = (int32_t)(pos - equalSign - i);
738 pos++;
739 } else {
740 i = (int32_t)uprv_strlen(equalSign);
741 while(i && equalSign[i-1] == ' ') {
742 i--;
743 }
744 keywordList[numKeywords].valueLen = i;
745 }
746 /* If this is a duplicate keyword, then ignore it */
747 for (j=0; j<numKeywords; ++j) {
748 if (uprv_strcmp(keywordList[j].keyword, keywordList[numKeywords].keyword) == 0) {
749 duplicate = TRUE;
750 break;
751 }
752 }
753 if (!duplicate) {
754 ++numKeywords;
755 }
756 } while(pos);
757
758 /* Handle addKeyword/addValue. */
759 if (addKeyword != NULL) {
760 UBool duplicate = FALSE;
761 U_ASSERT(addValue != NULL);
762 /* Search for duplicate; if found, do nothing. Explicit keyword
763 overrides addKeyword. */
764 for (j=0; j<numKeywords; ++j) {
765 if (uprv_strcmp(keywordList[j].keyword, addKeyword) == 0) {
766 duplicate = TRUE;
767 break;
768 }
769 }
770 if (!duplicate) {
771 if (numKeywords == maxKeywords) {
772 *status = U_INTERNAL_PROGRAM_ERROR;
773 return 0;
774 }
775 uprv_strcpy(keywordList[numKeywords].keyword, addKeyword);
776 keywordList[numKeywords].keywordLen = (int32_t)uprv_strlen(addKeyword);
777 keywordList[numKeywords].valueStart = addValue;
778 keywordList[numKeywords].valueLen = (int32_t)uprv_strlen(addValue);
779 ++numKeywords;
780 }
781 } else {
782 U_ASSERT(addValue == NULL);
783 }
784
785 /* now we have a list of keywords */
786 /* we need to sort it */
787 uprv_sortArray(keywordList, numKeywords, sizeof(KeywordStruct), compareKeywordStructs, NULL, FALSE, status);
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700788
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000789 /* Now construct the keyword part */
790 for(i = 0; i < numKeywords; i++) {
791 if(keywordsLen + keywordList[i].keywordLen + 1< keywordCapacity) {
792 uprv_strcpy(keywords+keywordsLen, keywordList[i].keyword);
793 if(valuesToo) {
794 keywords[keywordsLen + keywordList[i].keywordLen] = '=';
795 } else {
796 keywords[keywordsLen + keywordList[i].keywordLen] = 0;
797 }
798 }
799 keywordsLen += keywordList[i].keywordLen + 1;
800 if(valuesToo) {
Jungshik Shin42d50272018-10-24 01:22:09 -0700801 if(keywordsLen + keywordList[i].valueLen <= keywordCapacity) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000802 uprv_strncpy(keywords+keywordsLen, keywordList[i].valueStart, keywordList[i].valueLen);
803 }
804 keywordsLen += keywordList[i].valueLen;
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700805
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000806 if(i < numKeywords - 1) {
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700807 if(keywordsLen < keywordCapacity) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000808 keywords[keywordsLen] = ';';
809 }
810 keywordsLen++;
811 }
812 }
813 if(values) {
814 if(valuesLen + keywordList[i].valueLen + 1< valuesCapacity) {
815 uprv_strcpy(values+valuesLen, keywordList[i].valueStart);
816 values[valuesLen + keywordList[i].valueLen] = 0;
817 }
818 valuesLen += keywordList[i].valueLen + 1;
819 }
820 }
821 if(values) {
822 values[valuesLen] = 0;
823 if(valLen) {
824 *valLen = valuesLen;
825 }
826 }
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700827 return u_terminateChars(keywords, keywordCapacity, keywordsLen, status);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000828 } else {
829 return 0;
830 }
831}
832
833U_CFUNC int32_t
834locale_getKeywords(const char *localeID,
835 char prev,
836 char *keywords, int32_t keywordCapacity,
837 char *values, int32_t valuesCapacity, int32_t *valLen,
838 UBool valuesToo,
839 UErrorCode *status) {
840 return _getKeywords(localeID, prev, keywords, keywordCapacity,
841 values, valuesCapacity, valLen, valuesToo,
842 NULL, NULL, status);
843}
844
845U_CAPI int32_t U_EXPORT2
846uloc_getKeywordValue(const char* localeID,
847 const char* keywordName,
848 char* buffer, int32_t bufferCapacity,
849 UErrorCode* status)
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700850{
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000851 const char* startSearchHere = NULL;
852 const char* nextSeparator = NULL;
853 char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
854 char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000855 int32_t result = 0;
856
857 if(status && U_SUCCESS(*status) && localeID) {
858 char tempBuffer[ULOC_FULLNAME_CAPACITY];
859 const char* tmpLocaleID;
860
Jungshik Shin87232d82017-05-13 21:10:13 -0700861 if (keywordName == NULL || keywordName[0] == 0) {
862 *status = U_ILLEGAL_ARGUMENT_ERROR;
863 return 0;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000864 }
865
866 locale_canonKeywordName(keywordNameBuffer, keywordName, status);
867 if(U_FAILURE(*status)) {
868 return 0;
869 }
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700870
Jungshik Shin87232d82017-05-13 21:10:13 -0700871 if (_hasBCP47Extension(localeID)) {
872 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
873 } else {
874 tmpLocaleID=localeID;
875 }
876
877 startSearchHere = locale_getKeywordsStart(tmpLocaleID);
878 if(startSearchHere == NULL) {
879 /* no keywords, return at once */
880 return 0;
881 }
882
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000883 /* find the first keyword */
884 while(startSearchHere) {
Jungshik Shin87232d82017-05-13 21:10:13 -0700885 const char* keyValueTail;
886 int32_t keyValueLen;
887
888 startSearchHere++; /* skip @ or ; */
889 nextSeparator = uprv_strchr(startSearchHere, '=');
890 if(!nextSeparator) {
891 *status = U_ILLEGAL_ARGUMENT_ERROR; /* key must have =value */
892 return 0;
893 }
894 /* strip leading & trailing spaces (TC decided to tolerate these) */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000895 while(*startSearchHere == ' ') {
896 startSearchHere++;
897 }
Jungshik Shin87232d82017-05-13 21:10:13 -0700898 keyValueTail = nextSeparator;
899 while (keyValueTail > startSearchHere && *(keyValueTail-1) == ' ') {
900 keyValueTail--;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000901 }
Jungshik Shin87232d82017-05-13 21:10:13 -0700902 /* now keyValueTail points to first char after the keyName */
903 /* copy & normalize keyName from locale */
904 if (startSearchHere == keyValueTail) {
905 *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name in passed-in locale */
906 return 0;
907 }
908 keyValueLen = 0;
909 while (startSearchHere < keyValueTail) {
910 if (!UPRV_ISALPHANUM(*startSearchHere)) {
911 *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
912 return 0;
913 }
914 if (keyValueLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
915 localeKeywordNameBuffer[keyValueLen++] = uprv_tolower(*startSearchHere++);
916 } else {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000917 /* keyword name too long for internal buffer */
918 *status = U_INTERNAL_PROGRAM_ERROR;
919 return 0;
Jungshik Shin87232d82017-05-13 21:10:13 -0700920 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000921 }
Jungshik Shin87232d82017-05-13 21:10:13 -0700922 localeKeywordNameBuffer[keyValueLen] = 0; /* terminate */
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700923
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000924 startSearchHere = uprv_strchr(nextSeparator, ';');
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700925
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000926 if(uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer) == 0) {
Jungshik Shin87232d82017-05-13 21:10:13 -0700927 /* current entry matches the keyword. */
928 nextSeparator++; /* skip '=' */
929 /* First strip leading & trailing spaces (TC decided to tolerate these) */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000930 while(*nextSeparator == ' ') {
Jungshik Shin87232d82017-05-13 21:10:13 -0700931 nextSeparator++;
932 }
933 keyValueTail = (startSearchHere)? startSearchHere: nextSeparator + uprv_strlen(nextSeparator);
934 while(keyValueTail > nextSeparator && *(keyValueTail-1) == ' ') {
935 keyValueTail--;
936 }
937 /* Now copy the value, but check well-formedness */
938 if (nextSeparator == keyValueTail) {
939 *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty key value name in passed-in locale */
940 return 0;
941 }
942 keyValueLen = 0;
943 while (nextSeparator < keyValueTail) {
944 if (!UPRV_ISALPHANUM(*nextSeparator) && !UPRV_OK_VALUE_PUNCTUATION(*nextSeparator)) {
945 *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed key value */
946 return 0;
947 }
948 if (keyValueLen < bufferCapacity) {
949 /* Should we lowercase value to return here? Tests expect as-is. */
950 buffer[keyValueLen++] = *nextSeparator++;
951 } else { /* keep advancing so we return correct length in case of overflow */
952 keyValueLen++;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000953 nextSeparator++;
Jungshik Shin87232d82017-05-13 21:10:13 -0700954 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000955 }
Jungshik Shin87232d82017-05-13 21:10:13 -0700956 result = u_terminateChars(buffer, bufferCapacity, keyValueLen, status);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000957 return result;
958 }
959 }
960 }
961 return 0;
962}
963
964U_CAPI int32_t U_EXPORT2
965uloc_setKeywordValue(const char* keywordName,
966 const char* keywordValue,
967 char* buffer, int32_t bufferCapacity,
968 UErrorCode* status)
969{
970 /* TODO: sorting. removal. */
971 int32_t keywordNameLen;
972 int32_t keywordValueLen;
973 int32_t bufLen;
974 int32_t needLen = 0;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000975 char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
Jungshik Shin87232d82017-05-13 21:10:13 -0700976 char keywordValueBuffer[ULOC_KEYWORDS_CAPACITY+1];
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000977 char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000978 int32_t rc;
979 char* nextSeparator = NULL;
980 char* nextEqualsign = NULL;
981 char* startSearchHere = NULL;
982 char* keywordStart = NULL;
Jungshik Shin87232d82017-05-13 21:10:13 -0700983 CharString updatedKeysAndValues;
984 int32_t updatedKeysAndValuesLen;
985 UBool handledInputKeyAndValue = FALSE;
986 char keyValuePrefix = '@';
987
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700988 if(U_FAILURE(*status)) {
989 return -1;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000990 }
Jungshik Shin87232d82017-05-13 21:10:13 -0700991 if (keywordName == NULL || keywordName[0] == 0 || bufferCapacity <= 1) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000992 *status = U_ILLEGAL_ARGUMENT_ERROR;
993 return 0;
994 }
Jungshik Shin87232d82017-05-13 21:10:13 -0700995 bufLen = (int32_t)uprv_strlen(buffer);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000996 if(bufferCapacity<bufLen) {
997 /* The capacity is less than the length?! Is this NULL terminated? */
998 *status = U_ILLEGAL_ARGUMENT_ERROR;
999 return 0;
1000 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001001 keywordNameLen = locale_canonKeywordName(keywordNameBuffer, keywordName, status);
1002 if(U_FAILURE(*status)) {
1003 return 0;
1004 }
Jungshik Shin87232d82017-05-13 21:10:13 -07001005
1006 keywordValueLen = 0;
1007 if(keywordValue) {
1008 while (*keywordValue != 0) {
1009 if (!UPRV_ISALPHANUM(*keywordValue) && !UPRV_OK_VALUE_PUNCTUATION(*keywordValue)) {
1010 *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed key value */
1011 return 0;
1012 }
1013 if (keywordValueLen < ULOC_KEYWORDS_CAPACITY) {
1014 /* Should we force lowercase in value to set? */
1015 keywordValueBuffer[keywordValueLen++] = *keywordValue++;
1016 } else {
1017 /* keywordValue too long for internal buffer */
1018 *status = U_INTERNAL_PROGRAM_ERROR;
1019 return 0;
1020 }
1021 }
1022 }
1023 keywordValueBuffer[keywordValueLen] = 0; /* terminate */
1024
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001025 startSearchHere = (char*)locale_getKeywordsStart(buffer);
1026 if(startSearchHere == NULL || (startSearchHere[1]==0)) {
Jungshik Shin87232d82017-05-13 21:10:13 -07001027 if(keywordValueLen == 0) { /* no keywords = nothing to remove */
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001028 return bufLen;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001029 }
1030
1031 needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001032 if(startSearchHere) { /* had a single @ */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001033 needLen--; /* already had the @ */
1034 /* startSearchHere points at the @ */
1035 } else {
1036 startSearchHere=buffer+bufLen;
1037 }
1038 if(needLen >= bufferCapacity) {
1039 *status = U_BUFFER_OVERFLOW_ERROR;
1040 return needLen; /* no change */
1041 }
Jungshik Shin87232d82017-05-13 21:10:13 -07001042 *startSearchHere++ = '@';
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001043 uprv_strcpy(startSearchHere, keywordNameBuffer);
1044 startSearchHere += keywordNameLen;
Jungshik Shin87232d82017-05-13 21:10:13 -07001045 *startSearchHere++ = '=';
1046 uprv_strcpy(startSearchHere, keywordValueBuffer);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001047 return needLen;
1048 } /* end shortcut - no @ */
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001049
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001050 keywordStart = startSearchHere;
1051 /* search for keyword */
1052 while(keywordStart) {
Jungshik Shin87232d82017-05-13 21:10:13 -07001053 const char* keyValueTail;
1054 int32_t keyValueLen;
1055
1056 keywordStart++; /* skip @ or ; */
1057 nextEqualsign = uprv_strchr(keywordStart, '=');
1058 if (!nextEqualsign) {
1059 *status = U_ILLEGAL_ARGUMENT_ERROR; /* key must have =value */
1060 return 0;
1061 }
1062 /* strip leading & trailing spaces (TC decided to tolerate these) */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001063 while(*keywordStart == ' ') {
1064 keywordStart++;
1065 }
Jungshik Shin87232d82017-05-13 21:10:13 -07001066 keyValueTail = nextEqualsign;
1067 while (keyValueTail > keywordStart && *(keyValueTail-1) == ' ') {
1068 keyValueTail--;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001069 }
Jungshik Shin87232d82017-05-13 21:10:13 -07001070 /* now keyValueTail points to first char after the keyName */
1071 /* copy & normalize keyName from locale */
1072 if (keywordStart == keyValueTail) {
1073 *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name in passed-in locale */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001074 return 0;
1075 }
Jungshik Shin87232d82017-05-13 21:10:13 -07001076 keyValueLen = 0;
1077 while (keywordStart < keyValueTail) {
1078 if (!UPRV_ISALPHANUM(*keywordStart)) {
1079 *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
1080 return 0;
1081 }
1082 if (keyValueLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
1083 localeKeywordNameBuffer[keyValueLen++] = uprv_tolower(*keywordStart++);
1084 } else {
1085 /* keyword name too long for internal buffer */
1086 *status = U_INTERNAL_PROGRAM_ERROR;
1087 return 0;
1088 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001089 }
Jungshik Shin87232d82017-05-13 21:10:13 -07001090 localeKeywordNameBuffer[keyValueLen] = 0; /* terminate */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001091
1092 nextSeparator = uprv_strchr(nextEqualsign, ';');
Jungshik Shin87232d82017-05-13 21:10:13 -07001093
1094 /* start processing the value part */
1095 nextEqualsign++; /* skip '=' */
1096 /* First strip leading & trailing spaces (TC decided to tolerate these) */
1097 while(*nextEqualsign == ' ') {
1098 nextEqualsign++;
1099 }
1100 keyValueTail = (nextSeparator)? nextSeparator: nextEqualsign + uprv_strlen(nextEqualsign);
1101 while(keyValueTail > nextEqualsign && *(keyValueTail-1) == ' ') {
1102 keyValueTail--;
1103 }
1104 if (nextEqualsign == keyValueTail) {
1105 *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty key value in passed-in locale */
1106 return 0;
1107 }
1108
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001109 rc = uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer);
1110 if(rc == 0) {
Jungshik Shin87232d82017-05-13 21:10:13 -07001111 /* Current entry matches the input keyword. Update the entry */
1112 if(keywordValueLen > 0) { /* updating a value */
1113 updatedKeysAndValues.append(keyValuePrefix, *status);
1114 keyValuePrefix = ';'; /* for any subsequent key-value pair */
1115 updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
1116 updatedKeysAndValues.append('=', *status);
1117 updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
1118 } /* else removing this entry, don't emit anything */
1119 handledInputKeyAndValue = TRUE;
1120 } else {
1121 /* input keyword sorts earlier than current entry, add before current entry */
1122 if (rc < 0 && keywordValueLen > 0 && !handledInputKeyAndValue) {
1123 /* insert new entry at this location */
1124 updatedKeysAndValues.append(keyValuePrefix, *status);
1125 keyValuePrefix = ';'; /* for any subsequent key-value pair */
1126 updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
1127 updatedKeysAndValues.append('=', *status);
1128 updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
1129 handledInputKeyAndValue = TRUE;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001130 }
Jungshik Shin87232d82017-05-13 21:10:13 -07001131 /* copy the current entry */
1132 updatedKeysAndValues.append(keyValuePrefix, *status);
1133 keyValuePrefix = ';'; /* for any subsequent key-value pair */
1134 updatedKeysAndValues.append(localeKeywordNameBuffer, keyValueLen, *status);
1135 updatedKeysAndValues.append('=', *status);
Jungshik Shin42d50272018-10-24 01:22:09 -07001136 updatedKeysAndValues.append(nextEqualsign, static_cast<int32_t>(keyValueTail-nextEqualsign), *status);
Jungshik Shin87232d82017-05-13 21:10:13 -07001137 }
1138 if (!nextSeparator && keywordValueLen > 0 && !handledInputKeyAndValue) {
1139 /* append new entry at the end, it sorts later than existing entries */
1140 updatedKeysAndValues.append(keyValuePrefix, *status);
1141 /* skip keyValuePrefix update, no subsequent key-value pair */
1142 updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
1143 updatedKeysAndValues.append('=', *status);
1144 updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
1145 handledInputKeyAndValue = TRUE;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001146 }
1147 keywordStart = nextSeparator;
1148 } /* end loop searching */
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001149
Jungshik Shin87232d82017-05-13 21:10:13 -07001150 /* Any error from updatedKeysAndValues.append above would be internal and not due to
1151 * problems with the passed-in locale. So if we did encounter problems with the
1152 * passed-in locale above, those errors took precedence and overrode any error
1153 * status from updatedKeysAndValues.append, and also caused a return of 0. If there
1154 * are errors here they are from updatedKeysAndValues.append; they do cause an
1155 * error return but the passed-in locale is unmodified and the original bufLen is
1156 * returned.
1157 */
1158 if (!handledInputKeyAndValue || U_FAILURE(*status)) {
1159 /* if input key/value specified removal of a keyword not present in locale, or
1160 * there was an error in CharString.append, leave original locale alone. */
1161 return bufLen;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001162 }
1163
Jungshik Shin87232d82017-05-13 21:10:13 -07001164 updatedKeysAndValuesLen = updatedKeysAndValues.length();
1165 /* needLen = length of the part before '@' + length of updated key-value part including '@' */
1166 needLen = (int32_t)(startSearchHere - buffer) + updatedKeysAndValuesLen;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001167 if(needLen >= bufferCapacity) {
1168 *status = U_BUFFER_OVERFLOW_ERROR;
1169 return needLen; /* no change */
1170 }
Jungshik Shin87232d82017-05-13 21:10:13 -07001171 if (updatedKeysAndValuesLen > 0) {
1172 uprv_strncpy(startSearchHere, updatedKeysAndValues.data(), updatedKeysAndValuesLen);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001173 }
1174 buffer[needLen]=0;
1175 return needLen;
1176}
1177
1178/* ### ID parsing implementation **************************************************/
1179
1180#define _isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
1181
1182/*returns TRUE if one of the special prefixes is here (s=string)
1183 'x-' or 'i-' */
1184#define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1]))
1185
1186/* Dot terminates it because of POSIX form where dot precedes the codepage
1187 * except for variant
1188 */
1189#define _isTerminator(a) ((a==0)||(a=='.')||(a=='@'))
1190
1191static char* _strnchr(const char* str, int32_t len, char c) {
1192 U_ASSERT(str != 0 && len >= 0);
1193 while (len-- != 0) {
1194 char d = *str;
1195 if (d == c) {
1196 return (char*) str;
1197 } else if (d == 0) {
1198 break;
1199 }
1200 ++str;
1201 }
1202 return NULL;
1203}
1204
1205/**
1206 * Lookup 'key' in the array 'list'. The array 'list' should contain
1207 * a NULL entry, followed by more entries, and a second NULL entry.
1208 *
1209 * The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or
1210 * COUNTRIES_3.
1211 */
1212static int16_t _findIndex(const char* const* list, const char* key)
1213{
1214 const char* const* anchor = list;
1215 int32_t pass = 0;
1216
1217 /* Make two passes through two NULL-terminated arrays at 'list' */
1218 while (pass++ < 2) {
1219 while (*list) {
1220 if (uprv_strcmp(key, *list) == 0) {
1221 return (int16_t)(list - anchor);
1222 }
1223 list++;
1224 }
1225 ++list; /* skip final NULL *CWB*/
1226 }
1227 return -1;
1228}
1229
1230/* count the length of src while copying it to dest; return strlen(src) */
1231static inline int32_t
1232_copyCount(char *dest, int32_t destCapacity, const char *src) {
1233 const char *anchor;
1234 char c;
1235
1236 anchor=src;
1237 for(;;) {
1238 if((c=*src)==0) {
1239 return (int32_t)(src-anchor);
1240 }
1241 if(destCapacity<=0) {
1242 return (int32_t)((src-anchor)+uprv_strlen(src));
1243 }
1244 ++src;
1245 *dest++=c;
1246 --destCapacity;
1247 }
1248}
1249
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001250U_CFUNC const char*
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001251uloc_getCurrentCountryID(const char* oldID){
1252 int32_t offset = _findIndex(DEPRECATED_COUNTRIES, oldID);
1253 if (offset >= 0) {
1254 return REPLACEMENT_COUNTRIES[offset];
1255 }
1256 return oldID;
1257}
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001258U_CFUNC const char*
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001259uloc_getCurrentLanguageID(const char* oldID){
1260 int32_t offset = _findIndex(DEPRECATED_LANGUAGES, oldID);
1261 if (offset >= 0) {
1262 return REPLACEMENT_LANGUAGES[offset];
1263 }
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001264 return oldID;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001265}
1266/*
1267 * the internal functions _getLanguage(), _getCountry(), _getVariant()
1268 * avoid duplicating code to handle the earlier locale ID pieces
1269 * in the functions for the later ones by
1270 * setting the *pEnd pointer to where they stopped parsing
1271 *
1272 * TODO try to use this in Locale
1273 */
1274U_CFUNC int32_t
1275ulocimp_getLanguage(const char *localeID,
1276 char *language, int32_t languageCapacity,
1277 const char **pEnd) {
1278 int32_t i=0;
1279 int32_t offset;
1280 char lang[4]={ 0, 0, 0, 0 }; /* temporary buffer to hold language code for searching */
1281
1282 /* if it starts with i- or x- then copy that prefix */
1283 if(_isIDPrefix(localeID)) {
1284 if(i<languageCapacity) {
1285 language[i]=(char)uprv_tolower(*localeID);
1286 }
1287 if(i<languageCapacity) {
1288 language[i+1]='-';
1289 }
1290 i+=2;
1291 localeID+=2;
1292 }
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001293
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001294 /* copy the language as far as possible and count its length */
1295 while(!_isTerminator(*localeID) && !_isIDSeparator(*localeID)) {
1296 if(i<languageCapacity) {
1297 language[i]=(char)uprv_tolower(*localeID);
1298 }
1299 if(i<3) {
1300 U_ASSERT(i>=0);
1301 lang[i]=(char)uprv_tolower(*localeID);
1302 }
1303 i++;
1304 localeID++;
1305 }
1306
1307 if(i==3) {
1308 /* convert 3 character code to 2 character code if possible *CWB*/
1309 offset=_findIndex(LANGUAGES_3, lang);
1310 if(offset>=0) {
1311 i=_copyCount(language, languageCapacity, LANGUAGES[offset]);
1312 }
1313 }
1314
1315 if(pEnd!=NULL) {
1316 *pEnd=localeID;
1317 }
1318 return i;
1319}
1320
1321U_CFUNC int32_t
1322ulocimp_getScript(const char *localeID,
1323 char *script, int32_t scriptCapacity,
1324 const char **pEnd)
1325{
1326 int32_t idLen = 0;
1327
1328 if (pEnd != NULL) {
1329 *pEnd = localeID;
1330 }
1331
1332 /* copy the second item as far as possible and count its length */
1333 while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])
1334 && uprv_isASCIILetter(localeID[idLen])) {
1335 idLen++;
1336 }
1337
1338 /* If it's exactly 4 characters long, then it's a script and not a country. */
1339 if (idLen == 4) {
1340 int32_t i;
1341 if (pEnd != NULL) {
1342 *pEnd = localeID+idLen;
1343 }
1344 if(idLen > scriptCapacity) {
1345 idLen = scriptCapacity;
1346 }
1347 if (idLen >= 1) {
1348 script[0]=(char)uprv_toupper(*(localeID++));
1349 }
1350 for (i = 1; i < idLen; i++) {
1351 script[i]=(char)uprv_tolower(*(localeID++));
1352 }
1353 }
1354 else {
1355 idLen = 0;
1356 }
1357 return idLen;
1358}
1359
1360U_CFUNC int32_t
1361ulocimp_getCountry(const char *localeID,
1362 char *country, int32_t countryCapacity,
1363 const char **pEnd)
1364{
1365 int32_t idLen=0;
1366 char cnty[ULOC_COUNTRY_CAPACITY]={ 0, 0, 0, 0 };
1367 int32_t offset;
1368
1369 /* copy the country as far as possible and count its length */
1370 while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) {
1371 if(idLen<(ULOC_COUNTRY_CAPACITY-1)) { /*CWB*/
1372 cnty[idLen]=(char)uprv_toupper(localeID[idLen]);
1373 }
1374 idLen++;
1375 }
1376
1377 /* the country should be either length 2 or 3 */
1378 if (idLen == 2 || idLen == 3) {
1379 UBool gotCountry = FALSE;
1380 /* convert 3 character code to 2 character code if possible *CWB*/
1381 if(idLen==3) {
1382 offset=_findIndex(COUNTRIES_3, cnty);
1383 if(offset>=0) {
1384 idLen=_copyCount(country, countryCapacity, COUNTRIES[offset]);
1385 gotCountry = TRUE;
1386 }
1387 }
1388 if (!gotCountry) {
1389 int32_t i = 0;
1390 for (i = 0; i < idLen; i++) {
1391 if (i < countryCapacity) {
1392 country[i]=(char)uprv_toupper(localeID[i]);
1393 }
1394 }
1395 }
1396 localeID+=idLen;
1397 } else {
1398 idLen = 0;
1399 }
1400
1401 if(pEnd!=NULL) {
1402 *pEnd=localeID;
1403 }
1404
1405 return idLen;
1406}
1407
1408/**
1409 * @param needSeparator if true, then add leading '_' if any variants
1410 * are added to 'variant'
1411 */
1412static int32_t
1413_getVariantEx(const char *localeID,
1414 char prev,
1415 char *variant, int32_t variantCapacity,
1416 UBool needSeparator) {
1417 int32_t i=0;
1418
1419 /* get one or more variant tags and separate them with '_' */
1420 if(_isIDSeparator(prev)) {
1421 /* get a variant string after a '-' or '_' */
1422 while(!_isTerminator(*localeID)) {
1423 if (needSeparator) {
1424 if (i<variantCapacity) {
1425 variant[i] = '_';
1426 }
1427 ++i;
1428 needSeparator = FALSE;
1429 }
1430 if(i<variantCapacity) {
1431 variant[i]=(char)uprv_toupper(*localeID);
1432 if(variant[i]=='-') {
1433 variant[i]='_';
1434 }
1435 }
1436 i++;
1437 localeID++;
1438 }
1439 }
1440
1441 /* if there is no variant tag after a '-' or '_' then look for '@' */
1442 if(i==0) {
1443 if(prev=='@') {
1444 /* keep localeID */
1445 } else if((localeID=locale_getKeywordsStart(localeID))!=NULL) {
1446 ++localeID; /* point after the '@' */
1447 } else {
1448 return 0;
1449 }
1450 while(!_isTerminator(*localeID)) {
1451 if (needSeparator) {
1452 if (i<variantCapacity) {
1453 variant[i] = '_';
1454 }
1455 ++i;
1456 needSeparator = FALSE;
1457 }
1458 if(i<variantCapacity) {
1459 variant[i]=(char)uprv_toupper(*localeID);
1460 if(variant[i]=='-' || variant[i]==',') {
1461 variant[i]='_';
1462 }
1463 }
1464 i++;
1465 localeID++;
1466 }
1467 }
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001468
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001469 return i;
1470}
1471
1472static int32_t
1473_getVariant(const char *localeID,
1474 char prev,
1475 char *variant, int32_t variantCapacity) {
1476 return _getVariantEx(localeID, prev, variant, variantCapacity, FALSE);
1477}
1478
1479/**
1480 * Delete ALL instances of a variant from the given list of one or
1481 * more variants. Example: "FOO_EURO_BAR_EURO" => "FOO_BAR".
1482 * @param variants the source string of one or more variants,
1483 * separated by '_'. This will be MODIFIED IN PLACE. Not zero
1484 * terminated; if it is, trailing zero will NOT be maintained.
1485 * @param variantsLen length of variants
1486 * @param toDelete variant to delete, without separators, e.g. "EURO"
1487 * or "PREEURO"; not zero terminated
1488 * @param toDeleteLen length of toDelete
1489 * @return number of characters deleted from variants
1490 */
1491static int32_t
1492_deleteVariant(char* variants, int32_t variantsLen,
1493 const char* toDelete, int32_t toDeleteLen)
1494{
1495 int32_t delta = 0; /* number of chars deleted */
1496 for (;;) {
1497 UBool flag = FALSE;
1498 if (variantsLen < toDeleteLen) {
1499 return delta;
1500 }
1501 if (uprv_strncmp(variants, toDelete, toDeleteLen) == 0 &&
1502 (variantsLen == toDeleteLen ||
Jungshik Shin42d50272018-10-24 01:22:09 -07001503 (flag=(variants[toDeleteLen] == '_')) != 0))
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001504 {
1505 int32_t d = toDeleteLen + (flag?1:0);
1506 variantsLen -= d;
1507 delta += d;
1508 if (variantsLen > 0) {
1509 uprv_memmove(variants, variants+d, variantsLen);
1510 }
1511 } else {
1512 char* p = _strnchr(variants, variantsLen, '_');
1513 if (p == NULL) {
1514 return delta;
1515 }
1516 ++p;
1517 variantsLen -= (int32_t)(p - variants);
1518 variants = p;
1519 }
1520 }
1521}
1522
1523/* Keyword enumeration */
1524
1525typedef struct UKeywordsContext {
1526 char* keywords;
1527 char* current;
1528} UKeywordsContext;
1529
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001530U_CDECL_BEGIN
1531
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001532static void U_CALLCONV
1533uloc_kw_closeKeywords(UEnumeration *enumerator) {
1534 uprv_free(((UKeywordsContext *)enumerator->context)->keywords);
1535 uprv_free(enumerator->context);
1536 uprv_free(enumerator);
1537}
1538
1539static int32_t U_CALLCONV
1540uloc_kw_countKeywords(UEnumeration *en, UErrorCode * /*status*/) {
1541 char *kw = ((UKeywordsContext *)en->context)->keywords;
1542 int32_t result = 0;
1543 while(*kw) {
1544 result++;
1545 kw += uprv_strlen(kw)+1;
1546 }
1547 return result;
1548}
1549
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001550static const char * U_CALLCONV
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001551uloc_kw_nextKeyword(UEnumeration* en,
1552 int32_t* resultLength,
1553 UErrorCode* /*status*/) {
1554 const char* result = ((UKeywordsContext *)en->context)->current;
1555 int32_t len = 0;
1556 if(*result) {
1557 len = (int32_t)uprv_strlen(((UKeywordsContext *)en->context)->current);
1558 ((UKeywordsContext *)en->context)->current += len+1;
1559 } else {
1560 result = NULL;
1561 }
1562 if (resultLength) {
1563 *resultLength = len;
1564 }
1565 return result;
1566}
1567
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001568static void U_CALLCONV
1569uloc_kw_resetKeywords(UEnumeration* en,
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001570 UErrorCode* /*status*/) {
1571 ((UKeywordsContext *)en->context)->current = ((UKeywordsContext *)en->context)->keywords;
1572}
1573
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001574U_CDECL_END
1575
1576
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001577static const UEnumeration gKeywordsEnum = {
1578 NULL,
1579 NULL,
1580 uloc_kw_closeKeywords,
1581 uloc_kw_countKeywords,
1582 uenum_unextDefault,
1583 uloc_kw_nextKeyword,
1584 uloc_kw_resetKeywords
1585};
1586
1587U_CAPI UEnumeration* U_EXPORT2
1588uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status)
1589{
1590 UKeywordsContext *myContext = NULL;
1591 UEnumeration *result = NULL;
1592
1593 if(U_FAILURE(*status)) {
1594 return NULL;
1595 }
1596 result = (UEnumeration *)uprv_malloc(sizeof(UEnumeration));
1597 /* Null pointer test */
1598 if (result == NULL) {
1599 *status = U_MEMORY_ALLOCATION_ERROR;
1600 return NULL;
1601 }
1602 uprv_memcpy(result, &gKeywordsEnum, sizeof(UEnumeration));
1603 myContext = static_cast<UKeywordsContext *>(uprv_malloc(sizeof(UKeywordsContext)));
1604 if (myContext == NULL) {
1605 *status = U_MEMORY_ALLOCATION_ERROR;
1606 uprv_free(result);
1607 return NULL;
1608 }
1609 myContext->keywords = (char *)uprv_malloc(keywordListSize+1);
1610 uprv_memcpy(myContext->keywords, keywordList, keywordListSize);
1611 myContext->keywords[keywordListSize] = 0;
1612 myContext->current = myContext->keywords;
1613 result->context = myContext;
1614 return result;
1615}
1616
1617U_CAPI UEnumeration* U_EXPORT2
1618uloc_openKeywords(const char* localeID,
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001619 UErrorCode* status)
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001620{
1621 int32_t i=0;
1622 char keywords[256];
1623 int32_t keywordsCapacity = 256;
1624 char tempBuffer[ULOC_FULLNAME_CAPACITY];
1625 const char* tmpLocaleID;
1626
1627 if(status==NULL || U_FAILURE(*status)) {
1628 return 0;
1629 }
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001630
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001631 if (_hasBCP47Extension(localeID)) {
1632 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
1633 } else {
1634 if (localeID==NULL) {
1635 localeID=uloc_getDefault();
1636 }
1637 tmpLocaleID=localeID;
1638 }
1639
1640 /* Skip the language */
1641 ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID);
1642 if(_isIDSeparator(*tmpLocaleID)) {
1643 const char *scriptID;
1644 /* Skip the script if available */
1645 ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID);
1646 if(scriptID != tmpLocaleID+1) {
1647 /* Found optional script */
1648 tmpLocaleID = scriptID;
1649 }
1650 /* Skip the Country */
1651 if (_isIDSeparator(*tmpLocaleID)) {
1652 ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &tmpLocaleID);
1653 if(_isIDSeparator(*tmpLocaleID)) {
1654 _getVariant(tmpLocaleID+1, *tmpLocaleID, NULL, 0);
1655 }
1656 }
1657 }
1658
1659 /* keywords are located after '@' */
1660 if((tmpLocaleID = locale_getKeywordsStart(tmpLocaleID)) != NULL) {
1661 i=locale_getKeywords(tmpLocaleID+1, '@', keywords, keywordsCapacity, NULL, 0, NULL, FALSE, status);
1662 }
1663
1664 if(i) {
1665 return uloc_openKeywordList(keywords, i, status);
1666 } else {
1667 return NULL;
1668 }
1669}
1670
1671
1672/* bit-flags for 'options' parameter of _canonicalize */
1673#define _ULOC_STRIP_KEYWORDS 0x2
1674#define _ULOC_CANONICALIZE 0x1
1675
1676#define OPTION_SET(options, mask) ((options & mask) != 0)
1677
1678static const char i_default[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'};
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001679#define I_DEFAULT_LENGTH UPRV_LENGTHOF(i_default)
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001680
1681/**
1682 * Canonicalize the given localeID, to level 1 or to level 2,
1683 * depending on the options. To specify level 1, pass in options=0.
1684 * To specify level 2, pass in options=_ULOC_CANONICALIZE.
1685 *
1686 * This is the code underlying uloc_getName and uloc_canonicalize.
1687 */
1688static int32_t
1689_canonicalize(const char* localeID,
1690 char* result,
1691 int32_t resultCapacity,
1692 uint32_t options,
1693 UErrorCode* err) {
1694 int32_t j, len, fieldCount=0, scriptSize=0, variantSize=0, nameCapacity;
1695 char localeBuffer[ULOC_FULLNAME_CAPACITY];
1696 char tempBuffer[ULOC_FULLNAME_CAPACITY];
1697 const char* origLocaleID;
1698 const char* tmpLocaleID;
1699 const char* keywordAssign = NULL;
1700 const char* separatorIndicator = NULL;
1701 const char* addKeyword = NULL;
1702 const char* addValue = NULL;
1703 char* name;
1704 char* variant = NULL; /* pointer into name, or NULL */
1705
1706 if (U_FAILURE(*err)) {
1707 return 0;
1708 }
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001709
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001710 if (_hasBCP47Extension(localeID)) {
1711 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
1712 } else {
1713 if (localeID==NULL) {
1714 localeID=uloc_getDefault();
1715 }
1716 tmpLocaleID=localeID;
1717 }
1718
1719 origLocaleID=tmpLocaleID;
1720
1721 /* if we are doing a full canonicalization, then put results in
1722 localeBuffer, if necessary; otherwise send them to result. */
1723 if (/*OPTION_SET(options, _ULOC_CANONICALIZE) &&*/
1724 (result == NULL || resultCapacity < (int32_t)sizeof(localeBuffer))) {
1725 name = localeBuffer;
1726 nameCapacity = (int32_t)sizeof(localeBuffer);
1727 } else {
1728 name = result;
1729 nameCapacity = resultCapacity;
1730 }
1731
1732 /* get all pieces, one after another, and separate with '_' */
1733 len=ulocimp_getLanguage(tmpLocaleID, name, nameCapacity, &tmpLocaleID);
1734
1735 if(len == I_DEFAULT_LENGTH && uprv_strncmp(origLocaleID, i_default, len) == 0) {
1736 const char *d = uloc_getDefault();
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001737
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001738 len = (int32_t)uprv_strlen(d);
1739
1740 if (name != NULL) {
1741 uprv_strncpy(name, d, len);
1742 }
1743 } else if(_isIDSeparator(*tmpLocaleID)) {
1744 const char *scriptID;
1745
1746 ++fieldCount;
1747 if(len<nameCapacity) {
1748 name[len]='_';
1749 }
1750 ++len;
1751
1752 scriptSize=ulocimp_getScript(tmpLocaleID+1,
1753 (len<nameCapacity ? name+len : NULL), nameCapacity-len, &scriptID);
1754 if(scriptSize > 0) {
1755 /* Found optional script */
1756 tmpLocaleID = scriptID;
1757 ++fieldCount;
1758 len+=scriptSize;
1759 if (_isIDSeparator(*tmpLocaleID)) {
1760 /* If there is something else, then we add the _ */
1761 if(len<nameCapacity) {
1762 name[len]='_';
1763 }
1764 ++len;
1765 }
1766 }
1767
1768 if (_isIDSeparator(*tmpLocaleID)) {
1769 const char *cntryID;
1770 int32_t cntrySize = ulocimp_getCountry(tmpLocaleID+1,
1771 (len<nameCapacity ? name+len : NULL), nameCapacity-len, &cntryID);
1772 if (cntrySize > 0) {
1773 /* Found optional country */
1774 tmpLocaleID = cntryID;
1775 len+=cntrySize;
1776 }
1777 if(_isIDSeparator(*tmpLocaleID)) {
1778 /* If there is something else, then we add the _ if we found country before. */
1779 if (cntrySize >= 0 && ! _isIDSeparator(*(tmpLocaleID+1)) ) {
1780 ++fieldCount;
1781 if(len<nameCapacity) {
1782 name[len]='_';
1783 }
1784 ++len;
1785 }
1786
1787 variantSize = _getVariant(tmpLocaleID+1, *tmpLocaleID,
1788 (len<nameCapacity ? name+len : NULL), nameCapacity-len);
1789 if (variantSize > 0) {
1790 variant = len<nameCapacity ? name+len : NULL;
1791 len += variantSize;
1792 tmpLocaleID += variantSize + 1; /* skip '_' and variant */
1793 }
1794 }
1795 }
1796 }
1797
1798 /* Copy POSIX-style charset specifier, if any [mr.utf8] */
1799 if (!OPTION_SET(options, _ULOC_CANONICALIZE) && *tmpLocaleID == '.') {
1800 UBool done = FALSE;
1801 do {
1802 char c = *tmpLocaleID;
1803 switch (c) {
1804 case 0:
1805 case '@':
1806 done = TRUE;
1807 break;
1808 default:
1809 if (len<nameCapacity) {
1810 name[len] = c;
1811 }
1812 ++len;
1813 ++tmpLocaleID;
1814 break;
1815 }
1816 } while (!done);
1817 }
1818
1819 /* Scan ahead to next '@' and determine if it is followed by '=' and/or ';'
1820 After this, tmpLocaleID either points to '@' or is NULL */
1821 if ((tmpLocaleID=locale_getKeywordsStart(tmpLocaleID))!=NULL) {
1822 keywordAssign = uprv_strchr(tmpLocaleID, '=');
1823 separatorIndicator = uprv_strchr(tmpLocaleID, ';');
1824 }
1825
1826 /* Copy POSIX-style variant, if any [mr@FOO] */
1827 if (!OPTION_SET(options, _ULOC_CANONICALIZE) &&
1828 tmpLocaleID != NULL && keywordAssign == NULL) {
1829 for (;;) {
1830 char c = *tmpLocaleID;
1831 if (c == 0) {
1832 break;
1833 }
1834 if (len<nameCapacity) {
1835 name[len] = c;
1836 }
1837 ++len;
1838 ++tmpLocaleID;
1839 }
1840 }
1841
1842 if (OPTION_SET(options, _ULOC_CANONICALIZE)) {
1843 /* Handle @FOO variant if @ is present and not followed by = */
1844 if (tmpLocaleID!=NULL && keywordAssign==NULL) {
1845 int32_t posixVariantSize;
1846 /* Add missing '_' if needed */
1847 if (fieldCount < 2 || (fieldCount < 3 && scriptSize > 0)) {
1848 do {
1849 if(len<nameCapacity) {
1850 name[len]='_';
1851 }
1852 ++len;
1853 ++fieldCount;
1854 } while(fieldCount<2);
1855 }
1856 posixVariantSize = _getVariantEx(tmpLocaleID+1, '@', name+len, nameCapacity-len,
1857 (UBool)(variantSize > 0));
1858 if (posixVariantSize > 0) {
1859 if (variant == NULL) {
1860 variant = name+len;
1861 }
1862 len += posixVariantSize;
1863 variantSize += posixVariantSize;
1864 }
1865 }
1866
1867 /* Handle generic variants first */
1868 if (variant) {
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001869 for (j=0; j<UPRV_LENGTHOF(VARIANT_MAP); j++) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001870 const char* variantToCompare = VARIANT_MAP[j].variant;
1871 int32_t n = (int32_t)uprv_strlen(variantToCompare);
1872 int32_t variantLen = _deleteVariant(variant, uprv_min(variantSize, (nameCapacity-len)), variantToCompare, n);
1873 len -= variantLen;
1874 if (variantLen > 0) {
1875 if (len > 0 && name[len-1] == '_') { /* delete trailing '_' */
1876 --len;
1877 }
1878 addKeyword = VARIANT_MAP[j].keyword;
1879 addValue = VARIANT_MAP[j].value;
1880 break;
1881 }
1882 }
1883 if (len > 0 && len <= nameCapacity && name[len-1] == '_') { /* delete trailing '_' */
1884 --len;
1885 }
1886 }
1887
1888 /* Look up the ID in the canonicalization map */
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001889 for (j=0; j<UPRV_LENGTHOF(CANONICALIZE_MAP); j++) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001890 const char* id = CANONICALIZE_MAP[j].id;
1891 int32_t n = (int32_t)uprv_strlen(id);
1892 if (len == n && uprv_strncmp(name, id, n) == 0) {
1893 if (n == 0 && tmpLocaleID != NULL) {
1894 break; /* Don't remap "" if keywords present */
1895 }
1896 len = _copyCount(name, nameCapacity, CANONICALIZE_MAP[j].canonicalID);
1897 if (CANONICALIZE_MAP[j].keyword) {
1898 addKeyword = CANONICALIZE_MAP[j].keyword;
1899 addValue = CANONICALIZE_MAP[j].value;
1900 }
1901 break;
1902 }
1903 }
1904 }
1905
1906 if (!OPTION_SET(options, _ULOC_STRIP_KEYWORDS)) {
1907 if (tmpLocaleID!=NULL && keywordAssign!=NULL &&
1908 (!separatorIndicator || separatorIndicator > keywordAssign)) {
1909 if(len<nameCapacity) {
1910 name[len]='@';
1911 }
1912 ++len;
1913 ++fieldCount;
1914 len += _getKeywords(tmpLocaleID+1, '@', (len<nameCapacity ? name+len : NULL), nameCapacity-len,
1915 NULL, 0, NULL, TRUE, addKeyword, addValue, err);
1916 } else if (addKeyword != NULL) {
1917 U_ASSERT(addValue != NULL && len < nameCapacity);
1918 /* inelegant but works -- later make _getKeywords do this? */
1919 len += _copyCount(name+len, nameCapacity-len, "@");
1920 len += _copyCount(name+len, nameCapacity-len, addKeyword);
1921 len += _copyCount(name+len, nameCapacity-len, "=");
1922 len += _copyCount(name+len, nameCapacity-len, addValue);
1923 }
1924 }
1925
1926 if (U_SUCCESS(*err) && result != NULL && name == localeBuffer) {
1927 uprv_strncpy(result, localeBuffer, (len > resultCapacity) ? resultCapacity : len);
1928 }
1929
1930 return u_terminateChars(result, resultCapacity, len, err);
1931}
1932
1933/* ### ID parsing API **************************************************/
1934
1935U_CAPI int32_t U_EXPORT2
1936uloc_getParent(const char* localeID,
1937 char* parent,
1938 int32_t parentCapacity,
1939 UErrorCode* err)
1940{
1941 const char *lastUnderscore;
1942 int32_t i;
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001943
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001944 if (U_FAILURE(*err))
1945 return 0;
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001946
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001947 if (localeID == NULL)
1948 localeID = uloc_getDefault();
1949
1950 lastUnderscore=uprv_strrchr(localeID, '_');
1951 if(lastUnderscore!=NULL) {
1952 i=(int32_t)(lastUnderscore-localeID);
1953 } else {
1954 i=0;
1955 }
1956
1957 if(i>0 && parent != localeID) {
1958 uprv_memcpy(parent, localeID, uprv_min(i, parentCapacity));
1959 }
1960 return u_terminateChars(parent, parentCapacity, i, err);
1961}
1962
1963U_CAPI int32_t U_EXPORT2
1964uloc_getLanguage(const char* localeID,
1965 char* language,
1966 int32_t languageCapacity,
1967 UErrorCode* err)
1968{
1969 /* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/
1970 int32_t i=0;
1971
1972 if (err==NULL || U_FAILURE(*err)) {
1973 return 0;
1974 }
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001975
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001976 if(localeID==NULL) {
1977 localeID=uloc_getDefault();
1978 }
1979
1980 i=ulocimp_getLanguage(localeID, language, languageCapacity, NULL);
1981 return u_terminateChars(language, languageCapacity, i, err);
1982}
1983
1984U_CAPI int32_t U_EXPORT2
1985uloc_getScript(const char* localeID,
1986 char* script,
1987 int32_t scriptCapacity,
1988 UErrorCode* err)
1989{
1990 int32_t i=0;
1991
1992 if(err==NULL || U_FAILURE(*err)) {
1993 return 0;
1994 }
1995
1996 if(localeID==NULL) {
1997 localeID=uloc_getDefault();
1998 }
1999
2000 /* skip the language */
2001 ulocimp_getLanguage(localeID, NULL, 0, &localeID);
2002 if(_isIDSeparator(*localeID)) {
2003 i=ulocimp_getScript(localeID+1, script, scriptCapacity, NULL);
2004 }
2005 return u_terminateChars(script, scriptCapacity, i, err);
2006}
2007
2008U_CAPI int32_t U_EXPORT2
2009uloc_getCountry(const char* localeID,
2010 char* country,
2011 int32_t countryCapacity,
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002012 UErrorCode* err)
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002013{
2014 int32_t i=0;
2015
2016 if(err==NULL || U_FAILURE(*err)) {
2017 return 0;
2018 }
2019
2020 if(localeID==NULL) {
2021 localeID=uloc_getDefault();
2022 }
2023
2024 /* Skip the language */
2025 ulocimp_getLanguage(localeID, NULL, 0, &localeID);
2026 if(_isIDSeparator(*localeID)) {
2027 const char *scriptID;
2028 /* Skip the script if available */
2029 ulocimp_getScript(localeID+1, NULL, 0, &scriptID);
2030 if(scriptID != localeID+1) {
2031 /* Found optional script */
2032 localeID = scriptID;
2033 }
2034 if(_isIDSeparator(*localeID)) {
2035 i=ulocimp_getCountry(localeID+1, country, countryCapacity, NULL);
2036 }
2037 }
2038 return u_terminateChars(country, countryCapacity, i, err);
2039}
2040
2041U_CAPI int32_t U_EXPORT2
2042uloc_getVariant(const char* localeID,
2043 char* variant,
2044 int32_t variantCapacity,
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002045 UErrorCode* err)
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002046{
2047 char tempBuffer[ULOC_FULLNAME_CAPACITY];
2048 const char* tmpLocaleID;
2049 int32_t i=0;
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002050
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002051 if(err==NULL || U_FAILURE(*err)) {
2052 return 0;
2053 }
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002054
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002055 if (_hasBCP47Extension(localeID)) {
2056 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
2057 } else {
2058 if (localeID==NULL) {
2059 localeID=uloc_getDefault();
2060 }
2061 tmpLocaleID=localeID;
2062 }
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002063
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002064 /* Skip the language */
2065 ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID);
2066 if(_isIDSeparator(*tmpLocaleID)) {
2067 const char *scriptID;
2068 /* Skip the script if available */
2069 ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID);
2070 if(scriptID != tmpLocaleID+1) {
2071 /* Found optional script */
2072 tmpLocaleID = scriptID;
2073 }
2074 /* Skip the Country */
2075 if (_isIDSeparator(*tmpLocaleID)) {
2076 const char *cntryID;
2077 ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &cntryID);
2078 if (cntryID != tmpLocaleID+1) {
2079 /* Found optional country */
2080 tmpLocaleID = cntryID;
2081 }
2082 if(_isIDSeparator(*tmpLocaleID)) {
2083 /* If there was no country ID, skip a possible extra IDSeparator */
2084 if (tmpLocaleID != cntryID && _isIDSeparator(tmpLocaleID[1])) {
2085 tmpLocaleID++;
2086 }
2087 i=_getVariant(tmpLocaleID+1, *tmpLocaleID, variant, variantCapacity);
2088 }
2089 }
2090 }
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002091
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002092 /* removed by weiv. We don't want to handle POSIX variants anymore. Use canonicalization function */
2093 /* if we do not have a variant tag yet then try a POSIX variant after '@' */
2094/*
2095 if(!haveVariant && (localeID=uprv_strrchr(localeID, '@'))!=NULL) {
2096 i=_getVariant(localeID+1, '@', variant, variantCapacity);
2097 }
2098*/
2099 return u_terminateChars(variant, variantCapacity, i, err);
2100}
2101
2102U_CAPI int32_t U_EXPORT2
2103uloc_getName(const char* localeID,
2104 char* name,
2105 int32_t nameCapacity,
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002106 UErrorCode* err)
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002107{
2108 return _canonicalize(localeID, name, nameCapacity, 0, err);
2109}
2110
2111U_CAPI int32_t U_EXPORT2
2112uloc_getBaseName(const char* localeID,
2113 char* name,
2114 int32_t nameCapacity,
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002115 UErrorCode* err)
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002116{
2117 return _canonicalize(localeID, name, nameCapacity, _ULOC_STRIP_KEYWORDS, err);
2118}
2119
2120U_CAPI int32_t U_EXPORT2
2121uloc_canonicalize(const char* localeID,
2122 char* name,
2123 int32_t nameCapacity,
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002124 UErrorCode* err)
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002125{
2126 return _canonicalize(localeID, name, nameCapacity, _ULOC_CANONICALIZE, err);
2127}
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002128
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002129U_CAPI const char* U_EXPORT2
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002130uloc_getISO3Language(const char* localeID)
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002131{
2132 int16_t offset;
2133 char lang[ULOC_LANG_CAPACITY];
2134 UErrorCode err = U_ZERO_ERROR;
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002135
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002136 if (localeID == NULL)
2137 {
2138 localeID = uloc_getDefault();
2139 }
2140 uloc_getLanguage(localeID, lang, ULOC_LANG_CAPACITY, &err);
2141 if (U_FAILURE(err))
2142 return "";
2143 offset = _findIndex(LANGUAGES, lang);
2144 if (offset < 0)
2145 return "";
2146 return LANGUAGES_3[offset];
2147}
2148
2149U_CAPI const char* U_EXPORT2
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002150uloc_getISO3Country(const char* localeID)
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002151{
2152 int16_t offset;
2153 char cntry[ULOC_LANG_CAPACITY];
2154 UErrorCode err = U_ZERO_ERROR;
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002155
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002156 if (localeID == NULL)
2157 {
2158 localeID = uloc_getDefault();
2159 }
2160 uloc_getCountry(localeID, cntry, ULOC_LANG_CAPACITY, &err);
2161 if (U_FAILURE(err))
2162 return "";
2163 offset = _findIndex(COUNTRIES, cntry);
2164 if (offset < 0)
2165 return "";
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002166
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002167 return COUNTRIES_3[offset];
2168}
2169
2170U_CAPI uint32_t U_EXPORT2
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002171uloc_getLCID(const char* localeID)
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002172{
2173 UErrorCode status = U_ZERO_ERROR;
2174 char langID[ULOC_FULLNAME_CAPACITY];
Jungshik Shin87232d82017-05-13 21:10:13 -07002175 uint32_t lcid = 0;
2176
2177 /* Check for incomplete id. */
2178 if (!localeID || uprv_strlen(localeID) < 2) {
2179 return 0;
2180 }
2181
2182 // Attempt platform lookup if available
2183 lcid = uprv_convertToLCIDPlatform(localeID);
2184 if (lcid > 0)
2185 {
2186 // Windows found an LCID, return that
2187 return lcid;
2188 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002189
2190 uloc_getLanguage(localeID, langID, sizeof(langID), &status);
2191 if (U_FAILURE(status)) {
2192 return 0;
2193 }
2194
2195 if (uprv_strchr(localeID, '@')) {
2196 // uprv_convertToLCID does not support keywords other than collation.
2197 // Remove all keywords except collation.
2198 int32_t len;
2199 char collVal[ULOC_KEYWORDS_CAPACITY];
2200 char tmpLocaleID[ULOC_FULLNAME_CAPACITY];
2201
2202 len = uloc_getKeywordValue(localeID, "collation", collVal,
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002203 UPRV_LENGTHOF(collVal) - 1, &status);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002204
2205 if (U_SUCCESS(status) && len > 0) {
2206 collVal[len] = 0;
2207
2208 len = uloc_getBaseName(localeID, tmpLocaleID,
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002209 UPRV_LENGTHOF(tmpLocaleID) - 1, &status);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002210
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002211 if (U_SUCCESS(status) && len > 0) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002212 tmpLocaleID[len] = 0;
2213
2214 len = uloc_setKeywordValue("collation", collVal, tmpLocaleID,
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002215 UPRV_LENGTHOF(tmpLocaleID) - len - 1, &status);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002216
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002217 if (U_SUCCESS(status) && len > 0) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002218 tmpLocaleID[len] = 0;
2219 return uprv_convertToLCID(langID, tmpLocaleID, &status);
2220 }
2221 }
2222 }
2223
2224 // fall through - all keywords are simply ignored
2225 status = U_ZERO_ERROR;
2226 }
2227
2228 return uprv_convertToLCID(langID, localeID, &status);
2229}
2230
2231U_CAPI int32_t U_EXPORT2
2232uloc_getLocaleForLCID(uint32_t hostid, char *locale, int32_t localeCapacity,
2233 UErrorCode *status)
2234{
2235 return uprv_convertToPosix(hostid, locale, localeCapacity, status);
2236}
2237
2238/* ### Default locale **************************************************/
2239
2240U_CAPI const char* U_EXPORT2
2241uloc_getDefault()
2242{
2243 return locale_get_default();
2244}
2245
2246U_CAPI void U_EXPORT2
2247uloc_setDefault(const char* newDefaultLocale,
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002248 UErrorCode* err)
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002249{
2250 if (U_FAILURE(*err))
2251 return;
2252 /* the error code isn't currently used for anything by this function*/
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002253
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002254 /* propagate change to C++ */
2255 locale_set_default(newDefaultLocale);
2256}
2257
2258/**
2259 * Returns a list of all 2-letter language codes defined in ISO 639. This is a pointer
2260 * to an array of pointers to arrays of char. All of these pointers are owned
2261 * by ICU-- do not delete them, and do not write through them. The array is
2262 * terminated with a null pointer.
2263 */
2264U_CAPI const char* const* U_EXPORT2
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002265uloc_getISOLanguages()
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002266{
2267 return LANGUAGES;
2268}
2269
2270/**
2271 * Returns a list of all 2-letter country codes defined in ISO 639. This is a
2272 * pointer to an array of pointers to arrays of char. All of these pointers are
2273 * owned by ICU-- do not delete them, and do not write through them. The array is
2274 * terminated with a null pointer.
2275 */
2276U_CAPI const char* const* U_EXPORT2
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002277uloc_getISOCountries()
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002278{
2279 return COUNTRIES;
2280}
2281
2282
2283/* this function to be moved into cstring.c later */
2284static char gDecimal = 0;
2285
2286static /* U_CAPI */
2287double
2288/* U_EXPORT2 */
2289_uloc_strtod(const char *start, char **end) {
2290 char *decimal;
2291 char *myEnd;
2292 char buf[30];
2293 double rv;
2294 if (!gDecimal) {
2295 char rep[5];
2296 /* For machines that decide to change the decimal on you,
2297 and try to be too smart with localization.
2298 This normally should be just a '.'. */
2299 sprintf(rep, "%+1.1f", 1.0);
2300 gDecimal = rep[2];
2301 }
2302
2303 if(gDecimal == '.') {
2304 return uprv_strtod(start, end); /* fall through to OS */
2305 } else {
2306 uprv_strncpy(buf, start, 29);
2307 buf[29]=0;
2308 decimal = uprv_strchr(buf, '.');
2309 if(decimal) {
2310 *decimal = gDecimal;
2311 } else {
2312 return uprv_strtod(start, end); /* no decimal point */
2313 }
2314 rv = uprv_strtod(buf, &myEnd);
2315 if(end) {
2316 *end = (char*)(start+(myEnd-buf)); /* cast away const (to follow uprv_strtod API.) */
2317 }
2318 return rv;
2319 }
2320}
2321
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002322typedef struct {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002323 float q;
2324 int32_t dummy; /* to avoid uninitialized memory copy from qsort */
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002325 char locale[ULOC_FULLNAME_CAPACITY+1];
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002326} _acceptLangItem;
2327
2328static int32_t U_CALLCONV
2329uloc_acceptLanguageCompare(const void * /*context*/, const void *a, const void *b)
2330{
2331 const _acceptLangItem *aa = (const _acceptLangItem*)a;
2332 const _acceptLangItem *bb = (const _acceptLangItem*)b;
2333
2334 int32_t rc = 0;
2335 if(bb->q < aa->q) {
2336 rc = -1; /* A > B */
2337 } else if(bb->q > aa->q) {
2338 rc = 1; /* A < B */
2339 } else {
2340 rc = 0; /* A = B */
2341 }
2342
2343 if(rc==0) {
2344 rc = uprv_stricmp(aa->locale, bb->locale);
2345 }
2346
2347#if defined(ULOC_DEBUG)
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002348 /* fprintf(stderr, "a:[%s:%g], b:[%s:%g] -> %d\n",
2349 aa->locale, aa->q,
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002350 bb->locale, bb->q,
2351 rc);*/
2352#endif
2353
2354 return rc;
2355}
2356
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002357/*
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002358mt-mt, ja;q=0.76, en-us;q=0.95, en;q=0.92, en-gb;q=0.89, fr;q=0.87, iu-ca;q=0.84, iu;q=0.82, ja-jp;q=0.79, mt;q=0.97, de-de;q=0.74, de;q=0.71, es;q=0.68, it-it;q=0.66, it;q=0.63, vi-vn;q=0.61, vi;q=0.58, nl-nl;q=0.55, nl;q=0.53
2359*/
2360
2361U_CAPI int32_t U_EXPORT2
2362uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable, UAcceptResult *outResult,
2363 const char *httpAcceptLanguage,
2364 UEnumeration* availableLocales,
2365 UErrorCode *status)
2366{
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002367 MaybeStackArray<_acceptLangItem, 4> items; // Struct for collecting items.
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002368 char tmp[ULOC_FULLNAME_CAPACITY +1];
2369 int32_t n = 0;
2370 const char *itemEnd;
2371 const char *paramEnd;
2372 const char *s;
2373 const char *t;
2374 int32_t res;
2375 int32_t i;
2376 int32_t l = (int32_t)uprv_strlen(httpAcceptLanguage);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002377
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002378 if(U_FAILURE(*status)) {
2379 return -1;
2380 }
2381
2382 for(s=httpAcceptLanguage;s&&*s;) {
2383 while(isspace(*s)) /* eat space at the beginning */
2384 s++;
2385 itemEnd=uprv_strchr(s,',');
2386 paramEnd=uprv_strchr(s,';');
2387 if(!itemEnd) {
2388 itemEnd = httpAcceptLanguage+l; /* end of string */
2389 }
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002390 if(paramEnd && paramEnd<itemEnd) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002391 /* semicolon (;) is closer than end (,) */
2392 t = paramEnd+1;
2393 if(*t=='q') {
2394 t++;
2395 }
2396 while(isspace(*t)) {
2397 t++;
2398 }
2399 if(*t=='=') {
2400 t++;
2401 }
2402 while(isspace(*t)) {
2403 t++;
2404 }
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002405 items[n].q = (float)_uloc_strtod(t,NULL);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002406 } else {
2407 /* no semicolon - it's 1.0 */
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002408 items[n].q = 1.0f;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002409 paramEnd = itemEnd;
2410 }
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002411 items[n].dummy=0;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002412 /* eat spaces prior to semi */
2413 for(t=(paramEnd-1);(paramEnd>s)&&isspace(*t);t--)
2414 ;
Jungshik Shin42d50272018-10-24 01:22:09 -07002415 int32_t slen = static_cast<int32_t>(((t+1)-s));
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002416 if(slen > ULOC_FULLNAME_CAPACITY) {
2417 *status = U_BUFFER_OVERFLOW_ERROR;
2418 return -1; // too big
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002419 }
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002420 uprv_strncpy(items[n].locale, s, slen);
2421 items[n].locale[slen]=0; // terminate
2422 int32_t clen = uloc_canonicalize(items[n].locale, tmp, UPRV_LENGTHOF(tmp)-1, status);
2423 if(U_FAILURE(*status)) return -1;
2424 if((clen!=slen) || (uprv_strncmp(items[n].locale, tmp, slen))) {
2425 // canonicalization had an effect- copy back
2426 uprv_strncpy(items[n].locale, tmp, clen);
2427 items[n].locale[clen] = 0; // terminate
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002428 }
2429#if defined(ULOC_DEBUG)
2430 /*fprintf(stderr,"%d: s <%s> q <%g>\n", n, j[n].locale, j[n].q);*/
2431#endif
2432 n++;
2433 s = itemEnd;
2434 while(*s==',') { /* eat duplicate commas */
2435 s++;
2436 }
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002437 if(n>=items.getCapacity()) { // If we need more items
2438 if(NULL == items.resize(items.getCapacity()*2, items.getCapacity())) {
2439 *status = U_MEMORY_ALLOCATION_ERROR;
2440 return -1;
2441 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002442#if defined(ULOC_DEBUG)
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002443 fprintf(stderr,"malloced at size %d\n", items.getCapacity());
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002444#endif
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002445 }
2446 }
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002447 uprv_sortArray(items.getAlias(), n, sizeof(items[0]), uloc_acceptLanguageCompare, NULL, TRUE, status);
2448 if (U_FAILURE(*status)) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002449 return -1;
2450 }
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002451 LocalMemory<const char*> strs(NULL);
2452 if (strs.allocateInsteadAndReset(n) == NULL) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002453 *status = U_MEMORY_ALLOCATION_ERROR;
2454 return -1;
2455 }
2456 for(i=0;i<n;i++) {
2457#if defined(ULOC_DEBUG)
2458 /*fprintf(stderr,"%d: s <%s> q <%g>\n", i, j[i].locale, j[i].q);*/
2459#endif
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002460 strs[i]=items[i].locale;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002461 }
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002462 res = uloc_acceptLanguage(result, resultAvailable, outResult,
2463 strs.getAlias(), n, availableLocales, status);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002464 return res;
2465}
2466
2467
2468U_CAPI int32_t U_EXPORT2
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002469uloc_acceptLanguage(char *result, int32_t resultAvailable,
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002470 UAcceptResult *outResult, const char **acceptList,
2471 int32_t acceptListCount,
2472 UEnumeration* availableLocales,
2473 UErrorCode *status)
2474{
2475 int32_t i,j;
2476 int32_t len;
2477 int32_t maxLen=0;
2478 char tmp[ULOC_FULLNAME_CAPACITY+1];
2479 const char *l;
2480 char **fallbackList;
2481 if(U_FAILURE(*status)) {
2482 return -1;
2483 }
2484 fallbackList = static_cast<char **>(uprv_malloc((size_t)(sizeof(fallbackList[0])*acceptListCount)));
2485 if(fallbackList==NULL) {
2486 *status = U_MEMORY_ALLOCATION_ERROR;
2487 return -1;
2488 }
2489 for(i=0;i<acceptListCount;i++) {
2490#if defined(ULOC_DEBUG)
2491 fprintf(stderr,"%02d: %s\n", i, acceptList[i]);
2492#endif
Jungshik Shinb3189662017-11-07 11:18:34 -08002493 while((l=uenum_next(availableLocales, NULL, status)) != NULL) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002494#if defined(ULOC_DEBUG)
2495 fprintf(stderr," %s\n", l);
2496#endif
2497 len = (int32_t)uprv_strlen(l);
2498 if(!uprv_strcmp(acceptList[i], l)) {
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002499 if(outResult) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002500 *outResult = ULOC_ACCEPT_VALID;
2501 }
2502#if defined(ULOC_DEBUG)
2503 fprintf(stderr, "MATCH! %s\n", l);
2504#endif
2505 if(len>0) {
2506 uprv_strncpy(result, l, uprv_min(len, resultAvailable));
2507 }
2508 for(j=0;j<i;j++) {
2509 uprv_free(fallbackList[j]);
2510 }
2511 uprv_free(fallbackList);
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002512 return u_terminateChars(result, resultAvailable, len, status);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002513 }
2514 if(len>maxLen) {
2515 maxLen = len;
2516 }
2517 }
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002518 uenum_reset(availableLocales, status);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002519 /* save off parent info */
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002520 if(uloc_getParent(acceptList[i], tmp, UPRV_LENGTHOF(tmp), status)!=0) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002521 fallbackList[i] = uprv_strdup(tmp);
2522 } else {
2523 fallbackList[i]=0;
2524 }
2525 }
2526
2527 for(maxLen--;maxLen>0;maxLen--) {
2528 for(i=0;i<acceptListCount;i++) {
2529 if(fallbackList[i] && ((int32_t)uprv_strlen(fallbackList[i])==maxLen)) {
2530#if defined(ULOC_DEBUG)
2531 fprintf(stderr,"Try: [%s]", fallbackList[i]);
2532#endif
Jungshik Shinb3189662017-11-07 11:18:34 -08002533 while((l=uenum_next(availableLocales, NULL, status)) != NULL) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002534#if defined(ULOC_DEBUG)
2535 fprintf(stderr," %s\n", l);
2536#endif
2537 len = (int32_t)uprv_strlen(l);
2538 if(!uprv_strcmp(fallbackList[i], l)) {
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002539 if(outResult) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002540 *outResult = ULOC_ACCEPT_FALLBACK;
2541 }
2542#if defined(ULOC_DEBUG)
2543 fprintf(stderr, "fallback MATCH! %s\n", l);
2544#endif
2545 if(len>0) {
2546 uprv_strncpy(result, l, uprv_min(len, resultAvailable));
2547 }
2548 for(j=0;j<acceptListCount;j++) {
2549 uprv_free(fallbackList[j]);
2550 }
2551 uprv_free(fallbackList);
2552 return u_terminateChars(result, resultAvailable, len, status);
2553 }
2554 }
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002555 uenum_reset(availableLocales, status);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002556
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002557 if(uloc_getParent(fallbackList[i], tmp, UPRV_LENGTHOF(tmp), status)!=0) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002558 uprv_free(fallbackList[i]);
2559 fallbackList[i] = uprv_strdup(tmp);
2560 } else {
2561 uprv_free(fallbackList[i]);
2562 fallbackList[i]=0;
2563 }
2564 }
2565 }
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002566 if(outResult) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002567 *outResult = ULOC_ACCEPT_FAILED;
2568 }
2569 }
2570 for(i=0;i<acceptListCount;i++) {
2571 uprv_free(fallbackList[i]);
2572 }
2573 uprv_free(fallbackList);
2574 return -1;
2575}
2576
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08002577U_CAPI const char* U_EXPORT2
2578uloc_toUnicodeLocaleKey(const char* keyword)
2579{
2580 const char* bcpKey = ulocimp_toBcpKey(keyword);
2581 if (bcpKey == NULL && ultag_isUnicodeLocaleKey(keyword, -1)) {
2582 // unknown keyword, but syntax is fine..
2583 return keyword;
2584 }
2585 return bcpKey;
2586}
2587
2588U_CAPI const char* U_EXPORT2
2589uloc_toUnicodeLocaleType(const char* keyword, const char* value)
2590{
2591 const char* bcpType = ulocimp_toBcpType(keyword, value, NULL, NULL);
2592 if (bcpType == NULL && ultag_isUnicodeLocaleType(value, -1)) {
2593 // unknown keyword, but syntax is fine..
2594 return value;
2595 }
2596 return bcpType;
2597}
2598
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08002599static UBool
2600isWellFormedLegacyKey(const char* legacyKey)
2601{
2602 const char* p = legacyKey;
2603 while (*p) {
2604 if (!UPRV_ISALPHANUM(*p)) {
2605 return FALSE;
2606 }
2607 p++;
2608 }
2609 return TRUE;
2610}
2611
2612static UBool
2613isWellFormedLegacyType(const char* legacyType)
2614{
2615 const char* p = legacyType;
2616 int32_t alphaNumLen = 0;
2617 while (*p) {
2618 if (*p == '_' || *p == '/' || *p == '-') {
2619 if (alphaNumLen == 0) {
2620 return FALSE;
2621 }
2622 alphaNumLen = 0;
2623 } else if (UPRV_ISALPHANUM(*p)) {
2624 alphaNumLen++;
2625 } else {
2626 return FALSE;
2627 }
2628 p++;
2629 }
2630 return (alphaNumLen != 0);
2631}
2632
2633U_CAPI const char* U_EXPORT2
2634uloc_toLegacyKey(const char* keyword)
2635{
2636 const char* legacyKey = ulocimp_toLegacyKey(keyword);
2637 if (legacyKey == NULL) {
2638 // Checks if the specified locale key is well-formed with the legacy locale syntax.
2639 //
2640 // Note:
Jungshik Shin87232d82017-05-13 21:10:13 -07002641 // LDML/CLDR provides some definition of keyword syntax in
2642 // * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and
2643 // * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax
2644 // Keys can only consist of [0-9a-zA-Z].
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08002645 if (isWellFormedLegacyKey(keyword)) {
2646 return keyword;
2647 }
2648 }
2649 return legacyKey;
2650}
2651
2652U_CAPI const char* U_EXPORT2
2653uloc_toLegacyType(const char* keyword, const char* value)
2654{
2655 const char* legacyType = ulocimp_toLegacyType(keyword, value, NULL, NULL);
2656 if (legacyType == NULL) {
2657 // Checks if the specified locale type is well-formed with the legacy locale syntax.
2658 //
2659 // Note:
Jungshik Shin87232d82017-05-13 21:10:13 -07002660 // LDML/CLDR provides some definition of keyword syntax in
2661 // * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and
2662 // * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax
2663 // Values (types) can only consist of [0-9a-zA-Z], plus for legacy values
2664 // we allow [/_-+] in the middle (e.g. "Etc/GMT+1", "Asia/Tel_Aviv")
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08002665 if (isWellFormedLegacyType(value)) {
2666 return value;
2667 }
2668 }
2669 return legacyType;
2670}
2671
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002672/*eof*/