blob: 545521fe21f6dee23bba0449bc9bf9ddb8ac9a6f [file] [log] [blame]
Jungshik Shin87232d82017-05-13 21:10:13 -07001// © 2016 and later: Unicode, Inc. and others.
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002// License & terms of use: http://www.unicode.org/copyright.html
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00003/*
4 **********************************************************************
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07005 * Copyright (C) 1996-2016, International Business Machines
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00006 * Corporation and others. All Rights Reserved.
7 **********************************************************************
8 *
9 * Provides functionality for mapping between
10 * LCID and Posix IDs or ICU locale to codepage
11 *
12 * Note: All classes and code in this file are
13 * intended for internal use only.
14 *
15 * Methods of interest:
16 * unsigned long convertToLCID(const char*);
17 * const char* convertToPosix(unsigned long);
18 *
19 * Kathleen Wilson, 4/30/96
20 *
21 * Date Name Description
22 * 3/11/97 aliu Fixed off-by-one bug in assignment operator. Added
23 * setId() method and safety check against
24 * MAX_ID_LENGTH.
25 * 04/23/99 stephen Added C wrapper for convertToPosix.
26 * 09/18/00 george Removed the memory leaks.
27 * 08/23/01 george Convert to C
28 */
29
30#include "locmap.h"
31#include "cstring.h"
32#include "cmemory.h"
Jungshik Shin87232d82017-05-13 21:10:13 -070033#include "unicode/uloc.h"
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000034
Jungshik Shine0d9b902016-10-28 12:56:54 -070035#if 0
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000036#if U_PLATFORM == U_PF_WINDOWS && defined(_MSC_VER) && (_MSC_VER >= 1500)
37/*
38 * TODO: It seems like we should widen this to
39 * either U_PLATFORM_USES_ONLY_WIN32_API (includes MinGW)
40 * or U_PLATFORM_HAS_WIN32_API (includes MinGW and Cygwin)
41 * but those use gcc and won't have defined(_MSC_VER).
42 * We might need to #include some Windows header and test for some version macro from there.
43 * Or call some Windows function and see what it returns.
44 */
Jungshik Shin87232d82017-05-13 21:10:13 -070045#define USE_WINDOWS_LCID_MAPPING_API
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000046#include <windows.h>
47#include <winnls.h>
48#endif
Jungshik Shin87232d82017-05-13 21:10:13 -070049#endif
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000050
51/*
52 * Note:
53 * The mapping from Win32 locale ID numbers to POSIX locale strings should
54 * be the faster one.
55 *
Jungshik Shin87232d82017-05-13 21:10:13 -070056 * Windows LCIDs are defined at https://msdn.microsoft.com/en-us/library/cc233965.aspx
57 * [MS-LCID] Windows Language Code Identifier (LCID) Reference
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000058 */
59
60/*
61////////////////////////////////////////////////
62//
63// Internal Classes for LCID <--> POSIX Mapping
64//
65/////////////////////////////////////////////////
66*/
67
68typedef struct ILcidPosixElement
69{
70 const uint32_t hostID;
71 const char * const posixID;
72} ILcidPosixElement;
73
74typedef struct ILcidPosixMap
75{
76 const uint32_t numRegions;
77 const struct ILcidPosixElement* const regionMaps;
78} ILcidPosixMap;
79
80
81/*
82/////////////////////////////////////////////////
83//
84// Easy macros to make the LCID <--> POSIX Mapping
85//
86/////////////////////////////////////////////////
87*/
88
89/**
90 * The standard one language/one country mapping for LCID.
91 * The first element must be the language, and the following
92 * elements are the language with the country.
93 * @param hostID LCID in host format such as 0x044d
94 * @param languageID posix ID of just the language such as 'de'
95 * @param posixID posix ID of the language_TERRITORY such as 'de_CH'
96 */
97#define ILCID_POSIX_ELEMENT_ARRAY(hostID, languageID, posixID) \
98static const ILcidPosixElement locmap_ ## languageID [] = { \
99 {LANGUAGE_LCID(hostID), #languageID}, /* parent locale */ \
100 {hostID, #posixID}, \
101};
102
103/**
104 * Define a subtable by ID
105 * @param id the POSIX ID, either a language or language_TERRITORY
106 */
107#define ILCID_POSIX_SUBTABLE(id) \
108static const ILcidPosixElement locmap_ ## id [] =
109
110
111/**
112 * Create the map for the posixID. This macro supposes that the language string
113 * name is the same as the global variable name, and that the first element
114 * in the ILcidPosixElement is just the language.
115 * @param _posixID the full POSIX ID for this entry.
116 */
117#define ILCID_POSIX_MAP(_posixID) \
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700118 {UPRV_LENGTHOF(locmap_ ## _posixID), locmap_ ## _posixID}
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000119
120/*
121////////////////////////////////////////////
122//
123// Create the table of LCID to POSIX Mapping
124// None of it should be dynamically created.
125//
126// Keep static locale variables inside the function so that
127// it can be created properly during static init.
128//
Jungshik Shin87232d82017-05-13 21:10:13 -0700129// Note: This table should be updated periodically. Check the [MS-LCID] Windows Language Code Identifier
130// (LCID) Reference defined at https://msdn.microsoft.com/en-us/library/cc233965.aspx
131//
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000132// Microsoft is moving away from LCID in favor of locale name as of Vista. This table needs to be
133// maintained for support of older Windows version.
134// Update: Windows 7 (091130)
135//
136// Note: Microsoft assign a different LCID if a locale has a sorting variant. POSIX IDs below may contain
137// @collation=XXX, but no other keywords are allowed (at least for now). When uprv_convertToLCID() is
138// called from uloc_getLCID(), keywords other than collation are already removed. If we really need
139// to support other keywords in this mapping data, we must update the implementation.
140////////////////////////////////////////////
141*/
142
Jungshik Shin87232d82017-05-13 21:10:13 -0700143// TODO: For Windows ideally this table would be a list of exceptions rather than a complete list as
144// LocaleNameToLCID and LCIDToLocaleName provide 90% of these.
145
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000146ILCID_POSIX_ELEMENT_ARRAY(0x0436, af, af_ZA)
147
148ILCID_POSIX_SUBTABLE(ar) {
149 {0x01, "ar"},
150 {0x3801, "ar_AE"},
151 {0x3c01, "ar_BH"},
152 {0x1401, "ar_DZ"},
153 {0x0c01, "ar_EG"},
154 {0x0801, "ar_IQ"},
155 {0x2c01, "ar_JO"},
156 {0x3401, "ar_KW"},
157 {0x3001, "ar_LB"},
158 {0x1001, "ar_LY"},
159 {0x1801, "ar_MA"},
160 {0x1801, "ar_MO"},
161 {0x2001, "ar_OM"},
162 {0x4001, "ar_QA"},
163 {0x0401, "ar_SA"},
164 {0x2801, "ar_SY"},
165 {0x1c01, "ar_TN"},
166 {0x2401, "ar_YE"}
167};
168
169ILCID_POSIX_ELEMENT_ARRAY(0x044d, as, as_IN)
170ILCID_POSIX_ELEMENT_ARRAY(0x045e, am, am_ET)
171ILCID_POSIX_ELEMENT_ARRAY(0x047a, arn,arn_CL)
172
173ILCID_POSIX_SUBTABLE(az) {
174 {0x2c, "az"},
175 {0x082c, "az_Cyrl_AZ"}, /* Cyrillic based */
176 {0x742c, "az_Cyrl"}, /* Cyrillic based */
177 {0x042c, "az_Latn_AZ"}, /* Latin based */
178 {0x782c, "az_Latn"}, /* Latin based */
179 {0x042c, "az_AZ"} /* Latin based */
180};
181
182ILCID_POSIX_ELEMENT_ARRAY(0x046d, ba, ba_RU)
183ILCID_POSIX_ELEMENT_ARRAY(0x0423, be, be_BY)
184
185/*ILCID_POSIX_SUBTABLE(ber) {
186 {0x5f, "ber"},
187 {0x045f, "ber_Arab_DZ"},
188 {0x045f, "ber_Arab"},
189 {0x085f, "ber_Latn_DZ"},
190 {0x085f, "ber_Latn"}
191};*/
192
193ILCID_POSIX_ELEMENT_ARRAY(0x0402, bg, bg_BG)
194
195ILCID_POSIX_ELEMENT_ARRAY(0x0466, bin, bin_NG)
196
197ILCID_POSIX_SUBTABLE(bn) {
198 {0x45, "bn"},
199 {0x0845, "bn_BD"},
200 {0x0445, "bn_IN"}
201};
202
203ILCID_POSIX_SUBTABLE(bo) {
204 {0x51, "bo"},
205 {0x0851, "bo_BT"},
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700206 {0x0451, "bo_CN"},
207 {0x0c51, "dz_BT"}
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000208};
209
210ILCID_POSIX_ELEMENT_ARRAY(0x047e, br, br_FR)
211
212ILCID_POSIX_SUBTABLE(ca) {
213 {0x03, "ca"},
214 {0x0403, "ca_ES"},
215 {0x0803, "ca_ES_VALENCIA"}
216};
217
218ILCID_POSIX_ELEMENT_ARRAY(0x0483, co, co_FR)
219ILCID_POSIX_ELEMENT_ARRAY(0x045c, chr,chr_US)
220
Jungshik Shin87232d82017-05-13 21:10:13 -0700221// ICU has chosen different names for these.
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000222ILCID_POSIX_SUBTABLE(ckb) {
223 {0x92, "ckb"},
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000224 {0x7c92, "ckb_Arab"},
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700225 {0x0492, "ckb_Arab_IQ"}
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000226};
227
228/* Declared as cs_CZ to get around compiler errors on z/OS, which defines cs as a function */
229ILCID_POSIX_ELEMENT_ARRAY(0x0405, cs, cs_CZ)
230
231ILCID_POSIX_ELEMENT_ARRAY(0x0452, cy, cy_GB)
232ILCID_POSIX_ELEMENT_ARRAY(0x0406, da, da_DK)
233
Jungshik Shin87232d82017-05-13 21:10:13 -0700234// Windows doesn't know POSIX or BCP47 Unicode phonebook sort names
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000235ILCID_POSIX_SUBTABLE(de) {
236 {0x07, "de"},
237 {0x0c07, "de_AT"},
238 {0x0807, "de_CH"},
239 {0x0407, "de_DE"},
240 {0x1407, "de_LI"},
241 {0x1007, "de_LU"},
242 {0x10407,"de_DE@collation=phonebook"}, /*This is really de_DE_PHONEBOOK on Windows*/
243 {0x10407,"de@collation=phonebook"} /*This is really de_DE_PHONEBOOK on Windows*/
244};
245
246ILCID_POSIX_ELEMENT_ARRAY(0x0465, dv, dv_MV)
247ILCID_POSIX_ELEMENT_ARRAY(0x0408, el, el_GR)
248
Jungshik Shin87232d82017-05-13 21:10:13 -0700249// Windows uses an empty string for 'invariant'
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000250ILCID_POSIX_SUBTABLE(en) {
251 {0x09, "en"},
252 {0x0c09, "en_AU"},
253 {0x2809, "en_BZ"},
254 {0x1009, "en_CA"},
255 {0x0809, "en_GB"},
256 {0x3c09, "en_HK"},
257 {0x3809, "en_ID"},
258 {0x1809, "en_IE"},
259 {0x4009, "en_IN"},
260 {0x2009, "en_JM"},
261 {0x4409, "en_MY"},
262 {0x1409, "en_NZ"},
263 {0x3409, "en_PH"},
264 {0x4809, "en_SG"},
265 {0x2C09, "en_TT"},
266 {0x0409, "en_US"},
Jungshik Shin87232d82017-05-13 21:10:13 -0700267 {0x007f, "en_US_POSIX"}, /* duplicate for round-tripping */
268 {0x2409, "en_VI"}, /* Virgin Islands AKA Caribbean Islands (en_CB). On Windows8+ This is 0x1000 or dynamically assigned */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000269 {0x1c09, "en_ZA"},
270 {0x3009, "en_ZW"},
271 {0x2409, "en_029"},
Jungshik Shin87232d82017-05-13 21:10:13 -0700272 {0x0409, "en_AS"}, /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */
273 {0x0409, "en_GU"}, /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */
274 {0x0409, "en_MH"}, /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */
275 {0x0409, "en_MP"}, /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */
276 {0x0409, "en_UM"} /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000277};
278
279ILCID_POSIX_SUBTABLE(en_US_POSIX) {
280 {0x007f, "en_US_POSIX"} /* duplicate for roundtripping */
281};
282
Jungshik Shin87232d82017-05-13 21:10:13 -0700283// Windows doesn't know POSIX or BCP47 Unicode traditional sort names
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000284ILCID_POSIX_SUBTABLE(es) {
285 {0x0a, "es"},
286 {0x2c0a, "es_AR"},
287 {0x400a, "es_BO"},
288 {0x340a, "es_CL"},
289 {0x240a, "es_CO"},
290 {0x140a, "es_CR"},
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700291 {0x5c0a, "es_CU"},
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000292 {0x1c0a, "es_DO"},
293 {0x300a, "es_EC"},
294 {0x0c0a, "es_ES"}, /*Modern sort.*/
295 {0x100a, "es_GT"},
296 {0x480a, "es_HN"},
297 {0x080a, "es_MX"},
298 {0x4c0a, "es_NI"},
299 {0x180a, "es_PA"},
300 {0x280a, "es_PE"},
301 {0x500a, "es_PR"},
302 {0x3c0a, "es_PY"},
303 {0x440a, "es_SV"},
304 {0x540a, "es_US"},
305 {0x380a, "es_UY"},
306 {0x200a, "es_VE"},
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800307 {0x580a, "es_419"},
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000308 {0x040a, "es_ES@collation=traditional"},
Jungshik Shin87232d82017-05-13 21:10:13 -0700309 {0x040a, "es@collation=traditional"} // Windows will treat this as es-ES@collation=traditional
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000310};
311
312ILCID_POSIX_ELEMENT_ARRAY(0x0425, et, et_EE)
313ILCID_POSIX_ELEMENT_ARRAY(0x042d, eu, eu_ES)
314
315/* ISO-639 doesn't distinguish between Persian and Dari.*/
316ILCID_POSIX_SUBTABLE(fa) {
317 {0x29, "fa"},
318 {0x0429, "fa_IR"}, /* Persian/Farsi (Iran) */
319 {0x048c, "fa_AF"} /* Persian/Dari (Afghanistan) */
320};
321
Jungshik Shin87232d82017-05-13 21:10:13 -0700322
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000323/* duplicate for roundtripping */
324ILCID_POSIX_SUBTABLE(fa_AF) {
325 {0x8c, "fa_AF"}, /* Persian/Dari (Afghanistan) */
326 {0x048c, "fa_AF"} /* Persian/Dari (Afghanistan) */
327};
328
329ILCID_POSIX_SUBTABLE(ff) {
330 {0x67, "ff"},
331 {0x7c67, "ff_Latn"},
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700332 {0x0867, "ff_Latn_SN"},
333 {0x0467, "ff_NG"}
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000334};
335
336ILCID_POSIX_ELEMENT_ARRAY(0x040b, fi, fi_FI)
337ILCID_POSIX_ELEMENT_ARRAY(0x0464, fil,fil_PH)
338ILCID_POSIX_ELEMENT_ARRAY(0x0438, fo, fo_FO)
339
340ILCID_POSIX_SUBTABLE(fr) {
341 {0x0c, "fr"},
342 {0x080c, "fr_BE"},
343 {0x0c0c, "fr_CA"},
344 {0x240c, "fr_CD"},
345 {0x240c, "fr_CG"},
346 {0x100c, "fr_CH"},
347 {0x300c, "fr_CI"},
348 {0x2c0c, "fr_CM"},
349 {0x040c, "fr_FR"},
350 {0x3c0c, "fr_HT"},
351 {0x140c, "fr_LU"},
352 {0x380c, "fr_MA"},
353 {0x180c, "fr_MC"},
354 {0x340c, "fr_ML"},
355 {0x200c, "fr_RE"},
356 {0x280c, "fr_SN"},
357 {0xe40c, "fr_015"},
358 {0x1c0c, "fr_029"}
359};
360
361ILCID_POSIX_ELEMENT_ARRAY(0x0467, fuv, fuv_NG)
362
363ILCID_POSIX_ELEMENT_ARRAY(0x0462, fy, fy_NL)
364
365ILCID_POSIX_SUBTABLE(ga) { /* Gaelic (Ireland) */
366 {0x3c, "ga"},
367 {0x083c, "ga_IE"},
368 {0x043c, "gd_GB"}
369};
370
371ILCID_POSIX_SUBTABLE(gd) { /* Gaelic (Scotland) */
372 {0x91, "gd"},
373 {0x0491, "gd_GB"}
374};
375
376ILCID_POSIX_ELEMENT_ARRAY(0x0456, gl, gl_ES)
377ILCID_POSIX_ELEMENT_ARRAY(0x0447, gu, gu_IN)
378ILCID_POSIX_ELEMENT_ARRAY(0x0474, gn, gn_PY)
379ILCID_POSIX_ELEMENT_ARRAY(0x0484, gsw,gsw_FR)
380
381ILCID_POSIX_SUBTABLE(ha) {
382 {0x68, "ha"},
383 {0x7c68, "ha_Latn"},
384 {0x0468, "ha_Latn_NG"},
385};
386
387ILCID_POSIX_ELEMENT_ARRAY(0x0475, haw,haw_US)
388ILCID_POSIX_ELEMENT_ARRAY(0x040d, he, he_IL)
389ILCID_POSIX_ELEMENT_ARRAY(0x0439, hi, hi_IN)
390
391/* This LCID is really four different locales.*/
392ILCID_POSIX_SUBTABLE(hr) {
393 {0x1a, "hr"},
394 {0x141a, "bs_Latn_BA"}, /* Bosnian, Bosnia and Herzegovina */
395 {0x681a, "bs_Latn"}, /* Bosnian, Bosnia and Herzegovina */
396 {0x141a, "bs_BA"}, /* Bosnian, Bosnia and Herzegovina */
397 {0x781a, "bs"}, /* Bosnian */
398 {0x201a, "bs_Cyrl_BA"}, /* Bosnian, Bosnia and Herzegovina */
399 {0x641a, "bs_Cyrl"}, /* Bosnian, Bosnia and Herzegovina */
400 {0x101a, "hr_BA"}, /* Croatian in Bosnia */
401 {0x041a, "hr_HR"}, /* Croatian*/
402 {0x2c1a, "sr_Latn_ME"},
403 {0x241a, "sr_Latn_RS"},
404 {0x181a, "sr_Latn_BA"}, /* Serbo-Croatian in Bosnia */
405 {0x081a, "sr_Latn_CS"}, /* Serbo-Croatian*/
406 {0x701a, "sr_Latn"}, /* It's 0x1a or 0x081a, pick one to make the test program happy. */
407 {0x1c1a, "sr_Cyrl_BA"}, /* Serbo-Croatian in Bosnia */
408 {0x0c1a, "sr_Cyrl_CS"}, /* Serbian*/
409 {0x301a, "sr_Cyrl_ME"},
410 {0x281a, "sr_Cyrl_RS"},
411 {0x6c1a, "sr_Cyrl"}, /* It's 0x1a or 0x0c1a, pick one to make the test program happy. */
412 {0x7c1a, "sr"} /* In CLDR sr is sr_Cyrl. */
413};
414
Jungshik Shin70f82502016-01-29 00:32:36 -0800415ILCID_POSIX_SUBTABLE(hsb) {
416 {0x2E, "hsb"},
417 {0x042E, "hsb_DE"},
418 {0x082E, "dsb_DE"},
419 {0x7C2E, "dsb"},
420};
421
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000422ILCID_POSIX_ELEMENT_ARRAY(0x040e, hu, hu_HU)
423ILCID_POSIX_ELEMENT_ARRAY(0x042b, hy, hy_AM)
424ILCID_POSIX_ELEMENT_ARRAY(0x0469, ibb, ibb_NG)
425ILCID_POSIX_ELEMENT_ARRAY(0x0421, id, id_ID)
426ILCID_POSIX_ELEMENT_ARRAY(0x0470, ig, ig_NG)
427ILCID_POSIX_ELEMENT_ARRAY(0x0478, ii, ii_CN)
428ILCID_POSIX_ELEMENT_ARRAY(0x040f, is, is_IS)
429
430ILCID_POSIX_SUBTABLE(it) {
431 {0x10, "it"},
432 {0x0810, "it_CH"},
433 {0x0410, "it_IT"}
434};
435
436ILCID_POSIX_SUBTABLE(iu) {
437 {0x5d, "iu"},
438 {0x045d, "iu_Cans_CA"},
439 {0x785d, "iu_Cans"},
440 {0x085d, "iu_Latn_CA"},
441 {0x7c5d, "iu_Latn"}
442};
443
444ILCID_POSIX_ELEMENT_ARRAY(0x040d, iw, iw_IL) /*Left in for compatibility*/
445ILCID_POSIX_ELEMENT_ARRAY(0x0411, ja, ja_JP)
446ILCID_POSIX_ELEMENT_ARRAY(0x0437, ka, ka_GE)
447ILCID_POSIX_ELEMENT_ARRAY(0x043f, kk, kk_KZ)
448ILCID_POSIX_ELEMENT_ARRAY(0x046f, kl, kl_GL)
449ILCID_POSIX_ELEMENT_ARRAY(0x0453, km, km_KH)
450ILCID_POSIX_ELEMENT_ARRAY(0x044b, kn, kn_IN)
451
452ILCID_POSIX_SUBTABLE(ko) {
453 {0x12, "ko"},
454 {0x0812, "ko_KP"},
455 {0x0412, "ko_KR"}
456};
457
458ILCID_POSIX_ELEMENT_ARRAY(0x0457, kok, kok_IN)
459ILCID_POSIX_ELEMENT_ARRAY(0x0471, kr, kr_NG)
460
461ILCID_POSIX_SUBTABLE(ks) { /* We could add PK and CN too */
462 {0x60, "ks"},
463 {0x0860, "ks_IN"}, /* Documentation doesn't mention script */
464 {0x0460, "ks_Arab_IN"},
465 {0x0860, "ks_Deva_IN"}
466};
467
468ILCID_POSIX_ELEMENT_ARRAY(0x0440, ky, ky_KG) /* Kyrgyz is spoken in Kyrgyzstan */
469ILCID_POSIX_ELEMENT_ARRAY(0x0476, la, la_IT) /* TODO: Verify the country */
470ILCID_POSIX_ELEMENT_ARRAY(0x046e, lb, lb_LU)
471ILCID_POSIX_ELEMENT_ARRAY(0x0454, lo, lo_LA)
472ILCID_POSIX_ELEMENT_ARRAY(0x0427, lt, lt_LT)
473ILCID_POSIX_ELEMENT_ARRAY(0x0426, lv, lv_LV)
474ILCID_POSIX_ELEMENT_ARRAY(0x0481, mi, mi_NZ)
475ILCID_POSIX_ELEMENT_ARRAY(0x042f, mk, mk_MK)
476ILCID_POSIX_ELEMENT_ARRAY(0x044c, ml, ml_IN)
477
478ILCID_POSIX_SUBTABLE(mn) {
479 {0x50, "mn"},
480 {0x0450, "mn_MN"},
481 {0x7c50, "mn_Mong"},
482 {0x0850, "mn_Mong_CN"},
483 {0x0850, "mn_CN"},
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800484 {0x7850, "mn_Cyrl"},
485 {0x0c50, "mn_Mong_MN"}
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000486};
487
488ILCID_POSIX_ELEMENT_ARRAY(0x0458, mni,mni_IN)
489ILCID_POSIX_ELEMENT_ARRAY(0x047c, moh,moh_CA)
490ILCID_POSIX_ELEMENT_ARRAY(0x044e, mr, mr_IN)
491
492ILCID_POSIX_SUBTABLE(ms) {
493 {0x3e, "ms"},
494 {0x083e, "ms_BN"}, /* Brunei Darussalam*/
495 {0x043e, "ms_MY"} /* Malaysia*/
496};
497
498ILCID_POSIX_ELEMENT_ARRAY(0x043a, mt, mt_MT)
499ILCID_POSIX_ELEMENT_ARRAY(0x0455, my, my_MM)
500
501ILCID_POSIX_SUBTABLE(ne) {
502 {0x61, "ne"},
503 {0x0861, "ne_IN"}, /* India*/
504 {0x0461, "ne_NP"} /* Nepal*/
505};
506
507ILCID_POSIX_SUBTABLE(nl) {
508 {0x13, "nl"},
509 {0x0813, "nl_BE"},
510 {0x0413, "nl_NL"}
511};
512
513/* The "no" locale split into nb and nn. By default in ICU, "no" is nb.*/
Jungshik Shin87232d82017-05-13 21:10:13 -0700514// TODO: Not all of these are needed on Windows, but I don't know how ICU treats preferred ones here.
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000515ILCID_POSIX_SUBTABLE(no) {
Jungshik Shin87232d82017-05-13 21:10:13 -0700516 {0x14, "no"}, /* really nb_NO - actually Windows differentiates between neutral (no region) and specific (with region) */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000517 {0x7c14, "nb"}, /* really nb */
518 {0x0414, "nb_NO"}, /* really nb_NO. Keep first in the 414 list. */
519 {0x0414, "no_NO"}, /* really nb_NO */
520 {0x0814, "nn_NO"}, /* really nn_NO. Keep first in the 814 list. */
521 {0x7814, "nn"}, /* It's 0x14 or 0x814, pick one to make the test program happy. */
522 {0x0814, "no_NO_NY"}/* really nn_NO */
523};
524
525ILCID_POSIX_ELEMENT_ARRAY(0x046c, nso,nso_ZA) /* TODO: Verify the ISO-639 code */
526ILCID_POSIX_ELEMENT_ARRAY(0x0482, oc, oc_FR)
527
528ILCID_POSIX_SUBTABLE(om) { /* TODO: Verify the country */
529 {0x72, "om"},
530 {0x0472, "om_ET"},
531 {0x0472, "gaz_ET"}
532};
533
534/* Declared as or_IN to get around compiler errors*/
535ILCID_POSIX_SUBTABLE(or_IN) {
536 {0x48, "or"},
537 {0x0448, "or_IN"},
538};
539
540
541ILCID_POSIX_SUBTABLE(pa) {
542 {0x46, "pa"},
543 {0x0446, "pa_IN"},
544 {0x0846, "pa_PK"},
545 {0x0846, "pa_Arab_PK"}
546};
547
548ILCID_POSIX_ELEMENT_ARRAY(0x0479, pap, pap_AN)
549ILCID_POSIX_ELEMENT_ARRAY(0x0415, pl, pl_PL)
550ILCID_POSIX_ELEMENT_ARRAY(0x0463, ps, ps_AF)
551
552ILCID_POSIX_SUBTABLE(pt) {
553 {0x16, "pt"},
554 {0x0416, "pt_BR"},
555 {0x0816, "pt_PT"}
556};
557
558ILCID_POSIX_SUBTABLE(qu) {
559 {0x6b, "qu"},
560 {0x046b, "qu_BO"},
561 {0x086b, "qu_EC"},
562 {0x0C6b, "qu_PE"},
563 {0x046b, "quz_BO"},
564 {0x086b, "quz_EC"},
565 {0x0C6b, "quz_PE"}
566};
567
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700568ILCID_POSIX_SUBTABLE(quc) {
569 {0x93, "quc"},
570 {0x0493, "quc_CO"},
571 /*
572 "quc_Latn_GT" is an exceptional case. Language ID of "quc"
573 is 0x93, but LCID of "quc_Latn_GT" is 0x486, which should be
574 under the group of "qut". "qut" is a retired ISO 639-3 language
575 code for West Central Quiche, and merged to "quc".
576 It looks Windows previously reserved "qut" for K'iche', but,
577 decided to use "quc" when adding a locale for K'iche' (Guatemala).
578
579 This data structure used here assumes language ID bits in
580 LCID is unique for alphabetic language code. But this is not true
581 for "quc_Latn_GT". If we don't have the data below, LCID look up
582 by alphabetic locale ID (POSIX) will fail. The same entry is found
583 under "qut" below, which is required for reverse look up.
584 */
585 {0x0486, "quc_Latn_GT"}
586};
587
588ILCID_POSIX_SUBTABLE(qut) {
589 {0x86, "qut"},
590 {0x0486, "qut_GT"},
591 /*
592 See the note in "quc" above.
593 */
594 {0x0486, "quc_Latn_GT"}
595};
596
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000597ILCID_POSIX_ELEMENT_ARRAY(0x0417, rm, rm_CH)
598
599ILCID_POSIX_SUBTABLE(ro) {
600 {0x18, "ro"},
601 {0x0418, "ro_RO"},
602 {0x0818, "ro_MD"}
603};
604
Jungshik Shin87232d82017-05-13 21:10:13 -0700605// TODO: This is almost certainly 'wrong'. 0 in Windows is a synonym for LOCALE_USER_DEFAULT.
606// More likely this is a similar concept to the Windows 0x7f Invariant locale ""
607// (Except that it's not invariant in ICU)
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000608ILCID_POSIX_SUBTABLE(root) {
609 {0x00, "root"}
610};
611
612ILCID_POSIX_SUBTABLE(ru) {
613 {0x19, "ru"},
614 {0x0419, "ru_RU"},
615 {0x0819, "ru_MD"}
616};
617
618ILCID_POSIX_ELEMENT_ARRAY(0x0487, rw, rw_RW)
619ILCID_POSIX_ELEMENT_ARRAY(0x044f, sa, sa_IN)
620ILCID_POSIX_ELEMENT_ARRAY(0x0485, sah,sah_RU)
621
622ILCID_POSIX_SUBTABLE(sd) {
623 {0x59, "sd"},
624 {0x0459, "sd_IN"},
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700625 {0x0459, "sd_Deva_IN"},
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000626 {0x0859, "sd_PK"}
627};
628
629ILCID_POSIX_SUBTABLE(se) {
630 {0x3b, "se"},
631 {0x0c3b, "se_FI"},
632 {0x043b, "se_NO"},
633 {0x083b, "se_SE"},
634 {0x783b, "sma"},
635 {0x183b, "sma_NO"},
636 {0x1c3b, "sma_SE"},
637 {0x7c3b, "smj"},
638 {0x703b, "smn"},
639 {0x743b, "sms"},
640 {0x103b, "smj_NO"},
641 {0x143b, "smj_SE"},
642 {0x243b, "smn_FI"},
643 {0x203b, "sms_FI"},
644};
645
646ILCID_POSIX_ELEMENT_ARRAY(0x045b, si, si_LK)
647ILCID_POSIX_ELEMENT_ARRAY(0x041b, sk, sk_SK)
648ILCID_POSIX_ELEMENT_ARRAY(0x0424, sl, sl_SI)
649
650ILCID_POSIX_SUBTABLE(so) { /* TODO: Verify the country */
651 {0x77, "so"},
652 {0x0477, "so_ET"},
653 {0x0477, "so_SO"}
654};
655
656ILCID_POSIX_ELEMENT_ARRAY(0x041c, sq, sq_AL)
657ILCID_POSIX_ELEMENT_ARRAY(0x0430, st, st_ZA)
658
659ILCID_POSIX_SUBTABLE(sv) {
660 {0x1d, "sv"},
661 {0x081d, "sv_FI"},
662 {0x041d, "sv_SE"}
663};
664
665ILCID_POSIX_ELEMENT_ARRAY(0x0441, sw, sw_KE)
666ILCID_POSIX_ELEMENT_ARRAY(0x045A, syr, syr_SY)
667
668ILCID_POSIX_SUBTABLE(ta) {
669 {0x49, "ta"},
670 {0x0449, "ta_IN"},
671 {0x0849, "ta_LK"}
672};
673
674ILCID_POSIX_ELEMENT_ARRAY(0x044a, te, te_IN)
675
676/* Cyrillic based by default */
677ILCID_POSIX_SUBTABLE(tg) {
678 {0x28, "tg"},
679 {0x7c28, "tg_Cyrl"},
680 {0x0428, "tg_Cyrl_TJ"}
681};
682
683ILCID_POSIX_ELEMENT_ARRAY(0x041e, th, th_TH)
684
685ILCID_POSIX_SUBTABLE(ti) {
686 {0x73, "ti"},
687 {0x0873, "ti_ER"},
688 {0x0473, "ti_ET"}
689};
690
691ILCID_POSIX_ELEMENT_ARRAY(0x0442, tk, tk_TM)
692
693ILCID_POSIX_SUBTABLE(tn) {
694 {0x32, "tn"},
695 {0x0832, "tn_BW"},
696 {0x0432, "tn_ZA"}
697};
698
699ILCID_POSIX_ELEMENT_ARRAY(0x041f, tr, tr_TR)
700ILCID_POSIX_ELEMENT_ARRAY(0x0431, ts, ts_ZA)
701ILCID_POSIX_ELEMENT_ARRAY(0x0444, tt, tt_RU)
702
703ILCID_POSIX_SUBTABLE(tzm) {
704 {0x5f, "tzm"},
705 {0x7c5f, "tzm_Latn"},
706 {0x085f, "tzm_Latn_DZ"},
707 {0x105f, "tzm_Tfng_MA"},
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700708 {0x045f, "tzm_Arab_MA"},
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000709 {0x045f, "tmz"}
710};
711
712ILCID_POSIX_SUBTABLE(ug) {
713 {0x80, "ug"},
714 {0x0480, "ug_CN"},
715 {0x0480, "ug_Arab_CN"}
716};
717
718ILCID_POSIX_ELEMENT_ARRAY(0x0422, uk, uk_UA)
719
720ILCID_POSIX_SUBTABLE(ur) {
721 {0x20, "ur"},
722 {0x0820, "ur_IN"},
723 {0x0420, "ur_PK"}
724};
725
726ILCID_POSIX_SUBTABLE(uz) {
727 {0x43, "uz"},
728 {0x0843, "uz_Cyrl_UZ"}, /* Cyrillic based */
729 {0x7843, "uz_Cyrl"}, /* Cyrillic based */
730 {0x0843, "uz_UZ"}, /* Cyrillic based */
731 {0x0443, "uz_Latn_UZ"}, /* Latin based */
732 {0x7c43, "uz_Latn"} /* Latin based */
733};
734
735ILCID_POSIX_SUBTABLE(ve) { /* TODO: Verify the country */
736 {0x33, "ve"},
737 {0x0433, "ve_ZA"},
738 {0x0433, "ven_ZA"}
739};
740
741ILCID_POSIX_ELEMENT_ARRAY(0x042a, vi, vi_VN)
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000742ILCID_POSIX_ELEMENT_ARRAY(0x0488, wo, wo_SN)
743ILCID_POSIX_ELEMENT_ARRAY(0x0434, xh, xh_ZA)
744ILCID_POSIX_ELEMENT_ARRAY(0x043d, yi, yi)
745ILCID_POSIX_ELEMENT_ARRAY(0x046a, yo, yo_NG)
746
Jungshik Shin87232d82017-05-13 21:10:13 -0700747// Windows & ICU tend to different names for some of these
748// TODO: Windows probably does not need all of these entries, but I don't know how the precedence works.
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000749ILCID_POSIX_SUBTABLE(zh) {
750 {0x0004, "zh_Hans"},
751 {0x7804, "zh"},
752 {0x0804, "zh_CN"},
753 {0x0804, "zh_Hans_CN"},
754 {0x0c04, "zh_Hant_HK"},
755 {0x0c04, "zh_HK"},
756 {0x1404, "zh_Hant_MO"},
757 {0x1404, "zh_MO"},
758 {0x1004, "zh_Hans_SG"},
759 {0x1004, "zh_SG"},
760 {0x0404, "zh_Hant_TW"},
761 {0x7c04, "zh_Hant"},
762 {0x0404, "zh_TW"},
763 {0x30404,"zh_Hant_TW"}, /* Bopomofo order */
764 {0x30404,"zh_TW"}, /* Bopomofo order */
765 {0x20004,"zh@collation=stroke"},
766 {0x20404,"zh_Hant@collation=stroke"},
767 {0x20404,"zh_Hant_TW@collation=stroke"},
768 {0x20404,"zh_TW@collation=stroke"},
769 {0x20804,"zh_Hans@collation=stroke"},
770 {0x20804,"zh_Hans_CN@collation=stroke"},
771 {0x20804,"zh_CN@collation=stroke"}
Jungshik Shin87232d82017-05-13 21:10:13 -0700772 // TODO: Alternate collations for other LCIDs are missing, eg: 0x50804
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000773};
774
775ILCID_POSIX_ELEMENT_ARRAY(0x0435, zu, zu_ZA)
776
777/* This must be static and grouped by LCID. */
778static const ILcidPosixMap gPosixIDmap[] = {
779 ILCID_POSIX_MAP(af), /* af Afrikaans 0x36 */
780 ILCID_POSIX_MAP(am), /* am Amharic 0x5e */
781 ILCID_POSIX_MAP(ar), /* ar Arabic 0x01 */
782 ILCID_POSIX_MAP(arn), /* arn Araucanian/Mapudungun 0x7a */
783 ILCID_POSIX_MAP(as), /* as Assamese 0x4d */
784 ILCID_POSIX_MAP(az), /* az Azerbaijani 0x2c */
785 ILCID_POSIX_MAP(ba), /* ba Bashkir 0x6d */
786 ILCID_POSIX_MAP(be), /* be Belarusian 0x23 */
787/* ILCID_POSIX_MAP(ber), ber Berber/Tamazight 0x5f */
788 ILCID_POSIX_MAP(bg), /* bg Bulgarian 0x02 */
789 ILCID_POSIX_MAP(bin), /* bin Edo 0x66 */
790 ILCID_POSIX_MAP(bn), /* bn Bengali; Bangla 0x45 */
791 ILCID_POSIX_MAP(bo), /* bo Tibetan 0x51 */
792 ILCID_POSIX_MAP(br), /* br Breton 0x7e */
793 ILCID_POSIX_MAP(ca), /* ca Catalan 0x03 */
794 ILCID_POSIX_MAP(chr), /* chr Cherokee 0x5c */
795 ILCID_POSIX_MAP(ckb), /* ckb Sorani (Central Kurdish) 0x92 */
796 ILCID_POSIX_MAP(co), /* co Corsican 0x83 */
797 ILCID_POSIX_MAP(cs), /* cs Czech 0x05 */
798 ILCID_POSIX_MAP(cy), /* cy Welsh 0x52 */
799 ILCID_POSIX_MAP(da), /* da Danish 0x06 */
800 ILCID_POSIX_MAP(de), /* de German 0x07 */
801 ILCID_POSIX_MAP(dv), /* dv Divehi 0x65 */
802 ILCID_POSIX_MAP(el), /* el Greek 0x08 */
803 ILCID_POSIX_MAP(en), /* en English 0x09 */
804 ILCID_POSIX_MAP(en_US_POSIX), /* invariant 0x7f */
805 ILCID_POSIX_MAP(es), /* es Spanish 0x0a */
806 ILCID_POSIX_MAP(et), /* et Estonian 0x25 */
807 ILCID_POSIX_MAP(eu), /* eu Basque 0x2d */
808 ILCID_POSIX_MAP(fa), /* fa Persian/Farsi 0x29 */
809 ILCID_POSIX_MAP(fa_AF), /* fa Persian/Dari 0x8c */
810 ILCID_POSIX_MAP(ff), /* ff Fula 0x67 */
811 ILCID_POSIX_MAP(fi), /* fi Finnish 0x0b */
812 ILCID_POSIX_MAP(fil), /* fil Filipino 0x64 */
813 ILCID_POSIX_MAP(fo), /* fo Faroese 0x38 */
814 ILCID_POSIX_MAP(fr), /* fr French 0x0c */
815 ILCID_POSIX_MAP(fuv), /* fuv Fulfulde - Nigeria 0x67 */
816 ILCID_POSIX_MAP(fy), /* fy Frisian 0x62 */
817 ILCID_POSIX_MAP(ga), /* * Gaelic (Ireland,Scotland) 0x3c */
818 ILCID_POSIX_MAP(gd), /* gd Gaelic (United Kingdom) 0x91 */
819 ILCID_POSIX_MAP(gl), /* gl Galician 0x56 */
820 ILCID_POSIX_MAP(gn), /* gn Guarani 0x74 */
821 ILCID_POSIX_MAP(gsw), /* gsw Alemanic/Alsatian/Swiss German 0x84 */
822 ILCID_POSIX_MAP(gu), /* gu Gujarati 0x47 */
823 ILCID_POSIX_MAP(ha), /* ha Hausa 0x68 */
824 ILCID_POSIX_MAP(haw), /* haw Hawaiian 0x75 */
825 ILCID_POSIX_MAP(he), /* he Hebrew (formerly iw) 0x0d */
826 ILCID_POSIX_MAP(hi), /* hi Hindi 0x39 */
827 ILCID_POSIX_MAP(hr), /* * Croatian and others 0x1a */
Jungshik Shin70f82502016-01-29 00:32:36 -0800828 ILCID_POSIX_MAP(hsb), /* hsb Upper Sorbian 0x2e */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000829 ILCID_POSIX_MAP(hu), /* hu Hungarian 0x0e */
830 ILCID_POSIX_MAP(hy), /* hy Armenian 0x2b */
831 ILCID_POSIX_MAP(ibb), /* ibb Ibibio - Nigeria 0x69 */
832 ILCID_POSIX_MAP(id), /* id Indonesian (formerly in) 0x21 */
833 ILCID_POSIX_MAP(ig), /* ig Igbo 0x70 */
834 ILCID_POSIX_MAP(ii), /* ii Sichuan Yi 0x78 */
835 ILCID_POSIX_MAP(is), /* is Icelandic 0x0f */
836 ILCID_POSIX_MAP(it), /* it Italian 0x10 */
837 ILCID_POSIX_MAP(iu), /* iu Inuktitut 0x5d */
838 ILCID_POSIX_MAP(iw), /* iw Hebrew 0x0d */
839 ILCID_POSIX_MAP(ja), /* ja Japanese 0x11 */
840 ILCID_POSIX_MAP(ka), /* ka Georgian 0x37 */
841 ILCID_POSIX_MAP(kk), /* kk Kazakh 0x3f */
842 ILCID_POSIX_MAP(kl), /* kl Kalaallisut 0x6f */
843 ILCID_POSIX_MAP(km), /* km Khmer 0x53 */
844 ILCID_POSIX_MAP(kn), /* kn Kannada 0x4b */
845 ILCID_POSIX_MAP(ko), /* ko Korean 0x12 */
846 ILCID_POSIX_MAP(kok), /* kok Konkani 0x57 */
847 ILCID_POSIX_MAP(kr), /* kr Kanuri 0x71 */
848 ILCID_POSIX_MAP(ks), /* ks Kashmiri 0x60 */
849 ILCID_POSIX_MAP(ky), /* ky Kyrgyz 0x40 */
850 ILCID_POSIX_MAP(lb), /* lb Luxembourgish 0x6e */
851 ILCID_POSIX_MAP(la), /* la Latin 0x76 */
852 ILCID_POSIX_MAP(lo), /* lo Lao 0x54 */
853 ILCID_POSIX_MAP(lt), /* lt Lithuanian 0x27 */
854 ILCID_POSIX_MAP(lv), /* lv Latvian, Lettish 0x26 */
855 ILCID_POSIX_MAP(mi), /* mi Maori 0x81 */
856 ILCID_POSIX_MAP(mk), /* mk Macedonian 0x2f */
857 ILCID_POSIX_MAP(ml), /* ml Malayalam 0x4c */
858 ILCID_POSIX_MAP(mn), /* mn Mongolian 0x50 */
859 ILCID_POSIX_MAP(mni), /* mni Manipuri 0x58 */
860 ILCID_POSIX_MAP(moh), /* moh Mohawk 0x7c */
861 ILCID_POSIX_MAP(mr), /* mr Marathi 0x4e */
862 ILCID_POSIX_MAP(ms), /* ms Malay 0x3e */
863 ILCID_POSIX_MAP(mt), /* mt Maltese 0x3a */
864 ILCID_POSIX_MAP(my), /* my Burmese 0x55 */
865/* ILCID_POSIX_MAP(nb), // no Norwegian 0x14 */
866 ILCID_POSIX_MAP(ne), /* ne Nepali 0x61 */
867 ILCID_POSIX_MAP(nl), /* nl Dutch 0x13 */
868/* ILCID_POSIX_MAP(nn), // no Norwegian 0x14 */
869 ILCID_POSIX_MAP(no), /* * Norwegian 0x14 */
870 ILCID_POSIX_MAP(nso), /* nso Sotho, Northern (Sepedi dialect) 0x6c */
871 ILCID_POSIX_MAP(oc), /* oc Occitan 0x82 */
872 ILCID_POSIX_MAP(om), /* om Oromo 0x72 */
873 ILCID_POSIX_MAP(or_IN), /* or Oriya 0x48 */
874 ILCID_POSIX_MAP(pa), /* pa Punjabi 0x46 */
875 ILCID_POSIX_MAP(pap), /* pap Papiamentu 0x79 */
876 ILCID_POSIX_MAP(pl), /* pl Polish 0x15 */
877 ILCID_POSIX_MAP(ps), /* ps Pashto 0x63 */
878 ILCID_POSIX_MAP(pt), /* pt Portuguese 0x16 */
879 ILCID_POSIX_MAP(qu), /* qu Quechua 0x6B */
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700880 ILCID_POSIX_MAP(quc), /* quc K'iche 0x93 */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000881 ILCID_POSIX_MAP(qut), /* qut K'iche 0x86 */
882 ILCID_POSIX_MAP(rm), /* rm Raeto-Romance/Romansh 0x17 */
883 ILCID_POSIX_MAP(ro), /* ro Romanian 0x18 */
884 ILCID_POSIX_MAP(root), /* root 0x00 */
885 ILCID_POSIX_MAP(ru), /* ru Russian 0x19 */
886 ILCID_POSIX_MAP(rw), /* rw Kinyarwanda 0x87 */
887 ILCID_POSIX_MAP(sa), /* sa Sanskrit 0x4f */
888 ILCID_POSIX_MAP(sah), /* sah Yakut 0x85 */
889 ILCID_POSIX_MAP(sd), /* sd Sindhi 0x59 */
890 ILCID_POSIX_MAP(se), /* se Sami 0x3b */
891/* ILCID_POSIX_MAP(sh), // sh Serbo-Croatian 0x1a */
892 ILCID_POSIX_MAP(si), /* si Sinhalese 0x5b */
893 ILCID_POSIX_MAP(sk), /* sk Slovak 0x1b */
894 ILCID_POSIX_MAP(sl), /* sl Slovenian 0x24 */
895 ILCID_POSIX_MAP(so), /* so Somali 0x77 */
896 ILCID_POSIX_MAP(sq), /* sq Albanian 0x1c */
897/* ILCID_POSIX_MAP(sr), // sr Serbian 0x1a */
898 ILCID_POSIX_MAP(st), /* st Sutu 0x30 */
899 ILCID_POSIX_MAP(sv), /* sv Swedish 0x1d */
900 ILCID_POSIX_MAP(sw), /* sw Swahili 0x41 */
901 ILCID_POSIX_MAP(syr), /* syr Syriac 0x5A */
902 ILCID_POSIX_MAP(ta), /* ta Tamil 0x49 */
903 ILCID_POSIX_MAP(te), /* te Telugu 0x4a */
904 ILCID_POSIX_MAP(tg), /* tg Tajik 0x28 */
905 ILCID_POSIX_MAP(th), /* th Thai 0x1e */
906 ILCID_POSIX_MAP(ti), /* ti Tigrigna 0x73 */
907 ILCID_POSIX_MAP(tk), /* tk Turkmen 0x42 */
908 ILCID_POSIX_MAP(tn), /* tn Tswana 0x32 */
909 ILCID_POSIX_MAP(tr), /* tr Turkish 0x1f */
910 ILCID_POSIX_MAP(ts), /* ts Tsonga 0x31 */
911 ILCID_POSIX_MAP(tt), /* tt Tatar 0x44 */
912 ILCID_POSIX_MAP(tzm), /* tzm Tamazight 0x5f */
913 ILCID_POSIX_MAP(ug), /* ug Uighur 0x80 */
914 ILCID_POSIX_MAP(uk), /* uk Ukrainian 0x22 */
915 ILCID_POSIX_MAP(ur), /* ur Urdu 0x20 */
916 ILCID_POSIX_MAP(uz), /* uz Uzbek 0x43 */
917 ILCID_POSIX_MAP(ve), /* ve Venda 0x33 */
918 ILCID_POSIX_MAP(vi), /* vi Vietnamese 0x2a */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000919 ILCID_POSIX_MAP(wo), /* wo Wolof 0x88 */
920 ILCID_POSIX_MAP(xh), /* xh Xhosa 0x34 */
921 ILCID_POSIX_MAP(yi), /* yi Yiddish 0x3d */
922 ILCID_POSIX_MAP(yo), /* yo Yoruba 0x6a */
923 ILCID_POSIX_MAP(zh), /* zh Chinese 0x04 */
924 ILCID_POSIX_MAP(zu), /* zu Zulu 0x35 */
925};
926
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700927static const uint32_t gLocaleCount = UPRV_LENGTHOF(gPosixIDmap);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000928
929/**
930 * Do not call this function. It is called by hostID.
931 * The function is not private because this struct must stay as a C struct,
932 * and this is an internal class.
933 */
934static int32_t
935idCmp(const char* id1, const char* id2)
936{
937 int32_t diffIdx = 0;
938 while (*id1 == *id2 && *id1 != 0) {
939 diffIdx++;
940 id1++;
941 id2++;
942 }
943 return diffIdx;
944}
945
946/**
947 * Searches for a Windows LCID
948 *
949 * @param posixid the Posix style locale id.
950 * @param status gets set to U_ILLEGAL_ARGUMENT_ERROR when the Posix ID has
951 * no equivalent Windows LCID.
952 * @return the LCID
953 */
954static uint32_t
955getHostID(const ILcidPosixMap *this_0, const char* posixID, UErrorCode* status)
956{
957 int32_t bestIdx = 0;
958 int32_t bestIdxDiff = 0;
959 int32_t posixIDlen = (int32_t)uprv_strlen(posixID);
960 uint32_t idx;
961
962 for (idx = 0; idx < this_0->numRegions; idx++ ) {
963 int32_t sameChars = idCmp(posixID, this_0->regionMaps[idx].posixID);
964 if (sameChars > bestIdxDiff && this_0->regionMaps[idx].posixID[sameChars] == 0) {
965 if (posixIDlen == sameChars) {
966 /* Exact match */
967 return this_0->regionMaps[idx].hostID;
968 }
969 bestIdxDiff = sameChars;
970 bestIdx = idx;
971 }
972 }
973 /* We asked for something unusual, like en_ZZ, and we try to return the number for the same language. */
974 /* We also have to make sure that sid and si and similar string subsets don't match. */
975 if ((posixID[bestIdxDiff] == '_' || posixID[bestIdxDiff] == '@')
976 && this_0->regionMaps[bestIdx].posixID[bestIdxDiff] == 0)
977 {
978 *status = U_USING_FALLBACK_WARNING;
979 return this_0->regionMaps[bestIdx].hostID;
980 }
981
982 /*no match found */
983 *status = U_ILLEGAL_ARGUMENT_ERROR;
984 return this_0->regionMaps->hostID;
985}
986
987static const char*
988getPosixID(const ILcidPosixMap *this_0, uint32_t hostID)
989{
990 uint32_t i;
991 for (i = 0; i <= this_0->numRegions; i++)
992 {
993 if (this_0->regionMaps[i].hostID == hostID)
994 {
995 return this_0->regionMaps[i].posixID;
996 }
997 }
998
999 /* If you get here, then no matching region was found,
1000 so return the language id with the wild card region. */
1001 return this_0->regionMaps[0].posixID;
1002}
1003
1004/*
1005//////////////////////////////////////
1006//
1007// LCID --> POSIX
1008//
1009/////////////////////////////////////
1010*/
Jungshik Shin87232d82017-05-13 21:10:13 -07001011#ifdef USE_WINDOWS_LCID_MAPPING_API
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001012/*
1013 * Various language tags needs to be changed:
1014 * quz -> qu
1015 * prs -> fa
1016 */
1017#define FIX_LANGUAGE_ID_TAG(buffer, len) \
1018 if (len >= 3) { \
1019 if (buffer[0] == 'q' && buffer[1] == 'u' && buffer[2] == 'z') {\
1020 buffer[2] = 0; \
1021 uprv_strcat(buffer, buffer+3); \
1022 } else if (buffer[0] == 'p' && buffer[1] == 'r' && buffer[2] == 's') {\
1023 buffer[0] = 'f'; buffer[1] = 'a'; buffer[2] = 0; \
1024 uprv_strcat(buffer, buffer+3); \
1025 } \
1026 }
1027
1028#endif
1029U_CAPI int32_t
1030uprv_convertToPosix(uint32_t hostid, char *posixID, int32_t posixIDCapacity, UErrorCode* status)
1031{
1032 uint16_t langID;
1033 uint32_t localeIndex;
1034 UBool bLookup = TRUE;
1035 const char *pPosixID = NULL;
1036
Jungshik Shin87232d82017-05-13 21:10:13 -07001037#ifdef USE_WINDOWS_LCID_MAPPING_API
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001038 // Note: Windows primary lang ID 0x92 in LCID is used for Central Kurdish and
1039 // GetLocaleInfo() maps such LCID to "ku". However, CLDR uses "ku" for
1040 // Northern Kurdish and "ckb" for Central Kurdish. For this reason, we cannot
1041 // use the Windows API to resolve locale ID for this specific case.
Jungshik Shin87232d82017-05-13 21:10:13 -07001042 if ((hostid & 0x3FF) != 0x92) {
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001043 int32_t tmpLen = 0;
Jungshik Shin87232d82017-05-13 21:10:13 -07001044 UChar windowsLocaleName[LOCALE_NAME_MAX_LENGTH]; // ULOC_FULLNAME_CAPACITY > LOCALE_NAME_MAX_LENGTH
1045 char locName[LOCALE_NAME_MAX_LENGTH]; // ICU name can't be longer than Windows name
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001046
Jungshik Shin87232d82017-05-13 21:10:13 -07001047 // Note: LOCALE_ALLOW_NEUTRAL_NAMES was enabled in Windows7+, prior versions did not handle neutral (no-region) locale names.
1048 tmpLen = LCIDToLocaleName(hostid, (PWSTR)windowsLocaleName, UPRV_LENGTHOF(windowsLocaleName), LOCALE_ALLOW_NEUTRAL_NAMES);
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001049 if (tmpLen > 1) {
Jungshik Shin87232d82017-05-13 21:10:13 -07001050 int32_t i = 0;
1051 // Only need to look up in table if have _, eg for de-de_phoneb type alternate sort.
1052 bLookup = FALSE;
1053 for (i = 0; i < UPRV_LENGTHOF(locName); i++)
1054 {
1055 locName[i] = (char)(windowsLocaleName[i]);
1056
1057 // Windows locale name may contain sorting variant, such as "es-ES_tradnl".
1058 // In such cases, we need special mapping data found in the hardcoded table
1059 // in this source file.
1060 if (windowsLocaleName[i] == L'_')
1061 {
1062 // Keep the base locale, without variant
1063 // TODO: Should these be mapped from _phoneb to @collation=phonebook, etc.?
1064 locName[i] = '\0';
1065 tmpLen = i;
1066 bLookup = TRUE;
1067 break;
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001068 }
Jungshik Shin87232d82017-05-13 21:10:13 -07001069 else if (windowsLocaleName[i] == L'-')
1070 {
1071 // Windows names use -, ICU uses _
1072 locName[i] = '_';
1073 }
1074 else if (windowsLocaleName[i] == L'\0')
1075 {
1076 // No point in doing more work than necessary
1077 break;
1078 }
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001079 }
Jungshik Shin87232d82017-05-13 21:10:13 -07001080 // TODO: Need to understand this better, why isn't it an alias?
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001081 FIX_LANGUAGE_ID_TAG(locName, tmpLen);
1082 pPosixID = locName;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001083 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001084 }
Jungshik Shin87232d82017-05-13 21:10:13 -07001085#endif // USE_WINDOWS_LCID_MAPPING_API
1086
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001087 if (bLookup) {
1088 const char *pCandidate = NULL;
1089 langID = LANGUAGE_LCID(hostid);
1090
1091 for (localeIndex = 0; localeIndex < gLocaleCount; localeIndex++) {
1092 if (langID == gPosixIDmap[localeIndex].regionMaps->hostID) {
1093 pCandidate = getPosixID(&gPosixIDmap[localeIndex], hostid);
1094 break;
1095 }
1096 }
1097
1098 /* On Windows, when locale name has a variant, we still look up the hardcoded table.
1099 If a match in the hardcoded table is longer than the Windows locale name without
1100 variant, we use the one as the result */
1101 if (pCandidate && (pPosixID == NULL || uprv_strlen(pCandidate) > uprv_strlen(pPosixID))) {
1102 pPosixID = pCandidate;
1103 }
1104 }
1105
1106 if (pPosixID) {
1107 int32_t resLen = uprv_strlen(pPosixID);
1108 int32_t copyLen = resLen <= posixIDCapacity ? resLen : posixIDCapacity;
1109 uprv_memcpy(posixID, pPosixID, copyLen);
1110 if (resLen < posixIDCapacity) {
1111 posixID[resLen] = 0;
1112 if (*status == U_STRING_NOT_TERMINATED_WARNING) {
1113 *status = U_ZERO_ERROR;
1114 }
1115 } else if (resLen == posixIDCapacity) {
1116 *status = U_STRING_NOT_TERMINATED_WARNING;
1117 } else {
1118 *status = U_BUFFER_OVERFLOW_ERROR;
1119 }
1120 return resLen;
1121 }
1122
1123 /* no match found */
1124 *status = U_ILLEGAL_ARGUMENT_ERROR;
1125 return -1;
1126}
1127
1128/*
1129//////////////////////////////////////
1130//
1131// POSIX --> LCID
1132// This should only be called from uloc_getLCID.
1133// The locale ID must be in canonical form.
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001134//
1135/////////////////////////////////////
1136*/
Jungshik Shin87232d82017-05-13 21:10:13 -07001137U_CAPI uint32_t
1138uprv_convertToLCIDPlatform(const char* localeID)
1139{
1140 // The purpose of this function is to leverage native platform name->lcid
1141 // conversion functionality when available.
1142#ifdef USE_WINDOWS_LCID_MAPPING_API
1143 DWORD nameLCIDFlags = 0;
1144 UErrorCode myStatus = U_ZERO_ERROR;
1145
1146 // First check for a Windows name->LCID match, fall through to catch
1147 // ICU special cases, but Windows may know it already.
1148#if LOCALE_ALLOW_NEUTRAL_NAMES
1149 nameLCIDFlags = LOCALE_ALLOW_NEUTRAL_NAMES;
1150#endif /* LOCALE_ALLOW_NEUTRAL_NAMES */
1151
1152 int32_t len;
1153 char collVal[ULOC_KEYWORDS_CAPACITY] = {};
1154 char baseName[ULOC_FULLNAME_CAPACITY] = {};
1155 const char * mylocaleID = localeID;
1156
1157 // Check any for keywords.
1158 if (uprv_strchr(localeID, '@'))
1159 {
1160 len = uloc_getKeywordValue(localeID, "collation", collVal, UPRV_LENGTHOF(collVal) - 1, &myStatus);
1161 if (U_SUCCESS(myStatus) && len > 0)
1162 {
1163 // If it contains the keyword collation, return 0 so that the LCID lookup table will be used.
1164 return 0;
1165 }
1166 else
1167 {
1168 // If the locale ID contains keywords other than collation, just use the base name.
1169 len = uloc_getBaseName(localeID, baseName, UPRV_LENGTHOF(baseName) - 1, &myStatus);
1170
1171 if (U_SUCCESS(myStatus) && len > 0)
1172 {
1173 baseName[len] = 0;
1174 mylocaleID = baseName;
1175 }
1176 }
1177 }
1178
1179 char asciiBCP47Tag[LOCALE_NAME_MAX_LENGTH] = {};
1180 // this will change it from de_DE@collation=phonebook to de-DE-u-co-phonebk form
1181 int32_t bcp47Len = uloc_toLanguageTag(mylocaleID, asciiBCP47Tag, UPRV_LENGTHOF(asciiBCP47Tag), FALSE, &myStatus);
1182
1183 if (U_SUCCESS(myStatus))
1184 {
1185 // Need it to be UTF-16, not 8-bit
1186 wchar_t bcp47Tag[LOCALE_NAME_MAX_LENGTH] = {};
1187 int32_t i;
1188 for (i = 0; i < UPRV_LENGTHOF(bcp47Tag); i++)
1189 {
1190 if (asciiBCP47Tag[i] == '\0')
1191 {
1192 break;
1193 }
1194 else
1195 {
1196 // Copy the character
1197 bcp47Tag[i] = static_cast<wchar_t>(asciiBCP47Tag[i]);
1198 }
1199 }
1200
1201 if (i < (UPRV_LENGTHOF(bcp47Tag) - 1))
1202 {
1203 // Ensure it's null terminated
1204 bcp47Tag[i] = L'\0';
1205 LCID lcid = LocaleNameToLCID(bcp47Tag, nameLCIDFlags);
1206 if (lcid > 0)
1207 {
1208 // Found LCID from windows, return that one, unless its completely ambiguous
1209 // LOCALE_USER_DEFAULT and transients are OK because they will round trip
1210 // for this process.
1211 if (lcid != LOCALE_CUSTOM_UNSPECIFIED)
1212 {
1213 return lcid;
1214 }
1215 }
1216 }
1217 }
1218#endif /* USE_WINDOWS_LCID_MAPPING_API */
1219
1220 // No found, or not implemented on platforms without native name->lcid conversion
1221 return 0;
1222}
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001223
1224U_CAPI uint32_t
1225uprv_convertToLCID(const char *langID, const char* posixID, UErrorCode* status)
1226{
Jungshik Shin87232d82017-05-13 21:10:13 -07001227 // This function does the table lookup when native platform name->lcid conversion isn't available,
1228 // or for locales that don't follow patterns the platform expects.
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001229 uint32_t low = 0;
1230 uint32_t high = gLocaleCount;
1231 uint32_t mid;
1232 uint32_t oldmid = 0;
1233 int32_t compVal;
1234
1235 uint32_t value = 0;
1236 uint32_t fallbackValue = (uint32_t)-1;
1237 UErrorCode myStatus;
1238 uint32_t idx;
1239
1240 /* Check for incomplete id. */
1241 if (!langID || !posixID || uprv_strlen(langID) < 2 || uprv_strlen(posixID) < 2) {
1242 return 0;
1243 }
1244
1245 /*Binary search for the map entry for normal cases */
1246
1247 while (high > low) /*binary search*/{
1248
1249 mid = (high+low) >> 1; /*Finds median*/
1250
1251 if (mid == oldmid)
1252 break;
1253
1254 compVal = uprv_strcmp(langID, gPosixIDmap[mid].regionMaps->posixID);
1255 if (compVal < 0){
1256 high = mid;
1257 }
1258 else if (compVal > 0){
1259 low = mid;
1260 }
1261 else /*we found it*/{
1262 return getHostID(&gPosixIDmap[mid], posixID, status);
1263 }
1264 oldmid = mid;
1265 }
1266
1267 /*
1268 * Sometimes we can't do a binary search on posixID because some LCIDs
1269 * go to different locales. We hit one of those special cases.
1270 */
1271 for (idx = 0; idx < gLocaleCount; idx++ ) {
1272 myStatus = U_ZERO_ERROR;
1273 value = getHostID(&gPosixIDmap[idx], posixID, &myStatus);
1274 if (myStatus == U_ZERO_ERROR) {
1275 return value;
1276 }
1277 else if (myStatus == U_USING_FALLBACK_WARNING) {
1278 fallbackValue = value;
1279 }
1280 }
1281
1282 if (fallbackValue != (uint32_t)-1) {
1283 *status = U_USING_FALLBACK_WARNING;
1284 return fallbackValue;
1285 }
1286
1287 /* no match found */
1288 *status = U_ILLEGAL_ARGUMENT_ERROR;
1289 return 0; /* return international (root) */
1290}