blob: 4c89118a34a3714a549cb02ba607917a36f93254 [file] [log] [blame]
Jungshik Shin87232d82017-05-13 21:10:13 -07001// © 2016 and later: Unicode, Inc. and others.
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002// License & terms of use: http://www.unicode.org/copyright.html
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00003/*
4 **********************************************************************
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07005 * Copyright (C) 1996-2016, International Business Machines
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00006 * Corporation and others. All Rights Reserved.
7 **********************************************************************
8 *
9 * Provides functionality for mapping between
10 * LCID and Posix IDs or ICU locale to codepage
11 *
12 * Note: All classes and code in this file are
13 * intended for internal use only.
14 *
15 * Methods of interest:
16 * unsigned long convertToLCID(const char*);
17 * const char* convertToPosix(unsigned long);
18 *
19 * Kathleen Wilson, 4/30/96
20 *
21 * Date Name Description
22 * 3/11/97 aliu Fixed off-by-one bug in assignment operator. Added
23 * setId() method and safety check against
24 * MAX_ID_LENGTH.
25 * 04/23/99 stephen Added C wrapper for convertToPosix.
26 * 09/18/00 george Removed the memory leaks.
27 * 08/23/01 george Convert to C
28 */
29
30#include "locmap.h"
31#include "cstring.h"
32#include "cmemory.h"
Jungshik Shin87232d82017-05-13 21:10:13 -070033#include "unicode/uloc.h"
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000034
Jungshik Shine0d9b902016-10-28 12:56:54 -070035#if 0
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000036#if U_PLATFORM == U_PF_WINDOWS && defined(_MSC_VER) && (_MSC_VER >= 1500)
37/*
38 * TODO: It seems like we should widen this to
39 * either U_PLATFORM_USES_ONLY_WIN32_API (includes MinGW)
40 * or U_PLATFORM_HAS_WIN32_API (includes MinGW and Cygwin)
41 * but those use gcc and won't have defined(_MSC_VER).
42 * We might need to #include some Windows header and test for some version macro from there.
43 * Or call some Windows function and see what it returns.
44 */
Jungshik Shin87232d82017-05-13 21:10:13 -070045#define USE_WINDOWS_LCID_MAPPING_API
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000046#include <windows.h>
47#include <winnls.h>
48#endif
Jungshik Shin87232d82017-05-13 21:10:13 -070049#endif
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000050
51/*
52 * Note:
53 * The mapping from Win32 locale ID numbers to POSIX locale strings should
54 * be the faster one.
55 *
Jungshik Shin87232d82017-05-13 21:10:13 -070056 * Windows LCIDs are defined at https://msdn.microsoft.com/en-us/library/cc233965.aspx
57 * [MS-LCID] Windows Language Code Identifier (LCID) Reference
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000058 */
59
60/*
61////////////////////////////////////////////////
62//
63// Internal Classes for LCID <--> POSIX Mapping
64//
65/////////////////////////////////////////////////
66*/
67
68typedef struct ILcidPosixElement
69{
70 const uint32_t hostID;
71 const char * const posixID;
72} ILcidPosixElement;
73
74typedef struct ILcidPosixMap
75{
76 const uint32_t numRegions;
77 const struct ILcidPosixElement* const regionMaps;
78} ILcidPosixMap;
79
80
81/*
82/////////////////////////////////////////////////
83//
84// Easy macros to make the LCID <--> POSIX Mapping
85//
86/////////////////////////////////////////////////
87*/
88
89/**
90 * The standard one language/one country mapping for LCID.
91 * The first element must be the language, and the following
92 * elements are the language with the country.
93 * @param hostID LCID in host format such as 0x044d
94 * @param languageID posix ID of just the language such as 'de'
95 * @param posixID posix ID of the language_TERRITORY such as 'de_CH'
96 */
97#define ILCID_POSIX_ELEMENT_ARRAY(hostID, languageID, posixID) \
98static const ILcidPosixElement locmap_ ## languageID [] = { \
99 {LANGUAGE_LCID(hostID), #languageID}, /* parent locale */ \
100 {hostID, #posixID}, \
101};
102
103/**
104 * Define a subtable by ID
105 * @param id the POSIX ID, either a language or language_TERRITORY
106 */
107#define ILCID_POSIX_SUBTABLE(id) \
108static const ILcidPosixElement locmap_ ## id [] =
109
110
111/**
112 * Create the map for the posixID. This macro supposes that the language string
113 * name is the same as the global variable name, and that the first element
114 * in the ILcidPosixElement is just the language.
115 * @param _posixID the full POSIX ID for this entry.
116 */
117#define ILCID_POSIX_MAP(_posixID) \
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700118 {UPRV_LENGTHOF(locmap_ ## _posixID), locmap_ ## _posixID}
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000119
120/*
121////////////////////////////////////////////
122//
123// Create the table of LCID to POSIX Mapping
124// None of it should be dynamically created.
125//
126// Keep static locale variables inside the function so that
127// it can be created properly during static init.
128//
Jungshik Shin87232d82017-05-13 21:10:13 -0700129// Note: This table should be updated periodically. Check the [MS-LCID] Windows Language Code Identifier
130// (LCID) Reference defined at https://msdn.microsoft.com/en-us/library/cc233965.aspx
131//
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000132// Microsoft is moving away from LCID in favor of locale name as of Vista. This table needs to be
133// maintained for support of older Windows version.
134// Update: Windows 7 (091130)
135//
136// Note: Microsoft assign a different LCID if a locale has a sorting variant. POSIX IDs below may contain
137// @collation=XXX, but no other keywords are allowed (at least for now). When uprv_convertToLCID() is
138// called from uloc_getLCID(), keywords other than collation are already removed. If we really need
139// to support other keywords in this mapping data, we must update the implementation.
140////////////////////////////////////////////
141*/
142
Jungshik Shin87232d82017-05-13 21:10:13 -0700143// TODO: For Windows ideally this table would be a list of exceptions rather than a complete list as
144// LocaleNameToLCID and LCIDToLocaleName provide 90% of these.
145
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000146ILCID_POSIX_ELEMENT_ARRAY(0x0436, af, af_ZA)
147
148ILCID_POSIX_SUBTABLE(ar) {
149 {0x01, "ar"},
150 {0x3801, "ar_AE"},
151 {0x3c01, "ar_BH"},
152 {0x1401, "ar_DZ"},
153 {0x0c01, "ar_EG"},
154 {0x0801, "ar_IQ"},
155 {0x2c01, "ar_JO"},
156 {0x3401, "ar_KW"},
157 {0x3001, "ar_LB"},
158 {0x1001, "ar_LY"},
159 {0x1801, "ar_MA"},
160 {0x1801, "ar_MO"},
161 {0x2001, "ar_OM"},
162 {0x4001, "ar_QA"},
163 {0x0401, "ar_SA"},
164 {0x2801, "ar_SY"},
165 {0x1c01, "ar_TN"},
166 {0x2401, "ar_YE"}
167};
168
169ILCID_POSIX_ELEMENT_ARRAY(0x044d, as, as_IN)
170ILCID_POSIX_ELEMENT_ARRAY(0x045e, am, am_ET)
171ILCID_POSIX_ELEMENT_ARRAY(0x047a, arn,arn_CL)
172
173ILCID_POSIX_SUBTABLE(az) {
174 {0x2c, "az"},
175 {0x082c, "az_Cyrl_AZ"}, /* Cyrillic based */
176 {0x742c, "az_Cyrl"}, /* Cyrillic based */
177 {0x042c, "az_Latn_AZ"}, /* Latin based */
178 {0x782c, "az_Latn"}, /* Latin based */
179 {0x042c, "az_AZ"} /* Latin based */
180};
181
182ILCID_POSIX_ELEMENT_ARRAY(0x046d, ba, ba_RU)
183ILCID_POSIX_ELEMENT_ARRAY(0x0423, be, be_BY)
184
185/*ILCID_POSIX_SUBTABLE(ber) {
186 {0x5f, "ber"},
187 {0x045f, "ber_Arab_DZ"},
188 {0x045f, "ber_Arab"},
189 {0x085f, "ber_Latn_DZ"},
190 {0x085f, "ber_Latn"}
191};*/
192
193ILCID_POSIX_ELEMENT_ARRAY(0x0402, bg, bg_BG)
194
Jungshik Shinb3189662017-11-07 11:18:34 -0800195ILCID_POSIX_SUBTABLE(bin) {
196 {0x66, "bin"},
197 {0x0466, "bin_NG"}
198};
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000199
200ILCID_POSIX_SUBTABLE(bn) {
201 {0x45, "bn"},
202 {0x0845, "bn_BD"},
203 {0x0445, "bn_IN"}
204};
205
206ILCID_POSIX_SUBTABLE(bo) {
207 {0x51, "bo"},
208 {0x0851, "bo_BT"},
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700209 {0x0451, "bo_CN"},
210 {0x0c51, "dz_BT"}
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000211};
212
213ILCID_POSIX_ELEMENT_ARRAY(0x047e, br, br_FR)
214
215ILCID_POSIX_SUBTABLE(ca) {
216 {0x03, "ca"},
217 {0x0403, "ca_ES"},
218 {0x0803, "ca_ES_VALENCIA"}
219};
220
221ILCID_POSIX_ELEMENT_ARRAY(0x0483, co, co_FR)
Jungshik Shinb3189662017-11-07 11:18:34 -0800222
223ILCID_POSIX_SUBTABLE(chr) {
224 {0x05c, "chr"},
225 {0x7c5c, "chr_Cher"},
226 {0x045c, "chr_Cher_US"},
227 {0x045c, "chr_US"}
228};
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000229
Jungshik Shin87232d82017-05-13 21:10:13 -0700230// ICU has chosen different names for these.
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000231ILCID_POSIX_SUBTABLE(ckb) {
232 {0x92, "ckb"},
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000233 {0x7c92, "ckb_Arab"},
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700234 {0x0492, "ckb_Arab_IQ"}
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000235};
236
237/* Declared as cs_CZ to get around compiler errors on z/OS, which defines cs as a function */
238ILCID_POSIX_ELEMENT_ARRAY(0x0405, cs, cs_CZ)
239
240ILCID_POSIX_ELEMENT_ARRAY(0x0452, cy, cy_GB)
241ILCID_POSIX_ELEMENT_ARRAY(0x0406, da, da_DK)
242
Jungshik Shin87232d82017-05-13 21:10:13 -0700243// Windows doesn't know POSIX or BCP47 Unicode phonebook sort names
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000244ILCID_POSIX_SUBTABLE(de) {
245 {0x07, "de"},
246 {0x0c07, "de_AT"},
247 {0x0807, "de_CH"},
248 {0x0407, "de_DE"},
249 {0x1407, "de_LI"},
250 {0x1007, "de_LU"},
251 {0x10407,"de_DE@collation=phonebook"}, /*This is really de_DE_PHONEBOOK on Windows*/
252 {0x10407,"de@collation=phonebook"} /*This is really de_DE_PHONEBOOK on Windows*/
253};
254
255ILCID_POSIX_ELEMENT_ARRAY(0x0465, dv, dv_MV)
256ILCID_POSIX_ELEMENT_ARRAY(0x0408, el, el_GR)
257
Jungshik Shin87232d82017-05-13 21:10:13 -0700258// Windows uses an empty string for 'invariant'
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000259ILCID_POSIX_SUBTABLE(en) {
260 {0x09, "en"},
261 {0x0c09, "en_AU"},
262 {0x2809, "en_BZ"},
263 {0x1009, "en_CA"},
264 {0x0809, "en_GB"},
265 {0x3c09, "en_HK"},
266 {0x3809, "en_ID"},
267 {0x1809, "en_IE"},
268 {0x4009, "en_IN"},
269 {0x2009, "en_JM"},
270 {0x4409, "en_MY"},
271 {0x1409, "en_NZ"},
272 {0x3409, "en_PH"},
273 {0x4809, "en_SG"},
274 {0x2C09, "en_TT"},
275 {0x0409, "en_US"},
Jungshik Shin87232d82017-05-13 21:10:13 -0700276 {0x007f, "en_US_POSIX"}, /* duplicate for round-tripping */
Jungshik Shinb3189662017-11-07 11:18:34 -0800277 {0x2409, "en_029"},
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000278 {0x1c09, "en_ZA"},
279 {0x3009, "en_ZW"},
Jungshik Shinb3189662017-11-07 11:18:34 -0800280 {0x2409, "en_VI"}, /* Virgin Islands AKA Caribbean Islands (en_CB). On Windows8+ This is 0x1000 or dynamically assigned */
Jungshik Shin87232d82017-05-13 21:10:13 -0700281 {0x0409, "en_AS"}, /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */
282 {0x0409, "en_GU"}, /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */
283 {0x0409, "en_MH"}, /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */
284 {0x0409, "en_MP"}, /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */
285 {0x0409, "en_UM"} /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000286};
287
288ILCID_POSIX_SUBTABLE(en_US_POSIX) {
289 {0x007f, "en_US_POSIX"} /* duplicate for roundtripping */
290};
291
Jungshik Shin87232d82017-05-13 21:10:13 -0700292// Windows doesn't know POSIX or BCP47 Unicode traditional sort names
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000293ILCID_POSIX_SUBTABLE(es) {
294 {0x0a, "es"},
295 {0x2c0a, "es_AR"},
296 {0x400a, "es_BO"},
297 {0x340a, "es_CL"},
298 {0x240a, "es_CO"},
299 {0x140a, "es_CR"},
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700300 {0x5c0a, "es_CU"},
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000301 {0x1c0a, "es_DO"},
302 {0x300a, "es_EC"},
303 {0x0c0a, "es_ES"}, /*Modern sort.*/
304 {0x100a, "es_GT"},
305 {0x480a, "es_HN"},
306 {0x080a, "es_MX"},
307 {0x4c0a, "es_NI"},
308 {0x180a, "es_PA"},
309 {0x280a, "es_PE"},
310 {0x500a, "es_PR"},
311 {0x3c0a, "es_PY"},
312 {0x440a, "es_SV"},
313 {0x540a, "es_US"},
314 {0x380a, "es_UY"},
315 {0x200a, "es_VE"},
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800316 {0x580a, "es_419"},
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000317 {0x040a, "es_ES@collation=traditional"},
Jungshik Shin87232d82017-05-13 21:10:13 -0700318 {0x040a, "es@collation=traditional"} // Windows will treat this as es-ES@collation=traditional
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000319};
320
321ILCID_POSIX_ELEMENT_ARRAY(0x0425, et, et_EE)
322ILCID_POSIX_ELEMENT_ARRAY(0x042d, eu, eu_ES)
323
324/* ISO-639 doesn't distinguish between Persian and Dari.*/
325ILCID_POSIX_SUBTABLE(fa) {
326 {0x29, "fa"},
327 {0x0429, "fa_IR"}, /* Persian/Farsi (Iran) */
328 {0x048c, "fa_AF"} /* Persian/Dari (Afghanistan) */
329};
330
Jungshik Shin87232d82017-05-13 21:10:13 -0700331
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000332/* duplicate for roundtripping */
333ILCID_POSIX_SUBTABLE(fa_AF) {
334 {0x8c, "fa_AF"}, /* Persian/Dari (Afghanistan) */
335 {0x048c, "fa_AF"} /* Persian/Dari (Afghanistan) */
336};
337
338ILCID_POSIX_SUBTABLE(ff) {
339 {0x67, "ff"},
340 {0x7c67, "ff_Latn"},
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700341 {0x0867, "ff_Latn_SN"},
342 {0x0467, "ff_NG"}
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000343};
344
345ILCID_POSIX_ELEMENT_ARRAY(0x040b, fi, fi_FI)
346ILCID_POSIX_ELEMENT_ARRAY(0x0464, fil,fil_PH)
347ILCID_POSIX_ELEMENT_ARRAY(0x0438, fo, fo_FO)
348
349ILCID_POSIX_SUBTABLE(fr) {
350 {0x0c, "fr"},
351 {0x080c, "fr_BE"},
352 {0x0c0c, "fr_CA"},
353 {0x240c, "fr_CD"},
354 {0x240c, "fr_CG"},
355 {0x100c, "fr_CH"},
356 {0x300c, "fr_CI"},
357 {0x2c0c, "fr_CM"},
358 {0x040c, "fr_FR"},
359 {0x3c0c, "fr_HT"},
360 {0x140c, "fr_LU"},
361 {0x380c, "fr_MA"},
362 {0x180c, "fr_MC"},
363 {0x340c, "fr_ML"},
364 {0x200c, "fr_RE"},
365 {0x280c, "fr_SN"},
366 {0xe40c, "fr_015"},
367 {0x1c0c, "fr_029"}
368};
369
370ILCID_POSIX_ELEMENT_ARRAY(0x0467, fuv, fuv_NG)
371
372ILCID_POSIX_ELEMENT_ARRAY(0x0462, fy, fy_NL)
373
374ILCID_POSIX_SUBTABLE(ga) { /* Gaelic (Ireland) */
375 {0x3c, "ga"},
376 {0x083c, "ga_IE"},
377 {0x043c, "gd_GB"}
378};
379
380ILCID_POSIX_SUBTABLE(gd) { /* Gaelic (Scotland) */
381 {0x91, "gd"},
382 {0x0491, "gd_GB"}
383};
384
385ILCID_POSIX_ELEMENT_ARRAY(0x0456, gl, gl_ES)
386ILCID_POSIX_ELEMENT_ARRAY(0x0447, gu, gu_IN)
387ILCID_POSIX_ELEMENT_ARRAY(0x0474, gn, gn_PY)
388ILCID_POSIX_ELEMENT_ARRAY(0x0484, gsw,gsw_FR)
389
390ILCID_POSIX_SUBTABLE(ha) {
391 {0x68, "ha"},
392 {0x7c68, "ha_Latn"},
393 {0x0468, "ha_Latn_NG"},
394};
395
396ILCID_POSIX_ELEMENT_ARRAY(0x0475, haw,haw_US)
397ILCID_POSIX_ELEMENT_ARRAY(0x040d, he, he_IL)
398ILCID_POSIX_ELEMENT_ARRAY(0x0439, hi, hi_IN)
399
400/* This LCID is really four different locales.*/
401ILCID_POSIX_SUBTABLE(hr) {
402 {0x1a, "hr"},
403 {0x141a, "bs_Latn_BA"}, /* Bosnian, Bosnia and Herzegovina */
404 {0x681a, "bs_Latn"}, /* Bosnian, Bosnia and Herzegovina */
405 {0x141a, "bs_BA"}, /* Bosnian, Bosnia and Herzegovina */
406 {0x781a, "bs"}, /* Bosnian */
407 {0x201a, "bs_Cyrl_BA"}, /* Bosnian, Bosnia and Herzegovina */
408 {0x641a, "bs_Cyrl"}, /* Bosnian, Bosnia and Herzegovina */
409 {0x101a, "hr_BA"}, /* Croatian in Bosnia */
410 {0x041a, "hr_HR"}, /* Croatian*/
411 {0x2c1a, "sr_Latn_ME"},
412 {0x241a, "sr_Latn_RS"},
413 {0x181a, "sr_Latn_BA"}, /* Serbo-Croatian in Bosnia */
414 {0x081a, "sr_Latn_CS"}, /* Serbo-Croatian*/
415 {0x701a, "sr_Latn"}, /* It's 0x1a or 0x081a, pick one to make the test program happy. */
416 {0x1c1a, "sr_Cyrl_BA"}, /* Serbo-Croatian in Bosnia */
417 {0x0c1a, "sr_Cyrl_CS"}, /* Serbian*/
418 {0x301a, "sr_Cyrl_ME"},
419 {0x281a, "sr_Cyrl_RS"},
420 {0x6c1a, "sr_Cyrl"}, /* It's 0x1a or 0x0c1a, pick one to make the test program happy. */
421 {0x7c1a, "sr"} /* In CLDR sr is sr_Cyrl. */
422};
423
Jungshik Shin70f82502016-01-29 00:32:36 -0800424ILCID_POSIX_SUBTABLE(hsb) {
425 {0x2E, "hsb"},
426 {0x042E, "hsb_DE"},
427 {0x082E, "dsb_DE"},
428 {0x7C2E, "dsb"},
429};
430
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000431ILCID_POSIX_ELEMENT_ARRAY(0x040e, hu, hu_HU)
432ILCID_POSIX_ELEMENT_ARRAY(0x042b, hy, hy_AM)
Jungshik Shinb3189662017-11-07 11:18:34 -0800433
434ILCID_POSIX_SUBTABLE(ibb) {
435 {0x69, "ibb"},
436 {0x0469, "ibb_NG"}
437};
438
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000439ILCID_POSIX_ELEMENT_ARRAY(0x0421, id, id_ID)
440ILCID_POSIX_ELEMENT_ARRAY(0x0470, ig, ig_NG)
441ILCID_POSIX_ELEMENT_ARRAY(0x0478, ii, ii_CN)
442ILCID_POSIX_ELEMENT_ARRAY(0x040f, is, is_IS)
443
444ILCID_POSIX_SUBTABLE(it) {
445 {0x10, "it"},
446 {0x0810, "it_CH"},
447 {0x0410, "it_IT"}
448};
449
450ILCID_POSIX_SUBTABLE(iu) {
451 {0x5d, "iu"},
452 {0x045d, "iu_Cans_CA"},
453 {0x785d, "iu_Cans"},
454 {0x085d, "iu_Latn_CA"},
455 {0x7c5d, "iu_Latn"}
456};
457
458ILCID_POSIX_ELEMENT_ARRAY(0x040d, iw, iw_IL) /*Left in for compatibility*/
459ILCID_POSIX_ELEMENT_ARRAY(0x0411, ja, ja_JP)
460ILCID_POSIX_ELEMENT_ARRAY(0x0437, ka, ka_GE)
461ILCID_POSIX_ELEMENT_ARRAY(0x043f, kk, kk_KZ)
462ILCID_POSIX_ELEMENT_ARRAY(0x046f, kl, kl_GL)
463ILCID_POSIX_ELEMENT_ARRAY(0x0453, km, km_KH)
464ILCID_POSIX_ELEMENT_ARRAY(0x044b, kn, kn_IN)
465
466ILCID_POSIX_SUBTABLE(ko) {
467 {0x12, "ko"},
468 {0x0812, "ko_KP"},
469 {0x0412, "ko_KR"}
470};
471
472ILCID_POSIX_ELEMENT_ARRAY(0x0457, kok, kok_IN)
473ILCID_POSIX_ELEMENT_ARRAY(0x0471, kr, kr_NG)
474
475ILCID_POSIX_SUBTABLE(ks) { /* We could add PK and CN too */
476 {0x60, "ks"},
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000477 {0x0460, "ks_Arab_IN"},
478 {0x0860, "ks_Deva_IN"}
479};
480
481ILCID_POSIX_ELEMENT_ARRAY(0x0440, ky, ky_KG) /* Kyrgyz is spoken in Kyrgyzstan */
Jungshik Shinb3189662017-11-07 11:18:34 -0800482
483ILCID_POSIX_SUBTABLE(la) {
484 {0x76, "la"},
485 {0x0476, "la_001"},
486 {0x0476, "la_IT"} /*Left in for compatibility*/
487};
488
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000489ILCID_POSIX_ELEMENT_ARRAY(0x046e, lb, lb_LU)
490ILCID_POSIX_ELEMENT_ARRAY(0x0454, lo, lo_LA)
491ILCID_POSIX_ELEMENT_ARRAY(0x0427, lt, lt_LT)
492ILCID_POSIX_ELEMENT_ARRAY(0x0426, lv, lv_LV)
493ILCID_POSIX_ELEMENT_ARRAY(0x0481, mi, mi_NZ)
494ILCID_POSIX_ELEMENT_ARRAY(0x042f, mk, mk_MK)
495ILCID_POSIX_ELEMENT_ARRAY(0x044c, ml, ml_IN)
496
497ILCID_POSIX_SUBTABLE(mn) {
498 {0x50, "mn"},
499 {0x0450, "mn_MN"},
500 {0x7c50, "mn_Mong"},
501 {0x0850, "mn_Mong_CN"},
502 {0x0850, "mn_CN"},
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800503 {0x7850, "mn_Cyrl"},
504 {0x0c50, "mn_Mong_MN"}
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000505};
506
507ILCID_POSIX_ELEMENT_ARRAY(0x0458, mni,mni_IN)
508ILCID_POSIX_ELEMENT_ARRAY(0x047c, moh,moh_CA)
509ILCID_POSIX_ELEMENT_ARRAY(0x044e, mr, mr_IN)
510
511ILCID_POSIX_SUBTABLE(ms) {
512 {0x3e, "ms"},
513 {0x083e, "ms_BN"}, /* Brunei Darussalam*/
514 {0x043e, "ms_MY"} /* Malaysia*/
515};
516
517ILCID_POSIX_ELEMENT_ARRAY(0x043a, mt, mt_MT)
518ILCID_POSIX_ELEMENT_ARRAY(0x0455, my, my_MM)
519
520ILCID_POSIX_SUBTABLE(ne) {
521 {0x61, "ne"},
522 {0x0861, "ne_IN"}, /* India*/
523 {0x0461, "ne_NP"} /* Nepal*/
524};
525
526ILCID_POSIX_SUBTABLE(nl) {
527 {0x13, "nl"},
528 {0x0813, "nl_BE"},
529 {0x0413, "nl_NL"}
530};
531
532/* The "no" locale split into nb and nn. By default in ICU, "no" is nb.*/
Jungshik Shin87232d82017-05-13 21:10:13 -0700533// TODO: Not all of these are needed on Windows, but I don't know how ICU treats preferred ones here.
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000534ILCID_POSIX_SUBTABLE(no) {
Jungshik Shin87232d82017-05-13 21:10:13 -0700535 {0x14, "no"}, /* really nb_NO - actually Windows differentiates between neutral (no region) and specific (with region) */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000536 {0x7c14, "nb"}, /* really nb */
537 {0x0414, "nb_NO"}, /* really nb_NO. Keep first in the 414 list. */
538 {0x0414, "no_NO"}, /* really nb_NO */
539 {0x0814, "nn_NO"}, /* really nn_NO. Keep first in the 814 list. */
540 {0x7814, "nn"}, /* It's 0x14 or 0x814, pick one to make the test program happy. */
541 {0x0814, "no_NO_NY"}/* really nn_NO */
542};
543
544ILCID_POSIX_ELEMENT_ARRAY(0x046c, nso,nso_ZA) /* TODO: Verify the ISO-639 code */
545ILCID_POSIX_ELEMENT_ARRAY(0x0482, oc, oc_FR)
546
547ILCID_POSIX_SUBTABLE(om) { /* TODO: Verify the country */
548 {0x72, "om"},
549 {0x0472, "om_ET"},
550 {0x0472, "gaz_ET"}
551};
552
553/* Declared as or_IN to get around compiler errors*/
554ILCID_POSIX_SUBTABLE(or_IN) {
555 {0x48, "or"},
556 {0x0448, "or_IN"},
557};
558
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000559ILCID_POSIX_SUBTABLE(pa) {
560 {0x46, "pa"},
561 {0x0446, "pa_IN"},
Jungshik Shinb3189662017-11-07 11:18:34 -0800562 {0x0846, "pa_Arab_PK"},
563 {0x0846, "pa_PK"}
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000564};
565
Jungshik Shinb3189662017-11-07 11:18:34 -0800566ILCID_POSIX_SUBTABLE(pap) {
567 {0x79, "pap"},
568 {0x0479, "pap_029"},
569 {0x0479, "pap_AN"} /*Left in for compatibility*/
570};
571
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000572ILCID_POSIX_ELEMENT_ARRAY(0x0415, pl, pl_PL)
573ILCID_POSIX_ELEMENT_ARRAY(0x0463, ps, ps_AF)
574
575ILCID_POSIX_SUBTABLE(pt) {
576 {0x16, "pt"},
577 {0x0416, "pt_BR"},
578 {0x0816, "pt_PT"}
579};
580
581ILCID_POSIX_SUBTABLE(qu) {
582 {0x6b, "qu"},
583 {0x046b, "qu_BO"},
584 {0x086b, "qu_EC"},
585 {0x0C6b, "qu_PE"},
586 {0x046b, "quz_BO"},
587 {0x086b, "quz_EC"},
588 {0x0C6b, "quz_PE"}
589};
590
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700591ILCID_POSIX_SUBTABLE(quc) {
592 {0x93, "quc"},
593 {0x0493, "quc_CO"},
594 /*
595 "quc_Latn_GT" is an exceptional case. Language ID of "quc"
596 is 0x93, but LCID of "quc_Latn_GT" is 0x486, which should be
597 under the group of "qut". "qut" is a retired ISO 639-3 language
598 code for West Central Quiche, and merged to "quc".
599 It looks Windows previously reserved "qut" for K'iche', but,
600 decided to use "quc" when adding a locale for K'iche' (Guatemala).
601
602 This data structure used here assumes language ID bits in
603 LCID is unique for alphabetic language code. But this is not true
604 for "quc_Latn_GT". If we don't have the data below, LCID look up
605 by alphabetic locale ID (POSIX) will fail. The same entry is found
606 under "qut" below, which is required for reverse look up.
607 */
608 {0x0486, "quc_Latn_GT"}
609};
610
611ILCID_POSIX_SUBTABLE(qut) {
612 {0x86, "qut"},
613 {0x0486, "qut_GT"},
614 /*
615 See the note in "quc" above.
616 */
617 {0x0486, "quc_Latn_GT"}
618};
619
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000620ILCID_POSIX_ELEMENT_ARRAY(0x0417, rm, rm_CH)
621
622ILCID_POSIX_SUBTABLE(ro) {
623 {0x18, "ro"},
624 {0x0418, "ro_RO"},
625 {0x0818, "ro_MD"}
626};
627
Jungshik Shin87232d82017-05-13 21:10:13 -0700628// TODO: This is almost certainly 'wrong'. 0 in Windows is a synonym for LOCALE_USER_DEFAULT.
629// More likely this is a similar concept to the Windows 0x7f Invariant locale ""
630// (Except that it's not invariant in ICU)
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000631ILCID_POSIX_SUBTABLE(root) {
632 {0x00, "root"}
633};
634
635ILCID_POSIX_SUBTABLE(ru) {
636 {0x19, "ru"},
637 {0x0419, "ru_RU"},
638 {0x0819, "ru_MD"}
639};
640
641ILCID_POSIX_ELEMENT_ARRAY(0x0487, rw, rw_RW)
642ILCID_POSIX_ELEMENT_ARRAY(0x044f, sa, sa_IN)
643ILCID_POSIX_ELEMENT_ARRAY(0x0485, sah,sah_RU)
644
645ILCID_POSIX_SUBTABLE(sd) {
646 {0x59, "sd"},
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700647 {0x0459, "sd_Deva_IN"},
Jungshik Shinb3189662017-11-07 11:18:34 -0800648 {0x0459, "sd_IN"},
649 {0x0859, "sd_Arab_PK"},
650 {0x0859, "sd_PK"},
651 {0x7c59, "sd_Arab"}
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000652};
653
654ILCID_POSIX_SUBTABLE(se) {
655 {0x3b, "se"},
656 {0x0c3b, "se_FI"},
657 {0x043b, "se_NO"},
658 {0x083b, "se_SE"},
659 {0x783b, "sma"},
660 {0x183b, "sma_NO"},
661 {0x1c3b, "sma_SE"},
662 {0x7c3b, "smj"},
663 {0x703b, "smn"},
664 {0x743b, "sms"},
665 {0x103b, "smj_NO"},
666 {0x143b, "smj_SE"},
667 {0x243b, "smn_FI"},
668 {0x203b, "sms_FI"},
669};
670
671ILCID_POSIX_ELEMENT_ARRAY(0x045b, si, si_LK)
672ILCID_POSIX_ELEMENT_ARRAY(0x041b, sk, sk_SK)
673ILCID_POSIX_ELEMENT_ARRAY(0x0424, sl, sl_SI)
674
Jungshik Shinb3189662017-11-07 11:18:34 -0800675ILCID_POSIX_SUBTABLE(so) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000676 {0x77, "so"},
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000677 {0x0477, "so_SO"}
678};
679
680ILCID_POSIX_ELEMENT_ARRAY(0x041c, sq, sq_AL)
681ILCID_POSIX_ELEMENT_ARRAY(0x0430, st, st_ZA)
682
683ILCID_POSIX_SUBTABLE(sv) {
684 {0x1d, "sv"},
685 {0x081d, "sv_FI"},
686 {0x041d, "sv_SE"}
687};
688
689ILCID_POSIX_ELEMENT_ARRAY(0x0441, sw, sw_KE)
690ILCID_POSIX_ELEMENT_ARRAY(0x045A, syr, syr_SY)
691
692ILCID_POSIX_SUBTABLE(ta) {
693 {0x49, "ta"},
694 {0x0449, "ta_IN"},
695 {0x0849, "ta_LK"}
696};
697
698ILCID_POSIX_ELEMENT_ARRAY(0x044a, te, te_IN)
699
700/* Cyrillic based by default */
701ILCID_POSIX_SUBTABLE(tg) {
702 {0x28, "tg"},
703 {0x7c28, "tg_Cyrl"},
704 {0x0428, "tg_Cyrl_TJ"}
705};
706
707ILCID_POSIX_ELEMENT_ARRAY(0x041e, th, th_TH)
708
709ILCID_POSIX_SUBTABLE(ti) {
710 {0x73, "ti"},
711 {0x0873, "ti_ER"},
712 {0x0473, "ti_ET"}
713};
714
715ILCID_POSIX_ELEMENT_ARRAY(0x0442, tk, tk_TM)
716
717ILCID_POSIX_SUBTABLE(tn) {
718 {0x32, "tn"},
719 {0x0832, "tn_BW"},
720 {0x0432, "tn_ZA"}
721};
722
723ILCID_POSIX_ELEMENT_ARRAY(0x041f, tr, tr_TR)
724ILCID_POSIX_ELEMENT_ARRAY(0x0431, ts, ts_ZA)
725ILCID_POSIX_ELEMENT_ARRAY(0x0444, tt, tt_RU)
726
727ILCID_POSIX_SUBTABLE(tzm) {
728 {0x5f, "tzm"},
729 {0x7c5f, "tzm_Latn"},
730 {0x085f, "tzm_Latn_DZ"},
731 {0x105f, "tzm_Tfng_MA"},
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700732 {0x045f, "tzm_Arab_MA"},
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000733 {0x045f, "tmz"}
734};
735
736ILCID_POSIX_SUBTABLE(ug) {
737 {0x80, "ug"},
738 {0x0480, "ug_CN"},
739 {0x0480, "ug_Arab_CN"}
740};
741
742ILCID_POSIX_ELEMENT_ARRAY(0x0422, uk, uk_UA)
743
744ILCID_POSIX_SUBTABLE(ur) {
745 {0x20, "ur"},
746 {0x0820, "ur_IN"},
747 {0x0420, "ur_PK"}
748};
749
750ILCID_POSIX_SUBTABLE(uz) {
751 {0x43, "uz"},
752 {0x0843, "uz_Cyrl_UZ"}, /* Cyrillic based */
753 {0x7843, "uz_Cyrl"}, /* Cyrillic based */
754 {0x0843, "uz_UZ"}, /* Cyrillic based */
755 {0x0443, "uz_Latn_UZ"}, /* Latin based */
756 {0x7c43, "uz_Latn"} /* Latin based */
757};
758
759ILCID_POSIX_SUBTABLE(ve) { /* TODO: Verify the country */
760 {0x33, "ve"},
761 {0x0433, "ve_ZA"},
762 {0x0433, "ven_ZA"}
763};
764
765ILCID_POSIX_ELEMENT_ARRAY(0x042a, vi, vi_VN)
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000766ILCID_POSIX_ELEMENT_ARRAY(0x0488, wo, wo_SN)
767ILCID_POSIX_ELEMENT_ARRAY(0x0434, xh, xh_ZA)
Jungshik Shinb3189662017-11-07 11:18:34 -0800768
769ILCID_POSIX_SUBTABLE(yi) {
770 {0x003d, "yi"},
771 {0x043d, "yi_001"}
772};
773
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000774ILCID_POSIX_ELEMENT_ARRAY(0x046a, yo, yo_NG)
775
Jungshik Shin87232d82017-05-13 21:10:13 -0700776// Windows & ICU tend to different names for some of these
777// TODO: Windows probably does not need all of these entries, but I don't know how the precedence works.
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000778ILCID_POSIX_SUBTABLE(zh) {
779 {0x0004, "zh_Hans"},
780 {0x7804, "zh"},
781 {0x0804, "zh_CN"},
782 {0x0804, "zh_Hans_CN"},
783 {0x0c04, "zh_Hant_HK"},
784 {0x0c04, "zh_HK"},
785 {0x1404, "zh_Hant_MO"},
786 {0x1404, "zh_MO"},
787 {0x1004, "zh_Hans_SG"},
788 {0x1004, "zh_SG"},
789 {0x0404, "zh_Hant_TW"},
790 {0x7c04, "zh_Hant"},
791 {0x0404, "zh_TW"},
792 {0x30404,"zh_Hant_TW"}, /* Bopomofo order */
793 {0x30404,"zh_TW"}, /* Bopomofo order */
794 {0x20004,"zh@collation=stroke"},
795 {0x20404,"zh_Hant@collation=stroke"},
796 {0x20404,"zh_Hant_TW@collation=stroke"},
797 {0x20404,"zh_TW@collation=stroke"},
798 {0x20804,"zh_Hans@collation=stroke"},
799 {0x20804,"zh_Hans_CN@collation=stroke"},
800 {0x20804,"zh_CN@collation=stroke"}
Jungshik Shin87232d82017-05-13 21:10:13 -0700801 // TODO: Alternate collations for other LCIDs are missing, eg: 0x50804
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000802};
803
804ILCID_POSIX_ELEMENT_ARRAY(0x0435, zu, zu_ZA)
805
806/* This must be static and grouped by LCID. */
807static const ILcidPosixMap gPosixIDmap[] = {
808 ILCID_POSIX_MAP(af), /* af Afrikaans 0x36 */
809 ILCID_POSIX_MAP(am), /* am Amharic 0x5e */
810 ILCID_POSIX_MAP(ar), /* ar Arabic 0x01 */
811 ILCID_POSIX_MAP(arn), /* arn Araucanian/Mapudungun 0x7a */
812 ILCID_POSIX_MAP(as), /* as Assamese 0x4d */
813 ILCID_POSIX_MAP(az), /* az Azerbaijani 0x2c */
814 ILCID_POSIX_MAP(ba), /* ba Bashkir 0x6d */
815 ILCID_POSIX_MAP(be), /* be Belarusian 0x23 */
816/* ILCID_POSIX_MAP(ber), ber Berber/Tamazight 0x5f */
817 ILCID_POSIX_MAP(bg), /* bg Bulgarian 0x02 */
818 ILCID_POSIX_MAP(bin), /* bin Edo 0x66 */
819 ILCID_POSIX_MAP(bn), /* bn Bengali; Bangla 0x45 */
820 ILCID_POSIX_MAP(bo), /* bo Tibetan 0x51 */
821 ILCID_POSIX_MAP(br), /* br Breton 0x7e */
822 ILCID_POSIX_MAP(ca), /* ca Catalan 0x03 */
823 ILCID_POSIX_MAP(chr), /* chr Cherokee 0x5c */
824 ILCID_POSIX_MAP(ckb), /* ckb Sorani (Central Kurdish) 0x92 */
825 ILCID_POSIX_MAP(co), /* co Corsican 0x83 */
826 ILCID_POSIX_MAP(cs), /* cs Czech 0x05 */
827 ILCID_POSIX_MAP(cy), /* cy Welsh 0x52 */
828 ILCID_POSIX_MAP(da), /* da Danish 0x06 */
829 ILCID_POSIX_MAP(de), /* de German 0x07 */
830 ILCID_POSIX_MAP(dv), /* dv Divehi 0x65 */
831 ILCID_POSIX_MAP(el), /* el Greek 0x08 */
832 ILCID_POSIX_MAP(en), /* en English 0x09 */
833 ILCID_POSIX_MAP(en_US_POSIX), /* invariant 0x7f */
834 ILCID_POSIX_MAP(es), /* es Spanish 0x0a */
835 ILCID_POSIX_MAP(et), /* et Estonian 0x25 */
836 ILCID_POSIX_MAP(eu), /* eu Basque 0x2d */
837 ILCID_POSIX_MAP(fa), /* fa Persian/Farsi 0x29 */
838 ILCID_POSIX_MAP(fa_AF), /* fa Persian/Dari 0x8c */
839 ILCID_POSIX_MAP(ff), /* ff Fula 0x67 */
840 ILCID_POSIX_MAP(fi), /* fi Finnish 0x0b */
841 ILCID_POSIX_MAP(fil), /* fil Filipino 0x64 */
842 ILCID_POSIX_MAP(fo), /* fo Faroese 0x38 */
843 ILCID_POSIX_MAP(fr), /* fr French 0x0c */
844 ILCID_POSIX_MAP(fuv), /* fuv Fulfulde - Nigeria 0x67 */
845 ILCID_POSIX_MAP(fy), /* fy Frisian 0x62 */
846 ILCID_POSIX_MAP(ga), /* * Gaelic (Ireland,Scotland) 0x3c */
847 ILCID_POSIX_MAP(gd), /* gd Gaelic (United Kingdom) 0x91 */
848 ILCID_POSIX_MAP(gl), /* gl Galician 0x56 */
849 ILCID_POSIX_MAP(gn), /* gn Guarani 0x74 */
850 ILCID_POSIX_MAP(gsw), /* gsw Alemanic/Alsatian/Swiss German 0x84 */
851 ILCID_POSIX_MAP(gu), /* gu Gujarati 0x47 */
852 ILCID_POSIX_MAP(ha), /* ha Hausa 0x68 */
853 ILCID_POSIX_MAP(haw), /* haw Hawaiian 0x75 */
854 ILCID_POSIX_MAP(he), /* he Hebrew (formerly iw) 0x0d */
855 ILCID_POSIX_MAP(hi), /* hi Hindi 0x39 */
856 ILCID_POSIX_MAP(hr), /* * Croatian and others 0x1a */
Jungshik Shin70f82502016-01-29 00:32:36 -0800857 ILCID_POSIX_MAP(hsb), /* hsb Upper Sorbian 0x2e */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000858 ILCID_POSIX_MAP(hu), /* hu Hungarian 0x0e */
859 ILCID_POSIX_MAP(hy), /* hy Armenian 0x2b */
860 ILCID_POSIX_MAP(ibb), /* ibb Ibibio - Nigeria 0x69 */
861 ILCID_POSIX_MAP(id), /* id Indonesian (formerly in) 0x21 */
862 ILCID_POSIX_MAP(ig), /* ig Igbo 0x70 */
863 ILCID_POSIX_MAP(ii), /* ii Sichuan Yi 0x78 */
864 ILCID_POSIX_MAP(is), /* is Icelandic 0x0f */
865 ILCID_POSIX_MAP(it), /* it Italian 0x10 */
866 ILCID_POSIX_MAP(iu), /* iu Inuktitut 0x5d */
867 ILCID_POSIX_MAP(iw), /* iw Hebrew 0x0d */
868 ILCID_POSIX_MAP(ja), /* ja Japanese 0x11 */
869 ILCID_POSIX_MAP(ka), /* ka Georgian 0x37 */
870 ILCID_POSIX_MAP(kk), /* kk Kazakh 0x3f */
871 ILCID_POSIX_MAP(kl), /* kl Kalaallisut 0x6f */
872 ILCID_POSIX_MAP(km), /* km Khmer 0x53 */
873 ILCID_POSIX_MAP(kn), /* kn Kannada 0x4b */
874 ILCID_POSIX_MAP(ko), /* ko Korean 0x12 */
875 ILCID_POSIX_MAP(kok), /* kok Konkani 0x57 */
876 ILCID_POSIX_MAP(kr), /* kr Kanuri 0x71 */
877 ILCID_POSIX_MAP(ks), /* ks Kashmiri 0x60 */
878 ILCID_POSIX_MAP(ky), /* ky Kyrgyz 0x40 */
879 ILCID_POSIX_MAP(lb), /* lb Luxembourgish 0x6e */
880 ILCID_POSIX_MAP(la), /* la Latin 0x76 */
881 ILCID_POSIX_MAP(lo), /* lo Lao 0x54 */
882 ILCID_POSIX_MAP(lt), /* lt Lithuanian 0x27 */
883 ILCID_POSIX_MAP(lv), /* lv Latvian, Lettish 0x26 */
884 ILCID_POSIX_MAP(mi), /* mi Maori 0x81 */
885 ILCID_POSIX_MAP(mk), /* mk Macedonian 0x2f */
886 ILCID_POSIX_MAP(ml), /* ml Malayalam 0x4c */
887 ILCID_POSIX_MAP(mn), /* mn Mongolian 0x50 */
888 ILCID_POSIX_MAP(mni), /* mni Manipuri 0x58 */
889 ILCID_POSIX_MAP(moh), /* moh Mohawk 0x7c */
890 ILCID_POSIX_MAP(mr), /* mr Marathi 0x4e */
891 ILCID_POSIX_MAP(ms), /* ms Malay 0x3e */
892 ILCID_POSIX_MAP(mt), /* mt Maltese 0x3a */
893 ILCID_POSIX_MAP(my), /* my Burmese 0x55 */
894/* ILCID_POSIX_MAP(nb), // no Norwegian 0x14 */
895 ILCID_POSIX_MAP(ne), /* ne Nepali 0x61 */
896 ILCID_POSIX_MAP(nl), /* nl Dutch 0x13 */
897/* ILCID_POSIX_MAP(nn), // no Norwegian 0x14 */
898 ILCID_POSIX_MAP(no), /* * Norwegian 0x14 */
899 ILCID_POSIX_MAP(nso), /* nso Sotho, Northern (Sepedi dialect) 0x6c */
900 ILCID_POSIX_MAP(oc), /* oc Occitan 0x82 */
901 ILCID_POSIX_MAP(om), /* om Oromo 0x72 */
902 ILCID_POSIX_MAP(or_IN), /* or Oriya 0x48 */
903 ILCID_POSIX_MAP(pa), /* pa Punjabi 0x46 */
904 ILCID_POSIX_MAP(pap), /* pap Papiamentu 0x79 */
905 ILCID_POSIX_MAP(pl), /* pl Polish 0x15 */
906 ILCID_POSIX_MAP(ps), /* ps Pashto 0x63 */
907 ILCID_POSIX_MAP(pt), /* pt Portuguese 0x16 */
908 ILCID_POSIX_MAP(qu), /* qu Quechua 0x6B */
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700909 ILCID_POSIX_MAP(quc), /* quc K'iche 0x93 */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000910 ILCID_POSIX_MAP(qut), /* qut K'iche 0x86 */
911 ILCID_POSIX_MAP(rm), /* rm Raeto-Romance/Romansh 0x17 */
912 ILCID_POSIX_MAP(ro), /* ro Romanian 0x18 */
913 ILCID_POSIX_MAP(root), /* root 0x00 */
914 ILCID_POSIX_MAP(ru), /* ru Russian 0x19 */
915 ILCID_POSIX_MAP(rw), /* rw Kinyarwanda 0x87 */
916 ILCID_POSIX_MAP(sa), /* sa Sanskrit 0x4f */
917 ILCID_POSIX_MAP(sah), /* sah Yakut 0x85 */
918 ILCID_POSIX_MAP(sd), /* sd Sindhi 0x59 */
919 ILCID_POSIX_MAP(se), /* se Sami 0x3b */
920/* ILCID_POSIX_MAP(sh), // sh Serbo-Croatian 0x1a */
921 ILCID_POSIX_MAP(si), /* si Sinhalese 0x5b */
922 ILCID_POSIX_MAP(sk), /* sk Slovak 0x1b */
923 ILCID_POSIX_MAP(sl), /* sl Slovenian 0x24 */
924 ILCID_POSIX_MAP(so), /* so Somali 0x77 */
925 ILCID_POSIX_MAP(sq), /* sq Albanian 0x1c */
926/* ILCID_POSIX_MAP(sr), // sr Serbian 0x1a */
927 ILCID_POSIX_MAP(st), /* st Sutu 0x30 */
928 ILCID_POSIX_MAP(sv), /* sv Swedish 0x1d */
929 ILCID_POSIX_MAP(sw), /* sw Swahili 0x41 */
930 ILCID_POSIX_MAP(syr), /* syr Syriac 0x5A */
931 ILCID_POSIX_MAP(ta), /* ta Tamil 0x49 */
932 ILCID_POSIX_MAP(te), /* te Telugu 0x4a */
933 ILCID_POSIX_MAP(tg), /* tg Tajik 0x28 */
934 ILCID_POSIX_MAP(th), /* th Thai 0x1e */
935 ILCID_POSIX_MAP(ti), /* ti Tigrigna 0x73 */
936 ILCID_POSIX_MAP(tk), /* tk Turkmen 0x42 */
937 ILCID_POSIX_MAP(tn), /* tn Tswana 0x32 */
938 ILCID_POSIX_MAP(tr), /* tr Turkish 0x1f */
939 ILCID_POSIX_MAP(ts), /* ts Tsonga 0x31 */
940 ILCID_POSIX_MAP(tt), /* tt Tatar 0x44 */
941 ILCID_POSIX_MAP(tzm), /* tzm Tamazight 0x5f */
942 ILCID_POSIX_MAP(ug), /* ug Uighur 0x80 */
943 ILCID_POSIX_MAP(uk), /* uk Ukrainian 0x22 */
944 ILCID_POSIX_MAP(ur), /* ur Urdu 0x20 */
945 ILCID_POSIX_MAP(uz), /* uz Uzbek 0x43 */
946 ILCID_POSIX_MAP(ve), /* ve Venda 0x33 */
947 ILCID_POSIX_MAP(vi), /* vi Vietnamese 0x2a */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000948 ILCID_POSIX_MAP(wo), /* wo Wolof 0x88 */
949 ILCID_POSIX_MAP(xh), /* xh Xhosa 0x34 */
950 ILCID_POSIX_MAP(yi), /* yi Yiddish 0x3d */
951 ILCID_POSIX_MAP(yo), /* yo Yoruba 0x6a */
952 ILCID_POSIX_MAP(zh), /* zh Chinese 0x04 */
953 ILCID_POSIX_MAP(zu), /* zu Zulu 0x35 */
954};
955
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700956static const uint32_t gLocaleCount = UPRV_LENGTHOF(gPosixIDmap);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000957
958/**
959 * Do not call this function. It is called by hostID.
960 * The function is not private because this struct must stay as a C struct,
961 * and this is an internal class.
962 */
963static int32_t
964idCmp(const char* id1, const char* id2)
965{
966 int32_t diffIdx = 0;
967 while (*id1 == *id2 && *id1 != 0) {
968 diffIdx++;
969 id1++;
970 id2++;
971 }
972 return diffIdx;
973}
974
975/**
976 * Searches for a Windows LCID
977 *
978 * @param posixid the Posix style locale id.
979 * @param status gets set to U_ILLEGAL_ARGUMENT_ERROR when the Posix ID has
980 * no equivalent Windows LCID.
981 * @return the LCID
982 */
983static uint32_t
984getHostID(const ILcidPosixMap *this_0, const char* posixID, UErrorCode* status)
985{
986 int32_t bestIdx = 0;
987 int32_t bestIdxDiff = 0;
988 int32_t posixIDlen = (int32_t)uprv_strlen(posixID);
989 uint32_t idx;
990
991 for (idx = 0; idx < this_0->numRegions; idx++ ) {
992 int32_t sameChars = idCmp(posixID, this_0->regionMaps[idx].posixID);
993 if (sameChars > bestIdxDiff && this_0->regionMaps[idx].posixID[sameChars] == 0) {
994 if (posixIDlen == sameChars) {
995 /* Exact match */
996 return this_0->regionMaps[idx].hostID;
997 }
998 bestIdxDiff = sameChars;
999 bestIdx = idx;
1000 }
1001 }
1002 /* We asked for something unusual, like en_ZZ, and we try to return the number for the same language. */
1003 /* We also have to make sure that sid and si and similar string subsets don't match. */
1004 if ((posixID[bestIdxDiff] == '_' || posixID[bestIdxDiff] == '@')
1005 && this_0->regionMaps[bestIdx].posixID[bestIdxDiff] == 0)
1006 {
1007 *status = U_USING_FALLBACK_WARNING;
1008 return this_0->regionMaps[bestIdx].hostID;
1009 }
1010
1011 /*no match found */
1012 *status = U_ILLEGAL_ARGUMENT_ERROR;
1013 return this_0->regionMaps->hostID;
1014}
1015
1016static const char*
1017getPosixID(const ILcidPosixMap *this_0, uint32_t hostID)
1018{
1019 uint32_t i;
1020 for (i = 0; i <= this_0->numRegions; i++)
1021 {
1022 if (this_0->regionMaps[i].hostID == hostID)
1023 {
1024 return this_0->regionMaps[i].posixID;
1025 }
1026 }
1027
1028 /* If you get here, then no matching region was found,
1029 so return the language id with the wild card region. */
1030 return this_0->regionMaps[0].posixID;
1031}
1032
1033/*
1034//////////////////////////////////////
1035//
1036// LCID --> POSIX
1037//
1038/////////////////////////////////////
1039*/
Jungshik Shin87232d82017-05-13 21:10:13 -07001040#ifdef USE_WINDOWS_LCID_MAPPING_API
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001041/*
1042 * Various language tags needs to be changed:
1043 * quz -> qu
1044 * prs -> fa
1045 */
1046#define FIX_LANGUAGE_ID_TAG(buffer, len) \
1047 if (len >= 3) { \
1048 if (buffer[0] == 'q' && buffer[1] == 'u' && buffer[2] == 'z') {\
1049 buffer[2] = 0; \
1050 uprv_strcat(buffer, buffer+3); \
1051 } else if (buffer[0] == 'p' && buffer[1] == 'r' && buffer[2] == 's') {\
1052 buffer[0] = 'f'; buffer[1] = 'a'; buffer[2] = 0; \
1053 uprv_strcat(buffer, buffer+3); \
1054 } \
1055 }
1056
1057#endif
1058U_CAPI int32_t
1059uprv_convertToPosix(uint32_t hostid, char *posixID, int32_t posixIDCapacity, UErrorCode* status)
1060{
1061 uint16_t langID;
1062 uint32_t localeIndex;
1063 UBool bLookup = TRUE;
1064 const char *pPosixID = NULL;
1065
Jungshik Shin87232d82017-05-13 21:10:13 -07001066#ifdef USE_WINDOWS_LCID_MAPPING_API
Jungshik Shinb3189662017-11-07 11:18:34 -08001067 char locName[LOCALE_NAME_MAX_LENGTH] = {}; // ICU name can't be longer than Windows name
1068
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001069 // Note: Windows primary lang ID 0x92 in LCID is used for Central Kurdish and
1070 // GetLocaleInfo() maps such LCID to "ku". However, CLDR uses "ku" for
1071 // Northern Kurdish and "ckb" for Central Kurdish. For this reason, we cannot
1072 // use the Windows API to resolve locale ID for this specific case.
Jungshik Shin87232d82017-05-13 21:10:13 -07001073 if ((hostid & 0x3FF) != 0x92) {
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001074 int32_t tmpLen = 0;
Jungshik Shin87232d82017-05-13 21:10:13 -07001075 UChar windowsLocaleName[LOCALE_NAME_MAX_LENGTH]; // ULOC_FULLNAME_CAPACITY > LOCALE_NAME_MAX_LENGTH
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001076
Jungshik Shin87232d82017-05-13 21:10:13 -07001077 // Note: LOCALE_ALLOW_NEUTRAL_NAMES was enabled in Windows7+, prior versions did not handle neutral (no-region) locale names.
1078 tmpLen = LCIDToLocaleName(hostid, (PWSTR)windowsLocaleName, UPRV_LENGTHOF(windowsLocaleName), LOCALE_ALLOW_NEUTRAL_NAMES);
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001079 if (tmpLen > 1) {
Jungshik Shin87232d82017-05-13 21:10:13 -07001080 int32_t i = 0;
1081 // Only need to look up in table if have _, eg for de-de_phoneb type alternate sort.
1082 bLookup = FALSE;
1083 for (i = 0; i < UPRV_LENGTHOF(locName); i++)
1084 {
1085 locName[i] = (char)(windowsLocaleName[i]);
1086
1087 // Windows locale name may contain sorting variant, such as "es-ES_tradnl".
1088 // In such cases, we need special mapping data found in the hardcoded table
1089 // in this source file.
1090 if (windowsLocaleName[i] == L'_')
1091 {
1092 // Keep the base locale, without variant
1093 // TODO: Should these be mapped from _phoneb to @collation=phonebook, etc.?
1094 locName[i] = '\0';
1095 tmpLen = i;
1096 bLookup = TRUE;
1097 break;
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001098 }
Jungshik Shin87232d82017-05-13 21:10:13 -07001099 else if (windowsLocaleName[i] == L'-')
1100 {
1101 // Windows names use -, ICU uses _
1102 locName[i] = '_';
1103 }
1104 else if (windowsLocaleName[i] == L'\0')
1105 {
1106 // No point in doing more work than necessary
1107 break;
1108 }
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001109 }
Jungshik Shin87232d82017-05-13 21:10:13 -07001110 // TODO: Need to understand this better, why isn't it an alias?
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07001111 FIX_LANGUAGE_ID_TAG(locName, tmpLen);
1112 pPosixID = locName;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001113 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001114 }
Jungshik Shin87232d82017-05-13 21:10:13 -07001115#endif // USE_WINDOWS_LCID_MAPPING_API
1116
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001117 if (bLookup) {
1118 const char *pCandidate = NULL;
1119 langID = LANGUAGE_LCID(hostid);
1120
1121 for (localeIndex = 0; localeIndex < gLocaleCount; localeIndex++) {
1122 if (langID == gPosixIDmap[localeIndex].regionMaps->hostID) {
1123 pCandidate = getPosixID(&gPosixIDmap[localeIndex], hostid);
1124 break;
1125 }
1126 }
1127
1128 /* On Windows, when locale name has a variant, we still look up the hardcoded table.
1129 If a match in the hardcoded table is longer than the Windows locale name without
1130 variant, we use the one as the result */
1131 if (pCandidate && (pPosixID == NULL || uprv_strlen(pCandidate) > uprv_strlen(pPosixID))) {
1132 pPosixID = pCandidate;
1133 }
1134 }
1135
1136 if (pPosixID) {
Jungshik Shinb3189662017-11-07 11:18:34 -08001137 int32_t resLen = static_cast<int32_t>(uprv_strlen(pPosixID));
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001138 int32_t copyLen = resLen <= posixIDCapacity ? resLen : posixIDCapacity;
1139 uprv_memcpy(posixID, pPosixID, copyLen);
1140 if (resLen < posixIDCapacity) {
1141 posixID[resLen] = 0;
1142 if (*status == U_STRING_NOT_TERMINATED_WARNING) {
1143 *status = U_ZERO_ERROR;
1144 }
1145 } else if (resLen == posixIDCapacity) {
1146 *status = U_STRING_NOT_TERMINATED_WARNING;
1147 } else {
1148 *status = U_BUFFER_OVERFLOW_ERROR;
1149 }
1150 return resLen;
1151 }
1152
1153 /* no match found */
1154 *status = U_ILLEGAL_ARGUMENT_ERROR;
1155 return -1;
1156}
1157
1158/*
1159//////////////////////////////////////
1160//
1161// POSIX --> LCID
1162// This should only be called from uloc_getLCID.
1163// The locale ID must be in canonical form.
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001164//
1165/////////////////////////////////////
1166*/
Jungshik Shin87232d82017-05-13 21:10:13 -07001167U_CAPI uint32_t
1168uprv_convertToLCIDPlatform(const char* localeID)
1169{
1170 // The purpose of this function is to leverage native platform name->lcid
1171 // conversion functionality when available.
1172#ifdef USE_WINDOWS_LCID_MAPPING_API
1173 DWORD nameLCIDFlags = 0;
1174 UErrorCode myStatus = U_ZERO_ERROR;
1175
1176 // First check for a Windows name->LCID match, fall through to catch
1177 // ICU special cases, but Windows may know it already.
1178#if LOCALE_ALLOW_NEUTRAL_NAMES
1179 nameLCIDFlags = LOCALE_ALLOW_NEUTRAL_NAMES;
1180#endif /* LOCALE_ALLOW_NEUTRAL_NAMES */
1181
1182 int32_t len;
1183 char collVal[ULOC_KEYWORDS_CAPACITY] = {};
1184 char baseName[ULOC_FULLNAME_CAPACITY] = {};
1185 const char * mylocaleID = localeID;
1186
1187 // Check any for keywords.
1188 if (uprv_strchr(localeID, '@'))
1189 {
1190 len = uloc_getKeywordValue(localeID, "collation", collVal, UPRV_LENGTHOF(collVal) - 1, &myStatus);
1191 if (U_SUCCESS(myStatus) && len > 0)
1192 {
1193 // If it contains the keyword collation, return 0 so that the LCID lookup table will be used.
1194 return 0;
1195 }
1196 else
1197 {
1198 // If the locale ID contains keywords other than collation, just use the base name.
1199 len = uloc_getBaseName(localeID, baseName, UPRV_LENGTHOF(baseName) - 1, &myStatus);
1200
1201 if (U_SUCCESS(myStatus) && len > 0)
1202 {
1203 baseName[len] = 0;
1204 mylocaleID = baseName;
1205 }
1206 }
1207 }
1208
1209 char asciiBCP47Tag[LOCALE_NAME_MAX_LENGTH] = {};
1210 // this will change it from de_DE@collation=phonebook to de-DE-u-co-phonebk form
Jungshik Shinb3189662017-11-07 11:18:34 -08001211 (void)uloc_toLanguageTag(mylocaleID, asciiBCP47Tag, UPRV_LENGTHOF(asciiBCP47Tag), FALSE, &myStatus);
Jungshik Shin87232d82017-05-13 21:10:13 -07001212
1213 if (U_SUCCESS(myStatus))
1214 {
1215 // Need it to be UTF-16, not 8-bit
1216 wchar_t bcp47Tag[LOCALE_NAME_MAX_LENGTH] = {};
1217 int32_t i;
1218 for (i = 0; i < UPRV_LENGTHOF(bcp47Tag); i++)
1219 {
1220 if (asciiBCP47Tag[i] == '\0')
1221 {
1222 break;
1223 }
1224 else
1225 {
1226 // Copy the character
1227 bcp47Tag[i] = static_cast<wchar_t>(asciiBCP47Tag[i]);
1228 }
1229 }
1230
1231 if (i < (UPRV_LENGTHOF(bcp47Tag) - 1))
1232 {
1233 // Ensure it's null terminated
1234 bcp47Tag[i] = L'\0';
1235 LCID lcid = LocaleNameToLCID(bcp47Tag, nameLCIDFlags);
1236 if (lcid > 0)
1237 {
1238 // Found LCID from windows, return that one, unless its completely ambiguous
1239 // LOCALE_USER_DEFAULT and transients are OK because they will round trip
1240 // for this process.
1241 if (lcid != LOCALE_CUSTOM_UNSPECIFIED)
1242 {
1243 return lcid;
1244 }
1245 }
1246 }
1247 }
Jungshik Shinb3189662017-11-07 11:18:34 -08001248#else
1249 (void)localeID; // Suppress unused variable warning.
Jungshik Shin87232d82017-05-13 21:10:13 -07001250#endif /* USE_WINDOWS_LCID_MAPPING_API */
1251
1252 // No found, or not implemented on platforms without native name->lcid conversion
1253 return 0;
1254}
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001255
1256U_CAPI uint32_t
1257uprv_convertToLCID(const char *langID, const char* posixID, UErrorCode* status)
1258{
Jungshik Shin87232d82017-05-13 21:10:13 -07001259 // This function does the table lookup when native platform name->lcid conversion isn't available,
1260 // or for locales that don't follow patterns the platform expects.
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001261 uint32_t low = 0;
1262 uint32_t high = gLocaleCount;
1263 uint32_t mid;
1264 uint32_t oldmid = 0;
1265 int32_t compVal;
1266
1267 uint32_t value = 0;
1268 uint32_t fallbackValue = (uint32_t)-1;
1269 UErrorCode myStatus;
1270 uint32_t idx;
1271
1272 /* Check for incomplete id. */
1273 if (!langID || !posixID || uprv_strlen(langID) < 2 || uprv_strlen(posixID) < 2) {
1274 return 0;
1275 }
1276
1277 /*Binary search for the map entry for normal cases */
1278
1279 while (high > low) /*binary search*/{
1280
1281 mid = (high+low) >> 1; /*Finds median*/
1282
1283 if (mid == oldmid)
1284 break;
1285
1286 compVal = uprv_strcmp(langID, gPosixIDmap[mid].regionMaps->posixID);
1287 if (compVal < 0){
1288 high = mid;
1289 }
1290 else if (compVal > 0){
1291 low = mid;
1292 }
1293 else /*we found it*/{
1294 return getHostID(&gPosixIDmap[mid], posixID, status);
1295 }
1296 oldmid = mid;
1297 }
1298
1299 /*
1300 * Sometimes we can't do a binary search on posixID because some LCIDs
1301 * go to different locales. We hit one of those special cases.
1302 */
1303 for (idx = 0; idx < gLocaleCount; idx++ ) {
1304 myStatus = U_ZERO_ERROR;
1305 value = getHostID(&gPosixIDmap[idx], posixID, &myStatus);
1306 if (myStatus == U_ZERO_ERROR) {
1307 return value;
1308 }
1309 else if (myStatus == U_USING_FALLBACK_WARNING) {
1310 fallbackValue = value;
1311 }
1312 }
1313
1314 if (fallbackValue != (uint32_t)-1) {
1315 *status = U_USING_FALLBACK_WARNING;
1316 return fallbackValue;
1317 }
1318
1319 /* no match found */
1320 *status = U_ILLEGAL_ARGUMENT_ERROR;
1321 return 0; /* return international (root) */
1322}