Blame - source/common/uloc.cpp - chromium.googlesource.com/chromium/deps/icu

blob: 88a221c6024fe370aa8cab459d0509450da22a1b [file] [log] [blame]

jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1	/*
				2	**********************************************************************
Jungshik Shin (jungshik at google)	0f8746a	2015-01-08 15:46:45 -0800	[diff] [blame^]	3	* Copyright (C) 1997-2014, International Business Machines
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	4	* Corporation and others. All Rights Reserved.
				5	**********************************************************************
				6	*
				7	* File ULOC.CPP
				8	*
				9	* Modification History:
				10	*
				11	* Date Name Description
				12	* 04/01/97 aliu Creation.
				13	* 08/21/98 stephen JDK 1.2 sync
				14	* 12/08/98 rtg New Locale implementation and C API
				15	* 03/15/99 damiba overhaul.
				16	* 04/06/99 stephen changed setDefault() to realloc and copy
				17	* 06/14/99 stephen Changed calls to ures_open for new params
				18	* 07/21/99 stephen Modified setDefault() to propagate to C++
				19	* 05/14/04 alan 7 years later: refactored, cleaned up, fixed bugs,
				20	* brought canonicalization code into line with spec
				21	*****************************************************************************/
				22
				23	/*
				24	POSIX's locale format, from putil.c: [no spaces]
				25
				26	ll [ _CC ] [ . MM ] [ @ VV]
				27
				28	l = lang, C = ctry, M = charmap, V = variant
				29	*/
				30
				31	#include "unicode/utypes.h"
				32	#include "unicode/ustring.h"
				33	#include "unicode/uloc.h"
				34
				35	#include "putilimp.h"
				36	#include "ustr_imp.h"
				37	#include "ulocimp.h"
				38	#include "umutex.h"
				39	#include "cstring.h"
				40	#include "cmemory.h"
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	41	#include "locmap.h"
				42	#include "uarrsort.h"
				43	#include "uenumimp.h"
				44	#include "uassert.h"
				45
				46	#include <stdio.h> /* for sprintf */
				47
				48	/* ### Declarations **************************************************/
				49
				50	/* Locale stuff from locid.cpp */
				51	U_CFUNC void locale_set_default(const char *id);
				52	U_CFUNC const char *locale_get_default(void);
				53	U_CFUNC int32_t
				54	locale_getKeywords(const char *localeID,
				55	char prev,
				56	char *keywords, int32_t keywordCapacity,
				57	char values, int32_t valuesCapacity, int32_t valLen,
				58	UBool valuesToo,
				59	UErrorCode *status);
				60
				61	/* ### Data tables **************************************************/
				62
				63	/**
				64	* Table of language codes, both 2- and 3-letter, with preference
				65	* given to 2-letter codes where possible. Includes 3-letter codes
				66	* that lack a 2-letter equivalent.
				67	*
				68	* This list must be in sorted order. This list is returned directly
				69	* to the user by some API.
				70	*
				71	* This list must be kept in sync with LANGUAGES_3, with corresponding
				72	* entries matched.
				73	*
				74	* This table should be terminated with a NULL entry, followed by a
				75	* second list, and another NULL entry. The first list is visible to
				76	* user code when this array is returned by API. The second list
				77	* contains codes we support, but do not expose through user API.
				78	*
				79	* Notes
				80	*
				81	* Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to
				82	* include the revisions up to 2001/7/27 CWB
				83	*
				84	* The 3 character codes are the terminology codes like RFC 3066. This
				85	* is compatible with prior ICU codes
				86	*
				87	* "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the
				88	* table but now at the end of the table because 3 character codes are
				89	* duplicates. This avoids bad searches going from 3 to 2 character
				90	* codes.
				91	*
				92	* The range qaa-qtz is reserved for local use
				93	*/
				94	/* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
				95	/* ISO639 table version is 20130531 */
				96	static const char * const LANGUAGES[] = {
				97	"aa", "ab", "ace", "ach", "ada", "ady", "ae", "af",
				98	"afa", "afh", "agq", "ain", "ak", "akk", "ale", "alg",
				99	"alt", "am", "an", "ang", "anp", "apa", "ar", "arc",
				100	"arn", "arp", "art", "arw", "as", "asa", "ast", "ath",
				101	"aus", "av", "awa", "ay", "az",
				102	"ba", "bad", "bai", "bal", "ban", "bas", "bat", "bax",
				103	"bbj", "be", "bej", "bem", "ber", "bez", "bfd", "bg",
				104	"bh", "bho", "bi", "bik", "bin", "bkm", "bla", "bm",
				105	"bn", "bnt", "bo", "br", "bra", "brx", "bs", "bss",
				106	"btk", "bua", "bug", "bum", "byn", "byv",
				107	"ca", "cad", "cai", "car", "cau", "cay", "cch", "ce",
				108	"ceb", "cel", "cgg", "ch", "chb", "chg", "chk", "chm",
				109	"chn", "cho", "chp", "chr", "chy", "ckb", "cmc", "co",
				110	"cop", "cpe", "cpf", "cpp", "cr", "crh", "crp", "cs",
				111	"csb", "cu", "cus", "cv", "cy",
				112	"da", "dak", "dar", "dav", "day", "de", "del", "den",
				113	"dgr", "din", "dje", "doi", "dra", "dsb", "dua", "dum",
				114	"dv", "dyo", "dyu", "dz", "dzg",
				115	"ebu", "ee", "efi", "egy", "eka", "el", "elx", "en",
				116	"enm", "eo", "es", "et", "eu", "ewo",
				117	"fa", "fan", "fat", "ff", "fi", "fil", "fiu", "fj",
				118	"fo", "fon", "fr", "frm", "fro", "frr", "frs", "fur",
				119	"fy",
				120	"ga", "gaa", "gay", "gba", "gd", "gem", "gez", "gil",
				121	"gl", "gmh", "gn", "goh", "gon", "gor", "got", "grb",
				122	"grc", "gsw", "gu", "guz", "gv", "gwi",
				123	"ha", "hai", "haw", "he", "hi", "hil", "him", "hit",
				124	"hmn", "ho", "hr", "hsb", "ht", "hu", "hup", "hy",
				125	"hz",
				126	"ia", "iba", "ibb", "id", "ie", "ig", "ii", "ijo",
				127	"ik", "ilo", "inc", "ine", "inh", "io", "ira", "iro",
				128	"is", "it", "iu",
				129	"ja", "jbo", "jgo", "jmc", "jpr", "jrb", "jv",
				130	"ka", "kaa", "kab", "kac", "kaj", "kam", "kar", "kaw",
				131	"kbd", "kbl", "kcg", "kde", "kea", "kfo", "kg", "kha",
				132	"khi", "kho", "khq", "ki", "kj", "kk", "kkj", "kl",
				133	"kln", "km", "kmb", "kn", "ko", "kok", "kos", "kpe",
				134	"kr", "krc", "krl", "kro", "kru", "ks", "ksb", "ksf",
				135	"ksh", "ku", "kum", "kut", "kv", "kw", "ky",
				136	"la", "lad", "lag", "lah", "lam", "lb", "lez", "lg",
				137	"li", "lkt", "ln", "lo", "lol", "loz", "lt", "lu",
				138	"lua", "lui", "lun", "luo", "lus", "luy", "lv",
				139	"mad", "maf", "mag", "mai", "mak", "man", "map", "mas",
				140	"mde", "mdf", "mdr", "men", "mer", "mfe", "mg", "mga",
				141	"mgh", "mgo", "mh", "mi", "mic", "min", "mis", "mk",
				142	"mkh", "ml", "mn", "mnc", "mni", "mno", "mo", "moh",
				143	"mos", "mr", "ms", "mt", "mua", "mul", "mun", "mus",
				144	"mwl", "mwr", "my", "mye", "myn", "myv",
				145	"na", "nah", "nai", "nap", "naq", "nb", "nd", "nds",
				146	"ne", "new", "ng", "nia", "nic", "niu", "nl", "nmg",
				147	"nn", "nnh", "no", "nog", "non", "nqo", "nr", "nso",
				148	"nub", "nus", "nv", "nwc", "ny", "nym", "nyn", "nyo",
				149	"nzi",
				150	"oc", "oj", "om", "or", "os", "osa", "ota", "oto",
				151	"pa", "paa", "pag", "pal", "pam", "pap", "pau", "peo",
				152	"phi", "phn", "pi", "pl", "pon", "pra", "pro", "ps",
				153	"pt",
				154	"qu",
				155	"raj", "rap", "rar", "rm", "rn", "ro", "roa", "rof",
				156	"rom", "ru", "rup", "rw", "rwk",
				157	"sa", "sad", "sah", "sai", "sal", "sam", "saq", "sas",
				158	"sat", "sba", "sbp", "sc", "scn", "sco", "sd", "se",
				159	"see", "seh", "sel", "sem", "ses", "sg", "sga", "sgn",
				160	"shi", "shn", "shu", "si", "sid", "sio", "sit",
				161	"sk", "sl", "sla", "sm", "sma", "smi", "smj", "smn",
				162	"sms", "sn", "snk", "so", "sog", "son", "sq", "sr",
				163	"srn", "srr", "ss", "ssa", "ssy", "st", "su", "suk",
				164	"sus", "sux", "sv", "sw", "swb", "swc", "syc", "syr",
				165	"ta", "tai", "te", "tem", "teo", "ter", "tet", "tg",
				166	"th", "ti", "tig", "tiv", "tk", "tkl", "tl", "tlh",
				167	"tli", "tmh", "tn", "to", "tog", "tpi", "tr", "trv",
				168	"ts", "tsi", "tt", "tum", "tup", "tut", "tvl", "tw",
				169	"twq", "ty", "tyv", "tzm",
				170	"udm", "ug", "uga", "uk", "umb", "und", "ur", "uz",
				171	"vai", "ve", "vi", "vo", "vot", "vun",
				172	"wa", "wae", "wak", "wal", "war", "was", "wen", "wo",
				173	"xal", "xh", "xog",
				174	"yao", "yap", "yav", "ybb", "yi", "yo", "ypk", "yue",
				175	"za", "zap", "zbl", "zen", "zgh", "zh", "znd", "zu",
				176	"zun", "zxx", "zza",
				177	NULL,
				178	"in", "iw", "ji", "jw", "sh", /* obsolete language codes */
				179	NULL
				180	};
				181
				182	static const char* const DEPRECATED_LANGUAGES[]={
				183	"in", "iw", "ji", "jw", NULL, NULL
				184	};
				185	static const char* const REPLACEMENT_LANGUAGES[]={
				186	"id", "he", "yi", "jv", NULL, NULL
				187	};
				188
				189	/**
				190	* Table of 3-letter language codes.
				191	*
				192	* This is a lookup table used to convert 3-letter language codes to
				193	* their 2-letter equivalent, where possible. It must be kept in sync
				194	* with LANGUAGES. For all valid i, LANGUAGES[i] must refer to the
				195	* same language as LANGUAGES_3[i]. The commented-out lines are
				196	* copied from LANGUAGES to make eyeballing this baby easier.
				197	*
				198	* Where a 3-letter language code has no 2-letter equivalent, the
				199	* 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i].
				200	*
				201	* This table should be terminated with a NULL entry, followed by a
				202	* second list, and another NULL entry. The two lists correspond to
				203	* the two lists in LANGUAGES.
				204	*/
				205	/* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
				206	/* ISO639 table version is 20130531 */
				207	static const char * const LANGUAGES_3[] = {
				208	"aar", "abk", "ace", "ach", "ada", "ady", "ave", "afr",
				209	"afa", "afh", "agq", "ain", "aka", "akk", "ale", "alg",
				210	"alt", "amh", "arg", "ang", "anp", "apa", "ara", "arc",
				211	"arn", "arp", "art", "arw", "asm", "asa", "ast", "ath",
				212	"aus", "ava", "awa", "aym", "aze",
				213	"bak", "bad", "bai", "bal", "ban", "bas", "bat", "bax",
				214	"bbj", "bel", "bej", "bem", "ber", "bez", "bfd", "bul",
				215	"bih", "bho", "bis", "bik", "bin", "bkm", "bla", "bam",
				216	"ben", "bnt", "bod", "bre", "bra", "brx", "bos", "bss",
				217	"btk", "bua", "bug", "bum", "byn", "byv",
				218	"cat", "cad", "cai", "car", "cau", "cay", "cch", "che",
				219	"ceb", "cel", "cgg", "cha", "chb", "chg", "chk", "chm",
				220	"chn", "cho", "chp", "chr", "chy", "ckb", "cmc", "cos",
				221	"cop", "cpe", "cpf", "cpp", "cre", "crh", "crp", "ces",
				222	"csb", "chu", "cus", "chv", "cym",
				223	"dan", "dak", "dar", "dav", "day", "deu", "del", "den",
				224	"dgr", "din", "dje", "doi", "dra", "dsb", "dua", "dum",
				225	"div", "dyo", "dyu", "dzo", "dzg",
				226	"ebu", "ewe", "efi", "egy", "eka", "ell", "elx", "eng",
				227	"enm", "epo", "spa", "est", "eus", "ewo",
				228	"fas", "fan", "fat", "ful", "fin", "fil", "fiu", "fij",
				229	"fao", "fon", "fra", "frm", "fro", "frr", "frs", "fur",
				230	"fry",
				231	"gle", "gaa", "gay", "gba", "gla", "gem", "gez", "gil",
				232	"glg", "gmh", "grn", "goh", "gon", "gor", "got", "grb",
				233	"grc", "gsw", "guj", "guz", "glv", "gwi",
				234	"hau", "hai", "haw", "heb", "hin", "hil", "him", "hit",
				235	"hmn", "hmo", "hrv", "hsb", "hat", "hun", "hup", "hye",
				236	"her",
				237	"ina", "iba", "ibb", "ind", "ile", "ibo", "iii", "ijo",
				238	"ipk", "ilo", "inc", "ine", "inh", "ido", "ira", "iro",
				239	"isl", "ita", "iku",
				240	"jpn", "jbo", "jgo", "jmc", "jpr", "jrb", "jav",
				241	"kat", "kaa", "kab", "kac", "kaj", "kam", "kar", "kaw",
				242	"kbd", "kbl", "kcg", "kde", "kea", "kfo", "kon", "kha",
				243	"khi", "kho", "khq", "kik", "kua", "kaz", "kkj", "kal",
				244	"kln", "khm", "kmb", "kan", "kor", "kok", "kos", "kpe",
				245	"kau", "krc", "krl", "kro", "kru", "kas", "ksb", "ksf",
				246	"ksh", "kur", "kum", "kut", "kom", "cor", "kir",
				247	"lat", "lad", "lag", "lah", "lam", "ltz", "lez", "lug",
				248	"lim", "lkt", "lin", "lao", "lol", "loz", "lit", "lub",
				249	"lua", "lui", "lun", "luo", "lus", "luy", "lav",
				250	"mad", "maf", "mag", "mai", "mak", "man", "map", "mas",
				251	"mde", "mdf", "mdr", "men", "mer", "mfe", "mlg", "mga",
				252	"mgh", "mgo", "mah", "mri", "mic", "min", "mis", "mkd",
				253	"mkh", "mal", "mon", "mnc", "mni", "mno", "mol", "moh",
				254	"mos", "mar", "msa", "mlt", "mua", "mul", "mun", "mus",
				255	"mwl", "mwr", "mya", "mye", "myn", "myv",
				256	"nau", "nah", "nai", "nap", "naq", "nob", "nde", "nds",
				257	"nep", "new", "ndo", "nia", "nic", "niu", "nld", "nmg",
				258	"nno", "nnh", "nor", "nog", "non", "nqo", "nbl", "nso",
				259	"nub", "nus", "nav", "nwc", "nya", "nym", "nyn", "nyo",
				260	"nzi",
				261	"oci", "oji", "orm", "ori", "oss", "osa", "ota", "oto",
				262	"pan", "paa", "pag", "pal", "pam", "pap", "pau", "peo",
				263	"phi", "phn", "pli", "pol", "pon", "pra", "pro", "pus",
				264	"por",
				265	"que",
				266	"raj", "rap", "rar", "roh", "run", "ron", "roa", "rof",
				267	"rom", "rus", "rup", "kin", "rwk",
				268	"san", "sad", "sah", "sai", "sal", "sam", "saq", "sas",
				269	"sat", "sba", "sbp", "srd", "scn", "sco", "snd", "sme",
				270	"see", "seh", "sel", "sem", "ses", "sag", "sga", "sgn",
				271	"shi", "shn", "shu", "sin", "sid", "sio", "sit",
				272	"slk", "slv", "sla", "smo", "sma", "smi", "smj", "smn",
				273	"sms", "sna", "snk", "som", "sog", "son", "sqi", "srp",
				274	"srn", "srr", "ssw", "ssa", "ssy", "sot", "sun", "suk",
				275	"sus", "sux", "swe", "swa", "swb", "swc", "syc", "syr",
				276	"tam", "tai", "tel", "tem", "teo", "ter", "tet", "tgk",
				277	"tha", "tir", "tig", "tiv", "tuk", "tkl", "tgl", "tlh",
				278	"tli", "tmh", "tsn", "ton", "tog", "tpi", "tur", "trv",
				279	"tso", "tsi", "tat", "tum", "tup", "tut", "tvl", "twi",
				280	"twq", "tah", "tyv", "tzm",
				281	"udm", "uig", "uga", "ukr", "umb", "und", "urd", "uzb",
				282	"vai", "ven", "vie", "vol", "vot", "vun",
				283	"wln", "wae", "wak", "wal", "war", "was", "wen", "wol",
				284	"xal", "xho", "xog",
				285	"yao", "yap", "yav", "ybb", "yid", "yor", "ypk", "yue",
				286	"zha", "zap", "zbl", "zen", "zgh", "zho", "znd", "zul",
				287	"zun", "zxx", "zza",
				288	NULL,
				289	/* "in", "iw", "ji", "jw", "sh", */
				290	"ind", "heb", "yid", "jaw", "srp",
				291	NULL
				292	};
				293
				294	/**
				295	* Table of 2-letter country codes.
				296	*
				297	* This list must be in sorted order. This list is returned directly
				298	* to the user by some API.
				299	*
				300	* This list must be kept in sync with COUNTRIES_3, with corresponding
				301	* entries matched.
				302	*
				303	* This table should be terminated with a NULL entry, followed by a
				304	* second list, and another NULL entry. The first list is visible to
				305	* user code when this array is returned by API. The second list
				306	* contains codes we support, but do not expose through user API.
				307	*
				308	* Notes:
				309	*
				310	* ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per
				311	* http://www.evertype.com/standards/iso3166/iso3166-1-en.html added
				312	* new codes keeping the old ones for compatibility updated to include
				313	* 1999/12/03 revisions CWB
				314	*
				315	* RO(ROM) is now RO(ROU) according to
				316	* http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html
				317	*/
				318	static const char * const COUNTRIES[] = {
				319	"AD", "AE", "AF", "AG", "AI", "AL", "AM",
				320	"AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ",
				321	"BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI",
				322	"BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV",
				323	"BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG",
				324	"CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR",
				325	"CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DJ", "DK",
				326	"DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER",
				327	"ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR",
				328	"GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL",
				329	"GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU",
				330	"GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU",
				331	"ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS",
				332	"IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI",
				333	"KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA",
				334	"LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU",
				335	"LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK",
				336	"ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS",
				337	"MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA",
				338	"NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP",
				339	"NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG",
				340	"PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT",
				341	"PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA",
				342	"SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ",
				343	"SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV",
				344	"SX", "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ",
				345	"TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV",
				346	"TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ",
				347	"VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF",
				348	"WS", "YE", "YT", "ZA", "ZM", "ZW",
				349	NULL,
				350	"AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR", /* obsolete country codes */
				351	NULL
				352	};
				353
				354	static const char* const DEPRECATED_COUNTRIES[] = {
				355	"AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR", NULL, NULL /* deprecated country list */
				356	};
				357	static const char* const REPLACEMENT_COUNTRIES[] = {
				358	/* "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR" */
				359	"CW", "MM", "RS", "DE", "BJ", "FR", "BF", "VU", "ZW", "RU", "TL", "GB", "VN", "YE", "RS", "CD", NULL, NULL /* replacement country codes */
				360	};
				361
				362	/**
				363	* Table of 3-letter country codes.
				364	*
				365	* This is a lookup table used to convert 3-letter country codes to
				366	* their 2-letter equivalent. It must be kept in sync with COUNTRIES.
				367	* For all valid i, COUNTRIES[i] must refer to the same country as
				368	* COUNTRIES_3[i]. The commented-out lines are copied from COUNTRIES
				369	* to make eyeballing this baby easier.
				370	*
				371	* This table should be terminated with a NULL entry, followed by a
				372	* second list, and another NULL entry. The two lists correspond to
				373	* the two lists in COUNTRIES.
				374	*/
				375	static const char * const COUNTRIES_3[] = {
				376	/* "AD", "AE", "AF", "AG", "AI", "AL", "AM", */
				377	"AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM",
				378	/* "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ", */
				379	"AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE",
				380	/* "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", */
				381	"BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI",
				382	/* "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV", */
				383	"BEN", "BLM", "BMU", "BRN", "BOL", "BES", "BRA", "BHS", "BTN", "BVT",
				384	/* "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG", */
				385	"BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",
				386	/* "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR", */
				387	"CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRI",
				388	/* "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DJ", "DK", */
				389	"CUB", "CPV", "CUW", "CXR", "CYP", "CZE", "DEU", "DJI", "DNK",
				390	/* "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER", */
				391	"DMA", "DOM", "DZA", "ECU", "EST", "EGY", "ESH", "ERI",
				392	/* "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", */
				393	"ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",
				394	/* "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", */
				395	"GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL",
				396	/* "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", */
				397	"GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",
				398	/* "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", */
				399	"GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",
				400	/* "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS" */
				401	"IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL",
				402	/* "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", */
				403	"ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",
				404	/* "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", */
				405	"COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO",
				406	/* "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", */
				407	"LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX",
				408	/* "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK", */
				409	"LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD",
				410	/* "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS", */
				411	"MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR",
				412	/* "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", */
				413	"MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM",
				414	/* "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP", */
				415	"NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL",
				416	/* "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG", */
				417	"NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG",
				418	/* "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT", */
				419	"PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT",
				420	/* "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA", */
				421	"PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU",
				422	/* "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ", */
				423	"SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM",
				424	/* "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV", */
				425	"SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "SSD", "STP", "SLV",
				426	/* "SX", "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ", */
				427	"SXM", "SYR", "SWZ", "TCA", "TCD", "ATF", "TGO", "THA", "TJK",
				428	/* "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV", */
				429	"TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV",
				430	/* "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ", */
				431	"TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",
				432	/* "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", */
				433	"VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",
				434	/* "WS", "YE", "YT", "ZA", "ZM", "ZW", */
				435	"WSM", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
				436	NULL,
				437	/* "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR" */
				438	"ANT", "BUR", "SCG", "FXX", "ROM", "SUN", "TMP", "YMD", "YUG", "ZAR",
				439	NULL
				440	};
				441
				442	typedef struct CanonicalizationMap {
				443	const char id; / input ID */
				444	const char canonicalID; / canonicalized output ID */
				445	const char keyword; / keyword, or NULL if none */
				446	const char value; / keyword value, or NULL if kw==NULL */
				447	} CanonicalizationMap;
				448
				449	/**
				450	* A map to canonicalize locale IDs. This handles a variety of
				451	* different semantic kinds of transformations.
				452	*/
				453	static const CanonicalizationMap CANONICALIZE_MAP[] = {
				454	{ "", "en_US_POSIX", NULL, NULL }, /* .NET name */
				455	{ "c", "en_US_POSIX", NULL, NULL }, /* POSIX name */
				456	{ "posix", "en_US_POSIX", NULL, NULL }, /* POSIX name (alias of C) */
				457	{ "art_LOJBAN", "jbo", NULL, NULL }, /* registered name */
				458	{ "az_AZ_CYRL", "az_Cyrl_AZ", NULL, NULL }, /* .NET name */
				459	{ "az_AZ_LATN", "az_Latn_AZ", NULL, NULL }, /* .NET name */
				460	{ "ca_ES_PREEURO", "ca_ES", "currency", "ESP" },
				461	{ "de__PHONEBOOK", "de", "collation", "phonebook" }, /* Old ICU name */
				462	{ "de_AT_PREEURO", "de_AT", "currency", "ATS" },
				463	{ "de_DE_PREEURO", "de_DE", "currency", "DEM" },
				464	{ "de_LU_PREEURO", "de_LU", "currency", "LUF" },
				465	{ "el_GR_PREEURO", "el_GR", "currency", "GRD" },
				466	{ "en_BE_PREEURO", "en_BE", "currency", "BEF" },
				467	{ "en_IE_PREEURO", "en_IE", "currency", "IEP" },
				468	{ "es__TRADITIONAL", "es", "collation", "traditional" }, /* Old ICU name */
				469	{ "es_ES_PREEURO", "es_ES", "currency", "ESP" },
				470	{ "eu_ES_PREEURO", "eu_ES", "currency", "ESP" },
				471	{ "fi_FI_PREEURO", "fi_FI", "currency", "FIM" },
				472	{ "fr_BE_PREEURO", "fr_BE", "currency", "BEF" },
				473	{ "fr_FR_PREEURO", "fr_FR", "currency", "FRF" },
				474	{ "fr_LU_PREEURO", "fr_LU", "currency", "LUF" },
				475	{ "ga_IE_PREEURO", "ga_IE", "currency", "IEP" },
				476	{ "gl_ES_PREEURO", "gl_ES", "currency", "ESP" },
				477	{ "hi__DIRECT", "hi", "collation", "direct" }, /* Old ICU name */
				478	{ "it_IT_PREEURO", "it_IT", "currency", "ITL" },
				479	{ "ja_JP_TRADITIONAL", "ja_JP", "calendar", "japanese" }, /* Old ICU name */
				480	{ "nb_NO_NY", "nn_NO", NULL, NULL }, /* "markus said this was ok" :-) */
				481	{ "nl_BE_PREEURO", "nl_BE", "currency", "BEF" },
				482	{ "nl_NL_PREEURO", "nl_NL", "currency", "NLG" },
				483	{ "pt_PT_PREEURO", "pt_PT", "currency", "PTE" },
				484	{ "sr_SP_CYRL", "sr_Cyrl_RS", NULL, NULL }, /* .NET name */
				485	{ "sr_SP_LATN", "sr_Latn_RS", NULL, NULL }, /* .NET name */
				486	{ "sr_YU_CYRILLIC", "sr_Cyrl_RS", NULL, NULL }, /* Linux name */
				487	{ "th_TH_TRADITIONAL", "th_TH", "calendar", "buddhist" }, /* Old ICU name */
				488	{ "uz_UZ_CYRILLIC", "uz_Cyrl_UZ", NULL, NULL }, /* Linux name */
				489	{ "uz_UZ_CYRL", "uz_Cyrl_UZ", NULL, NULL }, /* .NET name */
				490	{ "uz_UZ_LATN", "uz_Latn_UZ", NULL, NULL }, /* .NET name */
				491	{ "zh_CHS", "zh_Hans", NULL, NULL }, /* .NET name */
				492	{ "zh_CHT", "zh_Hant", NULL, NULL }, /* .NET name */
				493	{ "zh_GAN", "gan", NULL, NULL }, /* registered name */
				494	{ "zh_GUOYU", "zh", NULL, NULL }, /* registered name */
				495	{ "zh_HAKKA", "hak", NULL, NULL }, /* registered name */
				496	{ "zh_MIN_NAN", "nan", NULL, NULL }, /* registered name */
				497	{ "zh_WUU", "wuu", NULL, NULL }, /* registered name */
				498	{ "zh_XIANG", "hsn", NULL, NULL }, /* registered name */
				499	{ "zh_YUE", "yue", NULL, NULL }, /* registered name */
				500	};
				501
				502	typedef struct VariantMap {
				503	const char variant; / input ID */
				504	const char keyword; / keyword, or NULL if none */
				505	const char value; / keyword value, or NULL if kw==NULL */
				506	} VariantMap;
				507
				508	static const VariantMap VARIANT_MAP[] = {
				509	{ "EURO", "currency", "EUR" },
				510	{ "PINYIN", "collation", "pinyin" }, /* Solaris variant */
				511	{ "STROKE", "collation", "stroke" } /* Solaris variant */
				512	};
				513
				514	/* ### BCP47 Conversion *******************************************/
				515	/* Test if the locale id has BCP47 u extension and does not have '@' */
				516	#define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == NULL && getShortestSubtagLength(localeID) == 1)
				517	/* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */
				518	#define _ConvertBCP47(finalID, id, buffer, length,err) \
				519	if (uloc_forLanguageTag(id, buffer, length, NULL, err) <= 0 \|\| U_FAILURE(*err)) { \
				520	finalID=id; \
				521	} else { \
				522	finalID=buffer; \
				523	}
				524	/* Gets the size of the shortest subtag in the given localeID. */
				525	static int32_t getShortestSubtagLength(const char *localeID) {
				526	int32_t localeIDLength = uprv_strlen(localeID);
				527	int32_t length = localeIDLength;
				528	int32_t tmpLength = 0;
				529	int32_t i;
				530	UBool reset = TRUE;
				531
				532	for (i = 0; i < localeIDLength; i++) {
				533	if (localeID[i] != '_' && localeID[i] != '-') {
				534	if (reset) {
				535	tmpLength = 0;
				536	reset = FALSE;
				537	}
				538	tmpLength++;
				539	} else {
				540	if (tmpLength != 0 && tmpLength < length) {
				541	length = tmpLength;
				542	}
				543	reset = TRUE;
				544	}
				545	}
				546
				547	return length;
				548	}
				549
				550	/* ### Keywords **************************************************/
				551
				552	#define ULOC_KEYWORD_BUFFER_LEN 25
				553	#define ULOC_MAX_NO_KEYWORDS 25
				554
				555	U_CAPI const char * U_EXPORT2
				556	locale_getKeywordsStart(const char *localeID) {
				557	const char *result = NULL;
				558	if((result = uprv_strchr(localeID, '@')) != NULL) {
				559	return result;
				560	}
				561	#if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
				562	else {
				563	/* We do this because the @ sign is variant, and the @ sign used on one
				564	EBCDIC machine won't be compiled the same way on other EBCDIC based
				565	machines. */
				566	static const uint8_t ebcdicSigns[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
				567	const uint8_t *charToFind = ebcdicSigns;
				568	while(*charToFind) {
				569	if((result = uprv_strchr(localeID, *charToFind)) != NULL) {
				570	return result;
				571	}
				572	charToFind++;
				573	}
				574	}
				575	#endif
				576	return NULL;
				577	}
				578
				579	/**
				580	* @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN]
				581	* @param keywordName incoming name to be canonicalized
				582	* @param status return status (keyword too long)
				583	* @return length of the keyword name
				584	*/
				585	static int32_t locale_canonKeywordName(char buf, const char keywordName, UErrorCode *status)
				586	{
				587	int32_t i;
				588	int32_t keywordNameLen = (int32_t)uprv_strlen(keywordName);
				589
				590	if(keywordNameLen >= ULOC_KEYWORD_BUFFER_LEN) {
				591	/* keyword name too long for internal buffer */
				592	*status = U_INTERNAL_PROGRAM_ERROR;
				593	return 0;
				594	}
				595
				596	/* normalize the keyword name */
				597	for(i = 0; i < keywordNameLen; i++) {
				598	buf[i] = uprv_tolower(keywordName[i]);
				599	}
				600	buf[i] = 0;
				601
				602	return keywordNameLen;
				603	}
				604
				605	typedef struct {
				606	char keyword[ULOC_KEYWORD_BUFFER_LEN];
				607	int32_t keywordLen;
				608	const char *valueStart;
				609	int32_t valueLen;
				610	} KeywordStruct;
				611
				612	static int32_t U_CALLCONV
				613	compareKeywordStructs(const void * /context/, const void left, const void right) {
				614	const char* leftString = ((const KeywordStruct *)left)->keyword;
				615	const char* rightString = ((const KeywordStruct *)right)->keyword;
				616	return uprv_strcmp(leftString, rightString);
				617	}
				618
				619	/**
				620	* Both addKeyword and addValue must already be in canonical form.
				621	* Either both addKeyword and addValue are NULL, or neither is NULL.
				622	* If they are not NULL they must be zero terminated.
				623	* If addKeyword is not NULL is must have length small enough to fit in KeywordStruct.keyword.
				624	*/
				625	static int32_t
				626	_getKeywords(const char *localeID,
				627	char prev,
				628	char *keywords, int32_t keywordCapacity,
				629	char values, int32_t valuesCapacity, int32_t valLen,
				630	UBool valuesToo,
				631	const char* addKeyword,
				632	const char* addValue,
				633	UErrorCode *status)
				634	{
				635	KeywordStruct keywordList[ULOC_MAX_NO_KEYWORDS];
				636
				637	int32_t maxKeywords = ULOC_MAX_NO_KEYWORDS;
				638	int32_t numKeywords = 0;
				639	const char* pos = localeID;
				640	const char* equalSign = NULL;
				641	const char* semicolon = NULL;
				642	int32_t i = 0, j, n;
				643	int32_t keywordsLen = 0;
				644	int32_t valuesLen = 0;
				645
				646	if(prev == '@') { /* start of keyword definition */
				647	/* we will grab pairs, trim spaces, lowercase keywords, sort and return */
				648	do {
				649	UBool duplicate = FALSE;
				650	/* skip leading spaces */
				651	while(*pos == ' ') {
				652	pos++;
				653	}
				654	if (!pos) { / handle trailing "; " */
				655	break;
				656	}
				657	if(numKeywords == maxKeywords) {
				658	*status = U_INTERNAL_PROGRAM_ERROR;
				659	return 0;
				660	}
				661	equalSign = uprv_strchr(pos, '=');
				662	semicolon = uprv_strchr(pos, ';');
				663	/* lack of '=' [foo@currency] is illegal */
				664	/* ';' before '=' [foo@currency;collation=pinyin] is illegal */
				665	if(!equalSign \|\| (semicolon && semicolon<equalSign)) {
				666	*status = U_INVALID_FORMAT_ERROR;
				667	return 0;
				668	}
				669	/* need to normalize both keyword and keyword name */
				670	if(equalSign - pos >= ULOC_KEYWORD_BUFFER_LEN) {
				671	/* keyword name too long for internal buffer */
				672	*status = U_INTERNAL_PROGRAM_ERROR;
				673	return 0;
				674	}
				675	for(i = 0, n = 0; i < equalSign - pos; ++i) {
				676	if (pos[i] != ' ') {
				677	keywordList[numKeywords].keyword[n++] = uprv_tolower(pos[i]);
				678	}
				679	}
				680
				681	/* zero-length keyword is an error. */
				682	if (n == 0) {
				683	*status = U_INVALID_FORMAT_ERROR;
				684	return 0;
				685	}
				686
				687	keywordList[numKeywords].keyword[n] = 0;
				688	keywordList[numKeywords].keywordLen = n;
				689	/* now grab the value part. First we skip the '=' */
				690	equalSign++;
				691	/* then we leading spaces */
				692	while(*equalSign == ' ') {
				693	equalSign++;
				694	}
				695
				696	/* Premature end or zero-length value */
				697	if (!equalSign \|\| equalSign == semicolon) {
				698	*status = U_INVALID_FORMAT_ERROR;
				699	return 0;
				700	}
				701
				702	keywordList[numKeywords].valueStart = equalSign;
				703
				704	pos = semicolon;
				705	i = 0;
				706	if(pos) {
				707	while(*(pos - i - 1) == ' ') {
				708	i++;
				709	}
				710	keywordList[numKeywords].valueLen = (int32_t)(pos - equalSign - i);
				711	pos++;
				712	} else {
				713	i = (int32_t)uprv_strlen(equalSign);
				714	while(i && equalSign[i-1] == ' ') {
				715	i--;
				716	}
				717	keywordList[numKeywords].valueLen = i;
				718	}
				719	/* If this is a duplicate keyword, then ignore it */
				720	for (j=0; j<numKeywords; ++j) {
				721	if (uprv_strcmp(keywordList[j].keyword, keywordList[numKeywords].keyword) == 0) {
				722	duplicate = TRUE;
				723	break;
				724	}
				725	}
				726	if (!duplicate) {
				727	++numKeywords;
				728	}
				729	} while(pos);
				730
				731	/* Handle addKeyword/addValue. */
				732	if (addKeyword != NULL) {
				733	UBool duplicate = FALSE;
				734	U_ASSERT(addValue != NULL);
				735	/* Search for duplicate; if found, do nothing. Explicit keyword
				736	overrides addKeyword. */
				737	for (j=0; j<numKeywords; ++j) {
				738	if (uprv_strcmp(keywordList[j].keyword, addKeyword) == 0) {
				739	duplicate = TRUE;
				740	break;
				741	}
				742	}
				743	if (!duplicate) {
				744	if (numKeywords == maxKeywords) {
				745	*status = U_INTERNAL_PROGRAM_ERROR;
				746	return 0;
				747	}
				748	uprv_strcpy(keywordList[numKeywords].keyword, addKeyword);
				749	keywordList[numKeywords].keywordLen = (int32_t)uprv_strlen(addKeyword);
				750	keywordList[numKeywords].valueStart = addValue;
				751	keywordList[numKeywords].valueLen = (int32_t)uprv_strlen(addValue);
				752	++numKeywords;
				753	}
				754	} else {
				755	U_ASSERT(addValue == NULL);
				756	}
				757
				758	/* now we have a list of keywords */
				759	/* we need to sort it */
				760	uprv_sortArray(keywordList, numKeywords, sizeof(KeywordStruct), compareKeywordStructs, NULL, FALSE, status);
				761
				762	/* Now construct the keyword part */
				763	for(i = 0; i < numKeywords; i++) {
				764	if(keywordsLen + keywordList[i].keywordLen + 1< keywordCapacity) {
				765	uprv_strcpy(keywords+keywordsLen, keywordList[i].keyword);
				766	if(valuesToo) {
				767	keywords[keywordsLen + keywordList[i].keywordLen] = '=';
				768	} else {
				769	keywords[keywordsLen + keywordList[i].keywordLen] = 0;
				770	}
				771	}
				772	keywordsLen += keywordList[i].keywordLen + 1;
				773	if(valuesToo) {
				774	if(keywordsLen + keywordList[i].valueLen < keywordCapacity) {
				775	uprv_strncpy(keywords+keywordsLen, keywordList[i].valueStart, keywordList[i].valueLen);
				776	}
				777	keywordsLen += keywordList[i].valueLen;
				778
				779	if(i < numKeywords - 1) {
				780	if(keywordsLen < keywordCapacity) {
				781	keywords[keywordsLen] = ';';
				782	}
				783	keywordsLen++;
				784	}
				785	}
				786	if(values) {
				787	if(valuesLen + keywordList[i].valueLen + 1< valuesCapacity) {
				788	uprv_strcpy(values+valuesLen, keywordList[i].valueStart);
				789	values[valuesLen + keywordList[i].valueLen] = 0;
				790	}
				791	valuesLen += keywordList[i].valueLen + 1;
				792	}
				793	}
				794	if(values) {
				795	values[valuesLen] = 0;
				796	if(valLen) {
				797	*valLen = valuesLen;
				798	}
				799	}
				800	return u_terminateChars(keywords, keywordCapacity, keywordsLen, status);
				801	} else {
				802	return 0;
				803	}
				804	}
				805
				806	U_CFUNC int32_t
				807	locale_getKeywords(const char *localeID,
				808	char prev,
				809	char *keywords, int32_t keywordCapacity,
				810	char values, int32_t valuesCapacity, int32_t valLen,
				811	UBool valuesToo,
				812	UErrorCode *status) {
				813	return _getKeywords(localeID, prev, keywords, keywordCapacity,
				814	values, valuesCapacity, valLen, valuesToo,
				815	NULL, NULL, status);
				816	}
				817
				818	U_CAPI int32_t U_EXPORT2
				819	uloc_getKeywordValue(const char* localeID,
				820	const char* keywordName,
				821	char* buffer, int32_t bufferCapacity,
				822	UErrorCode* status)
				823	{
				824	const char* startSearchHere = NULL;
				825	const char* nextSeparator = NULL;
				826	char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
				827	char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
				828	int32_t i = 0;
				829	int32_t result = 0;
				830
				831	if(status && U_SUCCESS(*status) && localeID) {
				832	char tempBuffer[ULOC_FULLNAME_CAPACITY];
				833	const char* tmpLocaleID;
				834
				835	if (_hasBCP47Extension(localeID)) {
				836	_ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
				837	} else {
				838	tmpLocaleID=localeID;
				839	}
				840
				841	startSearchHere = uprv_strchr(tmpLocaleID, '@'); /* TODO: REVISIT: shouldn't this be locale_getKeywordsStart ? */
				842	if(startSearchHere == NULL) {
				843	/* no keywords, return at once */
				844	return 0;
				845	}
				846
				847	locale_canonKeywordName(keywordNameBuffer, keywordName, status);
				848	if(U_FAILURE(*status)) {
				849	return 0;
				850	}
				851
				852	/* find the first keyword */
				853	while(startSearchHere) {
				854	startSearchHere++;
				855	/* skip leading spaces (allowed?) */
				856	while(*startSearchHere == ' ') {
				857	startSearchHere++;
				858	}
				859	nextSeparator = uprv_strchr(startSearchHere, '=');
				860	/* need to normalize both keyword and keyword name */
				861	if(!nextSeparator) {
				862	break;
				863	}
				864	if(nextSeparator - startSearchHere >= ULOC_KEYWORD_BUFFER_LEN) {
				865	/* keyword name too long for internal buffer */
				866	*status = U_INTERNAL_PROGRAM_ERROR;
				867	return 0;
				868	}
				869	for(i = 0; i < nextSeparator - startSearchHere; i++) {
				870	localeKeywordNameBuffer[i] = uprv_tolower(startSearchHere[i]);
				871	}
				872	/* trim trailing spaces */
				873	while(startSearchHere[i-1] == ' ') {
				874	i--;
				875	U_ASSERT(i>=0);
				876	}
				877	localeKeywordNameBuffer[i] = 0;
				878
				879	startSearchHere = uprv_strchr(nextSeparator, ';');
				880
				881	if(uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer) == 0) {
				882	nextSeparator++;
				883	while(*nextSeparator == ' ') {
				884	nextSeparator++;
				885	}
				886	/* we actually found the keyword. Copy the value */
				887	if(startSearchHere && startSearchHere - nextSeparator < bufferCapacity) {
				888	while(*(startSearchHere-1) == ' ') {
				889	startSearchHere--;
				890	}
				891	uprv_strncpy(buffer, nextSeparator, startSearchHere - nextSeparator);
				892	result = u_terminateChars(buffer, bufferCapacity, (int32_t)(startSearchHere - nextSeparator), status);
				893	} else if(!startSearchHere && (int32_t)uprv_strlen(nextSeparator) < bufferCapacity) { /* last item in string */
				894	i = (int32_t)uprv_strlen(nextSeparator);
				895	while(nextSeparator[i - 1] == ' ') {
				896	i--;
				897	}
				898	uprv_strncpy(buffer, nextSeparator, i);
				899	result = u_terminateChars(buffer, bufferCapacity, i, status);
				900	} else {
				901	/* give a bigger buffer, please */
				902	*status = U_BUFFER_OVERFLOW_ERROR;
				903	if(startSearchHere) {
				904	result = (int32_t)(startSearchHere - nextSeparator);
				905	} else {
				906	result = (int32_t)uprv_strlen(nextSeparator);
				907	}
				908	}
				909	return result;
				910	}
				911	}
				912	}
				913	return 0;
				914	}
				915
				916	U_CAPI int32_t U_EXPORT2
				917	uloc_setKeywordValue(const char* keywordName,
				918	const char* keywordValue,
				919	char* buffer, int32_t bufferCapacity,
				920	UErrorCode* status)
				921	{
				922	/* TODO: sorting. removal. */
				923	int32_t keywordNameLen;
				924	int32_t keywordValueLen;
				925	int32_t bufLen;
				926	int32_t needLen = 0;
				927	int32_t foundValueLen;
				928	int32_t keywordAtEnd = 0; /* is the keyword at the end of the string? */
				929	char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
				930	char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
				931	int32_t i = 0;
				932	int32_t rc;
				933	char* nextSeparator = NULL;
				934	char* nextEqualsign = NULL;
				935	char* startSearchHere = NULL;
				936	char* keywordStart = NULL;
				937	char *insertHere = NULL;
				938	if(U_FAILURE(*status)) {
				939	return -1;
				940	}
				941	if(bufferCapacity>1) {
				942	bufLen = (int32_t)uprv_strlen(buffer);
				943	} else {
				944	*status = U_ILLEGAL_ARGUMENT_ERROR;
				945	return 0;
				946	}
				947	if(bufferCapacity<bufLen) {
				948	/* The capacity is less than the length?! Is this NULL terminated? */
				949	*status = U_ILLEGAL_ARGUMENT_ERROR;
				950	return 0;
				951	}
				952	if(keywordValue && !*keywordValue) {
				953	keywordValue = NULL;
				954	}
				955	if(keywordValue) {
				956	keywordValueLen = (int32_t)uprv_strlen(keywordValue);
				957	} else {
				958	keywordValueLen = 0;
				959	}
				960	keywordNameLen = locale_canonKeywordName(keywordNameBuffer, keywordName, status);
				961	if(U_FAILURE(*status)) {
				962	return 0;
				963	}
				964	startSearchHere = (char*)locale_getKeywordsStart(buffer);
				965	if(startSearchHere == NULL \|\| (startSearchHere[1]==0)) {
				966	if(!keywordValue) { /* no keywords = nothing to remove */
				967	return bufLen;
				968	}
				969
				970	needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
				971	if(startSearchHere) { /* had a single @ */
				972	needLen--; /* already had the @ */
				973	/* startSearchHere points at the @ */
				974	} else {
				975	startSearchHere=buffer+bufLen;
				976	}
				977	if(needLen >= bufferCapacity) {
				978	*status = U_BUFFER_OVERFLOW_ERROR;
				979	return needLen; /* no change */
				980	}
				981	*startSearchHere = '@';
				982	startSearchHere++;
				983	uprv_strcpy(startSearchHere, keywordNameBuffer);
				984	startSearchHere += keywordNameLen;
				985	*startSearchHere = '=';
				986	startSearchHere++;
				987	uprv_strcpy(startSearchHere, keywordValue);
				988	startSearchHere+=keywordValueLen;
				989	return needLen;
				990	} /* end shortcut - no @ */
				991
				992	keywordStart = startSearchHere;
				993	/* search for keyword */
				994	while(keywordStart) {
				995	keywordStart++;
				996	/* skip leading spaces (allowed?) */
				997	while(*keywordStart == ' ') {
				998	keywordStart++;
				999	}
				1000	nextEqualsign = uprv_strchr(keywordStart, '=');
				1001	/* need to normalize both keyword and keyword name */
				1002	if(!nextEqualsign) {
				1003	break;
				1004	}
				1005	if(nextEqualsign - keywordStart >= ULOC_KEYWORD_BUFFER_LEN) {
				1006	/* keyword name too long for internal buffer */
				1007	*status = U_INTERNAL_PROGRAM_ERROR;
				1008	return 0;
				1009	}
				1010	for(i = 0; i < nextEqualsign - keywordStart; i++) {
				1011	localeKeywordNameBuffer[i] = uprv_tolower(keywordStart[i]);
				1012	}
				1013	/* trim trailing spaces */
				1014	while(keywordStart[i-1] == ' ') {
				1015	i--;
				1016	}
				1017	U_ASSERT(i>=0 && i<ULOC_KEYWORD_BUFFER_LEN);
				1018	localeKeywordNameBuffer[i] = 0;
				1019
				1020	nextSeparator = uprv_strchr(nextEqualsign, ';');
				1021	rc = uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer);
				1022	if(rc == 0) {
				1023	nextEqualsign++;
				1024	while(*nextEqualsign == ' ') {
				1025	nextEqualsign++;
				1026	}
				1027	/* we actually found the keyword. Change the value */
				1028	if (nextSeparator) {
				1029	keywordAtEnd = 0;
				1030	foundValueLen = (int32_t)(nextSeparator - nextEqualsign);
				1031	} else {
				1032	keywordAtEnd = 1;
				1033	foundValueLen = (int32_t)uprv_strlen(nextEqualsign);
				1034	}
				1035	if(keywordValue) { /* adding a value - not removing */
				1036	if(foundValueLen == keywordValueLen) {
				1037	uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
				1038	return bufLen; /* no change in size */
				1039	} else if(foundValueLen > keywordValueLen) {
				1040	int32_t delta = foundValueLen - keywordValueLen;
				1041	if(nextSeparator) { /* RH side */
				1042	uprv_memmove(nextSeparator - delta, nextSeparator, bufLen-(nextSeparator-buffer));
				1043	}
				1044	uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
				1045	bufLen -= delta;
				1046	buffer[bufLen]=0;
				1047	return bufLen;
				1048	} else { /* FVL < KVL */
				1049	int32_t delta = keywordValueLen - foundValueLen;
				1050	if((bufLen+delta) >= bufferCapacity) {
				1051	*status = U_BUFFER_OVERFLOW_ERROR;
				1052	return bufLen+delta;
				1053	}
				1054	if(nextSeparator) { /* RH side */
				1055	uprv_memmove(nextSeparator+delta,nextSeparator, bufLen-(nextSeparator-buffer));
				1056	}
				1057	uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
				1058	bufLen += delta;
				1059	buffer[bufLen]=0;
				1060	return bufLen;
				1061	}
				1062	} else { /* removing a keyword */
				1063	if(keywordAtEnd) {
				1064	/* zero out the ';' or '@' just before startSearchhere */
				1065	keywordStart[-1] = 0;
				1066	return (int32_t)((keywordStart-buffer)-1); /* (string length without keyword) minus separator */
				1067	} else {
				1068	uprv_memmove(keywordStart, nextSeparator+1, bufLen-((nextSeparator+1)-buffer));
				1069	keywordStart[bufLen-((nextSeparator+1)-buffer)]=0;
				1070	return (int32_t)(bufLen-((nextSeparator+1)-keywordStart));
				1071	}
				1072	}
				1073	} else if(rc<0){ /* end match keyword */
				1074	/* could insert at this location. */
				1075	insertHere = keywordStart;
				1076	}
				1077	keywordStart = nextSeparator;
				1078	} /* end loop searching */
				1079
				1080	if(!keywordValue) {
				1081	return bufLen; /* removal of non-extant keyword - no change */
				1082	}
				1083
				1084	/* we know there is at least one keyword. */
				1085	needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
				1086	if(needLen >= bufferCapacity) {
				1087	*status = U_BUFFER_OVERFLOW_ERROR;
				1088	return needLen; /* no change */
				1089	}
				1090
				1091	if(insertHere) {
				1092	uprv_memmove(insertHere+(1+keywordNameLen+1+keywordValueLen), insertHere, bufLen-(insertHere-buffer));
				1093	keywordStart = insertHere;
				1094	} else {
				1095	keywordStart = buffer+bufLen;
				1096	*keywordStart = ';';
				1097	keywordStart++;
				1098	}
				1099	uprv_strncpy(keywordStart, keywordNameBuffer, keywordNameLen);
				1100	keywordStart += keywordNameLen;
				1101	*keywordStart = '=';
				1102	keywordStart++;
				1103	uprv_strncpy(keywordStart, keywordValue, keywordValueLen); /* terminates. */
				1104	keywordStart+=keywordValueLen;
				1105	if(insertHere) {
				1106	*keywordStart = ';';
				1107	keywordStart++;
				1108	}
				1109	buffer[needLen]=0;
				1110	return needLen;
				1111	}
				1112
				1113	/* ### ID parsing implementation **************************************************/
				1114
				1115	#define _isPrefixLetter(a) ((a=='x')\|\|(a=='X')\|\|(a=='i')\|\|(a=='I'))
				1116
				1117	/*returns TRUE if one of the special prefixes is here (s=string)
				1118	'x-' or 'i-' */
				1119	#define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1]))
				1120
				1121	/* Dot terminates it because of POSIX form where dot precedes the codepage
				1122	* except for variant
				1123	*/
				1124	#define _isTerminator(a) ((a==0)\|\|(a=='.')\|\|(a=='@'))
				1125
				1126	static char* _strnchr(const char* str, int32_t len, char c) {
				1127	U_ASSERT(str != 0 && len >= 0);
				1128	while (len-- != 0) {
				1129	char d = *str;
				1130	if (d == c) {
				1131	return (char*) str;
				1132	} else if (d == 0) {
				1133	break;
				1134	}
				1135	++str;
				1136	}
				1137	return NULL;
				1138	}
				1139
				1140	/**
				1141	* Lookup 'key' in the array 'list'. The array 'list' should contain
				1142	* a NULL entry, followed by more entries, and a second NULL entry.
				1143	*
				1144	* The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or
				1145	* COUNTRIES_3.
				1146	*/
				1147	static int16_t _findIndex(const char* const* list, const char* key)
				1148	{
				1149	const char* const* anchor = list;
				1150	int32_t pass = 0;
				1151
				1152	/* Make two passes through two NULL-terminated arrays at 'list' */
				1153	while (pass++ < 2) {
				1154	while (*list) {
				1155	if (uprv_strcmp(key, *list) == 0) {
				1156	return (int16_t)(list - anchor);
				1157	}
				1158	list++;
				1159	}
				1160	++list; /* skip final NULL CWB/
				1161	}
				1162	return -1;
				1163	}
				1164
				1165	/* count the length of src while copying it to dest; return strlen(src) */
				1166	static inline int32_t
				1167	_copyCount(char dest, int32_t destCapacity, const char src) {
				1168	const char *anchor;
				1169	char c;
				1170
				1171	anchor=src;
				1172	for(;;) {
				1173	if((c=*src)==0) {
				1174	return (int32_t)(src-anchor);
				1175	}
				1176	if(destCapacity<=0) {
				1177	return (int32_t)((src-anchor)+uprv_strlen(src));
				1178	}
				1179	++src;
				1180	*dest++=c;
				1181	--destCapacity;
				1182	}
				1183	}
				1184
				1185	U_CFUNC const char*
				1186	uloc_getCurrentCountryID(const char* oldID){
				1187	int32_t offset = _findIndex(DEPRECATED_COUNTRIES, oldID);
				1188	if (offset >= 0) {
				1189	return REPLACEMENT_COUNTRIES[offset];
				1190	}
				1191	return oldID;
				1192	}
				1193	U_CFUNC const char*
				1194	uloc_getCurrentLanguageID(const char* oldID){
				1195	int32_t offset = _findIndex(DEPRECATED_LANGUAGES, oldID);
				1196	if (offset >= 0) {
				1197	return REPLACEMENT_LANGUAGES[offset];
				1198	}
				1199	return oldID;
				1200	}
				1201	/*
				1202	* the internal functions _getLanguage(), _getCountry(), _getVariant()
				1203	* avoid duplicating code to handle the earlier locale ID pieces
				1204	* in the functions for the later ones by
				1205	* setting the *pEnd pointer to where they stopped parsing
				1206	*
				1207	* TODO try to use this in Locale
				1208	*/
				1209	U_CFUNC int32_t
				1210	ulocimp_getLanguage(const char *localeID,
				1211	char *language, int32_t languageCapacity,
				1212	const char **pEnd) {
				1213	int32_t i=0;
				1214	int32_t offset;
				1215	char lang[4]={ 0, 0, 0, 0 }; /* temporary buffer to hold language code for searching */
				1216
				1217	/* if it starts with i- or x- then copy that prefix */
				1218	if(_isIDPrefix(localeID)) {
				1219	if(i<languageCapacity) {
				1220	language[i]=(char)uprv_tolower(*localeID);
				1221	}
				1222	if(i<languageCapacity) {
				1223	language[i+1]='-';
				1224	}
				1225	i+=2;
				1226	localeID+=2;
				1227	}
				1228
				1229	/* copy the language as far as possible and count its length */
				1230	while(!_isTerminator(localeID) && !_isIDSeparator(localeID)) {
				1231	if(i<languageCapacity) {
				1232	language[i]=(char)uprv_tolower(*localeID);
				1233	}
				1234	if(i<3) {
				1235	U_ASSERT(i>=0);
				1236	lang[i]=(char)uprv_tolower(*localeID);
				1237	}
				1238	i++;
				1239	localeID++;
				1240	}
				1241
				1242	if(i==3) {
				1243	/* convert 3 character code to 2 character code if possible CWB/
				1244	offset=_findIndex(LANGUAGES_3, lang);
				1245	if(offset>=0) {
				1246	i=_copyCount(language, languageCapacity, LANGUAGES[offset]);
				1247	}
				1248	}
				1249
				1250	if(pEnd!=NULL) {
				1251	*pEnd=localeID;
				1252	}
				1253	return i;
				1254	}
				1255
				1256	U_CFUNC int32_t
				1257	ulocimp_getScript(const char *localeID,
				1258	char *script, int32_t scriptCapacity,
				1259	const char **pEnd)
				1260	{
				1261	int32_t idLen = 0;
				1262
				1263	if (pEnd != NULL) {
				1264	*pEnd = localeID;
				1265	}
				1266
				1267	/* copy the second item as far as possible and count its length */
				1268	while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])
				1269	&& uprv_isASCIILetter(localeID[idLen])) {
				1270	idLen++;
				1271	}
				1272
				1273	/* If it's exactly 4 characters long, then it's a script and not a country. */
				1274	if (idLen == 4) {
				1275	int32_t i;
				1276	if (pEnd != NULL) {
				1277	*pEnd = localeID+idLen;
				1278	}
				1279	if(idLen > scriptCapacity) {
				1280	idLen = scriptCapacity;
				1281	}
				1282	if (idLen >= 1) {
				1283	script[0]=(char)uprv_toupper(*(localeID++));
				1284	}
				1285	for (i = 1; i < idLen; i++) {
				1286	script[i]=(char)uprv_tolower(*(localeID++));
				1287	}
				1288	}
				1289	else {
				1290	idLen = 0;
				1291	}
				1292	return idLen;
				1293	}
				1294
				1295	U_CFUNC int32_t
				1296	ulocimp_getCountry(const char *localeID,
				1297	char *country, int32_t countryCapacity,
				1298	const char **pEnd)
				1299	{
				1300	int32_t idLen=0;
				1301	char cnty[ULOC_COUNTRY_CAPACITY]={ 0, 0, 0, 0 };
				1302	int32_t offset;
				1303
				1304	/* copy the country as far as possible and count its length */
				1305	while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) {
				1306	if(idLen<(ULOC_COUNTRY_CAPACITY-1)) { /CWB/
				1307	cnty[idLen]=(char)uprv_toupper(localeID[idLen]);
				1308	}
				1309	idLen++;
				1310	}
				1311
				1312	/* the country should be either length 2 or 3 */
				1313	if (idLen == 2 \|\| idLen == 3) {
				1314	UBool gotCountry = FALSE;
				1315	/* convert 3 character code to 2 character code if possible CWB/
				1316	if(idLen==3) {
				1317	offset=_findIndex(COUNTRIES_3, cnty);
				1318	if(offset>=0) {
				1319	idLen=_copyCount(country, countryCapacity, COUNTRIES[offset]);
				1320	gotCountry = TRUE;
				1321	}
				1322	}
				1323	if (!gotCountry) {
				1324	int32_t i = 0;
				1325	for (i = 0; i < idLen; i++) {
				1326	if (i < countryCapacity) {
				1327	country[i]=(char)uprv_toupper(localeID[i]);
				1328	}
				1329	}
				1330	}
				1331	localeID+=idLen;
				1332	} else {
				1333	idLen = 0;
				1334	}
				1335
				1336	if(pEnd!=NULL) {
				1337	*pEnd=localeID;
				1338	}
				1339
				1340	return idLen;
				1341	}
				1342
				1343	/**
				1344	* @param needSeparator if true, then add leading '_' if any variants
				1345	* are added to 'variant'
				1346	*/
				1347	static int32_t
				1348	_getVariantEx(const char *localeID,
				1349	char prev,
				1350	char *variant, int32_t variantCapacity,
				1351	UBool needSeparator) {
				1352	int32_t i=0;
				1353
				1354	/* get one or more variant tags and separate them with '_' */
				1355	if(_isIDSeparator(prev)) {
				1356	/* get a variant string after a '-' or '_' */
				1357	while(!_isTerminator(*localeID)) {
				1358	if (needSeparator) {
				1359	if (i<variantCapacity) {
				1360	variant[i] = '_';
				1361	}
				1362	++i;
				1363	needSeparator = FALSE;
				1364	}
				1365	if(i<variantCapacity) {
				1366	variant[i]=(char)uprv_toupper(*localeID);
				1367	if(variant[i]=='-') {
				1368	variant[i]='_';
				1369	}
				1370	}
				1371	i++;
				1372	localeID++;
				1373	}
				1374	}
				1375
				1376	/* if there is no variant tag after a '-' or '_' then look for '@' */
				1377	if(i==0) {
				1378	if(prev=='@') {
				1379	/* keep localeID */
				1380	} else if((localeID=locale_getKeywordsStart(localeID))!=NULL) {
				1381	++localeID; /* point after the '@' */
				1382	} else {
				1383	return 0;
				1384	}
				1385	while(!_isTerminator(*localeID)) {
				1386	if (needSeparator) {
				1387	if (i<variantCapacity) {
				1388	variant[i] = '_';
				1389	}
				1390	++i;
				1391	needSeparator = FALSE;
				1392	}
				1393	if(i<variantCapacity) {
				1394	variant[i]=(char)uprv_toupper(*localeID);
				1395	if(variant[i]=='-' \|\| variant[i]==',') {
				1396	variant[i]='_';
				1397	}
				1398	}
				1399	i++;
				1400	localeID++;
				1401	}
				1402	}
				1403
				1404	return i;
				1405	}
				1406
				1407	static int32_t
				1408	_getVariant(const char *localeID,
				1409	char prev,
				1410	char *variant, int32_t variantCapacity) {
				1411	return _getVariantEx(localeID, prev, variant, variantCapacity, FALSE);
				1412	}
				1413
				1414	/**
				1415	* Delete ALL instances of a variant from the given list of one or
				1416	* more variants. Example: "FOO_EURO_BAR_EURO" => "FOO_BAR".
				1417	* @param variants the source string of one or more variants,
				1418	* separated by '_'. This will be MODIFIED IN PLACE. Not zero
				1419	* terminated; if it is, trailing zero will NOT be maintained.
				1420	* @param variantsLen length of variants
				1421	* @param toDelete variant to delete, without separators, e.g. "EURO"
				1422	* or "PREEURO"; not zero terminated
				1423	* @param toDeleteLen length of toDelete
				1424	* @return number of characters deleted from variants
				1425	*/
				1426	static int32_t
				1427	_deleteVariant(char* variants, int32_t variantsLen,
				1428	const char* toDelete, int32_t toDeleteLen)
				1429	{
				1430	int32_t delta = 0; /* number of chars deleted */
				1431	for (;;) {
				1432	UBool flag = FALSE;
				1433	if (variantsLen < toDeleteLen) {
				1434	return delta;
				1435	}
				1436	if (uprv_strncmp(variants, toDelete, toDeleteLen) == 0 &&
				1437	(variantsLen == toDeleteLen \|\|
				1438	(flag=(variants[toDeleteLen] == '_'))))
				1439	{
				1440	int32_t d = toDeleteLen + (flag?1:0);
				1441	variantsLen -= d;
				1442	delta += d;
				1443	if (variantsLen > 0) {
				1444	uprv_memmove(variants, variants+d, variantsLen);
				1445	}
				1446	} else {
				1447	char* p = _strnchr(variants, variantsLen, '_');
				1448	if (p == NULL) {
				1449	return delta;
				1450	}
				1451	++p;
				1452	variantsLen -= (int32_t)(p - variants);
				1453	variants = p;
				1454	}
				1455	}
				1456	}
				1457
				1458	/* Keyword enumeration */
				1459
				1460	typedef struct UKeywordsContext {
				1461	char* keywords;
				1462	char* current;
				1463	} UKeywordsContext;
				1464
				1465	static void U_CALLCONV
				1466	uloc_kw_closeKeywords(UEnumeration *enumerator) {
				1467	uprv_free(((UKeywordsContext *)enumerator->context)->keywords);
				1468	uprv_free(enumerator->context);
				1469	uprv_free(enumerator);
				1470	}
				1471
				1472	static int32_t U_CALLCONV
				1473	uloc_kw_countKeywords(UEnumeration en, UErrorCode /status/) {
				1474	char kw = ((UKeywordsContext )en->context)->keywords;
				1475	int32_t result = 0;
				1476	while(*kw) {
				1477	result++;
				1478	kw += uprv_strlen(kw)+1;
				1479	}
				1480	return result;
				1481	}
				1482
				1483	static const char* U_CALLCONV
				1484	uloc_kw_nextKeyword(UEnumeration* en,
				1485	int32_t* resultLength,
				1486	UErrorCode* /status/) {
				1487	const char* result = ((UKeywordsContext *)en->context)->current;
				1488	int32_t len = 0;
				1489	if(*result) {
				1490	len = (int32_t)uprv_strlen(((UKeywordsContext *)en->context)->current);
				1491	((UKeywordsContext *)en->context)->current += len+1;
				1492	} else {
				1493	result = NULL;
				1494	}
				1495	if (resultLength) {
				1496	*resultLength = len;
				1497	}
				1498	return result;
				1499	}
				1500
				1501	static void U_CALLCONV
				1502	uloc_kw_resetKeywords(UEnumeration* en,
				1503	UErrorCode* /status/) {
				1504	((UKeywordsContext )en->context)->current = ((UKeywordsContext )en->context)->keywords;
				1505	}
				1506
				1507	static const UEnumeration gKeywordsEnum = {
				1508	NULL,
				1509	NULL,
				1510	uloc_kw_closeKeywords,
				1511	uloc_kw_countKeywords,
				1512	uenum_unextDefault,
				1513	uloc_kw_nextKeyword,
				1514	uloc_kw_resetKeywords
				1515	};
				1516
				1517	U_CAPI UEnumeration* U_EXPORT2
				1518	uloc_openKeywordList(const char keywordList, int32_t keywordListSize, UErrorCode status)
				1519	{
				1520	UKeywordsContext *myContext = NULL;
				1521	UEnumeration *result = NULL;
				1522
				1523	if(U_FAILURE(*status)) {
				1524	return NULL;
				1525	}
				1526	result = (UEnumeration *)uprv_malloc(sizeof(UEnumeration));
				1527	/* Null pointer test */
				1528	if (result == NULL) {
				1529	*status = U_MEMORY_ALLOCATION_ERROR;
				1530	return NULL;
				1531	}
				1532	uprv_memcpy(result, &gKeywordsEnum, sizeof(UEnumeration));
				1533	myContext = static_cast<UKeywordsContext *>(uprv_malloc(sizeof(UKeywordsContext)));
				1534	if (myContext == NULL) {
				1535	*status = U_MEMORY_ALLOCATION_ERROR;
				1536	uprv_free(result);
				1537	return NULL;
				1538	}
				1539	myContext->keywords = (char *)uprv_malloc(keywordListSize+1);
				1540	uprv_memcpy(myContext->keywords, keywordList, keywordListSize);
				1541	myContext->keywords[keywordListSize] = 0;
				1542	myContext->current = myContext->keywords;
				1543	result->context = myContext;
				1544	return result;
				1545	}
				1546
				1547	U_CAPI UEnumeration* U_EXPORT2
				1548	uloc_openKeywords(const char* localeID,
				1549	UErrorCode* status)
				1550	{
				1551	int32_t i=0;
				1552	char keywords[256];
				1553	int32_t keywordsCapacity = 256;
				1554	char tempBuffer[ULOC_FULLNAME_CAPACITY];
				1555	const char* tmpLocaleID;
				1556
				1557	if(status==NULL \|\| U_FAILURE(*status)) {
				1558	return 0;
				1559	}
				1560
				1561	if (_hasBCP47Extension(localeID)) {
				1562	_ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
				1563	} else {
				1564	if (localeID==NULL) {
				1565	localeID=uloc_getDefault();
				1566	}
				1567	tmpLocaleID=localeID;
				1568	}
				1569
				1570	/* Skip the language */
				1571	ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID);
				1572	if(_isIDSeparator(*tmpLocaleID)) {
				1573	const char *scriptID;
				1574	/* Skip the script if available */
				1575	ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID);
				1576	if(scriptID != tmpLocaleID+1) {
				1577	/* Found optional script */
				1578	tmpLocaleID = scriptID;
				1579	}
				1580	/* Skip the Country */
				1581	if (_isIDSeparator(*tmpLocaleID)) {
				1582	ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &tmpLocaleID);
				1583	if(_isIDSeparator(*tmpLocaleID)) {
				1584	_getVariant(tmpLocaleID+1, *tmpLocaleID, NULL, 0);
				1585	}
				1586	}
				1587	}
				1588
				1589	/* keywords are located after '@' */
				1590	if((tmpLocaleID = locale_getKeywordsStart(tmpLocaleID)) != NULL) {
				1591	i=locale_getKeywords(tmpLocaleID+1, '@', keywords, keywordsCapacity, NULL, 0, NULL, FALSE, status);
				1592	}
				1593
				1594	if(i) {
				1595	return uloc_openKeywordList(keywords, i, status);
				1596	} else {
				1597	return NULL;
				1598	}
				1599	}
				1600
				1601
				1602	/* bit-flags for 'options' parameter of _canonicalize */
				1603	#define _ULOC_STRIP_KEYWORDS 0x2
				1604	#define _ULOC_CANONICALIZE 0x1
				1605
				1606	#define OPTION_SET(options, mask) ((options & mask) != 0)
				1607
				1608	static const char i_default[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'};
				1609	#define I_DEFAULT_LENGTH (sizeof i_default / sizeof i_default[0])
				1610
				1611	/**
				1612	* Canonicalize the given localeID, to level 1 or to level 2,
				1613	* depending on the options. To specify level 1, pass in options=0.
				1614	* To specify level 2, pass in options=_ULOC_CANONICALIZE.
				1615	*
				1616	* This is the code underlying uloc_getName and uloc_canonicalize.
				1617	*/
				1618	static int32_t
				1619	_canonicalize(const char* localeID,
				1620	char* result,
				1621	int32_t resultCapacity,
				1622	uint32_t options,
				1623	UErrorCode* err) {
				1624	int32_t j, len, fieldCount=0, scriptSize=0, variantSize=0, nameCapacity;
				1625	char localeBuffer[ULOC_FULLNAME_CAPACITY];
				1626	char tempBuffer[ULOC_FULLNAME_CAPACITY];
				1627	const char* origLocaleID;
				1628	const char* tmpLocaleID;
				1629	const char* keywordAssign = NULL;
				1630	const char* separatorIndicator = NULL;
				1631	const char* addKeyword = NULL;
				1632	const char* addValue = NULL;
				1633	char* name;
				1634	char* variant = NULL; /* pointer into name, or NULL */
				1635
				1636	if (U_FAILURE(*err)) {
				1637	return 0;
				1638	}
				1639
				1640	if (_hasBCP47Extension(localeID)) {
				1641	_ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
				1642	} else {
				1643	if (localeID==NULL) {
				1644	localeID=uloc_getDefault();
				1645	}
				1646	tmpLocaleID=localeID;
				1647	}
				1648
				1649	origLocaleID=tmpLocaleID;
				1650
				1651	/* if we are doing a full canonicalization, then put results in
				1652	localeBuffer, if necessary; otherwise send them to result. */
				1653	if (/OPTION_SET(options, _ULOC_CANONICALIZE) &&/
				1654	(result == NULL \|\| resultCapacity < (int32_t)sizeof(localeBuffer))) {
				1655	name = localeBuffer;
				1656	nameCapacity = (int32_t)sizeof(localeBuffer);
				1657	} else {
				1658	name = result;
				1659	nameCapacity = resultCapacity;
				1660	}
				1661
				1662	/* get all pieces, one after another, and separate with '_' */
				1663	len=ulocimp_getLanguage(tmpLocaleID, name, nameCapacity, &tmpLocaleID);
				1664
				1665	if(len == I_DEFAULT_LENGTH && uprv_strncmp(origLocaleID, i_default, len) == 0) {
				1666	const char *d = uloc_getDefault();
				1667
				1668	len = (int32_t)uprv_strlen(d);
				1669
				1670	if (name != NULL) {
				1671	uprv_strncpy(name, d, len);
				1672	}
				1673	} else if(_isIDSeparator(*tmpLocaleID)) {
				1674	const char *scriptID;
				1675
				1676	++fieldCount;
				1677	if(len<nameCapacity) {
				1678	name[len]='_';
				1679	}
				1680	++len;
				1681
				1682	scriptSize=ulocimp_getScript(tmpLocaleID+1,
				1683	(len<nameCapacity ? name+len : NULL), nameCapacity-len, &scriptID);
				1684	if(scriptSize > 0) {
				1685	/* Found optional script */
				1686	tmpLocaleID = scriptID;
				1687	++fieldCount;
				1688	len+=scriptSize;
				1689	if (_isIDSeparator(*tmpLocaleID)) {
				1690	/* If there is something else, then we add the _ */
				1691	if(len<nameCapacity) {
				1692	name[len]='_';
				1693	}
				1694	++len;
				1695	}
				1696	}
				1697
				1698	if (_isIDSeparator(*tmpLocaleID)) {
				1699	const char *cntryID;
				1700	int32_t cntrySize = ulocimp_getCountry(tmpLocaleID+1,
				1701	(len<nameCapacity ? name+len : NULL), nameCapacity-len, &cntryID);
				1702	if (cntrySize > 0) {
				1703	/* Found optional country */
				1704	tmpLocaleID = cntryID;
				1705	len+=cntrySize;
				1706	}
				1707	if(_isIDSeparator(*tmpLocaleID)) {
				1708	/* If there is something else, then we add the _ if we found country before. */
				1709	if (cntrySize >= 0 && ! _isIDSeparator(*(tmpLocaleID+1)) ) {
				1710	++fieldCount;
				1711	if(len<nameCapacity) {
				1712	name[len]='_';
				1713	}
				1714	++len;
				1715	}
				1716
				1717	variantSize = _getVariant(tmpLocaleID+1, *tmpLocaleID,
				1718	(len<nameCapacity ? name+len : NULL), nameCapacity-len);
				1719	if (variantSize > 0) {
				1720	variant = len<nameCapacity ? name+len : NULL;
				1721	len += variantSize;
				1722	tmpLocaleID += variantSize + 1; /* skip '_' and variant */
				1723	}
				1724	}
				1725	}
				1726	}
				1727
				1728	/* Copy POSIX-style charset specifier, if any [mr.utf8] */
				1729	if (!OPTION_SET(options, _ULOC_CANONICALIZE) && *tmpLocaleID == '.') {
				1730	UBool done = FALSE;
				1731	do {
				1732	char c = *tmpLocaleID;
				1733	switch (c) {
				1734	case 0:
				1735	case '@':
				1736	done = TRUE;
				1737	break;
				1738	default:
				1739	if (len<nameCapacity) {
				1740	name[len] = c;
				1741	}
				1742	++len;
				1743	++tmpLocaleID;
				1744	break;
				1745	}
				1746	} while (!done);
				1747	}
				1748
				1749	/* Scan ahead to next '@' and determine if it is followed by '=' and/or ';'
				1750	After this, tmpLocaleID either points to '@' or is NULL */
				1751	if ((tmpLocaleID=locale_getKeywordsStart(tmpLocaleID))!=NULL) {
				1752	keywordAssign = uprv_strchr(tmpLocaleID, '=');
				1753	separatorIndicator = uprv_strchr(tmpLocaleID, ';');
				1754	}
				1755
				1756	/* Copy POSIX-style variant, if any [mr@FOO] */
				1757	if (!OPTION_SET(options, _ULOC_CANONICALIZE) &&
				1758	tmpLocaleID != NULL && keywordAssign == NULL) {
				1759	for (;;) {
				1760	char c = *tmpLocaleID;
				1761	if (c == 0) {
				1762	break;
				1763	}
				1764	if (len<nameCapacity) {
				1765	name[len] = c;
				1766	}
				1767	++len;
				1768	++tmpLocaleID;
				1769	}
				1770	}
				1771
				1772	if (OPTION_SET(options, _ULOC_CANONICALIZE)) {
				1773	/* Handle @FOO variant if @ is present and not followed by = */
				1774	if (tmpLocaleID!=NULL && keywordAssign==NULL) {
				1775	int32_t posixVariantSize;
				1776	/* Add missing '_' if needed */
				1777	if (fieldCount < 2 \|\| (fieldCount < 3 && scriptSize > 0)) {
				1778	do {
				1779	if(len<nameCapacity) {
				1780	name[len]='_';
				1781	}
				1782	++len;
				1783	++fieldCount;
				1784	} while(fieldCount<2);
				1785	}
				1786	posixVariantSize = _getVariantEx(tmpLocaleID+1, '@', name+len, nameCapacity-len,
				1787	(UBool)(variantSize > 0));
				1788	if (posixVariantSize > 0) {
				1789	if (variant == NULL) {
				1790	variant = name+len;
				1791	}
				1792	len += posixVariantSize;
				1793	variantSize += posixVariantSize;
				1794	}
				1795	}
				1796
				1797	/* Handle generic variants first */
				1798	if (variant) {
				1799	for (j=0; j<(int32_t)(sizeof(VARIANT_MAP)/sizeof(VARIANT_MAP[0])); j++) {
				1800	const char* variantToCompare = VARIANT_MAP[j].variant;
				1801	int32_t n = (int32_t)uprv_strlen(variantToCompare);
				1802	int32_t variantLen = _deleteVariant(variant, uprv_min(variantSize, (nameCapacity-len)), variantToCompare, n);
				1803	len -= variantLen;
				1804	if (variantLen > 0) {
				1805	if (len > 0 && name[len-1] == '_') { /* delete trailing '_' */
				1806	--len;
				1807	}
				1808	addKeyword = VARIANT_MAP[j].keyword;
				1809	addValue = VARIANT_MAP[j].value;
				1810	break;
				1811	}
				1812	}
				1813	if (len > 0 && len <= nameCapacity && name[len-1] == '_') { /* delete trailing '_' */
				1814	--len;
				1815	}
				1816	}
				1817
				1818	/* Look up the ID in the canonicalization map */
				1819	for (j=0; j<(int32_t)(sizeof(CANONICALIZE_MAP)/sizeof(CANONICALIZE_MAP[0])); j++) {
				1820	const char* id = CANONICALIZE_MAP[j].id;
				1821	int32_t n = (int32_t)uprv_strlen(id);
				1822	if (len == n && uprv_strncmp(name, id, n) == 0) {
				1823	if (n == 0 && tmpLocaleID != NULL) {
				1824	break; /* Don't remap "" if keywords present */
				1825	}
				1826	len = _copyCount(name, nameCapacity, CANONICALIZE_MAP[j].canonicalID);
				1827	if (CANONICALIZE_MAP[j].keyword) {
				1828	addKeyword = CANONICALIZE_MAP[j].keyword;
				1829	addValue = CANONICALIZE_MAP[j].value;
				1830	}
				1831	break;
				1832	}
				1833	}
				1834	}
				1835
				1836	if (!OPTION_SET(options, _ULOC_STRIP_KEYWORDS)) {
				1837	if (tmpLocaleID!=NULL && keywordAssign!=NULL &&
				1838	(!separatorIndicator \|\| separatorIndicator > keywordAssign)) {
				1839	if(len<nameCapacity) {
				1840	name[len]='@';
				1841	}
				1842	++len;
				1843	++fieldCount;
				1844	len += _getKeywords(tmpLocaleID+1, '@', (len<nameCapacity ? name+len : NULL), nameCapacity-len,
				1845	NULL, 0, NULL, TRUE, addKeyword, addValue, err);
				1846	} else if (addKeyword != NULL) {
				1847	U_ASSERT(addValue != NULL && len < nameCapacity);
				1848	/* inelegant but works -- later make _getKeywords do this? */
				1849	len += _copyCount(name+len, nameCapacity-len, "@");
				1850	len += _copyCount(name+len, nameCapacity-len, addKeyword);
				1851	len += _copyCount(name+len, nameCapacity-len, "=");
				1852	len += _copyCount(name+len, nameCapacity-len, addValue);
				1853	}
				1854	}
				1855
				1856	if (U_SUCCESS(*err) && result != NULL && name == localeBuffer) {
				1857	uprv_strncpy(result, localeBuffer, (len > resultCapacity) ? resultCapacity : len);
				1858	}
				1859
				1860	return u_terminateChars(result, resultCapacity, len, err);
				1861	}
				1862
				1863	/* ### ID parsing API **************************************************/
				1864
				1865	U_CAPI int32_t U_EXPORT2
				1866	uloc_getParent(const char* localeID,
				1867	char* parent,
				1868	int32_t parentCapacity,
				1869	UErrorCode* err)
				1870	{
				1871	const char *lastUnderscore;
				1872	int32_t i;
				1873
				1874	if (U_FAILURE(*err))
				1875	return 0;
				1876
				1877	if (localeID == NULL)
				1878	localeID = uloc_getDefault();
				1879
				1880	lastUnderscore=uprv_strrchr(localeID, '_');
				1881	if(lastUnderscore!=NULL) {
				1882	i=(int32_t)(lastUnderscore-localeID);
				1883	} else {
				1884	i=0;
				1885	}
				1886
				1887	if(i>0 && parent != localeID) {
				1888	uprv_memcpy(parent, localeID, uprv_min(i, parentCapacity));
				1889	}
				1890	return u_terminateChars(parent, parentCapacity, i, err);
				1891	}
				1892
				1893	U_CAPI int32_t U_EXPORT2
				1894	uloc_getLanguage(const char* localeID,
				1895	char* language,
				1896	int32_t languageCapacity,
				1897	UErrorCode* err)
				1898	{
				1899	/* uloc_getLanguage will return a 2 character iso-639 code if one exists. CWB/
				1900	int32_t i=0;
				1901
				1902	if (err==NULL \|\| U_FAILURE(*err)) {
				1903	return 0;
				1904	}
				1905
				1906	if(localeID==NULL) {
				1907	localeID=uloc_getDefault();
				1908	}
				1909
				1910	i=ulocimp_getLanguage(localeID, language, languageCapacity, NULL);
				1911	return u_terminateChars(language, languageCapacity, i, err);
				1912	}
				1913
				1914	U_CAPI int32_t U_EXPORT2
				1915	uloc_getScript(const char* localeID,
				1916	char* script,
				1917	int32_t scriptCapacity,
				1918	UErrorCode* err)
				1919	{
				1920	int32_t i=0;
				1921
				1922	if(err==NULL \|\| U_FAILURE(*err)) {
				1923	return 0;
				1924	}
				1925
				1926	if(localeID==NULL) {
				1927	localeID=uloc_getDefault();
				1928	}
				1929
				1930	/* skip the language */
				1931	ulocimp_getLanguage(localeID, NULL, 0, &localeID);
				1932	if(_isIDSeparator(*localeID)) {
				1933	i=ulocimp_getScript(localeID+1, script, scriptCapacity, NULL);
				1934	}
				1935	return u_terminateChars(script, scriptCapacity, i, err);
				1936	}
				1937
				1938	U_CAPI int32_t U_EXPORT2
				1939	uloc_getCountry(const char* localeID,
				1940	char* country,
				1941	int32_t countryCapacity,
				1942	UErrorCode* err)
				1943	{
				1944	int32_t i=0;
				1945
				1946	if(err==NULL \|\| U_FAILURE(*err)) {
				1947	return 0;
				1948	}
				1949
				1950	if(localeID==NULL) {
				1951	localeID=uloc_getDefault();
				1952	}
				1953
				1954	/* Skip the language */
				1955	ulocimp_getLanguage(localeID, NULL, 0, &localeID);
				1956	if(_isIDSeparator(*localeID)) {
				1957	const char *scriptID;
				1958	/* Skip the script if available */
				1959	ulocimp_getScript(localeID+1, NULL, 0, &scriptID);
				1960	if(scriptID != localeID+1) {
				1961	/* Found optional script */
				1962	localeID = scriptID;
				1963	}
				1964	if(_isIDSeparator(*localeID)) {
				1965	i=ulocimp_getCountry(localeID+1, country, countryCapacity, NULL);
				1966	}
				1967	}
				1968	return u_terminateChars(country, countryCapacity, i, err);
				1969	}
				1970
				1971	U_CAPI int32_t U_EXPORT2
				1972	uloc_getVariant(const char* localeID,
				1973	char* variant,
				1974	int32_t variantCapacity,
				1975	UErrorCode* err)
				1976	{
				1977	char tempBuffer[ULOC_FULLNAME_CAPACITY];
				1978	const char* tmpLocaleID;
				1979	int32_t i=0;
				1980
				1981	if(err==NULL \|\| U_FAILURE(*err)) {
				1982	return 0;
				1983	}
				1984
				1985	if (_hasBCP47Extension(localeID)) {
				1986	_ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
				1987	} else {
				1988	if (localeID==NULL) {
				1989	localeID=uloc_getDefault();
				1990	}
				1991	tmpLocaleID=localeID;
				1992	}
				1993
				1994	/* Skip the language */
				1995	ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID);
				1996	if(_isIDSeparator(*tmpLocaleID)) {
				1997	const char *scriptID;
				1998	/* Skip the script if available */
				1999	ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID);
				2000	if(scriptID != tmpLocaleID+1) {
				2001	/* Found optional script */
				2002	tmpLocaleID = scriptID;
				2003	}
				2004	/* Skip the Country */
				2005	if (_isIDSeparator(*tmpLocaleID)) {
				2006	const char *cntryID;
				2007	ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &cntryID);
				2008	if (cntryID != tmpLocaleID+1) {
				2009	/* Found optional country */
				2010	tmpLocaleID = cntryID;
				2011	}
				2012	if(_isIDSeparator(*tmpLocaleID)) {
				2013	/* If there was no country ID, skip a possible extra IDSeparator */
				2014	if (tmpLocaleID != cntryID && _isIDSeparator(tmpLocaleID[1])) {
				2015	tmpLocaleID++;
				2016	}
				2017	i=_getVariant(tmpLocaleID+1, *tmpLocaleID, variant, variantCapacity);
				2018	}
				2019	}
				2020	}
				2021
				2022	/* removed by weiv. We don't want to handle POSIX variants anymore. Use canonicalization function */
				2023	/* if we do not have a variant tag yet then try a POSIX variant after '@' */
				2024	/*
				2025	if(!haveVariant && (localeID=uprv_strrchr(localeID, '@'))!=NULL) {
				2026	i=_getVariant(localeID+1, '@', variant, variantCapacity);
				2027	}
				2028	*/
				2029	return u_terminateChars(variant, variantCapacity, i, err);
				2030	}
				2031
				2032	U_CAPI int32_t U_EXPORT2
				2033	uloc_getName(const char* localeID,
				2034	char* name,
				2035	int32_t nameCapacity,
				2036	UErrorCode* err)
				2037	{
				2038	return _canonicalize(localeID, name, nameCapacity, 0, err);
				2039	}
				2040
				2041	U_CAPI int32_t U_EXPORT2
				2042	uloc_getBaseName(const char* localeID,
				2043	char* name,
				2044	int32_t nameCapacity,
				2045	UErrorCode* err)
				2046	{
				2047	return _canonicalize(localeID, name, nameCapacity, _ULOC_STRIP_KEYWORDS, err);
				2048	}
				2049
				2050	U_CAPI int32_t U_EXPORT2
				2051	uloc_canonicalize(const char* localeID,
				2052	char* name,
				2053	int32_t nameCapacity,
				2054	UErrorCode* err)
				2055	{
				2056	return _canonicalize(localeID, name, nameCapacity, _ULOC_CANONICALIZE, err);
				2057	}
				2058
				2059	U_CAPI const char* U_EXPORT2
				2060	uloc_getISO3Language(const char* localeID)
				2061	{
				2062	int16_t offset;
				2063	char lang[ULOC_LANG_CAPACITY];
				2064	UErrorCode err = U_ZERO_ERROR;
				2065
				2066	if (localeID == NULL)
				2067	{
				2068	localeID = uloc_getDefault();
				2069	}
				2070	uloc_getLanguage(localeID, lang, ULOC_LANG_CAPACITY, &err);
				2071	if (U_FAILURE(err))
				2072	return "";
				2073	offset = _findIndex(LANGUAGES, lang);
				2074	if (offset < 0)
				2075	return "";
				2076	return LANGUAGES_3[offset];
				2077	}
				2078
				2079	U_CAPI const char* U_EXPORT2
				2080	uloc_getISO3Country(const char* localeID)
				2081	{
				2082	int16_t offset;
				2083	char cntry[ULOC_LANG_CAPACITY];
				2084	UErrorCode err = U_ZERO_ERROR;
				2085
				2086	if (localeID == NULL)
				2087	{
				2088	localeID = uloc_getDefault();
				2089	}
				2090	uloc_getCountry(localeID, cntry, ULOC_LANG_CAPACITY, &err);
				2091	if (U_FAILURE(err))
				2092	return "";
				2093	offset = _findIndex(COUNTRIES, cntry);
				2094	if (offset < 0)
				2095	return "";
				2096
				2097	return COUNTRIES_3[offset];
				2098	}
				2099
				2100	U_CAPI uint32_t U_EXPORT2
				2101	uloc_getLCID(const char* localeID)
				2102	{
				2103	UErrorCode status = U_ZERO_ERROR;
				2104	char langID[ULOC_FULLNAME_CAPACITY];
				2105
				2106	uloc_getLanguage(localeID, langID, sizeof(langID), &status);
				2107	if (U_FAILURE(status)) {
				2108	return 0;
				2109	}
				2110
				2111	if (uprv_strchr(localeID, '@')) {
				2112	// uprv_convertToLCID does not support keywords other than collation.
				2113	// Remove all keywords except collation.
				2114	int32_t len;
				2115	char collVal[ULOC_KEYWORDS_CAPACITY];
				2116	char tmpLocaleID[ULOC_FULLNAME_CAPACITY];
				2117
				2118	len = uloc_getKeywordValue(localeID, "collation", collVal,
				2119	sizeof(collVal)/sizeof(collVal[0]) - 1, &status);
				2120
				2121	if (U_SUCCESS(status) && len > 0) {
				2122	collVal[len] = 0;
				2123
				2124	len = uloc_getBaseName(localeID, tmpLocaleID,
				2125	sizeof(tmpLocaleID)/sizeof(tmpLocaleID[0]) - 1, &status);
				2126
				2127	if (U_SUCCESS(status)) {
				2128	tmpLocaleID[len] = 0;
				2129
				2130	len = uloc_setKeywordValue("collation", collVal, tmpLocaleID,
				2131	sizeof(tmpLocaleID)/sizeof(tmpLocaleID[0]) - len - 1, &status);
				2132
				2133	if (U_SUCCESS(status)) {
				2134	tmpLocaleID[len] = 0;
				2135	return uprv_convertToLCID(langID, tmpLocaleID, &status);
				2136	}
				2137	}
				2138	}
				2139
				2140	// fall through - all keywords are simply ignored
				2141	status = U_ZERO_ERROR;
				2142	}
				2143
				2144	return uprv_convertToLCID(langID, localeID, &status);
				2145	}
				2146
				2147	U_CAPI int32_t U_EXPORT2
				2148	uloc_getLocaleForLCID(uint32_t hostid, char *locale, int32_t localeCapacity,
				2149	UErrorCode *status)
				2150	{
				2151	return uprv_convertToPosix(hostid, locale, localeCapacity, status);
				2152	}
				2153
				2154	/* ### Default locale **************************************************/
				2155
				2156	U_CAPI const char* U_EXPORT2
				2157	uloc_getDefault()
				2158	{
				2159	return locale_get_default();
				2160	}
				2161
				2162	U_CAPI void U_EXPORT2
				2163	uloc_setDefault(const char* newDefaultLocale,
				2164	UErrorCode* err)
				2165	{
				2166	if (U_FAILURE(*err))
				2167	return;
				2168	/* the error code isn't currently used for anything by this function*/
				2169
				2170	/* propagate change to C++ */
				2171	locale_set_default(newDefaultLocale);
				2172	}
				2173
				2174	/**
				2175	* Returns a list of all 2-letter language codes defined in ISO 639. This is a pointer
				2176	* to an array of pointers to arrays of char. All of these pointers are owned
				2177	* by ICU-- do not delete them, and do not write through them. The array is
				2178	* terminated with a null pointer.
				2179	*/
				2180	U_CAPI const char* const* U_EXPORT2
				2181	uloc_getISOLanguages()
				2182	{
				2183	return LANGUAGES;
				2184	}
				2185
				2186	/**
				2187	* Returns a list of all 2-letter country codes defined in ISO 639. This is a
				2188	* pointer to an array of pointers to arrays of char. All of these pointers are
				2189	* owned by ICU-- do not delete them, and do not write through them. The array is
				2190	* terminated with a null pointer.
				2191	*/
				2192	U_CAPI const char* const* U_EXPORT2
				2193	uloc_getISOCountries()
				2194	{
				2195	return COUNTRIES;
				2196	}
				2197
				2198
				2199	/* this function to be moved into cstring.c later */
				2200	static char gDecimal = 0;
				2201
				2202	static /* U_CAPI */
				2203	double
				2204	/* U_EXPORT2 */
				2205	_uloc_strtod(const char start, char *end) {
				2206	char *decimal;
				2207	char *myEnd;
				2208	char buf[30];
				2209	double rv;
				2210	if (!gDecimal) {
				2211	char rep[5];
				2212	/* For machines that decide to change the decimal on you,
				2213	and try to be too smart with localization.
				2214	This normally should be just a '.'. */
				2215	sprintf(rep, "%+1.1f", 1.0);
				2216	gDecimal = rep[2];
				2217	}
				2218
				2219	if(gDecimal == '.') {
				2220	return uprv_strtod(start, end); /* fall through to OS */
				2221	} else {
				2222	uprv_strncpy(buf, start, 29);
				2223	buf[29]=0;
				2224	decimal = uprv_strchr(buf, '.');
				2225	if(decimal) {
				2226	*decimal = gDecimal;
				2227	} else {
				2228	return uprv_strtod(start, end); /* no decimal point */
				2229	}
				2230	rv = uprv_strtod(buf, &myEnd);
				2231	if(end) {
				2232	end = (char)(start+(myEnd-buf)); /* cast away const (to follow uprv_strtod API.) */
				2233	}
				2234	return rv;
				2235	}
				2236	}
				2237
				2238	typedef struct {
				2239	float q;
				2240	int32_t dummy; /* to avoid uninitialized memory copy from qsort */
				2241	char *locale;
				2242	} _acceptLangItem;
				2243
				2244	static int32_t U_CALLCONV
				2245	uloc_acceptLanguageCompare(const void * /context/, const void a, const void b)
				2246	{
				2247	const _acceptLangItem aa = (const _acceptLangItem)a;
				2248	const _acceptLangItem bb = (const _acceptLangItem)b;
				2249
				2250	int32_t rc = 0;
				2251	if(bb->q < aa->q) {
				2252	rc = -1; /* A > B */
				2253	} else if(bb->q > aa->q) {
				2254	rc = 1; /* A < B */
				2255	} else {
				2256	rc = 0; /* A = B */
				2257	}
				2258
				2259	if(rc==0) {
				2260	rc = uprv_stricmp(aa->locale, bb->locale);
				2261	}
				2262
				2263	#if defined(ULOC_DEBUG)
				2264	/* fprintf(stderr, "a:[%s:%g], b:[%s:%g] -> %d\n",
				2265	aa->locale, aa->q,
				2266	bb->locale, bb->q,
				2267	rc);*/
				2268	#endif
				2269
				2270	return rc;
				2271	}
				2272
				2273	/*
				2274	mt-mt, ja;q=0.76, en-us;q=0.95, en;q=0.92, en-gb;q=0.89, fr;q=0.87, iu-ca;q=0.84, iu;q=0.82, ja-jp;q=0.79, mt;q=0.97, de-de;q=0.74, de;q=0.71, es;q=0.68, it-it;q=0.66, it;q=0.63, vi-vn;q=0.61, vi;q=0.58, nl-nl;q=0.55, nl;q=0.53
				2275	*/
				2276
				2277	U_CAPI int32_t U_EXPORT2
				2278	uloc_acceptLanguageFromHTTP(char result, int32_t resultAvailable, UAcceptResult outResult,
				2279	const char *httpAcceptLanguage,
				2280	UEnumeration* availableLocales,
				2281	UErrorCode *status)
				2282	{
				2283	_acceptLangItem *j;
				2284	_acceptLangItem smallBuffer[30];
				2285	char **strs;
				2286	char tmp[ULOC_FULLNAME_CAPACITY +1];
				2287	int32_t n = 0;
				2288	const char *itemEnd;
				2289	const char *paramEnd;
				2290	const char *s;
				2291	const char *t;
				2292	int32_t res;
				2293	int32_t i;
				2294	int32_t l = (int32_t)uprv_strlen(httpAcceptLanguage);
				2295	int32_t jSize;
				2296	char tempstr; / Use for null pointer check */
				2297
				2298	j = smallBuffer;
				2299	jSize = sizeof(smallBuffer)/sizeof(smallBuffer[0]);
				2300	if(U_FAILURE(*status)) {
				2301	return -1;
				2302	}
				2303
				2304	for(s=httpAcceptLanguage;s&&*s;) {
				2305	while(isspace(s)) / eat space at the beginning */
				2306	s++;
				2307	itemEnd=uprv_strchr(s,',');
				2308	paramEnd=uprv_strchr(s,';');
				2309	if(!itemEnd) {
				2310	itemEnd = httpAcceptLanguage+l; /* end of string */
				2311	}
				2312	if(paramEnd && paramEnd<itemEnd) {
				2313	/* semicolon (;) is closer than end (,) */
				2314	t = paramEnd+1;
				2315	if(*t=='q') {
				2316	t++;
				2317	}
				2318	while(isspace(*t)) {
				2319	t++;
				2320	}
				2321	if(*t=='=') {
				2322	t++;
				2323	}
				2324	while(isspace(*t)) {
				2325	t++;
				2326	}
				2327	j[n].q = (float)_uloc_strtod(t,NULL);
				2328	} else {
				2329	/* no semicolon - it's 1.0 */
				2330	j[n].q = 1.0f;
				2331	paramEnd = itemEnd;
				2332	}
				2333	j[n].dummy=0;
				2334	/* eat spaces prior to semi */
				2335	for(t=(paramEnd-1);(paramEnd>s)&&isspace(*t);t--)
				2336	;
				2337	/* Check for null pointer from uprv_strndup */
				2338	tempstr = uprv_strndup(s,(int32_t)((t+1)-s));
				2339	if (tempstr == NULL) {
				2340	*status = U_MEMORY_ALLOCATION_ERROR;
				2341	return -1;
				2342	}
				2343	j[n].locale = tempstr;
				2344	uloc_canonicalize(j[n].locale,tmp,sizeof(tmp)/sizeof(tmp[0]),status);
				2345	if(strcmp(j[n].locale,tmp)) {
				2346	uprv_free(j[n].locale);
				2347	j[n].locale=uprv_strdup(tmp);
				2348	}
				2349	#if defined(ULOC_DEBUG)
				2350	/fprintf(stderr,"%d: s <%s> q <%g>\n", n, j[n].locale, j[n].q);/
				2351	#endif
				2352	n++;
				2353	s = itemEnd;
				2354	while(s==',') { / eat duplicate commas */
				2355	s++;
				2356	}
				2357	if(n>=jSize) {
				2358	if(j==smallBuffer) { /* overflowed the small buffer. */
				2359	j = static_cast<_acceptLangItem >(uprv_malloc(sizeof(j[0])(jSize*2)));
				2360	if(j!=NULL) {
				2361	uprv_memcpy(j,smallBuffer,sizeof(j[0])*jSize);
				2362	}
				2363	#if defined(ULOC_DEBUG)
				2364	fprintf(stderr,"malloced at size %d\n", jSize);
				2365	#endif
				2366	} else {
				2367	j = static_cast<_acceptLangItem >(uprv_realloc(j, sizeof(j[0])jSize*2));
				2368	#if defined(ULOC_DEBUG)
				2369	fprintf(stderr,"re-alloced at size %d\n", jSize);
				2370	#endif
				2371	}
				2372	jSize *= 2;
				2373	if(j==NULL) {
				2374	*status = U_MEMORY_ALLOCATION_ERROR;
				2375	return -1;
				2376	}
				2377	}
				2378	}
				2379	uprv_sortArray(j, n, sizeof(j[0]), uloc_acceptLanguageCompare, NULL, TRUE, status);
				2380	if(U_FAILURE(*status)) {
				2381	if(j != smallBuffer) {
				2382	#if defined(ULOC_DEBUG)
				2383	fprintf(stderr,"freeing j %p\n", j);
				2384	#endif
				2385	uprv_free(j);
				2386	}
				2387	return -1;
				2388	}
				2389	strs = static_cast<char *>(uprv_malloc((size_t)(sizeof(strs[0])n)));
				2390	/* Check for null pointer */
				2391	if (strs == NULL) {
				2392	uprv_free(j); /* Free to avoid memory leak */
				2393	*status = U_MEMORY_ALLOCATION_ERROR;
				2394	return -1;
				2395	}
				2396	for(i=0;i<n;i++) {
				2397	#if defined(ULOC_DEBUG)
				2398	/fprintf(stderr,"%d: s <%s> q <%g>\n", i, j[i].locale, j[i].q);/
				2399	#endif
				2400	strs[i]=j[i].locale;
				2401	}
				2402	res = uloc_acceptLanguage(result, resultAvailable, outResult,
				2403	(const char**)strs, n, availableLocales, status);
				2404	for(i=0;i<n;i++) {
				2405	uprv_free(strs[i]);
				2406	}
				2407	uprv_free(strs);
				2408	if(j != smallBuffer) {
				2409	#if defined(ULOC_DEBUG)
				2410	fprintf(stderr,"freeing j %p\n", j);
				2411	#endif
				2412	uprv_free(j);
				2413	}
				2414	return res;
				2415	}
				2416
				2417
				2418	U_CAPI int32_t U_EXPORT2
				2419	uloc_acceptLanguage(char *result, int32_t resultAvailable,
				2420	UAcceptResult outResult, const char *acceptList,
				2421	int32_t acceptListCount,
				2422	UEnumeration* availableLocales,
				2423	UErrorCode *status)
				2424	{
				2425	int32_t i,j;
				2426	int32_t len;
				2427	int32_t maxLen=0;
				2428	char tmp[ULOC_FULLNAME_CAPACITY+1];
				2429	const char *l;
				2430	char **fallbackList;
				2431	if(U_FAILURE(*status)) {
				2432	return -1;
				2433	}
				2434	fallbackList = static_cast<char *>(uprv_malloc((size_t)(sizeof(fallbackList[0])acceptListCount)));
				2435	if(fallbackList==NULL) {
				2436	*status = U_MEMORY_ALLOCATION_ERROR;
				2437	return -1;
				2438	}
				2439	for(i=0;i<acceptListCount;i++) {
				2440	#if defined(ULOC_DEBUG)
				2441	fprintf(stderr,"%02d: %s\n", i, acceptList[i]);
				2442	#endif
				2443	while((l=uenum_next(availableLocales, NULL, status))) {
				2444	#if defined(ULOC_DEBUG)
				2445	fprintf(stderr," %s\n", l);
				2446	#endif
				2447	len = (int32_t)uprv_strlen(l);
				2448	if(!uprv_strcmp(acceptList[i], l)) {
				2449	if(outResult) {
				2450	*outResult = ULOC_ACCEPT_VALID;
				2451	}
				2452	#if defined(ULOC_DEBUG)
				2453	fprintf(stderr, "MATCH! %s\n", l);
				2454	#endif
				2455	if(len>0) {
				2456	uprv_strncpy(result, l, uprv_min(len, resultAvailable));
				2457	}
				2458	for(j=0;j<i;j++) {
				2459	uprv_free(fallbackList[j]);
				2460	}
				2461	uprv_free(fallbackList);
				2462	return u_terminateChars(result, resultAvailable, len, status);
				2463	}
				2464	if(len>maxLen) {
				2465	maxLen = len;
				2466	}
				2467	}
				2468	uenum_reset(availableLocales, status);
				2469	/* save off parent info */
				2470	if(uloc_getParent(acceptList[i], tmp, sizeof(tmp)/sizeof(tmp[0]), status)!=0) {
				2471	fallbackList[i] = uprv_strdup(tmp);
				2472	} else {
				2473	fallbackList[i]=0;
				2474	}
				2475	}
				2476
				2477	for(maxLen--;maxLen>0;maxLen--) {
				2478	for(i=0;i<acceptListCount;i++) {
				2479	if(fallbackList[i] && ((int32_t)uprv_strlen(fallbackList[i])==maxLen)) {
				2480	#if defined(ULOC_DEBUG)
				2481	fprintf(stderr,"Try: [%s]", fallbackList[i]);
				2482	#endif
				2483	while((l=uenum_next(availableLocales, NULL, status))) {
				2484	#if defined(ULOC_DEBUG)
				2485	fprintf(stderr," %s\n", l);
				2486	#endif
				2487	len = (int32_t)uprv_strlen(l);
				2488	if(!uprv_strcmp(fallbackList[i], l)) {
				2489	if(outResult) {
				2490	*outResult = ULOC_ACCEPT_FALLBACK;
				2491	}
				2492	#if defined(ULOC_DEBUG)
				2493	fprintf(stderr, "fallback MATCH! %s\n", l);
				2494	#endif
				2495	if(len>0) {
				2496	uprv_strncpy(result, l, uprv_min(len, resultAvailable));
				2497	}
				2498	for(j=0;j<acceptListCount;j++) {
				2499	uprv_free(fallbackList[j]);
				2500	}
				2501	uprv_free(fallbackList);
				2502	return u_terminateChars(result, resultAvailable, len, status);
				2503	}
				2504	}
				2505	uenum_reset(availableLocales, status);
				2506
				2507	if(uloc_getParent(fallbackList[i], tmp, sizeof(tmp)/sizeof(tmp[0]), status)!=0) {
				2508	uprv_free(fallbackList[i]);
				2509	fallbackList[i] = uprv_strdup(tmp);
				2510	} else {
				2511	uprv_free(fallbackList[i]);
				2512	fallbackList[i]=0;
				2513	}
				2514	}
				2515	}
				2516	if(outResult) {
				2517	*outResult = ULOC_ACCEPT_FAILED;
				2518	}
				2519	}
				2520	for(i=0;i<acceptListCount;i++) {
				2521	uprv_free(fallbackList[i]);
				2522	}
				2523	uprv_free(fallbackList);
				2524	return -1;
				2525	}
				2526
Jungshik Shin (jungshik at google)	0f8746a	2015-01-08 15:46:45 -0800	[diff] [blame^]	2527	U_CAPI const char* U_EXPORT2
				2528	uloc_toUnicodeLocaleKey(const char* keyword)
				2529	{
				2530	const char* bcpKey = ulocimp_toBcpKey(keyword);
				2531	if (bcpKey == NULL && ultag_isUnicodeLocaleKey(keyword, -1)) {
				2532	// unknown keyword, but syntax is fine..
				2533	return keyword;
				2534	}
				2535	return bcpKey;
				2536	}
				2537
				2538	U_CAPI const char* U_EXPORT2
				2539	uloc_toUnicodeLocaleType(const char* keyword, const char* value)
				2540	{
				2541	const char* bcpType = ulocimp_toBcpType(keyword, value, NULL, NULL);
				2542	if (bcpType == NULL && ultag_isUnicodeLocaleType(value, -1)) {
				2543	// unknown keyword, but syntax is fine..
				2544	return value;
				2545	}
				2546	return bcpType;
				2547	}
				2548
				2549	#define UPRV_ISDIGIT(c) (((c) >= '0') && ((c) <= '9'))
				2550	#define UPRV_ISALPHANUM(c) (uprv_isASCIILetter(c) \|\| UPRV_ISDIGIT(c) )
				2551
				2552	static UBool
				2553	isWellFormedLegacyKey(const char* legacyKey)
				2554	{
				2555	const char* p = legacyKey;
				2556	while (*p) {
				2557	if (!UPRV_ISALPHANUM(*p)) {
				2558	return FALSE;
				2559	}
				2560	p++;
				2561	}
				2562	return TRUE;
				2563	}
				2564
				2565	static UBool
				2566	isWellFormedLegacyType(const char* legacyType)
				2567	{
				2568	const char* p = legacyType;
				2569	int32_t alphaNumLen = 0;
				2570	while (*p) {
				2571	if (p == '_' \|\| p == '/' \|\| *p == '-') {
				2572	if (alphaNumLen == 0) {
				2573	return FALSE;
				2574	}
				2575	alphaNumLen = 0;
				2576	} else if (UPRV_ISALPHANUM(*p)) {
				2577	alphaNumLen++;
				2578	} else {
				2579	return FALSE;
				2580	}
				2581	p++;
				2582	}
				2583	return (alphaNumLen != 0);
				2584	}
				2585
				2586	U_CAPI const char* U_EXPORT2
				2587	uloc_toLegacyKey(const char* keyword)
				2588	{
				2589	const char* legacyKey = ulocimp_toLegacyKey(keyword);
				2590	if (legacyKey == NULL) {
				2591	// Checks if the specified locale key is well-formed with the legacy locale syntax.
				2592	//
				2593	// Note:
				2594	// Neither ICU nor LDML/CLDR provides the definition of keyword syntax.
				2595	// However, a key should not contain '=' obviously. For now, all existing
				2596	// keys are using ASCII alphabetic letters only. We won't add any new key
				2597	// that is not compatible with the BCP 47 syntax. Therefore, we assume
				2598	// a valid key consist from [0-9a-zA-Z], no symbols.
				2599	if (isWellFormedLegacyKey(keyword)) {
				2600	return keyword;
				2601	}
				2602	}
				2603	return legacyKey;
				2604	}
				2605
				2606	U_CAPI const char* U_EXPORT2
				2607	uloc_toLegacyType(const char* keyword, const char* value)
				2608	{
				2609	const char* legacyType = ulocimp_toLegacyType(keyword, value, NULL, NULL);
				2610	if (legacyType == NULL) {
				2611	// Checks if the specified locale type is well-formed with the legacy locale syntax.
				2612	//
				2613	// Note:
				2614	// Neither ICU nor LDML/CLDR provides the definition of keyword syntax.
				2615	// However, a type should not contain '=' obviously. For now, all existing
				2616	// types are using ASCII alphabetic letters with a few symbol letters. We won't
				2617	// add any new type that is not compatible with the BCP 47 syntax except timezone
				2618	// IDs. For now, we assume a valid type start with [0-9a-zA-Z], but may contain
				2619	// '-' '_' '/' in the middle.
				2620	if (isWellFormedLegacyType(value)) {
				2621	return value;
				2622	}
				2623	}
				2624	return legacyType;
				2625	}
				2626
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	2627	/eof/