Blame - source/common/uloc.cpp - chromium.googlesource.com/chromium/deps/icu

blob: 81b6e0f68ab88be97d59dcec345e8d612ebd45e4 [file] [log] [blame]

Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	1	// © 2016 and later: Unicode, Inc. and others.
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	2	// License & terms of use: http://www.unicode.org/copyright.html
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	3	/*
				4	**********************************************************************
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	5	* Copyright (C) 1997-2016, International Business Machines
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	6	* Corporation and others. All Rights Reserved.
				7	**********************************************************************
				8	*
				9	* File ULOC.CPP
				10	*
				11	* Modification History:
				12	*
				13	* Date Name Description
				14	* 04/01/97 aliu Creation.
				15	* 08/21/98 stephen JDK 1.2 sync
				16	* 12/08/98 rtg New Locale implementation and C API
				17	* 03/15/99 damiba overhaul.
				18	* 04/06/99 stephen changed setDefault() to realloc and copy
				19	* 06/14/99 stephen Changed calls to ures_open for new params
				20	* 07/21/99 stephen Modified setDefault() to propagate to C++
				21	* 05/14/04 alan 7 years later: refactored, cleaned up, fixed bugs,
				22	* brought canonicalization code into line with spec
				23	*****************************************************************************/
				24
				25	/*
				26	POSIX's locale format, from putil.c: [no spaces]
				27
				28	ll [ _CC ] [ . MM ] [ @ VV]
				29
				30	l = lang, C = ctry, M = charmap, V = variant
				31	*/
				32
				33	#include "unicode/utypes.h"
				34	#include "unicode/ustring.h"
				35	#include "unicode/uloc.h"
				36
				37	#include "putilimp.h"
				38	#include "ustr_imp.h"
				39	#include "ulocimp.h"
				40	#include "umutex.h"
				41	#include "cstring.h"
				42	#include "cmemory.h"
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	43	#include "locmap.h"
				44	#include "uarrsort.h"
				45	#include "uenumimp.h"
				46	#include "uassert.h"
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	47	#include "charstr.h"
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	48
				49	#include <stdio.h> /* for sprintf */
				50
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	51	U_NAMESPACE_USE
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	52
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	53	/* ### Declarations **************************************************/
				54
				55	/* Locale stuff from locid.cpp */
				56	U_CFUNC void locale_set_default(const char *id);
				57	U_CFUNC const char *locale_get_default(void);
				58	U_CFUNC int32_t
				59	locale_getKeywords(const char *localeID,
				60	char prev,
				61	char *keywords, int32_t keywordCapacity,
				62	char values, int32_t valuesCapacity, int32_t valLen,
				63	UBool valuesToo,
				64	UErrorCode *status);
				65
				66	/* ### Data tables **************************************************/
				67
				68	/**
				69	* Table of language codes, both 2- and 3-letter, with preference
				70	* given to 2-letter codes where possible. Includes 3-letter codes
				71	* that lack a 2-letter equivalent.
				72	*
				73	* This list must be in sorted order. This list is returned directly
				74	* to the user by some API.
				75	*
				76	* This list must be kept in sync with LANGUAGES_3, with corresponding
				77	* entries matched.
				78	*
				79	* This table should be terminated with a NULL entry, followed by a
				80	* second list, and another NULL entry. The first list is visible to
				81	* user code when this array is returned by API. The second list
				82	* contains codes we support, but do not expose through user API.
				83	*
				84	* Notes
				85	*
				86	* Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to
				87	* include the revisions up to 2001/7/27 CWB
				88	*
				89	* The 3 character codes are the terminology codes like RFC 3066. This
				90	* is compatible with prior ICU codes
				91	*
				92	* "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the
				93	* table but now at the end of the table because 3 character codes are
				94	* duplicates. This avoids bad searches going from 3 to 2 character
				95	* codes.
				96	*
				97	* The range qaa-qtz is reserved for local use
				98	*/
				99	/* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	100	/* ISO639 table version is 20150505 */
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame]	101	/* Subsequent hand addition of selected languages */
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	102	static const char * const LANGUAGES[] = {
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	103	"aa", "ab", "ace", "ach", "ada", "ady", "ae", "aeb",
				104	"af", "afh", "agq", "ain", "ak", "akk", "akz", "ale",
				105	"aln", "alt", "am", "an", "ang", "anp", "ar", "arc",
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	106	"arn", "aro", "arp", "arq", "ars", "arw", "ary", "arz", "as",
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	107	"asa", "ase", "ast", "av", "avk", "awa", "ay", "az",
				108	"ba", "bal", "ban", "bar", "bas", "bax", "bbc", "bbj",
				109	"be", "bej", "bem", "bew", "bez", "bfd", "bfq", "bg",
				110	"bgn", "bho", "bi", "bik", "bin", "bjn", "bkm", "bla",
				111	"bm", "bn", "bo", "bpy", "bqi", "br", "bra", "brh",
				112	"brx", "bs", "bss", "bua", "bug", "bum", "byn", "byv",
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame]	113	"ca", "cad", "car", "cay", "cch", "ccp", "ce", "ceb", "cgg",
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	114	"ch", "chb", "chg", "chk", "chm", "chn", "cho", "chp",
				115	"chr", "chy", "ckb", "co", "cop", "cps", "cr", "crh",
				116	"cs", "csb", "cu", "cv", "cy",
				117	"da", "dak", "dar", "dav", "de", "del", "den", "dgr",
				118	"din", "dje", "doi", "dsb", "dtp", "dua", "dum", "dv",
				119	"dyo", "dyu", "dz", "dzg",
				120	"ebu", "ee", "efi", "egl", "egy", "eka", "el", "elx",
				121	"en", "enm", "eo", "es", "esu", "et", "eu", "ewo",
				122	"ext",
				123	"fa", "fan", "fat", "ff", "fi", "fil", "fit", "fj",
				124	"fo", "fon", "fr", "frc", "frm", "fro", "frp", "frr",
				125	"frs", "fur", "fy",
				126	"ga", "gaa", "gag", "gan", "gay", "gba", "gbz", "gd",
				127	"gez", "gil", "gl", "glk", "gmh", "gn", "goh", "gom",
				128	"gon", "gor", "got", "grb", "grc", "gsw", "gu", "guc",
				129	"gur", "guz", "gv", "gwi",
				130	"ha", "hai", "hak", "haw", "he", "hi", "hif", "hil",
				131	"hit", "hmn", "ho", "hr", "hsb", "hsn", "ht", "hu",
				132	"hup", "hy", "hz",
				133	"ia", "iba", "ibb", "id", "ie", "ig", "ii", "ik",
				134	"ilo", "inh", "io", "is", "it", "iu", "izh",
				135	"ja", "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut",
				136	"jv",
				137	"ka", "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd",
				138	"kbl", "kcg", "kde", "kea", "ken", "kfo", "kg", "kgp",
				139	"kha", "kho", "khq", "khw", "ki", "kiu", "kj", "kk",
				140	"kkj", "kl", "kln", "km", "kmb", "kn", "ko", "koi",
				141	"kok", "kos", "kpe", "kr", "krc", "kri", "krj", "krl",
				142	"kru", "ks", "ksb", "ksf", "ksh", "ku", "kum", "kut",
				143	"kv", "kw", "ky",
				144	"la", "lad", "lag", "lah", "lam", "lb", "lez", "lfn",
				145	"lg", "li", "lij", "liv", "lkt", "lmo", "ln", "lo",
				146	"lol", "loz", "lrc", "lt", "ltg", "lu", "lua", "lui",
				147	"lun", "luo", "lus", "luy", "lv", "lzh", "lzz",
				148	"mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",
				149	"mdf", "mdh", "mdr", "men", "mer", "mfe", "mg", "mga",
				150	"mgh", "mgo", "mh", "mi", "mic", "min", "mis", "mk",
				151	"ml", "mn", "mnc", "mni", "moh", "mos", "mr", "mrj",
				152	"ms", "mt", "mua", "mul", "mus", "mwl", "mwr", "mwv",
				153	"my", "mye", "myv", "mzn",
				154	"na", "nan", "nap", "naq", "nb", "nd", "nds", "ne",
				155	"new", "ng", "nia", "niu", "njo", "nl", "nmg", "nn",
				156	"nnh", "no", "nog", "non", "nov", "nqo", "nr", "nso",
				157	"nus", "nv", "nwc", "ny", "nym", "nyn", "nyo", "nzi",
				158	"oc", "oj", "om", "or", "os", "osa", "ota",
				159	"pa", "pag", "pal", "pam", "pap", "pau", "pcd", "pdc",
				160	"pdt", "peo", "pfl", "phn", "pi", "pl", "pms", "pnt",
				161	"pon", "prg", "pro", "ps", "pt",
				162	"qu", "quc", "qug",
				163	"raj", "rap", "rar", "rgn", "rif", "rm", "rn", "ro",
				164	"rof", "rom", "rtm", "ru", "rue", "rug", "rup",
				165	"rw", "rwk",
				166	"sa", "sad", "sah", "sam", "saq", "sas", "sat", "saz",
				167	"sba", "sbp", "sc", "scn", "sco", "sd", "sdc", "sdh",
				168	"se", "see", "seh", "sei", "sel", "ses", "sg", "sga",
				169	"sgs", "shi", "shn", "shu", "si", "sid", "sk",
				170	"sl", "sli", "sly", "sm", "sma", "smj", "smn", "sms",
				171	"sn", "snk", "so", "sog", "sq", "sr", "srn", "srr",
				172	"ss", "ssy", "st", "stq", "su", "suk", "sus", "sux",
				173	"sv", "sw", "swb", "swc", "syc", "syr", "szl",
				174	"ta", "tcy", "te", "tem", "teo", "ter", "tet", "tg",
				175	"th", "ti", "tig", "tiv", "tk", "tkl", "tkr", "tl",
				176	"tlh", "tli", "tly", "tmh", "tn", "to", "tog", "tpi",
				177	"tr", "tru", "trv", "ts", "tsd", "tsi", "tt", "ttt",
				178	"tum", "tvl", "tw", "twq", "ty", "tyv", "tzm",
				179	"udm", "ug", "uga", "uk", "umb", "und", "ur", "uz",
				180	"vai", "ve", "vec", "vep", "vi", "vls", "vmf", "vo",
				181	"vot", "vro", "vun",
				182	"wa", "wae", "wal", "war", "was", "wbp", "wo", "wuu",
				183	"xal", "xh", "xmf", "xog",
				184	"yao", "yap", "yav", "ybb", "yi", "yo", "yrl", "yue",
				185	"za", "zap", "zbl", "zea", "zen", "zgh", "zh", "zu",
				186	"zun", "zxx", "zza",
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	187	NULL,
				188	"in", "iw", "ji", "jw", "sh", /* obsolete language codes */
				189	NULL
				190	};
				191
				192	static const char* const DEPRECATED_LANGUAGES[]={
				193	"in", "iw", "ji", "jw", NULL, NULL
				194	};
				195	static const char* const REPLACEMENT_LANGUAGES[]={
				196	"id", "he", "yi", "jv", NULL, NULL
				197	};
				198
				199	/**
				200	* Table of 3-letter language codes.
				201	*
				202	* This is a lookup table used to convert 3-letter language codes to
				203	* their 2-letter equivalent, where possible. It must be kept in sync
				204	* with LANGUAGES. For all valid i, LANGUAGES[i] must refer to the
				205	* same language as LANGUAGES_3[i]. The commented-out lines are
				206	* copied from LANGUAGES to make eyeballing this baby easier.
				207	*
				208	* Where a 3-letter language code has no 2-letter equivalent, the
				209	* 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i].
				210	*
				211	* This table should be terminated with a NULL entry, followed by a
				212	* second list, and another NULL entry. The two lists correspond to
				213	* the two lists in LANGUAGES.
				214	*/
				215	/* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	216	/* ISO639 table version is 20150505 */
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame]	217	/* Subsequent hand addition of selected languages */
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	218	static const char * const LANGUAGES_3[] = {
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	219	"aar", "abk", "ace", "ach", "ada", "ady", "ave", "aeb",
				220	"afr", "afh", "agq", "ain", "aka", "akk", "akz", "ale",
				221	"aln", "alt", "amh", "arg", "ang", "anp", "ara", "arc",
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	222	"arn", "aro", "arp", "arq", "ars", "arw", "ary", "arz", "asm",
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	223	"asa", "ase", "ast", "ava", "avk", "awa", "aym", "aze",
				224	"bak", "bal", "ban", "bar", "bas", "bax", "bbc", "bbj",
				225	"bel", "bej", "bem", "bew", "bez", "bfd", "bfq", "bul",
				226	"bgn", "bho", "bis", "bik", "bin", "bjn", "bkm", "bla",
				227	"bam", "ben", "bod", "bpy", "bqi", "bre", "bra", "brh",
				228	"brx", "bos", "bss", "bua", "bug", "bum", "byn", "byv",
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame]	229	"cat", "cad", "car", "cay", "cch", "ccp", "che", "ceb", "cgg",
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	230	"cha", "chb", "chg", "chk", "chm", "chn", "cho", "chp",
				231	"chr", "chy", "ckb", "cos", "cop", "cps", "cre", "crh",
				232	"ces", "csb", "chu", "chv", "cym",
				233	"dan", "dak", "dar", "dav", "deu", "del", "den", "dgr",
				234	"din", "dje", "doi", "dsb", "dtp", "dua", "dum", "div",
				235	"dyo", "dyu", "dzo", "dzg",
				236	"ebu", "ewe", "efi", "egl", "egy", "eka", "ell", "elx",
				237	"eng", "enm", "epo", "spa", "esu", "est", "eus", "ewo",
				238	"ext",
				239	"fas", "fan", "fat", "ful", "fin", "fil", "fit", "fij",
				240	"fao", "fon", "fra", "frc", "frm", "fro", "frp", "frr",
				241	"frs", "fur", "fry",
				242	"gle", "gaa", "gag", "gan", "gay", "gba", "gbz", "gla",
				243	"gez", "gil", "glg", "glk", "gmh", "grn", "goh", "gom",
				244	"gon", "gor", "got", "grb", "grc", "gsw", "guj", "guc",
				245	"gur", "guz", "glv", "gwi",
				246	"hau", "hai", "hak", "haw", "heb", "hin", "hif", "hil",
				247	"hit", "hmn", "hmo", "hrv", "hsb", "hsn", "hat", "hun",
				248	"hup", "hye", "her",
				249	"ina", "iba", "ibb", "ind", "ile", "ibo", "iii", "ipk",
				250	"ilo", "inh", "ido", "isl", "ita", "iku", "izh",
				251	"jpn", "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut",
				252	"jav",
				253	"kat", "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd",
				254	"kbl", "kcg", "kde", "kea", "ken", "kfo", "kon", "kgp",
				255	"kha", "kho", "khq", "khw", "kik", "kiu", "kua", "kaz",
				256	"kkj", "kal", "kln", "khm", "kmb", "kan", "kor", "koi",
				257	"kok", "kos", "kpe", "kau", "krc", "kri", "krj", "krl",
				258	"kru", "kas", "ksb", "ksf", "ksh", "kur", "kum", "kut",
				259	"kom", "cor", "kir",
				260	"lat", "lad", "lag", "lah", "lam", "ltz", "lez", "lfn",
				261	"lug", "lim", "lij", "liv", "lkt", "lmo", "lin", "lao",
				262	"lol", "loz", "lrc", "lit", "ltg", "lub", "lua", "lui",
				263	"lun", "luo", "lus", "luy", "lav", "lzh", "lzz",
				264	"mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",
				265	"mdf", "mdh", "mdr", "men", "mer", "mfe", "mlg", "mga",
				266	"mgh", "mgo", "mah", "mri", "mic", "min", "mis", "mkd",
				267	"mal", "mon", "mnc", "mni", "moh", "mos", "mar", "mrj",
				268	"msa", "mlt", "mua", "mul", "mus", "mwl", "mwr", "mwv",
				269	"mya", "mye", "myv", "mzn",
				270	"nau", "nan", "nap", "naq", "nob", "nde", "nds", "nep",
				271	"new", "ndo", "nia", "niu", "njo", "nld", "nmg", "nno",
				272	"nnh", "nor", "nog", "non", "nov", "nqo", "nbl", "nso",
				273	"nus", "nav", "nwc", "nya", "nym", "nyn", "nyo", "nzi",
				274	"oci", "oji", "orm", "ori", "oss", "osa", "ota",
				275	"pan", "pag", "pal", "pam", "pap", "pau", "pcd", "pdc",
				276	"pdt", "peo", "pfl", "phn", "pli", "pol", "pms", "pnt",
				277	"pon", "prg", "pro", "pus", "por",
				278	"que", "quc", "qug",
				279	"raj", "rap", "rar", "rgn", "rif", "roh", "run", "ron",
				280	"rof", "rom", "rtm", "rus", "rue", "rug", "rup",
				281	"kin", "rwk",
				282	"san", "sad", "sah", "sam", "saq", "sas", "sat", "saz",
				283	"sba", "sbp", "srd", "scn", "sco", "snd", "sdc", "sdh",
				284	"sme", "see", "seh", "sei", "sel", "ses", "sag", "sga",
				285	"sgs", "shi", "shn", "shu", "sin", "sid", "slk",
				286	"slv", "sli", "sly", "smo", "sma", "smj", "smn", "sms",
				287	"sna", "snk", "som", "sog", "sqi", "srp", "srn", "srr",
				288	"ssw", "ssy", "sot", "stq", "sun", "suk", "sus", "sux",
				289	"swe", "swa", "swb", "swc", "syc", "syr", "szl",
				290	"tam", "tcy", "tel", "tem", "teo", "ter", "tet", "tgk",
				291	"tha", "tir", "tig", "tiv", "tuk", "tkl", "tkr", "tgl",
				292	"tlh", "tli", "tly", "tmh", "tsn", "ton", "tog", "tpi",
				293	"tur", "tru", "trv", "tso", "tsd", "tsi", "tat", "ttt",
				294	"tum", "tvl", "twi", "twq", "tah", "tyv", "tzm",
				295	"udm", "uig", "uga", "ukr", "umb", "und", "urd", "uzb",
				296	"vai", "ven", "vec", "vep", "vie", "vls", "vmf", "vol",
				297	"vot", "vro", "vun",
				298	"wln", "wae", "wal", "war", "was", "wbp", "wol", "wuu",
				299	"xal", "xho", "xmf", "xog",
				300	"yao", "yap", "yav", "ybb", "yid", "yor", "yrl", "yue",
				301	"zha", "zap", "zbl", "zea", "zen", "zgh", "zho", "zul",
				302	"zun", "zxx", "zza",
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	303	NULL,
				304	/* "in", "iw", "ji", "jw", "sh", */
				305	"ind", "heb", "yid", "jaw", "srp",
				306	NULL
				307	};
				308
				309	/**
				310	* Table of 2-letter country codes.
				311	*
				312	* This list must be in sorted order. This list is returned directly
				313	* to the user by some API.
				314	*
				315	* This list must be kept in sync with COUNTRIES_3, with corresponding
				316	* entries matched.
				317	*
				318	* This table should be terminated with a NULL entry, followed by a
				319	* second list, and another NULL entry. The first list is visible to
				320	* user code when this array is returned by API. The second list
				321	* contains codes we support, but do not expose through user API.
				322	*
				323	* Notes:
				324	*
				325	* ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per
				326	* http://www.evertype.com/standards/iso3166/iso3166-1-en.html added
				327	* new codes keeping the old ones for compatibility updated to include
				328	* 1999/12/03 revisions CWB
				329	*
				330	* RO(ROM) is now RO(ROU) according to
				331	* http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html
				332	*/
				333	static const char * const COUNTRIES[] = {
				334	"AD", "AE", "AF", "AG", "AI", "AL", "AM",
				335	"AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ",
				336	"BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI",
				337	"BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV",
				338	"BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG",
				339	"CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR",
				340	"CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DJ", "DK",
				341	"DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER",
				342	"ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR",
				343	"GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL",
				344	"GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU",
				345	"GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU",
				346	"ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS",
				347	"IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI",
				348	"KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA",
				349	"LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU",
				350	"LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK",
				351	"ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS",
				352	"MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA",
				353	"NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP",
				354	"NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG",
				355	"PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT",
				356	"PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA",
				357	"SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ",
				358	"SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV",
				359	"SX", "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ",
				360	"TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV",
				361	"TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ",
				362	"VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF",
				363	"WS", "YE", "YT", "ZA", "ZM", "ZW",
				364	NULL,
				365	"AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR", /* obsolete country codes */
				366	NULL
				367	};
				368
				369	static const char* const DEPRECATED_COUNTRIES[] = {
				370	"AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR", NULL, NULL /* deprecated country list */
				371	};
				372	static const char* const REPLACEMENT_COUNTRIES[] = {
				373	/* "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR" */
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	374	"CW", "MM", "RS", "DE", "BJ", "FR", "BF", "VU", "ZW", "RU", "TL", "GB", "VN", "YE", "RS", "CD", NULL, NULL /* replacement country codes */
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	375	};
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	376
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	377	/**
				378	* Table of 3-letter country codes.
				379	*
				380	* This is a lookup table used to convert 3-letter country codes to
				381	* their 2-letter equivalent. It must be kept in sync with COUNTRIES.
				382	* For all valid i, COUNTRIES[i] must refer to the same country as
				383	* COUNTRIES_3[i]. The commented-out lines are copied from COUNTRIES
				384	* to make eyeballing this baby easier.
				385	*
				386	* This table should be terminated with a NULL entry, followed by a
				387	* second list, and another NULL entry. The two lists correspond to
				388	* the two lists in COUNTRIES.
				389	*/
				390	static const char * const COUNTRIES_3[] = {
				391	/* "AD", "AE", "AF", "AG", "AI", "AL", "AM", */
				392	"AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM",
				393	/* "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ", */
				394	"AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE",
				395	/* "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", */
				396	"BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI",
				397	/* "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV", */
				398	"BEN", "BLM", "BMU", "BRN", "BOL", "BES", "BRA", "BHS", "BTN", "BVT",
				399	/* "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG", */
				400	"BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",
				401	/* "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR", */
				402	"CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRI",
				403	/* "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DJ", "DK", */
				404	"CUB", "CPV", "CUW", "CXR", "CYP", "CZE", "DEU", "DJI", "DNK",
				405	/* "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER", */
				406	"DMA", "DOM", "DZA", "ECU", "EST", "EGY", "ESH", "ERI",
				407	/* "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", */
				408	"ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",
				409	/* "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", */
				410	"GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL",
				411	/* "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", */
				412	"GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",
				413	/* "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", */
				414	"GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",
				415	/* "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS" */
				416	"IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL",
				417	/* "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", */
				418	"ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",
				419	/* "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", */
				420	"COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO",
				421	/* "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", */
				422	"LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX",
				423	/* "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK", */
				424	"LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD",
				425	/* "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS", */
				426	"MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR",
				427	/* "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", */
				428	"MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM",
				429	/* "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP", */
				430	"NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL",
				431	/* "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG", */
				432	"NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG",
				433	/* "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT", */
				434	"PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT",
				435	/* "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA", */
				436	"PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU",
				437	/* "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ", */
				438	"SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM",
				439	/* "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV", */
				440	"SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "SSD", "STP", "SLV",
				441	/* "SX", "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ", */
				442	"SXM", "SYR", "SWZ", "TCA", "TCD", "ATF", "TGO", "THA", "TJK",
				443	/* "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV", */
				444	"TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV",
				445	/* "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ", */
				446	"TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",
				447	/* "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", */
				448	"VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",
				449	/* "WS", "YE", "YT", "ZA", "ZM", "ZW", */
				450	"WSM", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
				451	NULL,
				452	/* "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR" */
				453	"ANT", "BUR", "SCG", "FXX", "ROM", "SUN", "TMP", "YMD", "YUG", "ZAR",
				454	NULL
				455	};
				456
				457	typedef struct CanonicalizationMap {
				458	const char id; / input ID */
				459	const char canonicalID; / canonicalized output ID */
				460	const char keyword; / keyword, or NULL if none */
				461	const char value; / keyword value, or NULL if kw==NULL */
				462	} CanonicalizationMap;
				463
				464	/**
				465	* A map to canonicalize locale IDs. This handles a variety of
				466	* different semantic kinds of transformations.
				467	*/
				468	static const CanonicalizationMap CANONICALIZE_MAP[] = {
				469	{ "", "en_US_POSIX", NULL, NULL }, /* .NET name */
				470	{ "c", "en_US_POSIX", NULL, NULL }, /* POSIX name */
				471	{ "posix", "en_US_POSIX", NULL, NULL }, /* POSIX name (alias of C) */
				472	{ "art_LOJBAN", "jbo", NULL, NULL }, /* registered name */
				473	{ "az_AZ_CYRL", "az_Cyrl_AZ", NULL, NULL }, /* .NET name */
				474	{ "az_AZ_LATN", "az_Latn_AZ", NULL, NULL }, /* .NET name */
				475	{ "ca_ES_PREEURO", "ca_ES", "currency", "ESP" },
				476	{ "de__PHONEBOOK", "de", "collation", "phonebook" }, /* Old ICU name */
				477	{ "de_AT_PREEURO", "de_AT", "currency", "ATS" },
				478	{ "de_DE_PREEURO", "de_DE", "currency", "DEM" },
				479	{ "de_LU_PREEURO", "de_LU", "currency", "LUF" },
				480	{ "el_GR_PREEURO", "el_GR", "currency", "GRD" },
				481	{ "en_BE_PREEURO", "en_BE", "currency", "BEF" },
				482	{ "en_IE_PREEURO", "en_IE", "currency", "IEP" },
				483	{ "es__TRADITIONAL", "es", "collation", "traditional" }, /* Old ICU name */
				484	{ "es_ES_PREEURO", "es_ES", "currency", "ESP" },
				485	{ "eu_ES_PREEURO", "eu_ES", "currency", "ESP" },
				486	{ "fi_FI_PREEURO", "fi_FI", "currency", "FIM" },
				487	{ "fr_BE_PREEURO", "fr_BE", "currency", "BEF" },
				488	{ "fr_FR_PREEURO", "fr_FR", "currency", "FRF" },
				489	{ "fr_LU_PREEURO", "fr_LU", "currency", "LUF" },
				490	{ "ga_IE_PREEURO", "ga_IE", "currency", "IEP" },
				491	{ "gl_ES_PREEURO", "gl_ES", "currency", "ESP" },
				492	{ "hi__DIRECT", "hi", "collation", "direct" }, /* Old ICU name */
				493	{ "it_IT_PREEURO", "it_IT", "currency", "ITL" },
				494	{ "ja_JP_TRADITIONAL", "ja_JP", "calendar", "japanese" }, /* Old ICU name */
				495	{ "nb_NO_NY", "nn_NO", NULL, NULL }, /* "markus said this was ok" :-) */
				496	{ "nl_BE_PREEURO", "nl_BE", "currency", "BEF" },
				497	{ "nl_NL_PREEURO", "nl_NL", "currency", "NLG" },
				498	{ "pt_PT_PREEURO", "pt_PT", "currency", "PTE" },
				499	{ "sr_SP_CYRL", "sr_Cyrl_RS", NULL, NULL }, /* .NET name */
				500	{ "sr_SP_LATN", "sr_Latn_RS", NULL, NULL }, /* .NET name */
				501	{ "sr_YU_CYRILLIC", "sr_Cyrl_RS", NULL, NULL }, /* Linux name */
				502	{ "th_TH_TRADITIONAL", "th_TH", "calendar", "buddhist" }, /* Old ICU name */
				503	{ "uz_UZ_CYRILLIC", "uz_Cyrl_UZ", NULL, NULL }, /* Linux name */
				504	{ "uz_UZ_CYRL", "uz_Cyrl_UZ", NULL, NULL }, /* .NET name */
				505	{ "uz_UZ_LATN", "uz_Latn_UZ", NULL, NULL }, /* .NET name */
				506	{ "zh_CHS", "zh_Hans", NULL, NULL }, /* .NET name */
				507	{ "zh_CHT", "zh_Hant", NULL, NULL }, /* .NET name */
				508	{ "zh_GAN", "gan", NULL, NULL }, /* registered name */
				509	{ "zh_GUOYU", "zh", NULL, NULL }, /* registered name */
				510	{ "zh_HAKKA", "hak", NULL, NULL }, /* registered name */
				511	{ "zh_MIN_NAN", "nan", NULL, NULL }, /* registered name */
				512	{ "zh_WUU", "wuu", NULL, NULL }, /* registered name */
				513	{ "zh_XIANG", "hsn", NULL, NULL }, /* registered name */
				514	{ "zh_YUE", "yue", NULL, NULL }, /* registered name */
				515	};
				516
				517	typedef struct VariantMap {
				518	const char variant; / input ID */
				519	const char keyword; / keyword, or NULL if none */
				520	const char value; / keyword value, or NULL if kw==NULL */
				521	} VariantMap;
				522
				523	static const VariantMap VARIANT_MAP[] = {
				524	{ "EURO", "currency", "EUR" },
				525	{ "PINYIN", "collation", "pinyin" }, /* Solaris variant */
				526	{ "STROKE", "collation", "stroke" } /* Solaris variant */
				527	};
				528
				529	/* ### BCP47 Conversion *******************************************/
				530	/* Test if the locale id has BCP47 u extension and does not have '@' */
				531	#define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == NULL && getShortestSubtagLength(localeID) == 1)
				532	/* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */
				533	#define _ConvertBCP47(finalID, id, buffer, length,err) \
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame]	534	if (uloc_forLanguageTag(id, buffer, length, NULL, err) <= 0 \|\| \
				535	U_FAILURE(err) \|\| err == U_STRING_NOT_TERMINATED_WARNING) { \
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	536	finalID=id; \
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame]	537	if (err == U_STRING_NOT_TERMINATED_WARNING) { err = U_BUFFER_OVERFLOW_ERROR; } \
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	538	} else { \
				539	finalID=buffer; \
				540	}
				541	/* Gets the size of the shortest subtag in the given localeID. */
				542	static int32_t getShortestSubtagLength(const char *localeID) {
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame]	543	int32_t localeIDLength = static_cast<int32_t>(uprv_strlen(localeID));
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	544	int32_t length = localeIDLength;
				545	int32_t tmpLength = 0;
				546	int32_t i;
				547	UBool reset = TRUE;
				548
				549	for (i = 0; i < localeIDLength; i++) {
				550	if (localeID[i] != '_' && localeID[i] != '-') {
				551	if (reset) {
				552	tmpLength = 0;
				553	reset = FALSE;
				554	}
				555	tmpLength++;
				556	} else {
				557	if (tmpLength != 0 && tmpLength < length) {
				558	length = tmpLength;
				559	}
				560	reset = TRUE;
				561	}
				562	}
				563
				564	return length;
				565	}
				566
				567	/* ### Keywords **************************************************/
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	568	#define UPRV_ISDIGIT(c) (((c) >= '0') && ((c) <= '9'))
				569	#define UPRV_ISALPHANUM(c) (uprv_isASCIILetter(c) \|\| UPRV_ISDIGIT(c) )
				570	/* Punctuation/symbols allowed in legacy key values */
				571	#define UPRV_OK_VALUE_PUNCTUATION(c) ((c) == '_' \|\| (c) == '-' \|\| (c) == '+' \|\| (c) == '/')
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	572
				573	#define ULOC_KEYWORD_BUFFER_LEN 25
				574	#define ULOC_MAX_NO_KEYWORDS 25
				575
				576	U_CAPI const char * U_EXPORT2
				577	locale_getKeywordsStart(const char *localeID) {
				578	const char *result = NULL;
				579	if((result = uprv_strchr(localeID, '@')) != NULL) {
				580	return result;
				581	}
				582	#if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
				583	else {
				584	/* We do this because the @ sign is variant, and the @ sign used on one
				585	EBCDIC machine won't be compiled the same way on other EBCDIC based
				586	machines. */
				587	static const uint8_t ebcdicSigns[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
				588	const uint8_t *charToFind = ebcdicSigns;
				589	while(*charToFind) {
				590	if((result = uprv_strchr(localeID, *charToFind)) != NULL) {
				591	return result;
				592	}
				593	charToFind++;
				594	}
				595	}
				596	#endif
				597	return NULL;
				598	}
				599
				600	/**
				601	* @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN]
				602	* @param keywordName incoming name to be canonicalized
				603	* @param status return status (keyword too long)
				604	* @return length of the keyword name
				605	*/
				606	static int32_t locale_canonKeywordName(char buf, const char keywordName, UErrorCode *status)
				607	{
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	608	int32_t keywordNameLen = 0;
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	609
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	610	for (; *keywordName != 0; keywordName++) {
				611	if (!UPRV_ISALPHANUM(*keywordName)) {
				612	status = U_ILLEGAL_ARGUMENT_ERROR; / malformed keyword name */
				613	return 0;
				614	}
				615	if (keywordNameLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
				616	buf[keywordNameLen++] = uprv_tolower(*keywordName);
				617	} else {
				618	/* keyword name too long for internal buffer */
				619	*status = U_INTERNAL_PROGRAM_ERROR;
				620	return 0;
				621	}
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	622	}
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	623	if (keywordNameLen == 0) {
				624	status = U_ILLEGAL_ARGUMENT_ERROR; / empty keyword name */
				625	return 0;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	626	}
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	627	buf[keywordNameLen] = 0; /* terminate */
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	628
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	629	return keywordNameLen;
				630	}
				631
				632	typedef struct {
				633	char keyword[ULOC_KEYWORD_BUFFER_LEN];
				634	int32_t keywordLen;
				635	const char *valueStart;
				636	int32_t valueLen;
				637	} KeywordStruct;
				638
				639	static int32_t U_CALLCONV
				640	compareKeywordStructs(const void * /context/, const void left, const void right) {
				641	const char* leftString = ((const KeywordStruct *)left)->keyword;
				642	const char* rightString = ((const KeywordStruct *)right)->keyword;
				643	return uprv_strcmp(leftString, rightString);
				644	}
				645
				646	/**
				647	* Both addKeyword and addValue must already be in canonical form.
				648	* Either both addKeyword and addValue are NULL, or neither is NULL.
				649	* If they are not NULL they must be zero terminated.
				650	* If addKeyword is not NULL is must have length small enough to fit in KeywordStruct.keyword.
				651	*/
				652	static int32_t
				653	_getKeywords(const char *localeID,
				654	char prev,
				655	char *keywords, int32_t keywordCapacity,
				656	char values, int32_t valuesCapacity, int32_t valLen,
				657	UBool valuesToo,
				658	const char* addKeyword,
				659	const char* addValue,
				660	UErrorCode *status)
				661	{
				662	KeywordStruct keywordList[ULOC_MAX_NO_KEYWORDS];
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	663
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	664	int32_t maxKeywords = ULOC_MAX_NO_KEYWORDS;
				665	int32_t numKeywords = 0;
				666	const char* pos = localeID;
				667	const char* equalSign = NULL;
				668	const char* semicolon = NULL;
				669	int32_t i = 0, j, n;
				670	int32_t keywordsLen = 0;
				671	int32_t valuesLen = 0;
				672
				673	if(prev == '@') { /* start of keyword definition */
				674	/* we will grab pairs, trim spaces, lowercase keywords, sort and return */
				675	do {
				676	UBool duplicate = FALSE;
				677	/* skip leading spaces */
				678	while(*pos == ' ') {
				679	pos++;
				680	}
				681	if (!pos) { / handle trailing "; " */
				682	break;
				683	}
				684	if(numKeywords == maxKeywords) {
				685	*status = U_INTERNAL_PROGRAM_ERROR;
				686	return 0;
				687	}
				688	equalSign = uprv_strchr(pos, '=');
				689	semicolon = uprv_strchr(pos, ';');
				690	/* lack of '=' [foo@currency] is illegal */
				691	/* ';' before '=' [foo@currency;collation=pinyin] is illegal */
				692	if(!equalSign \|\| (semicolon && semicolon<equalSign)) {
				693	*status = U_INVALID_FORMAT_ERROR;
				694	return 0;
				695	}
				696	/* need to normalize both keyword and keyword name */
				697	if(equalSign - pos >= ULOC_KEYWORD_BUFFER_LEN) {
				698	/* keyword name too long for internal buffer */
				699	*status = U_INTERNAL_PROGRAM_ERROR;
				700	return 0;
				701	}
				702	for(i = 0, n = 0; i < equalSign - pos; ++i) {
				703	if (pos[i] != ' ') {
				704	keywordList[numKeywords].keyword[n++] = uprv_tolower(pos[i]);
				705	}
				706	}
				707
				708	/* zero-length keyword is an error. */
				709	if (n == 0) {
				710	*status = U_INVALID_FORMAT_ERROR;
				711	return 0;
				712	}
				713
				714	keywordList[numKeywords].keyword[n] = 0;
				715	keywordList[numKeywords].keywordLen = n;
				716	/* now grab the value part. First we skip the '=' */
				717	equalSign++;
				718	/* then we leading spaces */
				719	while(*equalSign == ' ') {
				720	equalSign++;
				721	}
				722
				723	/* Premature end or zero-length value */
Jungshik Shin (jungshik at google)	46be516	2015-03-26 11:46:43 -0700	[diff] [blame]	724	if (!*equalSign \|\| equalSign == semicolon) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	725	*status = U_INVALID_FORMAT_ERROR;
				726	return 0;
				727	}
				728
				729	keywordList[numKeywords].valueStart = equalSign;
				730
				731	pos = semicolon;
				732	i = 0;
				733	if(pos) {
				734	while(*(pos - i - 1) == ' ') {
				735	i++;
				736	}
				737	keywordList[numKeywords].valueLen = (int32_t)(pos - equalSign - i);
				738	pos++;
				739	} else {
				740	i = (int32_t)uprv_strlen(equalSign);
				741	while(i && equalSign[i-1] == ' ') {
				742	i--;
				743	}
				744	keywordList[numKeywords].valueLen = i;
				745	}
				746	/* If this is a duplicate keyword, then ignore it */
				747	for (j=0; j<numKeywords; ++j) {
				748	if (uprv_strcmp(keywordList[j].keyword, keywordList[numKeywords].keyword) == 0) {
				749	duplicate = TRUE;
				750	break;
				751	}
				752	}
				753	if (!duplicate) {
				754	++numKeywords;
				755	}
				756	} while(pos);
				757
				758	/* Handle addKeyword/addValue. */
				759	if (addKeyword != NULL) {
				760	UBool duplicate = FALSE;
				761	U_ASSERT(addValue != NULL);
				762	/* Search for duplicate; if found, do nothing. Explicit keyword
				763	overrides addKeyword. */
				764	for (j=0; j<numKeywords; ++j) {
				765	if (uprv_strcmp(keywordList[j].keyword, addKeyword) == 0) {
				766	duplicate = TRUE;
				767	break;
				768	}
				769	}
				770	if (!duplicate) {
				771	if (numKeywords == maxKeywords) {
				772	*status = U_INTERNAL_PROGRAM_ERROR;
				773	return 0;
				774	}
				775	uprv_strcpy(keywordList[numKeywords].keyword, addKeyword);
				776	keywordList[numKeywords].keywordLen = (int32_t)uprv_strlen(addKeyword);
				777	keywordList[numKeywords].valueStart = addValue;
				778	keywordList[numKeywords].valueLen = (int32_t)uprv_strlen(addValue);
				779	++numKeywords;
				780	}
				781	} else {
				782	U_ASSERT(addValue == NULL);
				783	}
				784
				785	/* now we have a list of keywords */
				786	/* we need to sort it */
				787	uprv_sortArray(keywordList, numKeywords, sizeof(KeywordStruct), compareKeywordStructs, NULL, FALSE, status);
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	788
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	789	/* Now construct the keyword part */
				790	for(i = 0; i < numKeywords; i++) {
				791	if(keywordsLen + keywordList[i].keywordLen + 1< keywordCapacity) {
				792	uprv_strcpy(keywords+keywordsLen, keywordList[i].keyword);
				793	if(valuesToo) {
				794	keywords[keywordsLen + keywordList[i].keywordLen] = '=';
				795	} else {
				796	keywords[keywordsLen + keywordList[i].keywordLen] = 0;
				797	}
				798	}
				799	keywordsLen += keywordList[i].keywordLen + 1;
				800	if(valuesToo) {
Jungshik Shin	42d5027	2018-10-24 01:22:09 -0700	[diff] [blame^]	801	if(keywordsLen + keywordList[i].valueLen <= keywordCapacity) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	802	uprv_strncpy(keywords+keywordsLen, keywordList[i].valueStart, keywordList[i].valueLen);
				803	}
				804	keywordsLen += keywordList[i].valueLen;
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	805
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	806	if(i < numKeywords - 1) {
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	807	if(keywordsLen < keywordCapacity) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	808	keywords[keywordsLen] = ';';
				809	}
				810	keywordsLen++;
				811	}
				812	}
				813	if(values) {
				814	if(valuesLen + keywordList[i].valueLen + 1< valuesCapacity) {
				815	uprv_strcpy(values+valuesLen, keywordList[i].valueStart);
				816	values[valuesLen + keywordList[i].valueLen] = 0;
				817	}
				818	valuesLen += keywordList[i].valueLen + 1;
				819	}
				820	}
				821	if(values) {
				822	values[valuesLen] = 0;
				823	if(valLen) {
				824	*valLen = valuesLen;
				825	}
				826	}
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	827	return u_terminateChars(keywords, keywordCapacity, keywordsLen, status);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	828	} else {
				829	return 0;
				830	}
				831	}
				832
				833	U_CFUNC int32_t
				834	locale_getKeywords(const char *localeID,
				835	char prev,
				836	char *keywords, int32_t keywordCapacity,
				837	char values, int32_t valuesCapacity, int32_t valLen,
				838	UBool valuesToo,
				839	UErrorCode *status) {
				840	return _getKeywords(localeID, prev, keywords, keywordCapacity,
				841	values, valuesCapacity, valLen, valuesToo,
				842	NULL, NULL, status);
				843	}
				844
				845	U_CAPI int32_t U_EXPORT2
				846	uloc_getKeywordValue(const char* localeID,
				847	const char* keywordName,
				848	char* buffer, int32_t bufferCapacity,
				849	UErrorCode* status)
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	850	{
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	851	const char* startSearchHere = NULL;
				852	const char* nextSeparator = NULL;
				853	char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
				854	char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	855	int32_t result = 0;
				856
				857	if(status && U_SUCCESS(*status) && localeID) {
				858	char tempBuffer[ULOC_FULLNAME_CAPACITY];
				859	const char* tmpLocaleID;
				860
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	861	if (keywordName == NULL \|\| keywordName[0] == 0) {
				862	*status = U_ILLEGAL_ARGUMENT_ERROR;
				863	return 0;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	864	}
				865
				866	locale_canonKeywordName(keywordNameBuffer, keywordName, status);
				867	if(U_FAILURE(*status)) {
				868	return 0;
				869	}
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	870
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	871	if (_hasBCP47Extension(localeID)) {
				872	_ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
				873	} else {
				874	tmpLocaleID=localeID;
				875	}
				876
				877	startSearchHere = locale_getKeywordsStart(tmpLocaleID);
				878	if(startSearchHere == NULL) {
				879	/* no keywords, return at once */
				880	return 0;
				881	}
				882
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	883	/* find the first keyword */
				884	while(startSearchHere) {
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	885	const char* keyValueTail;
				886	int32_t keyValueLen;
				887
				888	startSearchHere++; /* skip @ or ; */
				889	nextSeparator = uprv_strchr(startSearchHere, '=');
				890	if(!nextSeparator) {
				891	status = U_ILLEGAL_ARGUMENT_ERROR; / key must have =value */
				892	return 0;
				893	}
				894	/* strip leading & trailing spaces (TC decided to tolerate these) */
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	895	while(*startSearchHere == ' ') {
				896	startSearchHere++;
				897	}
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	898	keyValueTail = nextSeparator;
				899	while (keyValueTail > startSearchHere && *(keyValueTail-1) == ' ') {
				900	keyValueTail--;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	901	}
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	902	/* now keyValueTail points to first char after the keyName */
				903	/* copy & normalize keyName from locale */
				904	if (startSearchHere == keyValueTail) {
				905	status = U_ILLEGAL_ARGUMENT_ERROR; / empty keyword name in passed-in locale */
				906	return 0;
				907	}
				908	keyValueLen = 0;
				909	while (startSearchHere < keyValueTail) {
				910	if (!UPRV_ISALPHANUM(*startSearchHere)) {
				911	status = U_ILLEGAL_ARGUMENT_ERROR; / malformed keyword name */
				912	return 0;
				913	}
				914	if (keyValueLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
				915	localeKeywordNameBuffer[keyValueLen++] = uprv_tolower(*startSearchHere++);
				916	} else {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	917	/* keyword name too long for internal buffer */
				918	*status = U_INTERNAL_PROGRAM_ERROR;
				919	return 0;
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	920	}
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	921	}
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	922	localeKeywordNameBuffer[keyValueLen] = 0; /* terminate */
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	923
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	924	startSearchHere = uprv_strchr(nextSeparator, ';');
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	925
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	926	if(uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer) == 0) {
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	927	/* current entry matches the keyword. */
				928	nextSeparator++; /* skip '=' */
				929	/* First strip leading & trailing spaces (TC decided to tolerate these) */
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	930	while(*nextSeparator == ' ') {
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	931	nextSeparator++;
				932	}
				933	keyValueTail = (startSearchHere)? startSearchHere: nextSeparator + uprv_strlen(nextSeparator);
				934	while(keyValueTail > nextSeparator && *(keyValueTail-1) == ' ') {
				935	keyValueTail--;
				936	}
				937	/* Now copy the value, but check well-formedness */
				938	if (nextSeparator == keyValueTail) {
				939	status = U_ILLEGAL_ARGUMENT_ERROR; / empty key value name in passed-in locale */
				940	return 0;
				941	}
				942	keyValueLen = 0;
				943	while (nextSeparator < keyValueTail) {
				944	if (!UPRV_ISALPHANUM(nextSeparator) && !UPRV_OK_VALUE_PUNCTUATION(nextSeparator)) {
				945	status = U_ILLEGAL_ARGUMENT_ERROR; / malformed key value */
				946	return 0;
				947	}
				948	if (keyValueLen < bufferCapacity) {
				949	/* Should we lowercase value to return here? Tests expect as-is. */
				950	buffer[keyValueLen++] = *nextSeparator++;
				951	} else { /* keep advancing so we return correct length in case of overflow */
				952	keyValueLen++;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	953	nextSeparator++;
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	954	}
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	955	}
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	956	result = u_terminateChars(buffer, bufferCapacity, keyValueLen, status);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	957	return result;
				958	}
				959	}
				960	}
				961	return 0;
				962	}
				963
				964	U_CAPI int32_t U_EXPORT2
				965	uloc_setKeywordValue(const char* keywordName,
				966	const char* keywordValue,
				967	char* buffer, int32_t bufferCapacity,
				968	UErrorCode* status)
				969	{
				970	/* TODO: sorting. removal. */
				971	int32_t keywordNameLen;
				972	int32_t keywordValueLen;
				973	int32_t bufLen;
				974	int32_t needLen = 0;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	975	char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	976	char keywordValueBuffer[ULOC_KEYWORDS_CAPACITY+1];
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	977	char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	978	int32_t rc;
				979	char* nextSeparator = NULL;
				980	char* nextEqualsign = NULL;
				981	char* startSearchHere = NULL;
				982	char* keywordStart = NULL;
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	983	CharString updatedKeysAndValues;
				984	int32_t updatedKeysAndValuesLen;
				985	UBool handledInputKeyAndValue = FALSE;
				986	char keyValuePrefix = '@';
				987
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	988	if(U_FAILURE(*status)) {
				989	return -1;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	990	}
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	991	if (keywordName == NULL \|\| keywordName[0] == 0 \|\| bufferCapacity <= 1) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	992	*status = U_ILLEGAL_ARGUMENT_ERROR;
				993	return 0;
				994	}
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	995	bufLen = (int32_t)uprv_strlen(buffer);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	996	if(bufferCapacity<bufLen) {
				997	/* The capacity is less than the length?! Is this NULL terminated? */
				998	*status = U_ILLEGAL_ARGUMENT_ERROR;
				999	return 0;
				1000	}
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1001	keywordNameLen = locale_canonKeywordName(keywordNameBuffer, keywordName, status);
				1002	if(U_FAILURE(*status)) {
				1003	return 0;
				1004	}
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	1005
				1006	keywordValueLen = 0;
				1007	if(keywordValue) {
				1008	while (*keywordValue != 0) {
				1009	if (!UPRV_ISALPHANUM(keywordValue) && !UPRV_OK_VALUE_PUNCTUATION(keywordValue)) {
				1010	status = U_ILLEGAL_ARGUMENT_ERROR; / malformed key value */
				1011	return 0;
				1012	}
				1013	if (keywordValueLen < ULOC_KEYWORDS_CAPACITY) {
				1014	/* Should we force lowercase in value to set? */
				1015	keywordValueBuffer[keywordValueLen++] = *keywordValue++;
				1016	} else {
				1017	/* keywordValue too long for internal buffer */
				1018	*status = U_INTERNAL_PROGRAM_ERROR;
				1019	return 0;
				1020	}
				1021	}
				1022	}
				1023	keywordValueBuffer[keywordValueLen] = 0; /* terminate */
				1024
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1025	startSearchHere = (char*)locale_getKeywordsStart(buffer);
				1026	if(startSearchHere == NULL \|\| (startSearchHere[1]==0)) {
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	1027	if(keywordValueLen == 0) { /* no keywords = nothing to remove */
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	1028	return bufLen;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1029	}
				1030
				1031	needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	1032	if(startSearchHere) { /* had a single @ */
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1033	needLen--; /* already had the @ */
				1034	/* startSearchHere points at the @ */
				1035	} else {
				1036	startSearchHere=buffer+bufLen;
				1037	}
				1038	if(needLen >= bufferCapacity) {
				1039	*status = U_BUFFER_OVERFLOW_ERROR;
				1040	return needLen; /* no change */
				1041	}
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	1042	*startSearchHere++ = '@';
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1043	uprv_strcpy(startSearchHere, keywordNameBuffer);
				1044	startSearchHere += keywordNameLen;
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	1045	*startSearchHere++ = '=';
				1046	uprv_strcpy(startSearchHere, keywordValueBuffer);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1047	return needLen;
				1048	} /* end shortcut - no @ */
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	1049
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1050	keywordStart = startSearchHere;
				1051	/* search for keyword */
				1052	while(keywordStart) {
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	1053	const char* keyValueTail;
				1054	int32_t keyValueLen;
				1055
				1056	keywordStart++; /* skip @ or ; */
				1057	nextEqualsign = uprv_strchr(keywordStart, '=');
				1058	if (!nextEqualsign) {
				1059	status = U_ILLEGAL_ARGUMENT_ERROR; / key must have =value */
				1060	return 0;
				1061	}
				1062	/* strip leading & trailing spaces (TC decided to tolerate these) */
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1063	while(*keywordStart == ' ') {
				1064	keywordStart++;
				1065	}
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	1066	keyValueTail = nextEqualsign;
				1067	while (keyValueTail > keywordStart && *(keyValueTail-1) == ' ') {
				1068	keyValueTail--;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1069	}
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	1070	/* now keyValueTail points to first char after the keyName */
				1071	/* copy & normalize keyName from locale */
				1072	if (keywordStart == keyValueTail) {
				1073	status = U_ILLEGAL_ARGUMENT_ERROR; / empty keyword name in passed-in locale */
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1074	return 0;
				1075	}
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	1076	keyValueLen = 0;
				1077	while (keywordStart < keyValueTail) {
				1078	if (!UPRV_ISALPHANUM(*keywordStart)) {
				1079	status = U_ILLEGAL_ARGUMENT_ERROR; / malformed keyword name */
				1080	return 0;
				1081	}
				1082	if (keyValueLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
				1083	localeKeywordNameBuffer[keyValueLen++] = uprv_tolower(*keywordStart++);
				1084	} else {
				1085	/* keyword name too long for internal buffer */
				1086	*status = U_INTERNAL_PROGRAM_ERROR;
				1087	return 0;
				1088	}
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1089	}
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	1090	localeKeywordNameBuffer[keyValueLen] = 0; /* terminate */
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1091
				1092	nextSeparator = uprv_strchr(nextEqualsign, ';');
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	1093
				1094	/* start processing the value part */
				1095	nextEqualsign++; /* skip '=' */
				1096	/* First strip leading & trailing spaces (TC decided to tolerate these) */
				1097	while(*nextEqualsign == ' ') {
				1098	nextEqualsign++;
				1099	}
				1100	keyValueTail = (nextSeparator)? nextSeparator: nextEqualsign + uprv_strlen(nextEqualsign);
				1101	while(keyValueTail > nextEqualsign && *(keyValueTail-1) == ' ') {
				1102	keyValueTail--;
				1103	}
				1104	if (nextEqualsign == keyValueTail) {
				1105	status = U_ILLEGAL_ARGUMENT_ERROR; / empty key value in passed-in locale */
				1106	return 0;
				1107	}
				1108
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1109	rc = uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer);
				1110	if(rc == 0) {
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	1111	/* Current entry matches the input keyword. Update the entry */
				1112	if(keywordValueLen > 0) { /* updating a value */
				1113	updatedKeysAndValues.append(keyValuePrefix, *status);
				1114	keyValuePrefix = ';'; /* for any subsequent key-value pair */
				1115	updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
				1116	updatedKeysAndValues.append('=', *status);
				1117	updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
				1118	} /* else removing this entry, don't emit anything */
				1119	handledInputKeyAndValue = TRUE;
				1120	} else {
				1121	/* input keyword sorts earlier than current entry, add before current entry */
				1122	if (rc < 0 && keywordValueLen > 0 && !handledInputKeyAndValue) {
				1123	/* insert new entry at this location */
				1124	updatedKeysAndValues.append(keyValuePrefix, *status);
				1125	keyValuePrefix = ';'; /* for any subsequent key-value pair */
				1126	updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
				1127	updatedKeysAndValues.append('=', *status);
				1128	updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
				1129	handledInputKeyAndValue = TRUE;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1130	}
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	1131	/* copy the current entry */
				1132	updatedKeysAndValues.append(keyValuePrefix, *status);
				1133	keyValuePrefix = ';'; /* for any subsequent key-value pair */
				1134	updatedKeysAndValues.append(localeKeywordNameBuffer, keyValueLen, *status);
				1135	updatedKeysAndValues.append('=', *status);
Jungshik Shin	42d5027	2018-10-24 01:22:09 -0700	[diff] [blame^]	1136	updatedKeysAndValues.append(nextEqualsign, static_cast<int32_t>(keyValueTail-nextEqualsign), *status);
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	1137	}
				1138	if (!nextSeparator && keywordValueLen > 0 && !handledInputKeyAndValue) {
				1139	/* append new entry at the end, it sorts later than existing entries */
				1140	updatedKeysAndValues.append(keyValuePrefix, *status);
				1141	/* skip keyValuePrefix update, no subsequent key-value pair */
				1142	updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
				1143	updatedKeysAndValues.append('=', *status);
				1144	updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
				1145	handledInputKeyAndValue = TRUE;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1146	}
				1147	keywordStart = nextSeparator;
				1148	} /* end loop searching */
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	1149
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	1150	/* Any error from updatedKeysAndValues.append above would be internal and not due to
				1151	* problems with the passed-in locale. So if we did encounter problems with the
				1152	* passed-in locale above, those errors took precedence and overrode any error
				1153	* status from updatedKeysAndValues.append, and also caused a return of 0. If there
				1154	* are errors here they are from updatedKeysAndValues.append; they do cause an
				1155	* error return but the passed-in locale is unmodified and the original bufLen is
				1156	* returned.
				1157	*/
				1158	if (!handledInputKeyAndValue \|\| U_FAILURE(*status)) {
				1159	/* if input key/value specified removal of a keyword not present in locale, or
				1160	* there was an error in CharString.append, leave original locale alone. */
				1161	return bufLen;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1162	}
				1163
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	1164	updatedKeysAndValuesLen = updatedKeysAndValues.length();
				1165	/* needLen = length of the part before '@' + length of updated key-value part including '@' */
				1166	needLen = (int32_t)(startSearchHere - buffer) + updatedKeysAndValuesLen;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1167	if(needLen >= bufferCapacity) {
				1168	*status = U_BUFFER_OVERFLOW_ERROR;
				1169	return needLen; /* no change */
				1170	}
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	1171	if (updatedKeysAndValuesLen > 0) {
				1172	uprv_strncpy(startSearchHere, updatedKeysAndValues.data(), updatedKeysAndValuesLen);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1173	}
				1174	buffer[needLen]=0;
				1175	return needLen;
				1176	}
				1177
				1178	/* ### ID parsing implementation **************************************************/
				1179
				1180	#define _isPrefixLetter(a) ((a=='x')\|\|(a=='X')\|\|(a=='i')\|\|(a=='I'))
				1181
				1182	/*returns TRUE if one of the special prefixes is here (s=string)
				1183	'x-' or 'i-' */
				1184	#define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1]))
				1185
				1186	/* Dot terminates it because of POSIX form where dot precedes the codepage
				1187	* except for variant
				1188	*/
				1189	#define _isTerminator(a) ((a==0)\|\|(a=='.')\|\|(a=='@'))
				1190
				1191	static char* _strnchr(const char* str, int32_t len, char c) {
				1192	U_ASSERT(str != 0 && len >= 0);
				1193	while (len-- != 0) {
				1194	char d = *str;
				1195	if (d == c) {
				1196	return (char*) str;
				1197	} else if (d == 0) {
				1198	break;
				1199	}
				1200	++str;
				1201	}
				1202	return NULL;
				1203	}
				1204
				1205	/**
				1206	* Lookup 'key' in the array 'list'. The array 'list' should contain
				1207	* a NULL entry, followed by more entries, and a second NULL entry.
				1208	*
				1209	* The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or
				1210	* COUNTRIES_3.
				1211	*/
				1212	static int16_t _findIndex(const char* const* list, const char* key)
				1213	{
				1214	const char* const* anchor = list;
				1215	int32_t pass = 0;
				1216
				1217	/* Make two passes through two NULL-terminated arrays at 'list' */
				1218	while (pass++ < 2) {
				1219	while (*list) {
				1220	if (uprv_strcmp(key, *list) == 0) {
				1221	return (int16_t)(list - anchor);
				1222	}
				1223	list++;
				1224	}
				1225	++list; /* skip final NULL CWB/
				1226	}
				1227	return -1;
				1228	}
				1229
				1230	/* count the length of src while copying it to dest; return strlen(src) */
				1231	static inline int32_t
				1232	_copyCount(char dest, int32_t destCapacity, const char src) {
				1233	const char *anchor;
				1234	char c;
				1235
				1236	anchor=src;
				1237	for(;;) {
				1238	if((c=*src)==0) {
				1239	return (int32_t)(src-anchor);
				1240	}
				1241	if(destCapacity<=0) {
				1242	return (int32_t)((src-anchor)+uprv_strlen(src));
				1243	}
				1244	++src;
				1245	*dest++=c;
				1246	--destCapacity;
				1247	}
				1248	}
				1249
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	1250	U_CFUNC const char*
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1251	uloc_getCurrentCountryID(const char* oldID){
				1252	int32_t offset = _findIndex(DEPRECATED_COUNTRIES, oldID);
				1253	if (offset >= 0) {
				1254	return REPLACEMENT_COUNTRIES[offset];
				1255	}
				1256	return oldID;
				1257	}
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	1258	U_CFUNC const char*
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1259	uloc_getCurrentLanguageID(const char* oldID){
				1260	int32_t offset = _findIndex(DEPRECATED_LANGUAGES, oldID);
				1261	if (offset >= 0) {
				1262	return REPLACEMENT_LANGUAGES[offset];
				1263	}
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	1264	return oldID;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1265	}
				1266	/*
				1267	* the internal functions _getLanguage(), _getCountry(), _getVariant()
				1268	* avoid duplicating code to handle the earlier locale ID pieces
				1269	* in the functions for the later ones by
				1270	* setting the *pEnd pointer to where they stopped parsing
				1271	*
				1272	* TODO try to use this in Locale
				1273	*/
				1274	U_CFUNC int32_t
				1275	ulocimp_getLanguage(const char *localeID,
				1276	char *language, int32_t languageCapacity,
				1277	const char **pEnd) {
				1278	int32_t i=0;
				1279	int32_t offset;
				1280	char lang[4]={ 0, 0, 0, 0 }; /* temporary buffer to hold language code for searching */
				1281
				1282	/* if it starts with i- or x- then copy that prefix */
				1283	if(_isIDPrefix(localeID)) {
				1284	if(i<languageCapacity) {
				1285	language[i]=(char)uprv_tolower(*localeID);
				1286	}
				1287	if(i<languageCapacity) {
				1288	language[i+1]='-';
				1289	}
				1290	i+=2;
				1291	localeID+=2;
				1292	}
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	1293
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1294	/* copy the language as far as possible and count its length */
				1295	while(!_isTerminator(localeID) && !_isIDSeparator(localeID)) {
				1296	if(i<languageCapacity) {
				1297	language[i]=(char)uprv_tolower(*localeID);
				1298	}
				1299	if(i<3) {
				1300	U_ASSERT(i>=0);
				1301	lang[i]=(char)uprv_tolower(*localeID);
				1302	}
				1303	i++;
				1304	localeID++;
				1305	}
				1306
				1307	if(i==3) {
				1308	/* convert 3 character code to 2 character code if possible CWB/
				1309	offset=_findIndex(LANGUAGES_3, lang);
				1310	if(offset>=0) {
				1311	i=_copyCount(language, languageCapacity, LANGUAGES[offset]);
				1312	}
				1313	}
				1314
				1315	if(pEnd!=NULL) {
				1316	*pEnd=localeID;
				1317	}
				1318	return i;
				1319	}
				1320
				1321	U_CFUNC int32_t
				1322	ulocimp_getScript(const char *localeID,
				1323	char *script, int32_t scriptCapacity,
				1324	const char **pEnd)
				1325	{
				1326	int32_t idLen = 0;
				1327
				1328	if (pEnd != NULL) {
				1329	*pEnd = localeID;
				1330	}
				1331
				1332	/* copy the second item as far as possible and count its length */
				1333	while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])
				1334	&& uprv_isASCIILetter(localeID[idLen])) {
				1335	idLen++;
				1336	}
				1337
				1338	/* If it's exactly 4 characters long, then it's a script and not a country. */
				1339	if (idLen == 4) {
				1340	int32_t i;
				1341	if (pEnd != NULL) {
				1342	*pEnd = localeID+idLen;
				1343	}
				1344	if(idLen > scriptCapacity) {
				1345	idLen = scriptCapacity;
				1346	}
				1347	if (idLen >= 1) {
				1348	script[0]=(char)uprv_toupper(*(localeID++));
				1349	}
				1350	for (i = 1; i < idLen; i++) {
				1351	script[i]=(char)uprv_tolower(*(localeID++));
				1352	}
				1353	}
				1354	else {
				1355	idLen = 0;
				1356	}
				1357	return idLen;
				1358	}
				1359
				1360	U_CFUNC int32_t
				1361	ulocimp_getCountry(const char *localeID,
				1362	char *country, int32_t countryCapacity,
				1363	const char **pEnd)
				1364	{
				1365	int32_t idLen=0;
				1366	char cnty[ULOC_COUNTRY_CAPACITY]={ 0, 0, 0, 0 };
				1367	int32_t offset;
				1368
				1369	/* copy the country as far as possible and count its length */
				1370	while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) {
				1371	if(idLen<(ULOC_COUNTRY_CAPACITY-1)) { /CWB/
				1372	cnty[idLen]=(char)uprv_toupper(localeID[idLen]);
				1373	}
				1374	idLen++;
				1375	}
				1376
				1377	/* the country should be either length 2 or 3 */
				1378	if (idLen == 2 \|\| idLen == 3) {
				1379	UBool gotCountry = FALSE;
				1380	/* convert 3 character code to 2 character code if possible CWB/
				1381	if(idLen==3) {
				1382	offset=_findIndex(COUNTRIES_3, cnty);
				1383	if(offset>=0) {
				1384	idLen=_copyCount(country, countryCapacity, COUNTRIES[offset]);
				1385	gotCountry = TRUE;
				1386	}
				1387	}
				1388	if (!gotCountry) {
				1389	int32_t i = 0;
				1390	for (i = 0; i < idLen; i++) {
				1391	if (i < countryCapacity) {
				1392	country[i]=(char)uprv_toupper(localeID[i]);
				1393	}
				1394	}
				1395	}
				1396	localeID+=idLen;
				1397	} else {
				1398	idLen = 0;
				1399	}
				1400
				1401	if(pEnd!=NULL) {
				1402	*pEnd=localeID;
				1403	}
				1404
				1405	return idLen;
				1406	}
				1407
				1408	/**
				1409	* @param needSeparator if true, then add leading '_' if any variants
				1410	* are added to 'variant'
				1411	*/
				1412	static int32_t
				1413	_getVariantEx(const char *localeID,
				1414	char prev,
				1415	char *variant, int32_t variantCapacity,
				1416	UBool needSeparator) {
				1417	int32_t i=0;
				1418
				1419	/* get one or more variant tags and separate them with '_' */
				1420	if(_isIDSeparator(prev)) {
				1421	/* get a variant string after a '-' or '_' */
				1422	while(!_isTerminator(*localeID)) {
				1423	if (needSeparator) {
				1424	if (i<variantCapacity) {
				1425	variant[i] = '_';
				1426	}
				1427	++i;
				1428	needSeparator = FALSE;
				1429	}
				1430	if(i<variantCapacity) {
				1431	variant[i]=(char)uprv_toupper(*localeID);
				1432	if(variant[i]=='-') {
				1433	variant[i]='_';
				1434	}
				1435	}
				1436	i++;
				1437	localeID++;
				1438	}
				1439	}
				1440
				1441	/* if there is no variant tag after a '-' or '_' then look for '@' */
				1442	if(i==0) {
				1443	if(prev=='@') {
				1444	/* keep localeID */
				1445	} else if((localeID=locale_getKeywordsStart(localeID))!=NULL) {
				1446	++localeID; /* point after the '@' */
				1447	} else {
				1448	return 0;
				1449	}
				1450	while(!_isTerminator(*localeID)) {
				1451	if (needSeparator) {
				1452	if (i<variantCapacity) {
				1453	variant[i] = '_';
				1454	}
				1455	++i;
				1456	needSeparator = FALSE;
				1457	}
				1458	if(i<variantCapacity) {
				1459	variant[i]=(char)uprv_toupper(*localeID);
				1460	if(variant[i]=='-' \|\| variant[i]==',') {
				1461	variant[i]='_';
				1462	}
				1463	}
				1464	i++;
				1465	localeID++;
				1466	}
				1467	}
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	1468
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1469	return i;
				1470	}
				1471
				1472	static int32_t
				1473	_getVariant(const char *localeID,
				1474	char prev,
				1475	char *variant, int32_t variantCapacity) {
				1476	return _getVariantEx(localeID, prev, variant, variantCapacity, FALSE);
				1477	}
				1478
				1479	/**
				1480	* Delete ALL instances of a variant from the given list of one or
				1481	* more variants. Example: "FOO_EURO_BAR_EURO" => "FOO_BAR".
				1482	* @param variants the source string of one or more variants,
				1483	* separated by '_'. This will be MODIFIED IN PLACE. Not zero
				1484	* terminated; if it is, trailing zero will NOT be maintained.
				1485	* @param variantsLen length of variants
				1486	* @param toDelete variant to delete, without separators, e.g. "EURO"
				1487	* or "PREEURO"; not zero terminated
				1488	* @param toDeleteLen length of toDelete
				1489	* @return number of characters deleted from variants
				1490	*/
				1491	static int32_t
				1492	_deleteVariant(char* variants, int32_t variantsLen,
				1493	const char* toDelete, int32_t toDeleteLen)
				1494	{
				1495	int32_t delta = 0; /* number of chars deleted */
				1496	for (;;) {
				1497	UBool flag = FALSE;
				1498	if (variantsLen < toDeleteLen) {
				1499	return delta;
				1500	}
				1501	if (uprv_strncmp(variants, toDelete, toDeleteLen) == 0 &&
				1502	(variantsLen == toDeleteLen \|\|
Jungshik Shin	42d5027	2018-10-24 01:22:09 -0700	[diff] [blame^]	1503	(flag=(variants[toDeleteLen] == '_')) != 0))
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1504	{
				1505	int32_t d = toDeleteLen + (flag?1:0);
				1506	variantsLen -= d;
				1507	delta += d;
				1508	if (variantsLen > 0) {
				1509	uprv_memmove(variants, variants+d, variantsLen);
				1510	}
				1511	} else {
				1512	char* p = _strnchr(variants, variantsLen, '_');
				1513	if (p == NULL) {
				1514	return delta;
				1515	}
				1516	++p;
				1517	variantsLen -= (int32_t)(p - variants);
				1518	variants = p;
				1519	}
				1520	}
				1521	}
				1522
				1523	/* Keyword enumeration */
				1524
				1525	typedef struct UKeywordsContext {
				1526	char* keywords;
				1527	char* current;
				1528	} UKeywordsContext;
				1529
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	1530	U_CDECL_BEGIN
				1531
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1532	static void U_CALLCONV
				1533	uloc_kw_closeKeywords(UEnumeration *enumerator) {
				1534	uprv_free(((UKeywordsContext *)enumerator->context)->keywords);
				1535	uprv_free(enumerator->context);
				1536	uprv_free(enumerator);
				1537	}
				1538
				1539	static int32_t U_CALLCONV
				1540	uloc_kw_countKeywords(UEnumeration en, UErrorCode /status/) {
				1541	char kw = ((UKeywordsContext )en->context)->keywords;
				1542	int32_t result = 0;
				1543	while(*kw) {
				1544	result++;
				1545	kw += uprv_strlen(kw)+1;
				1546	}
				1547	return result;
				1548	}
				1549
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	1550	static const char * U_CALLCONV
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1551	uloc_kw_nextKeyword(UEnumeration* en,
				1552	int32_t* resultLength,
				1553	UErrorCode* /status/) {
				1554	const char* result = ((UKeywordsContext *)en->context)->current;
				1555	int32_t len = 0;
				1556	if(*result) {
				1557	len = (int32_t)uprv_strlen(((UKeywordsContext *)en->context)->current);
				1558	((UKeywordsContext *)en->context)->current += len+1;
				1559	} else {
				1560	result = NULL;
				1561	}
				1562	if (resultLength) {
				1563	*resultLength = len;
				1564	}
				1565	return result;
				1566	}
				1567
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	1568	static void U_CALLCONV
				1569	uloc_kw_resetKeywords(UEnumeration* en,
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1570	UErrorCode* /status/) {
				1571	((UKeywordsContext )en->context)->current = ((UKeywordsContext )en->context)->keywords;
				1572	}
				1573
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	1574	U_CDECL_END
				1575
				1576
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1577	static const UEnumeration gKeywordsEnum = {
				1578	NULL,
				1579	NULL,
				1580	uloc_kw_closeKeywords,
				1581	uloc_kw_countKeywords,
				1582	uenum_unextDefault,
				1583	uloc_kw_nextKeyword,
				1584	uloc_kw_resetKeywords
				1585	};
				1586
				1587	U_CAPI UEnumeration* U_EXPORT2
				1588	uloc_openKeywordList(const char keywordList, int32_t keywordListSize, UErrorCode status)
				1589	{
				1590	UKeywordsContext *myContext = NULL;
				1591	UEnumeration *result = NULL;
				1592
				1593	if(U_FAILURE(*status)) {
				1594	return NULL;
				1595	}
				1596	result = (UEnumeration *)uprv_malloc(sizeof(UEnumeration));
				1597	/* Null pointer test */
				1598	if (result == NULL) {
				1599	*status = U_MEMORY_ALLOCATION_ERROR;
				1600	return NULL;
				1601	}
				1602	uprv_memcpy(result, &gKeywordsEnum, sizeof(UEnumeration));
				1603	myContext = static_cast<UKeywordsContext *>(uprv_malloc(sizeof(UKeywordsContext)));
				1604	if (myContext == NULL) {
				1605	*status = U_MEMORY_ALLOCATION_ERROR;
				1606	uprv_free(result);
				1607	return NULL;
				1608	}
				1609	myContext->keywords = (char *)uprv_malloc(keywordListSize+1);
				1610	uprv_memcpy(myContext->keywords, keywordList, keywordListSize);
				1611	myContext->keywords[keywordListSize] = 0;
				1612	myContext->current = myContext->keywords;
				1613	result->context = myContext;
				1614	return result;
				1615	}
				1616
				1617	U_CAPI UEnumeration* U_EXPORT2
				1618	uloc_openKeywords(const char* localeID,
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	1619	UErrorCode* status)
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1620	{
				1621	int32_t i=0;
				1622	char keywords[256];
				1623	int32_t keywordsCapacity = 256;
				1624	char tempBuffer[ULOC_FULLNAME_CAPACITY];
				1625	const char* tmpLocaleID;
				1626
				1627	if(status==NULL \|\| U_FAILURE(*status)) {
				1628	return 0;
				1629	}
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	1630
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1631	if (_hasBCP47Extension(localeID)) {
				1632	_ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
				1633	} else {
				1634	if (localeID==NULL) {
				1635	localeID=uloc_getDefault();
				1636	}
				1637	tmpLocaleID=localeID;
				1638	}
				1639
				1640	/* Skip the language */
				1641	ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID);
				1642	if(_isIDSeparator(*tmpLocaleID)) {
				1643	const char *scriptID;
				1644	/* Skip the script if available */
				1645	ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID);
				1646	if(scriptID != tmpLocaleID+1) {
				1647	/* Found optional script */
				1648	tmpLocaleID = scriptID;
				1649	}
				1650	/* Skip the Country */
				1651	if (_isIDSeparator(*tmpLocaleID)) {
				1652	ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &tmpLocaleID);
				1653	if(_isIDSeparator(*tmpLocaleID)) {
				1654	_getVariant(tmpLocaleID+1, *tmpLocaleID, NULL, 0);
				1655	}
				1656	}
				1657	}
				1658
				1659	/* keywords are located after '@' */
				1660	if((tmpLocaleID = locale_getKeywordsStart(tmpLocaleID)) != NULL) {
				1661	i=locale_getKeywords(tmpLocaleID+1, '@', keywords, keywordsCapacity, NULL, 0, NULL, FALSE, status);
				1662	}
				1663
				1664	if(i) {
				1665	return uloc_openKeywordList(keywords, i, status);
				1666	} else {
				1667	return NULL;
				1668	}
				1669	}
				1670
				1671
				1672	/* bit-flags for 'options' parameter of _canonicalize */
				1673	#define _ULOC_STRIP_KEYWORDS 0x2
				1674	#define _ULOC_CANONICALIZE 0x1
				1675
				1676	#define OPTION_SET(options, mask) ((options & mask) != 0)
				1677
				1678	static const char i_default[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'};
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	1679	#define I_DEFAULT_LENGTH UPRV_LENGTHOF(i_default)
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1680
				1681	/**
				1682	* Canonicalize the given localeID, to level 1 or to level 2,
				1683	* depending on the options. To specify level 1, pass in options=0.
				1684	* To specify level 2, pass in options=_ULOC_CANONICALIZE.
				1685	*
				1686	* This is the code underlying uloc_getName and uloc_canonicalize.
				1687	*/
				1688	static int32_t
				1689	_canonicalize(const char* localeID,
				1690	char* result,
				1691	int32_t resultCapacity,
				1692	uint32_t options,
				1693	UErrorCode* err) {
				1694	int32_t j, len, fieldCount=0, scriptSize=0, variantSize=0, nameCapacity;
				1695	char localeBuffer[ULOC_FULLNAME_CAPACITY];
				1696	char tempBuffer[ULOC_FULLNAME_CAPACITY];
				1697	const char* origLocaleID;
				1698	const char* tmpLocaleID;
				1699	const char* keywordAssign = NULL;
				1700	const char* separatorIndicator = NULL;
				1701	const char* addKeyword = NULL;
				1702	const char* addValue = NULL;
				1703	char* name;
				1704	char* variant = NULL; /* pointer into name, or NULL */
				1705
				1706	if (U_FAILURE(*err)) {
				1707	return 0;
				1708	}
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	1709
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1710	if (_hasBCP47Extension(localeID)) {
				1711	_ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
				1712	} else {
				1713	if (localeID==NULL) {
				1714	localeID=uloc_getDefault();
				1715	}
				1716	tmpLocaleID=localeID;
				1717	}
				1718
				1719	origLocaleID=tmpLocaleID;
				1720
				1721	/* if we are doing a full canonicalization, then put results in
				1722	localeBuffer, if necessary; otherwise send them to result. */
				1723	if (/OPTION_SET(options, _ULOC_CANONICALIZE) &&/
				1724	(result == NULL \|\| resultCapacity < (int32_t)sizeof(localeBuffer))) {
				1725	name = localeBuffer;
				1726	nameCapacity = (int32_t)sizeof(localeBuffer);
				1727	} else {
				1728	name = result;
				1729	nameCapacity = resultCapacity;
				1730	}
				1731
				1732	/* get all pieces, one after another, and separate with '_' */
				1733	len=ulocimp_getLanguage(tmpLocaleID, name, nameCapacity, &tmpLocaleID);
				1734
				1735	if(len == I_DEFAULT_LENGTH && uprv_strncmp(origLocaleID, i_default, len) == 0) {
				1736	const char *d = uloc_getDefault();
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	1737
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1738	len = (int32_t)uprv_strlen(d);
				1739
				1740	if (name != NULL) {
				1741	uprv_strncpy(name, d, len);
				1742	}
				1743	} else if(_isIDSeparator(*tmpLocaleID)) {
				1744	const char *scriptID;
				1745
				1746	++fieldCount;
				1747	if(len<nameCapacity) {
				1748	name[len]='_';
				1749	}
				1750	++len;
				1751
				1752	scriptSize=ulocimp_getScript(tmpLocaleID+1,
				1753	(len<nameCapacity ? name+len : NULL), nameCapacity-len, &scriptID);
				1754	if(scriptSize > 0) {
				1755	/* Found optional script */
				1756	tmpLocaleID = scriptID;
				1757	++fieldCount;
				1758	len+=scriptSize;
				1759	if (_isIDSeparator(*tmpLocaleID)) {
				1760	/* If there is something else, then we add the _ */
				1761	if(len<nameCapacity) {
				1762	name[len]='_';
				1763	}
				1764	++len;
				1765	}
				1766	}
				1767
				1768	if (_isIDSeparator(*tmpLocaleID)) {
				1769	const char *cntryID;
				1770	int32_t cntrySize = ulocimp_getCountry(tmpLocaleID+1,
				1771	(len<nameCapacity ? name+len : NULL), nameCapacity-len, &cntryID);
				1772	if (cntrySize > 0) {
				1773	/* Found optional country */
				1774	tmpLocaleID = cntryID;
				1775	len+=cntrySize;
				1776	}
				1777	if(_isIDSeparator(*tmpLocaleID)) {
				1778	/* If there is something else, then we add the _ if we found country before. */
				1779	if (cntrySize >= 0 && ! _isIDSeparator(*(tmpLocaleID+1)) ) {
				1780	++fieldCount;
				1781	if(len<nameCapacity) {
				1782	name[len]='_';
				1783	}
				1784	++len;
				1785	}
				1786
				1787	variantSize = _getVariant(tmpLocaleID+1, *tmpLocaleID,
				1788	(len<nameCapacity ? name+len : NULL), nameCapacity-len);
				1789	if (variantSize > 0) {
				1790	variant = len<nameCapacity ? name+len : NULL;
				1791	len += variantSize;
				1792	tmpLocaleID += variantSize + 1; /* skip '_' and variant */
				1793	}
				1794	}
				1795	}
				1796	}
				1797
				1798	/* Copy POSIX-style charset specifier, if any [mr.utf8] */
				1799	if (!OPTION_SET(options, _ULOC_CANONICALIZE) && *tmpLocaleID == '.') {
				1800	UBool done = FALSE;
				1801	do {
				1802	char c = *tmpLocaleID;
				1803	switch (c) {
				1804	case 0:
				1805	case '@':
				1806	done = TRUE;
				1807	break;
				1808	default:
				1809	if (len<nameCapacity) {
				1810	name[len] = c;
				1811	}
				1812	++len;
				1813	++tmpLocaleID;
				1814	break;
				1815	}
				1816	} while (!done);
				1817	}
				1818
				1819	/* Scan ahead to next '@' and determine if it is followed by '=' and/or ';'
				1820	After this, tmpLocaleID either points to '@' or is NULL */
				1821	if ((tmpLocaleID=locale_getKeywordsStart(tmpLocaleID))!=NULL) {
				1822	keywordAssign = uprv_strchr(tmpLocaleID, '=');
				1823	separatorIndicator = uprv_strchr(tmpLocaleID, ';');
				1824	}
				1825
				1826	/* Copy POSIX-style variant, if any [mr@FOO] */
				1827	if (!OPTION_SET(options, _ULOC_CANONICALIZE) &&
				1828	tmpLocaleID != NULL && keywordAssign == NULL) {
				1829	for (;;) {
				1830	char c = *tmpLocaleID;
				1831	if (c == 0) {
				1832	break;
				1833	}
				1834	if (len<nameCapacity) {
				1835	name[len] = c;
				1836	}
				1837	++len;
				1838	++tmpLocaleID;
				1839	}
				1840	}
				1841
				1842	if (OPTION_SET(options, _ULOC_CANONICALIZE)) {
				1843	/* Handle @FOO variant if @ is present and not followed by = */
				1844	if (tmpLocaleID!=NULL && keywordAssign==NULL) {
				1845	int32_t posixVariantSize;
				1846	/* Add missing '_' if needed */
				1847	if (fieldCount < 2 \|\| (fieldCount < 3 && scriptSize > 0)) {
				1848	do {
				1849	if(len<nameCapacity) {
				1850	name[len]='_';
				1851	}
				1852	++len;
				1853	++fieldCount;
				1854	} while(fieldCount<2);
				1855	}
				1856	posixVariantSize = _getVariantEx(tmpLocaleID+1, '@', name+len, nameCapacity-len,
				1857	(UBool)(variantSize > 0));
				1858	if (posixVariantSize > 0) {
				1859	if (variant == NULL) {
				1860	variant = name+len;
				1861	}
				1862	len += posixVariantSize;
				1863	variantSize += posixVariantSize;
				1864	}
				1865	}
				1866
				1867	/* Handle generic variants first */
				1868	if (variant) {
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	1869	for (j=0; j<UPRV_LENGTHOF(VARIANT_MAP); j++) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1870	const char* variantToCompare = VARIANT_MAP[j].variant;
				1871	int32_t n = (int32_t)uprv_strlen(variantToCompare);
				1872	int32_t variantLen = _deleteVariant(variant, uprv_min(variantSize, (nameCapacity-len)), variantToCompare, n);
				1873	len -= variantLen;
				1874	if (variantLen > 0) {
				1875	if (len > 0 && name[len-1] == '_') { /* delete trailing '_' */
				1876	--len;
				1877	}
				1878	addKeyword = VARIANT_MAP[j].keyword;
				1879	addValue = VARIANT_MAP[j].value;
				1880	break;
				1881	}
				1882	}
				1883	if (len > 0 && len <= nameCapacity && name[len-1] == '_') { /* delete trailing '_' */
				1884	--len;
				1885	}
				1886	}
				1887
				1888	/* Look up the ID in the canonicalization map */
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	1889	for (j=0; j<UPRV_LENGTHOF(CANONICALIZE_MAP); j++) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1890	const char* id = CANONICALIZE_MAP[j].id;
				1891	int32_t n = (int32_t)uprv_strlen(id);
				1892	if (len == n && uprv_strncmp(name, id, n) == 0) {
				1893	if (n == 0 && tmpLocaleID != NULL) {
				1894	break; /* Don't remap "" if keywords present */
				1895	}
				1896	len = _copyCount(name, nameCapacity, CANONICALIZE_MAP[j].canonicalID);
				1897	if (CANONICALIZE_MAP[j].keyword) {
				1898	addKeyword = CANONICALIZE_MAP[j].keyword;
				1899	addValue = CANONICALIZE_MAP[j].value;
				1900	}
				1901	break;
				1902	}
				1903	}
				1904	}
				1905
				1906	if (!OPTION_SET(options, _ULOC_STRIP_KEYWORDS)) {
				1907	if (tmpLocaleID!=NULL && keywordAssign!=NULL &&
				1908	(!separatorIndicator \|\| separatorIndicator > keywordAssign)) {
				1909	if(len<nameCapacity) {
				1910	name[len]='@';
				1911	}
				1912	++len;
				1913	++fieldCount;
				1914	len += _getKeywords(tmpLocaleID+1, '@', (len<nameCapacity ? name+len : NULL), nameCapacity-len,
				1915	NULL, 0, NULL, TRUE, addKeyword, addValue, err);
				1916	} else if (addKeyword != NULL) {
				1917	U_ASSERT(addValue != NULL && len < nameCapacity);
				1918	/* inelegant but works -- later make _getKeywords do this? */
				1919	len += _copyCount(name+len, nameCapacity-len, "@");
				1920	len += _copyCount(name+len, nameCapacity-len, addKeyword);
				1921	len += _copyCount(name+len, nameCapacity-len, "=");
				1922	len += _copyCount(name+len, nameCapacity-len, addValue);
				1923	}
				1924	}
				1925
				1926	if (U_SUCCESS(*err) && result != NULL && name == localeBuffer) {
				1927	uprv_strncpy(result, localeBuffer, (len > resultCapacity) ? resultCapacity : len);
				1928	}
				1929
				1930	return u_terminateChars(result, resultCapacity, len, err);
				1931	}
				1932
				1933	/* ### ID parsing API **************************************************/
				1934
				1935	U_CAPI int32_t U_EXPORT2
				1936	uloc_getParent(const char* localeID,
				1937	char* parent,
				1938	int32_t parentCapacity,
				1939	UErrorCode* err)
				1940	{
				1941	const char *lastUnderscore;
				1942	int32_t i;
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	1943
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1944	if (U_FAILURE(*err))
				1945	return 0;
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	1946
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1947	if (localeID == NULL)
				1948	localeID = uloc_getDefault();
				1949
				1950	lastUnderscore=uprv_strrchr(localeID, '_');
				1951	if(lastUnderscore!=NULL) {
				1952	i=(int32_t)(lastUnderscore-localeID);
				1953	} else {
				1954	i=0;
				1955	}
				1956
				1957	if(i>0 && parent != localeID) {
				1958	uprv_memcpy(parent, localeID, uprv_min(i, parentCapacity));
				1959	}
				1960	return u_terminateChars(parent, parentCapacity, i, err);
				1961	}
				1962
				1963	U_CAPI int32_t U_EXPORT2
				1964	uloc_getLanguage(const char* localeID,
				1965	char* language,
				1966	int32_t languageCapacity,
				1967	UErrorCode* err)
				1968	{
				1969	/* uloc_getLanguage will return a 2 character iso-639 code if one exists. CWB/
				1970	int32_t i=0;
				1971
				1972	if (err==NULL \|\| U_FAILURE(*err)) {
				1973	return 0;
				1974	}
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	1975
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1976	if(localeID==NULL) {
				1977	localeID=uloc_getDefault();
				1978	}
				1979
				1980	i=ulocimp_getLanguage(localeID, language, languageCapacity, NULL);
				1981	return u_terminateChars(language, languageCapacity, i, err);
				1982	}
				1983
				1984	U_CAPI int32_t U_EXPORT2
				1985	uloc_getScript(const char* localeID,
				1986	char* script,
				1987	int32_t scriptCapacity,
				1988	UErrorCode* err)
				1989	{
				1990	int32_t i=0;
				1991
				1992	if(err==NULL \|\| U_FAILURE(*err)) {
				1993	return 0;
				1994	}
				1995
				1996	if(localeID==NULL) {
				1997	localeID=uloc_getDefault();
				1998	}
				1999
				2000	/* skip the language */
				2001	ulocimp_getLanguage(localeID, NULL, 0, &localeID);
				2002	if(_isIDSeparator(*localeID)) {
				2003	i=ulocimp_getScript(localeID+1, script, scriptCapacity, NULL);
				2004	}
				2005	return u_terminateChars(script, scriptCapacity, i, err);
				2006	}
				2007
				2008	U_CAPI int32_t U_EXPORT2
				2009	uloc_getCountry(const char* localeID,
				2010	char* country,
				2011	int32_t countryCapacity,
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	2012	UErrorCode* err)
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	2013	{
				2014	int32_t i=0;
				2015
				2016	if(err==NULL \|\| U_FAILURE(*err)) {
				2017	return 0;
				2018	}
				2019
				2020	if(localeID==NULL) {
				2021	localeID=uloc_getDefault();
				2022	}
				2023
				2024	/* Skip the language */
				2025	ulocimp_getLanguage(localeID, NULL, 0, &localeID);
				2026	if(_isIDSeparator(*localeID)) {
				2027	const char *scriptID;
				2028	/* Skip the script if available */
				2029	ulocimp_getScript(localeID+1, NULL, 0, &scriptID);
				2030	if(scriptID != localeID+1) {
				2031	/* Found optional script */
				2032	localeID = scriptID;
				2033	}
				2034	if(_isIDSeparator(*localeID)) {
				2035	i=ulocimp_getCountry(localeID+1, country, countryCapacity, NULL);
				2036	}
				2037	}
				2038	return u_terminateChars(country, countryCapacity, i, err);
				2039	}
				2040
				2041	U_CAPI int32_t U_EXPORT2
				2042	uloc_getVariant(const char* localeID,
				2043	char* variant,
				2044	int32_t variantCapacity,
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	2045	UErrorCode* err)
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	2046	{
				2047	char tempBuffer[ULOC_FULLNAME_CAPACITY];
				2048	const char* tmpLocaleID;
				2049	int32_t i=0;
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	2050
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	2051	if(err==NULL \|\| U_FAILURE(*err)) {
				2052	return 0;
				2053	}
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	2054
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	2055	if (_hasBCP47Extension(localeID)) {
				2056	_ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
				2057	} else {
				2058	if (localeID==NULL) {
				2059	localeID=uloc_getDefault();
				2060	}
				2061	tmpLocaleID=localeID;
				2062	}
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	2063
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	2064	/* Skip the language */
				2065	ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID);
				2066	if(_isIDSeparator(*tmpLocaleID)) {
				2067	const char *scriptID;
				2068	/* Skip the script if available */
				2069	ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID);
				2070	if(scriptID != tmpLocaleID+1) {
				2071	/* Found optional script */
				2072	tmpLocaleID = scriptID;
				2073	}
				2074	/* Skip the Country */
				2075	if (_isIDSeparator(*tmpLocaleID)) {
				2076	const char *cntryID;
				2077	ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &cntryID);
				2078	if (cntryID != tmpLocaleID+1) {
				2079	/* Found optional country */
				2080	tmpLocaleID = cntryID;
				2081	}
				2082	if(_isIDSeparator(*tmpLocaleID)) {
				2083	/* If there was no country ID, skip a possible extra IDSeparator */
				2084	if (tmpLocaleID != cntryID && _isIDSeparator(tmpLocaleID[1])) {
				2085	tmpLocaleID++;
				2086	}
				2087	i=_getVariant(tmpLocaleID+1, *tmpLocaleID, variant, variantCapacity);
				2088	}
				2089	}
				2090	}
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	2091
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	2092	/* removed by weiv. We don't want to handle POSIX variants anymore. Use canonicalization function */
				2093	/* if we do not have a variant tag yet then try a POSIX variant after '@' */
				2094	/*
				2095	if(!haveVariant && (localeID=uprv_strrchr(localeID, '@'))!=NULL) {
				2096	i=_getVariant(localeID+1, '@', variant, variantCapacity);
				2097	}
				2098	*/
				2099	return u_terminateChars(variant, variantCapacity, i, err);
				2100	}
				2101
				2102	U_CAPI int32_t U_EXPORT2
				2103	uloc_getName(const char* localeID,
				2104	char* name,
				2105	int32_t nameCapacity,
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	2106	UErrorCode* err)
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	2107	{
				2108	return _canonicalize(localeID, name, nameCapacity, 0, err);
				2109	}
				2110
				2111	U_CAPI int32_t U_EXPORT2
				2112	uloc_getBaseName(const char* localeID,
				2113	char* name,
				2114	int32_t nameCapacity,
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	2115	UErrorCode* err)
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	2116	{
				2117	return _canonicalize(localeID, name, nameCapacity, _ULOC_STRIP_KEYWORDS, err);
				2118	}
				2119
				2120	U_CAPI int32_t U_EXPORT2
				2121	uloc_canonicalize(const char* localeID,
				2122	char* name,
				2123	int32_t nameCapacity,
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	2124	UErrorCode* err)
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	2125	{
				2126	return _canonicalize(localeID, name, nameCapacity, _ULOC_CANONICALIZE, err);
				2127	}
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	2128
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	2129	U_CAPI const char* U_EXPORT2
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	2130	uloc_getISO3Language(const char* localeID)
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	2131	{
				2132	int16_t offset;
				2133	char lang[ULOC_LANG_CAPACITY];
				2134	UErrorCode err = U_ZERO_ERROR;
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	2135
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	2136	if (localeID == NULL)
				2137	{
				2138	localeID = uloc_getDefault();
				2139	}
				2140	uloc_getLanguage(localeID, lang, ULOC_LANG_CAPACITY, &err);
				2141	if (U_FAILURE(err))
				2142	return "";
				2143	offset = _findIndex(LANGUAGES, lang);
				2144	if (offset < 0)
				2145	return "";
				2146	return LANGUAGES_3[offset];
				2147	}
				2148
				2149	U_CAPI const char* U_EXPORT2
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	2150	uloc_getISO3Country(const char* localeID)
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	2151	{
				2152	int16_t offset;
				2153	char cntry[ULOC_LANG_CAPACITY];
				2154	UErrorCode err = U_ZERO_ERROR;
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	2155
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	2156	if (localeID == NULL)
				2157	{
				2158	localeID = uloc_getDefault();
				2159	}
				2160	uloc_getCountry(localeID, cntry, ULOC_LANG_CAPACITY, &err);
				2161	if (U_FAILURE(err))
				2162	return "";
				2163	offset = _findIndex(COUNTRIES, cntry);
				2164	if (offset < 0)
				2165	return "";
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	2166
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	2167	return COUNTRIES_3[offset];
				2168	}
				2169
				2170	U_CAPI uint32_t U_EXPORT2
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	2171	uloc_getLCID(const char* localeID)
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	2172	{
				2173	UErrorCode status = U_ZERO_ERROR;
				2174	char langID[ULOC_FULLNAME_CAPACITY];
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	2175	uint32_t lcid = 0;
				2176
				2177	/* Check for incomplete id. */
				2178	if (!localeID \|\| uprv_strlen(localeID) < 2) {
				2179	return 0;
				2180	}
				2181
				2182	// Attempt platform lookup if available
				2183	lcid = uprv_convertToLCIDPlatform(localeID);
				2184	if (lcid > 0)
				2185	{
				2186	// Windows found an LCID, return that
				2187	return lcid;
				2188	}
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	2189
				2190	uloc_getLanguage(localeID, langID, sizeof(langID), &status);
				2191	if (U_FAILURE(status)) {
				2192	return 0;
				2193	}
				2194
				2195	if (uprv_strchr(localeID, '@')) {
				2196	// uprv_convertToLCID does not support keywords other than collation.
				2197	// Remove all keywords except collation.
				2198	int32_t len;
				2199	char collVal[ULOC_KEYWORDS_CAPACITY];
				2200	char tmpLocaleID[ULOC_FULLNAME_CAPACITY];
				2201
				2202	len = uloc_getKeywordValue(localeID, "collation", collVal,
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	2203	UPRV_LENGTHOF(collVal) - 1, &status);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	2204
				2205	if (U_SUCCESS(status) && len > 0) {
				2206	collVal[len] = 0;
				2207
				2208	len = uloc_getBaseName(localeID, tmpLocaleID,
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	2209	UPRV_LENGTHOF(tmpLocaleID) - 1, &status);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	2210
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	2211	if (U_SUCCESS(status) && len > 0) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	2212	tmpLocaleID[len] = 0;
				2213
				2214	len = uloc_setKeywordValue("collation", collVal, tmpLocaleID,
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	2215	UPRV_LENGTHOF(tmpLocaleID) - len - 1, &status);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	2216
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	2217	if (U_SUCCESS(status) && len > 0) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	2218	tmpLocaleID[len] = 0;
				2219	return uprv_convertToLCID(langID, tmpLocaleID, &status);
				2220	}
				2221	}
				2222	}
				2223
				2224	// fall through - all keywords are simply ignored
				2225	status = U_ZERO_ERROR;
				2226	}
				2227
				2228	return uprv_convertToLCID(langID, localeID, &status);
				2229	}
				2230
				2231	U_CAPI int32_t U_EXPORT2
				2232	uloc_getLocaleForLCID(uint32_t hostid, char *locale, int32_t localeCapacity,
				2233	UErrorCode *status)
				2234	{
				2235	return uprv_convertToPosix(hostid, locale, localeCapacity, status);
				2236	}
				2237
				2238	/* ### Default locale **************************************************/
				2239
				2240	U_CAPI const char* U_EXPORT2
				2241	uloc_getDefault()
				2242	{
				2243	return locale_get_default();
				2244	}
				2245
				2246	U_CAPI void U_EXPORT2
				2247	uloc_setDefault(const char* newDefaultLocale,
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	2248	UErrorCode* err)
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	2249	{
				2250	if (U_FAILURE(*err))
				2251	return;
				2252	/* the error code isn't currently used for anything by this function*/
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	2253
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	2254	/* propagate change to C++ */
				2255	locale_set_default(newDefaultLocale);
				2256	}
				2257
				2258	/**
				2259	* Returns a list of all 2-letter language codes defined in ISO 639. This is a pointer
				2260	* to an array of pointers to arrays of char. All of these pointers are owned
				2261	* by ICU-- do not delete them, and do not write through them. The array is
				2262	* terminated with a null pointer.
				2263	*/
				2264	U_CAPI const char* const* U_EXPORT2
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	2265	uloc_getISOLanguages()
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	2266	{
				2267	return LANGUAGES;
				2268	}
				2269
				2270	/**
				2271	* Returns a list of all 2-letter country codes defined in ISO 639. This is a
				2272	* pointer to an array of pointers to arrays of char. All of these pointers are
				2273	* owned by ICU-- do not delete them, and do not write through them. The array is
				2274	* terminated with a null pointer.
				2275	*/
				2276	U_CAPI const char* const* U_EXPORT2
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	2277	uloc_getISOCountries()
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	2278	{
				2279	return COUNTRIES;
				2280	}
				2281
				2282
				2283	/* this function to be moved into cstring.c later */
				2284	static char gDecimal = 0;
				2285
				2286	static /* U_CAPI */
				2287	double
				2288	/* U_EXPORT2 */
				2289	_uloc_strtod(const char start, char *end) {
				2290	char *decimal;
				2291	char *myEnd;
				2292	char buf[30];
				2293	double rv;
				2294	if (!gDecimal) {
				2295	char rep[5];
				2296	/* For machines that decide to change the decimal on you,
				2297	and try to be too smart with localization.
				2298	This normally should be just a '.'. */
				2299	sprintf(rep, "%+1.1f", 1.0);
				2300	gDecimal = rep[2];
				2301	}
				2302
				2303	if(gDecimal == '.') {
				2304	return uprv_strtod(start, end); /* fall through to OS */
				2305	} else {
				2306	uprv_strncpy(buf, start, 29);
				2307	buf[29]=0;
				2308	decimal = uprv_strchr(buf, '.');
				2309	if(decimal) {
				2310	*decimal = gDecimal;
				2311	} else {
				2312	return uprv_strtod(start, end); /* no decimal point */
				2313	}
				2314	rv = uprv_strtod(buf, &myEnd);
				2315	if(end) {
				2316	end = (char)(start+(myEnd-buf)); /* cast away const (to follow uprv_strtod API.) */
				2317	}
				2318	return rv;
				2319	}
				2320	}
				2321
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	2322	typedef struct {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	2323	float q;
				2324	int32_t dummy; /* to avoid uninitialized memory copy from qsort */
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	2325	char locale[ULOC_FULLNAME_CAPACITY+1];
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	2326	} _acceptLangItem;
				2327
				2328	static int32_t U_CALLCONV
				2329	uloc_acceptLanguageCompare(const void * /context/, const void a, const void b)
				2330	{
				2331	const _acceptLangItem aa = (const _acceptLangItem)a;
				2332	const _acceptLangItem bb = (const _acceptLangItem)b;
				2333
				2334	int32_t rc = 0;
				2335	if(bb->q < aa->q) {
				2336	rc = -1; /* A > B */
				2337	} else if(bb->q > aa->q) {
				2338	rc = 1; /* A < B */
				2339	} else {
				2340	rc = 0; /* A = B */
				2341	}
				2342
				2343	if(rc==0) {
				2344	rc = uprv_stricmp(aa->locale, bb->locale);
				2345	}
				2346
				2347	#if defined(ULOC_DEBUG)
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	2348	/* fprintf(stderr, "a:[%s:%g], b:[%s:%g] -> %d\n",
				2349	aa->locale, aa->q,
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	2350	bb->locale, bb->q,
				2351	rc);*/
				2352	#endif
				2353
				2354	return rc;
				2355	}
				2356
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	2357	/*
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	2358	mt-mt, ja;q=0.76, en-us;q=0.95, en;q=0.92, en-gb;q=0.89, fr;q=0.87, iu-ca;q=0.84, iu;q=0.82, ja-jp;q=0.79, mt;q=0.97, de-de;q=0.74, de;q=0.71, es;q=0.68, it-it;q=0.66, it;q=0.63, vi-vn;q=0.61, vi;q=0.58, nl-nl;q=0.55, nl;q=0.53
				2359	*/
				2360
				2361	U_CAPI int32_t U_EXPORT2
				2362	uloc_acceptLanguageFromHTTP(char result, int32_t resultAvailable, UAcceptResult outResult,
				2363	const char *httpAcceptLanguage,
				2364	UEnumeration* availableLocales,
				2365	UErrorCode *status)
				2366	{
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	2367	MaybeStackArray<_acceptLangItem, 4> items; // Struct for collecting items.
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	2368	char tmp[ULOC_FULLNAME_CAPACITY +1];
				2369	int32_t n = 0;
				2370	const char *itemEnd;
				2371	const char *paramEnd;
				2372	const char *s;
				2373	const char *t;
				2374	int32_t res;
				2375	int32_t i;
				2376	int32_t l = (int32_t)uprv_strlen(httpAcceptLanguage);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	2377
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	2378	if(U_FAILURE(*status)) {
				2379	return -1;
				2380	}
				2381
				2382	for(s=httpAcceptLanguage;s&&*s;) {
				2383	while(isspace(s)) / eat space at the beginning */
				2384	s++;
				2385	itemEnd=uprv_strchr(s,',');
				2386	paramEnd=uprv_strchr(s,';');
				2387	if(!itemEnd) {
				2388	itemEnd = httpAcceptLanguage+l; /* end of string */
				2389	}
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	2390	if(paramEnd && paramEnd<itemEnd) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	2391	/* semicolon (;) is closer than end (,) */
				2392	t = paramEnd+1;
				2393	if(*t=='q') {
				2394	t++;
				2395	}
				2396	while(isspace(*t)) {
				2397	t++;
				2398	}
				2399	if(*t=='=') {
				2400	t++;
				2401	}
				2402	while(isspace(*t)) {
				2403	t++;
				2404	}
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	2405	items[n].q = (float)_uloc_strtod(t,NULL);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	2406	} else {
				2407	/* no semicolon - it's 1.0 */
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	2408	items[n].q = 1.0f;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	2409	paramEnd = itemEnd;
				2410	}
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	2411	items[n].dummy=0;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	2412	/* eat spaces prior to semi */
				2413	for(t=(paramEnd-1);(paramEnd>s)&&isspace(*t);t--)
				2414	;
Jungshik Shin	42d5027	2018-10-24 01:22:09 -0700	[diff] [blame^]	2415	int32_t slen = static_cast<int32_t>(((t+1)-s));
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	2416	if(slen > ULOC_FULLNAME_CAPACITY) {
				2417	*status = U_BUFFER_OVERFLOW_ERROR;
				2418	return -1; // too big
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	2419	}
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	2420	uprv_strncpy(items[n].locale, s, slen);
				2421	items[n].locale[slen]=0; // terminate
				2422	int32_t clen = uloc_canonicalize(items[n].locale, tmp, UPRV_LENGTHOF(tmp)-1, status);
				2423	if(U_FAILURE(*status)) return -1;
				2424	if((clen!=slen) \|\| (uprv_strncmp(items[n].locale, tmp, slen))) {
				2425	// canonicalization had an effect- copy back
				2426	uprv_strncpy(items[n].locale, tmp, clen);
				2427	items[n].locale[clen] = 0; // terminate
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	2428	}
				2429	#if defined(ULOC_DEBUG)
				2430	/fprintf(stderr,"%d: s <%s> q <%g>\n", n, j[n].locale, j[n].q);/
				2431	#endif
				2432	n++;
				2433	s = itemEnd;
				2434	while(s==',') { / eat duplicate commas */
				2435	s++;
				2436	}
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	2437	if(n>=items.getCapacity()) { // If we need more items
				2438	if(NULL == items.resize(items.getCapacity()*2, items.getCapacity())) {
				2439	*status = U_MEMORY_ALLOCATION_ERROR;
				2440	return -1;
				2441	}
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	2442	#if defined(ULOC_DEBUG)
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	2443	fprintf(stderr,"malloced at size %d\n", items.getCapacity());
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	2444	#endif
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	2445	}
				2446	}
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	2447	uprv_sortArray(items.getAlias(), n, sizeof(items[0]), uloc_acceptLanguageCompare, NULL, TRUE, status);
				2448	if (U_FAILURE(*status)) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	2449	return -1;
				2450	}
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	2451	LocalMemory<const char*> strs(NULL);
				2452	if (strs.allocateInsteadAndReset(n) == NULL) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	2453	*status = U_MEMORY_ALLOCATION_ERROR;
				2454	return -1;
				2455	}
				2456	for(i=0;i<n;i++) {
				2457	#if defined(ULOC_DEBUG)
				2458	/fprintf(stderr,"%d: s <%s> q <%g>\n", i, j[i].locale, j[i].q);/
				2459	#endif
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	2460	strs[i]=items[i].locale;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	2461	}
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	2462	res = uloc_acceptLanguage(result, resultAvailable, outResult,
				2463	strs.getAlias(), n, availableLocales, status);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	2464	return res;
				2465	}
				2466
				2467
				2468	U_CAPI int32_t U_EXPORT2
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	2469	uloc_acceptLanguage(char *result, int32_t resultAvailable,
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	2470	UAcceptResult outResult, const char *acceptList,
				2471	int32_t acceptListCount,
				2472	UEnumeration* availableLocales,
				2473	UErrorCode *status)
				2474	{
				2475	int32_t i,j;
				2476	int32_t len;
				2477	int32_t maxLen=0;
				2478	char tmp[ULOC_FULLNAME_CAPACITY+1];
				2479	const char *l;
				2480	char **fallbackList;
				2481	if(U_FAILURE(*status)) {
				2482	return -1;
				2483	}
				2484	fallbackList = static_cast<char *>(uprv_malloc((size_t)(sizeof(fallbackList[0])acceptListCount)));
				2485	if(fallbackList==NULL) {
				2486	*status = U_MEMORY_ALLOCATION_ERROR;
				2487	return -1;
				2488	}
				2489	for(i=0;i<acceptListCount;i++) {
				2490	#if defined(ULOC_DEBUG)
				2491	fprintf(stderr,"%02d: %s\n", i, acceptList[i]);
				2492	#endif
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame]	2493	while((l=uenum_next(availableLocales, NULL, status)) != NULL) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	2494	#if defined(ULOC_DEBUG)
				2495	fprintf(stderr," %s\n", l);
				2496	#endif
				2497	len = (int32_t)uprv_strlen(l);
				2498	if(!uprv_strcmp(acceptList[i], l)) {
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	2499	if(outResult) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	2500	*outResult = ULOC_ACCEPT_VALID;
				2501	}
				2502	#if defined(ULOC_DEBUG)
				2503	fprintf(stderr, "MATCH! %s\n", l);
				2504	#endif
				2505	if(len>0) {
				2506	uprv_strncpy(result, l, uprv_min(len, resultAvailable));
				2507	}
				2508	for(j=0;j<i;j++) {
				2509	uprv_free(fallbackList[j]);
				2510	}
				2511	uprv_free(fallbackList);
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	2512	return u_terminateChars(result, resultAvailable, len, status);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	2513	}
				2514	if(len>maxLen) {
				2515	maxLen = len;
				2516	}
				2517	}
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	2518	uenum_reset(availableLocales, status);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	2519	/* save off parent info */
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	2520	if(uloc_getParent(acceptList[i], tmp, UPRV_LENGTHOF(tmp), status)!=0) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	2521	fallbackList[i] = uprv_strdup(tmp);
				2522	} else {
				2523	fallbackList[i]=0;
				2524	}
				2525	}
				2526
				2527	for(maxLen--;maxLen>0;maxLen--) {
				2528	for(i=0;i<acceptListCount;i++) {
				2529	if(fallbackList[i] && ((int32_t)uprv_strlen(fallbackList[i])==maxLen)) {
				2530	#if defined(ULOC_DEBUG)
				2531	fprintf(stderr,"Try: [%s]", fallbackList[i]);
				2532	#endif
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame]	2533	while((l=uenum_next(availableLocales, NULL, status)) != NULL) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	2534	#if defined(ULOC_DEBUG)
				2535	fprintf(stderr," %s\n", l);
				2536	#endif
				2537	len = (int32_t)uprv_strlen(l);
				2538	if(!uprv_strcmp(fallbackList[i], l)) {
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	2539	if(outResult) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	2540	*outResult = ULOC_ACCEPT_FALLBACK;
				2541	}
				2542	#if defined(ULOC_DEBUG)
				2543	fprintf(stderr, "fallback MATCH! %s\n", l);
				2544	#endif
				2545	if(len>0) {
				2546	uprv_strncpy(result, l, uprv_min(len, resultAvailable));
				2547	}
				2548	for(j=0;j<acceptListCount;j++) {
				2549	uprv_free(fallbackList[j]);
				2550	}
				2551	uprv_free(fallbackList);
				2552	return u_terminateChars(result, resultAvailable, len, status);
				2553	}
				2554	}
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	2555	uenum_reset(availableLocales, status);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	2556
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	2557	if(uloc_getParent(fallbackList[i], tmp, UPRV_LENGTHOF(tmp), status)!=0) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	2558	uprv_free(fallbackList[i]);
				2559	fallbackList[i] = uprv_strdup(tmp);
				2560	} else {
				2561	uprv_free(fallbackList[i]);
				2562	fallbackList[i]=0;
				2563	}
				2564	}
				2565	}
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	2566	if(outResult) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	2567	*outResult = ULOC_ACCEPT_FAILED;
				2568	}
				2569	}
				2570	for(i=0;i<acceptListCount;i++) {
				2571	uprv_free(fallbackList[i]);
				2572	}
				2573	uprv_free(fallbackList);
				2574	return -1;
				2575	}
				2576
Jungshik Shin (jungshik at google)	0f8746a	2015-01-08 15:46:45 -0800	[diff] [blame]	2577	U_CAPI const char* U_EXPORT2
				2578	uloc_toUnicodeLocaleKey(const char* keyword)
				2579	{
				2580	const char* bcpKey = ulocimp_toBcpKey(keyword);
				2581	if (bcpKey == NULL && ultag_isUnicodeLocaleKey(keyword, -1)) {
				2582	// unknown keyword, but syntax is fine..
				2583	return keyword;
				2584	}
				2585	return bcpKey;
				2586	}
				2587
				2588	U_CAPI const char* U_EXPORT2
				2589	uloc_toUnicodeLocaleType(const char* keyword, const char* value)
				2590	{
				2591	const char* bcpType = ulocimp_toBcpType(keyword, value, NULL, NULL);
				2592	if (bcpType == NULL && ultag_isUnicodeLocaleType(value, -1)) {
				2593	// unknown keyword, but syntax is fine..
				2594	return value;
				2595	}
				2596	return bcpType;
				2597	}
				2598
Jungshik Shin (jungshik at google)	0f8746a	2015-01-08 15:46:45 -0800	[diff] [blame]	2599	static UBool
				2600	isWellFormedLegacyKey(const char* legacyKey)
				2601	{
				2602	const char* p = legacyKey;
				2603	while (*p) {
				2604	if (!UPRV_ISALPHANUM(*p)) {
				2605	return FALSE;
				2606	}
				2607	p++;
				2608	}
				2609	return TRUE;
				2610	}
				2611
				2612	static UBool
				2613	isWellFormedLegacyType(const char* legacyType)
				2614	{
				2615	const char* p = legacyType;
				2616	int32_t alphaNumLen = 0;
				2617	while (*p) {
				2618	if (p == '_' \|\| p == '/' \|\| *p == '-') {
				2619	if (alphaNumLen == 0) {
				2620	return FALSE;
				2621	}
				2622	alphaNumLen = 0;
				2623	} else if (UPRV_ISALPHANUM(*p)) {
				2624	alphaNumLen++;
				2625	} else {
				2626	return FALSE;
				2627	}
				2628	p++;
				2629	}
				2630	return (alphaNumLen != 0);
				2631	}
				2632
				2633	U_CAPI const char* U_EXPORT2
				2634	uloc_toLegacyKey(const char* keyword)
				2635	{
				2636	const char* legacyKey = ulocimp_toLegacyKey(keyword);
				2637	if (legacyKey == NULL) {
				2638	// Checks if the specified locale key is well-formed with the legacy locale syntax.
				2639	//
				2640	// Note:
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	2641	// LDML/CLDR provides some definition of keyword syntax in
				2642	// * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and
				2643	// * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax
				2644	// Keys can only consist of [0-9a-zA-Z].
Jungshik Shin (jungshik at google)	0f8746a	2015-01-08 15:46:45 -0800	[diff] [blame]	2645	if (isWellFormedLegacyKey(keyword)) {
				2646	return keyword;
				2647	}
				2648	}
				2649	return legacyKey;
				2650	}
				2651
				2652	U_CAPI const char* U_EXPORT2
				2653	uloc_toLegacyType(const char* keyword, const char* value)
				2654	{
				2655	const char* legacyType = ulocimp_toLegacyType(keyword, value, NULL, NULL);
				2656	if (legacyType == NULL) {
				2657	// Checks if the specified locale type is well-formed with the legacy locale syntax.
				2658	//
				2659	// Note:
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	2660	// LDML/CLDR provides some definition of keyword syntax in
				2661	// * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and
				2662	// * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax
				2663	// Values (types) can only consist of [0-9a-zA-Z], plus for legacy values
				2664	// we allow [/_-+] in the middle (e.g. "Etc/GMT+1", "Asia/Tel_Aviv")
Jungshik Shin (jungshik at google)	0f8746a	2015-01-08 15:46:45 -0800	[diff] [blame]	2665	if (isWellFormedLegacyType(value)) {
				2666	return value;
				2667	}
				2668	}
				2669	return legacyType;
				2670	}
				2671
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	2672	/eof/