blob: 1ededbb268ae23a19c6db1577eac490b433bc819 [file] [log] [blame]
Jungshik Shin87232d82017-05-13 21:10:13 -07001// © 2016 and later: Unicode, Inc. and others.
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002// License & terms of use: http://www.unicode.org/copyright.html
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00003/*
4**********************************************************************
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08005* Copyright (C) 1997-2014, International Business Machines
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00006* Corporation and others. All Rights Reserved.
7**********************************************************************
8*
9* File USCRIPT.C
10*
11* Modification History:
12*
13* Date Name Description
14* 07/06/2001 Ram Creation.
15******************************************************************************
16*/
17
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000018#include "unicode/uchar.h"
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -080019#include "unicode/uscript.h"
20#include "unicode/uloc.h"
Frank Tang69c72a62019-04-03 21:41:21 -070021#include "bytesinkutil.h"
22#include "charstr.h"
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000023#include "cmemory.h"
24#include "cstring.h"
Frank Tang69c72a62019-04-03 21:41:21 -070025#include "ulocimp.h"
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000026
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -080027static const UScriptCode JAPANESE[3] = { USCRIPT_KATAKANA, USCRIPT_HIRAGANA, USCRIPT_HAN };
28static const UScriptCode KOREAN[2] = { USCRIPT_HANGUL, USCRIPT_HAN };
29static const UScriptCode HAN_BOPO[2] = { USCRIPT_HAN, USCRIPT_BOPOMOFO };
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000030
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -080031static int32_t
32setCodes(const UScriptCode *src, int32_t length,
33 UScriptCode *dest, int32_t capacity, UErrorCode *err) {
34 int32_t i;
35 if(U_FAILURE(*err)) { return 0; }
36 if(length > capacity) {
37 *err = U_BUFFER_OVERFLOW_ERROR;
38 return length;
39 }
40 for(i = 0; i < length; ++i) {
41 dest[i] = src[i];
42 }
43 return length;
44}
45
46static int32_t
47setOneCode(UScriptCode script, UScriptCode *scripts, int32_t capacity, UErrorCode *err) {
48 if(U_FAILURE(*err)) { return 0; }
49 if(1 > capacity) {
50 *err = U_BUFFER_OVERFLOW_ERROR;
51 return 1;
52 }
53 scripts[0] = script;
54 return 1;
55}
56
57static int32_t
58getCodesFromLocale(const char *locale,
59 UScriptCode *scripts, int32_t capacity, UErrorCode *err) {
60 UErrorCode internalErrorCode = U_ZERO_ERROR;
Frank Tangf2223962020-04-27 18:25:29 -070061 char lang[8] = {0};
62 char script[8] = {0};
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -080063 int32_t scriptLength;
64 if(U_FAILURE(*err)) { return 0; }
65 // Multi-script languages, equivalent to the LocaleScript data
66 // that we used to load from locale resource bundles.
67 /*length = */ uloc_getLanguage(locale, lang, UPRV_LENGTHOF(lang), &internalErrorCode);
68 if(U_FAILURE(internalErrorCode) || internalErrorCode == U_STRING_NOT_TERMINATED_WARNING) {
69 return 0;
70 }
71 if(0 == uprv_strcmp(lang, "ja")) {
72 return setCodes(JAPANESE, UPRV_LENGTHOF(JAPANESE), scripts, capacity, err);
73 }
74 if(0 == uprv_strcmp(lang, "ko")) {
75 return setCodes(KOREAN, UPRV_LENGTHOF(KOREAN), scripts, capacity, err);
76 }
77 scriptLength = uloc_getScript(locale, script, UPRV_LENGTHOF(script), &internalErrorCode);
78 if(U_FAILURE(internalErrorCode) || internalErrorCode == U_STRING_NOT_TERMINATED_WARNING) {
79 return 0;
80 }
81 if(0 == uprv_strcmp(lang, "zh") && 0 == uprv_strcmp(script, "Hant")) {
82 return setCodes(HAN_BOPO, UPRV_LENGTHOF(HAN_BOPO), scripts, capacity, err);
83 }
84 // Explicit script code.
85 if(scriptLength != 0) {
86 UScriptCode scriptCode = (UScriptCode)u_getPropertyValueEnum(UCHAR_SCRIPT, script);
87 if(scriptCode != USCRIPT_INVALID_CODE) {
88 if(scriptCode == USCRIPT_SIMPLIFIED_HAN || scriptCode == USCRIPT_TRADITIONAL_HAN) {
89 scriptCode = USCRIPT_HAN;
90 }
91 return setOneCode(scriptCode, scripts, capacity, err);
92 }
93 }
94 return 0;
95}
96
97/* TODO: this is a bad API and should be deprecated, ticket #11141 */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000098U_CAPI int32_t U_EXPORT2
99uscript_getCode(const char* nameOrAbbrOrLocale,
100 UScriptCode* fillIn,
101 int32_t capacity,
102 UErrorCode* err){
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800103 UBool triedCode;
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800104 UErrorCode internalErrorCode;
105 int32_t length;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000106
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800107 if(U_FAILURE(*err)) {
108 return 0;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000109 }
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800110 if(nameOrAbbrOrLocale==NULL ||
111 (fillIn == NULL ? capacity != 0 : capacity < 0)) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000112 *err = U_ILLEGAL_ARGUMENT_ERROR;
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800113 return 0;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000114 }
115
Frank Tang1f164ee2022-11-08 12:31:27 -0800116 triedCode = false;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000117 if(uprv_strchr(nameOrAbbrOrLocale, '-')==NULL && uprv_strchr(nameOrAbbrOrLocale, '_')==NULL ){
118 /* try long and abbreviated script names first */
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800119 UScriptCode code = (UScriptCode) u_getPropertyValueEnum(UCHAR_SCRIPT, nameOrAbbrOrLocale);
120 if(code!=USCRIPT_INVALID_CODE) {
121 return setOneCode(code, fillIn, capacity, err);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000122 }
Frank Tang1f164ee2022-11-08 12:31:27 -0800123 triedCode = true;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000124 }
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800125 internalErrorCode = U_ZERO_ERROR;
126 length = getCodesFromLocale(nameOrAbbrOrLocale, fillIn, capacity, err);
127 if(U_FAILURE(*err) || length != 0) {
128 return length;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000129 }
Frank Tang69c72a62019-04-03 21:41:21 -0700130 icu::CharString likely;
131 {
132 icu::CharStringByteSink sink(&likely);
133 ulocimp_addLikelySubtags(nameOrAbbrOrLocale, sink, &internalErrorCode);
134 }
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800135 if(U_SUCCESS(internalErrorCode) && internalErrorCode != U_STRING_NOT_TERMINATED_WARNING) {
Frank Tang69c72a62019-04-03 21:41:21 -0700136 length = getCodesFromLocale(likely.data(), fillIn, capacity, err);
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800137 if(U_FAILURE(*err) || length != 0) {
138 return length;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000139 }
140 }
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800141 if(!triedCode) {
142 /* still not found .. try long and abbreviated script names again */
143 UScriptCode code = (UScriptCode) u_getPropertyValueEnum(UCHAR_SCRIPT, nameOrAbbrOrLocale);
144 if(code!=USCRIPT_INVALID_CODE) {
145 return setOneCode(code, fillIn, capacity, err);
146 }
147 }
148 return 0;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000149}