Jungshik Shin | 87232d8 | 2017-05-13 21:10:13 -0700 | [diff] [blame] | 1 | // © 2016 and later: Unicode, Inc. and others. |
Jungshik Shin | 5feb9ad | 2016-10-21 12:52:48 -0700 | [diff] [blame] | 2 | // License & terms of use: http://www.unicode.org/copyright.html |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 3 | /* |
| 4 | ********************************************************************** |
Jungshik Shin (jungshik at google) | 0f8746a | 2015-01-08 15:46:45 -0800 | [diff] [blame] | 5 | * Copyright (C) 1997-2014, International Business Machines |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 6 | * Corporation and others. All Rights Reserved. |
| 7 | ********************************************************************** |
| 8 | * |
| 9 | * File USCRIPT.C |
| 10 | * |
| 11 | * Modification History: |
| 12 | * |
| 13 | * Date Name Description |
| 14 | * 07/06/2001 Ram Creation. |
| 15 | ****************************************************************************** |
| 16 | */ |
| 17 | |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 18 | #include "unicode/uchar.h" |
Jungshik Shin (jungshik at google) | 0f8746a | 2015-01-08 15:46:45 -0800 | [diff] [blame] | 19 | #include "unicode/uscript.h" |
| 20 | #include "unicode/uloc.h" |
Frank Tang | 69c72a6 | 2019-04-03 21:41:21 -0700 | [diff] [blame] | 21 | #include "bytesinkutil.h" |
| 22 | #include "charstr.h" |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 23 | #include "cmemory.h" |
| 24 | #include "cstring.h" |
Frank Tang | 69c72a6 | 2019-04-03 21:41:21 -0700 | [diff] [blame] | 25 | #include "ulocimp.h" |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 26 | |
Jungshik Shin (jungshik at google) | 0f8746a | 2015-01-08 15:46:45 -0800 | [diff] [blame] | 27 | static const UScriptCode JAPANESE[3] = { USCRIPT_KATAKANA, USCRIPT_HIRAGANA, USCRIPT_HAN }; |
| 28 | static const UScriptCode KOREAN[2] = { USCRIPT_HANGUL, USCRIPT_HAN }; |
| 29 | static const UScriptCode HAN_BOPO[2] = { USCRIPT_HAN, USCRIPT_BOPOMOFO }; |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 30 | |
Jungshik Shin (jungshik at google) | 0f8746a | 2015-01-08 15:46:45 -0800 | [diff] [blame] | 31 | static int32_t |
| 32 | setCodes(const UScriptCode *src, int32_t length, |
| 33 | UScriptCode *dest, int32_t capacity, UErrorCode *err) { |
| 34 | int32_t i; |
| 35 | if(U_FAILURE(*err)) { return 0; } |
| 36 | if(length > capacity) { |
| 37 | *err = U_BUFFER_OVERFLOW_ERROR; |
| 38 | return length; |
| 39 | } |
| 40 | for(i = 0; i < length; ++i) { |
| 41 | dest[i] = src[i]; |
| 42 | } |
| 43 | return length; |
| 44 | } |
| 45 | |
| 46 | static int32_t |
| 47 | setOneCode(UScriptCode script, UScriptCode *scripts, int32_t capacity, UErrorCode *err) { |
| 48 | if(U_FAILURE(*err)) { return 0; } |
| 49 | if(1 > capacity) { |
| 50 | *err = U_BUFFER_OVERFLOW_ERROR; |
| 51 | return 1; |
| 52 | } |
| 53 | scripts[0] = script; |
| 54 | return 1; |
| 55 | } |
| 56 | |
| 57 | static int32_t |
| 58 | getCodesFromLocale(const char *locale, |
| 59 | UScriptCode *scripts, int32_t capacity, UErrorCode *err) { |
| 60 | UErrorCode internalErrorCode = U_ZERO_ERROR; |
Frank Tang | f222396 | 2020-04-27 18:25:29 -0700 | [diff] [blame] | 61 | char lang[8] = {0}; |
| 62 | char script[8] = {0}; |
Jungshik Shin (jungshik at google) | 0f8746a | 2015-01-08 15:46:45 -0800 | [diff] [blame] | 63 | int32_t scriptLength; |
| 64 | if(U_FAILURE(*err)) { return 0; } |
| 65 | // Multi-script languages, equivalent to the LocaleScript data |
| 66 | // that we used to load from locale resource bundles. |
| 67 | /*length = */ uloc_getLanguage(locale, lang, UPRV_LENGTHOF(lang), &internalErrorCode); |
| 68 | if(U_FAILURE(internalErrorCode) || internalErrorCode == U_STRING_NOT_TERMINATED_WARNING) { |
| 69 | return 0; |
| 70 | } |
| 71 | if(0 == uprv_strcmp(lang, "ja")) { |
| 72 | return setCodes(JAPANESE, UPRV_LENGTHOF(JAPANESE), scripts, capacity, err); |
| 73 | } |
| 74 | if(0 == uprv_strcmp(lang, "ko")) { |
| 75 | return setCodes(KOREAN, UPRV_LENGTHOF(KOREAN), scripts, capacity, err); |
| 76 | } |
| 77 | scriptLength = uloc_getScript(locale, script, UPRV_LENGTHOF(script), &internalErrorCode); |
| 78 | if(U_FAILURE(internalErrorCode) || internalErrorCode == U_STRING_NOT_TERMINATED_WARNING) { |
| 79 | return 0; |
| 80 | } |
| 81 | if(0 == uprv_strcmp(lang, "zh") && 0 == uprv_strcmp(script, "Hant")) { |
| 82 | return setCodes(HAN_BOPO, UPRV_LENGTHOF(HAN_BOPO), scripts, capacity, err); |
| 83 | } |
| 84 | // Explicit script code. |
| 85 | if(scriptLength != 0) { |
| 86 | UScriptCode scriptCode = (UScriptCode)u_getPropertyValueEnum(UCHAR_SCRIPT, script); |
| 87 | if(scriptCode != USCRIPT_INVALID_CODE) { |
| 88 | if(scriptCode == USCRIPT_SIMPLIFIED_HAN || scriptCode == USCRIPT_TRADITIONAL_HAN) { |
| 89 | scriptCode = USCRIPT_HAN; |
| 90 | } |
| 91 | return setOneCode(scriptCode, scripts, capacity, err); |
| 92 | } |
| 93 | } |
| 94 | return 0; |
| 95 | } |
| 96 | |
| 97 | /* TODO: this is a bad API and should be deprecated, ticket #11141 */ |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 98 | U_CAPI int32_t U_EXPORT2 |
| 99 | uscript_getCode(const char* nameOrAbbrOrLocale, |
| 100 | UScriptCode* fillIn, |
| 101 | int32_t capacity, |
| 102 | UErrorCode* err){ |
Jungshik Shin (jungshik at google) | 0f8746a | 2015-01-08 15:46:45 -0800 | [diff] [blame] | 103 | UBool triedCode; |
Jungshik Shin (jungshik at google) | 0f8746a | 2015-01-08 15:46:45 -0800 | [diff] [blame] | 104 | UErrorCode internalErrorCode; |
| 105 | int32_t length; |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 106 | |
Jungshik Shin (jungshik at google) | 0f8746a | 2015-01-08 15:46:45 -0800 | [diff] [blame] | 107 | if(U_FAILURE(*err)) { |
| 108 | return 0; |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 109 | } |
Jungshik Shin (jungshik at google) | 0f8746a | 2015-01-08 15:46:45 -0800 | [diff] [blame] | 110 | if(nameOrAbbrOrLocale==NULL || |
| 111 | (fillIn == NULL ? capacity != 0 : capacity < 0)) { |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 112 | *err = U_ILLEGAL_ARGUMENT_ERROR; |
Jungshik Shin (jungshik at google) | 0f8746a | 2015-01-08 15:46:45 -0800 | [diff] [blame] | 113 | return 0; |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 114 | } |
| 115 | |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 116 | triedCode = false; |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 117 | if(uprv_strchr(nameOrAbbrOrLocale, '-')==NULL && uprv_strchr(nameOrAbbrOrLocale, '_')==NULL ){ |
| 118 | /* try long and abbreviated script names first */ |
Jungshik Shin (jungshik at google) | 0f8746a | 2015-01-08 15:46:45 -0800 | [diff] [blame] | 119 | UScriptCode code = (UScriptCode) u_getPropertyValueEnum(UCHAR_SCRIPT, nameOrAbbrOrLocale); |
| 120 | if(code!=USCRIPT_INVALID_CODE) { |
| 121 | return setOneCode(code, fillIn, capacity, err); |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 122 | } |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 123 | triedCode = true; |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 124 | } |
Jungshik Shin (jungshik at google) | 0f8746a | 2015-01-08 15:46:45 -0800 | [diff] [blame] | 125 | internalErrorCode = U_ZERO_ERROR; |
| 126 | length = getCodesFromLocale(nameOrAbbrOrLocale, fillIn, capacity, err); |
| 127 | if(U_FAILURE(*err) || length != 0) { |
| 128 | return length; |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 129 | } |
Frank Tang | 69c72a6 | 2019-04-03 21:41:21 -0700 | [diff] [blame] | 130 | icu::CharString likely; |
| 131 | { |
| 132 | icu::CharStringByteSink sink(&likely); |
| 133 | ulocimp_addLikelySubtags(nameOrAbbrOrLocale, sink, &internalErrorCode); |
| 134 | } |
Jungshik Shin (jungshik at google) | 0f8746a | 2015-01-08 15:46:45 -0800 | [diff] [blame] | 135 | if(U_SUCCESS(internalErrorCode) && internalErrorCode != U_STRING_NOT_TERMINATED_WARNING) { |
Frank Tang | 69c72a6 | 2019-04-03 21:41:21 -0700 | [diff] [blame] | 136 | length = getCodesFromLocale(likely.data(), fillIn, capacity, err); |
Jungshik Shin (jungshik at google) | 0f8746a | 2015-01-08 15:46:45 -0800 | [diff] [blame] | 137 | if(U_FAILURE(*err) || length != 0) { |
| 138 | return length; |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 139 | } |
| 140 | } |
Jungshik Shin (jungshik at google) | 0f8746a | 2015-01-08 15:46:45 -0800 | [diff] [blame] | 141 | if(!triedCode) { |
| 142 | /* still not found .. try long and abbreviated script names again */ |
| 143 | UScriptCode code = (UScriptCode) u_getPropertyValueEnum(UCHAR_SCRIPT, nameOrAbbrOrLocale); |
| 144 | if(code!=USCRIPT_INVALID_CODE) { |
| 145 | return setOneCode(code, fillIn, capacity, err); |
| 146 | } |
| 147 | } |
| 148 | return 0; |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 149 | } |