Jungshik Shin | 87232d8 | 2017-05-13 21:10:13 -0700 | [diff] [blame] | 1 | // © 2016 and later: Unicode, Inc. and others. |
Jungshik Shin | 5feb9ad | 2016-10-21 12:52:48 -0700 | [diff] [blame] | 2 | // License & terms of use: http://www.unicode.org/copyright.html |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 3 | /* |
| 4 | ******************************************************************************* |
Jungshik Shin | 5feb9ad | 2016-10-21 12:52:48 -0700 | [diff] [blame] | 5 | * Copyright (C) 2013-2016, International Business Machines |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 6 | * Corporation and others. All Rights Reserved. |
| 7 | ******************************************************************************* |
| 8 | * file name: uscript_props.cpp |
Jungshik Shin | 87232d8 | 2017-05-13 21:10:13 -0700 | [diff] [blame] | 9 | * encoding: UTF-8 |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 10 | * tab size: 8 (not used) |
| 11 | * indentation:4 |
| 12 | * |
| 13 | * created on: 2013feb16 |
| 14 | * created by: Markus W. Scherer |
| 15 | */ |
| 16 | |
| 17 | #include "unicode/utypes.h" |
| 18 | #include "unicode/unistr.h" |
| 19 | #include "unicode/uscript.h" |
| 20 | #include "unicode/utf16.h" |
| 21 | #include "ustr_imp.h" |
Jungshik Shin (jungshik at google) | 0f8746a | 2015-01-08 15:46:45 -0800 | [diff] [blame] | 22 | #include "cmemory.h" |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 23 | |
| 24 | namespace { |
| 25 | |
| 26 | // Script metadata (script properties). |
| 27 | // See http://unicode.org/cldr/trac/browser/trunk/common/properties/scriptMetadata.txt |
| 28 | |
| 29 | // 0 = NOT_ENCODED, no sample character, default false script properties. |
| 30 | // Bits 20.. 0: sample character |
| 31 | |
| 32 | // Bits 23..21: usage |
| 33 | const int32_t UNKNOWN = 1 << 21; |
| 34 | const int32_t EXCLUSION = 2 << 21; |
| 35 | const int32_t LIMITED_USE = 3 << 21; |
Jungshik Shin | b318966 | 2017-11-07 11:18:34 -0800 | [diff] [blame^] | 36 | // st int32_t ASPIRATIONAL = 4 << 21; -- not used any more since Unicode 10 |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 37 | const int32_t RECOMMENDED = 5 << 21; |
| 38 | |
| 39 | // Bits 31..24: Single-bit flags |
| 40 | const int32_t RTL = 1 << 24; |
| 41 | const int32_t LB_LETTERS = 1 << 25; |
| 42 | const int32_t CASED = 1 << 26; |
| 43 | |
| 44 | const int32_t SCRIPT_PROPS[] = { |
| 45 | // Begin copy-paste output from |
| 46 | // tools/trunk/unicode/py/parsescriptmetadata.py |
Jungshik Shin (jungshik at google) | 0f8746a | 2015-01-08 15:46:45 -0800 | [diff] [blame] | 47 | 0x0040 | RECOMMENDED, // Zyyy |
Jungshik Shin | 70f8250 | 2016-01-29 00:32:36 -0800 | [diff] [blame] | 48 | 0x0308 | RECOMMENDED, // Zinh |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 49 | 0x0628 | RECOMMENDED | RTL, // Arab |
| 50 | 0x0531 | RECOMMENDED | CASED, // Armn |
| 51 | 0x0995 | RECOMMENDED, // Beng |
| 52 | 0x3105 | RECOMMENDED | LB_LETTERS, // Bopo |
Jungshik Shin | 70f8250 | 2016-01-29 00:32:36 -0800 | [diff] [blame] | 53 | 0x13C4 | LIMITED_USE | CASED, // Cher |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 54 | 0x03E2 | EXCLUSION | CASED, // Copt |
| 55 | 0x042F | RECOMMENDED | CASED, // Cyrl |
| 56 | 0x10414 | EXCLUSION | CASED, // Dsrt |
| 57 | 0x0905 | RECOMMENDED, // Deva |
| 58 | 0x12A0 | RECOMMENDED, // Ethi |
| 59 | 0x10D3 | RECOMMENDED, // Geor |
| 60 | 0x10330 | EXCLUSION, // Goth |
| 61 | 0x03A9 | RECOMMENDED | CASED, // Grek |
| 62 | 0x0A95 | RECOMMENDED, // Gujr |
| 63 | 0x0A15 | RECOMMENDED, // Guru |
| 64 | 0x5B57 | RECOMMENDED | LB_LETTERS, // Hani |
| 65 | 0xAC00 | RECOMMENDED, // Hang |
| 66 | 0x05D0 | RECOMMENDED | RTL, // Hebr |
| 67 | 0x304B | RECOMMENDED | LB_LETTERS, // Hira |
| 68 | 0x0C95 | RECOMMENDED, // Knda |
| 69 | 0x30AB | RECOMMENDED | LB_LETTERS, // Kana |
| 70 | 0x1780 | RECOMMENDED | LB_LETTERS, // Khmr |
| 71 | 0x0EA5 | RECOMMENDED | LB_LETTERS, // Laoo |
| 72 | 0x004C | RECOMMENDED | CASED, // Latn |
| 73 | 0x0D15 | RECOMMENDED, // Mlym |
Jungshik Shin | b318966 | 2017-11-07 11:18:34 -0800 | [diff] [blame^] | 74 | 0x1826 | LIMITED_USE, // Mong |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 75 | 0x1000 | RECOMMENDED | LB_LETTERS, // Mymr |
| 76 | 0x168F | EXCLUSION, // Ogam |
Jungshik Shin | 5feb9ad | 2016-10-21 12:52:48 -0700 | [diff] [blame] | 77 | 0x10300 | EXCLUSION, // Ital |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 78 | 0x0B15 | RECOMMENDED, // Orya |
| 79 | 0x16A0 | EXCLUSION, // Runr |
| 80 | 0x0D85 | RECOMMENDED, // Sinh |
| 81 | 0x0710 | LIMITED_USE | RTL, // Syrc |
| 82 | 0x0B95 | RECOMMENDED, // Taml |
| 83 | 0x0C15 | RECOMMENDED, // Telu |
| 84 | 0x078C | RECOMMENDED | RTL, // Thaa |
| 85 | 0x0E17 | RECOMMENDED | LB_LETTERS, // Thai |
| 86 | 0x0F40 | RECOMMENDED, // Tibt |
Jungshik Shin | b318966 | 2017-11-07 11:18:34 -0800 | [diff] [blame^] | 87 | 0x14C0 | LIMITED_USE, // Cans |
| 88 | 0xA288 | LIMITED_USE | LB_LETTERS, // Yiii |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 89 | 0x1703 | EXCLUSION, // Tglg |
| 90 | 0x1723 | EXCLUSION, // Hano |
| 91 | 0x1743 | EXCLUSION, // Buhd |
| 92 | 0x1763 | EXCLUSION, // Tagb |
Jungshik Shin | 70f8250 | 2016-01-29 00:32:36 -0800 | [diff] [blame] | 93 | 0x280E | UNKNOWN, // Brai |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 94 | 0x10800 | EXCLUSION | RTL, // Cprt |
| 95 | 0x1900 | LIMITED_USE, // Limb |
| 96 | 0x10000 | EXCLUSION, // Linb |
| 97 | 0x10480 | EXCLUSION, // Osma |
| 98 | 0x10450 | EXCLUSION, // Shaw |
| 99 | 0x1950 | LIMITED_USE | LB_LETTERS, // Tale |
| 100 | 0x10380 | EXCLUSION, // Ugar |
| 101 | 0, |
| 102 | 0x1A00 | EXCLUSION, // Bugi |
| 103 | 0x2C00 | EXCLUSION | CASED, // Glag |
| 104 | 0x10A00 | EXCLUSION | RTL, // Khar |
| 105 | 0xA800 | LIMITED_USE, // Sylo |
| 106 | 0x1980 | LIMITED_USE | LB_LETTERS, // Talu |
Jungshik Shin | b318966 | 2017-11-07 11:18:34 -0800 | [diff] [blame^] | 107 | 0x2D30 | LIMITED_USE, // Tfng |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 108 | 0x103A0 | EXCLUSION, // Xpeo |
Jungshik Shin (jungshik at google) | 0f8746a | 2015-01-08 15:46:45 -0800 | [diff] [blame] | 109 | 0x1B05 | LIMITED_USE, // Bali |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 110 | 0x1BC0 | LIMITED_USE, // Batk |
| 111 | 0, |
| 112 | 0x11005 | EXCLUSION, // Brah |
| 113 | 0xAA00 | LIMITED_USE, // Cham |
| 114 | 0, |
| 115 | 0, |
| 116 | 0, |
| 117 | 0, |
| 118 | 0x13153 | EXCLUSION, // Egyp |
| 119 | 0, |
| 120 | 0x5B57 | RECOMMENDED | LB_LETTERS, // Hans |
| 121 | 0x5B57 | RECOMMENDED | LB_LETTERS, // Hant |
Jungshik Shin (jungshik at google) | 0f8746a | 2015-01-08 15:46:45 -0800 | [diff] [blame] | 122 | 0x16B1C | EXCLUSION, // Hmng |
Jungshik Shin | 70f8250 | 2016-01-29 00:32:36 -0800 | [diff] [blame] | 123 | 0x10CA1 | EXCLUSION | RTL | CASED, // Hung |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 124 | 0, |
Jungshik Shin (jungshik at google) | 0f8746a | 2015-01-08 15:46:45 -0800 | [diff] [blame] | 125 | 0xA984 | LIMITED_USE, // Java |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 126 | 0xA90A | LIMITED_USE, // Kali |
| 127 | 0, |
| 128 | 0, |
| 129 | 0x1C00 | LIMITED_USE, // Lepc |
Jungshik Shin (jungshik at google) | 0f8746a | 2015-01-08 15:46:45 -0800 | [diff] [blame] | 130 | 0x10647 | EXCLUSION, // Lina |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 131 | 0x0840 | LIMITED_USE | RTL, // Mand |
| 132 | 0, |
| 133 | 0x10980 | EXCLUSION | RTL, // Mero |
Jungshik Shin | 5feb9ad | 2016-10-21 12:52:48 -0700 | [diff] [blame] | 134 | 0x07CA | LIMITED_USE | RTL, // Nkoo |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 135 | 0x10C00 | EXCLUSION | RTL, // Orkh |
Jungshik Shin (jungshik at google) | 0f8746a | 2015-01-08 15:46:45 -0800 | [diff] [blame] | 136 | 0x1036B | EXCLUSION, // Perm |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 137 | 0xA840 | EXCLUSION, // Phag |
| 138 | 0x10900 | EXCLUSION | RTL, // Phnx |
Jungshik Shin | b318966 | 2017-11-07 11:18:34 -0800 | [diff] [blame^] | 139 | 0x16F00 | LIMITED_USE, // Plrd |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 140 | 0, |
| 141 | 0, |
| 142 | 0, |
| 143 | 0, |
| 144 | 0, |
| 145 | 0, |
| 146 | 0xA549 | LIMITED_USE, // Vaii |
| 147 | 0, |
| 148 | 0x12000 | EXCLUSION, // Xsux |
| 149 | 0, |
| 150 | 0xFDD0 | UNKNOWN, // Zzzz |
Jungshik Shin | 5feb9ad | 2016-10-21 12:52:48 -0700 | [diff] [blame] | 151 | 0x102A0 | EXCLUSION, // Cari |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 152 | 0x304B | RECOMMENDED | LB_LETTERS, // Jpan |
| 153 | 0x1A20 | LIMITED_USE | LB_LETTERS, // Lana |
| 154 | 0x10280 | EXCLUSION, // Lyci |
| 155 | 0x10920 | EXCLUSION | RTL, // Lydi |
| 156 | 0x1C5A | LIMITED_USE, // Olck |
| 157 | 0xA930 | EXCLUSION, // Rjng |
| 158 | 0xA882 | LIMITED_USE, // Saur |
Jungshik Shin | 70f8250 | 2016-01-29 00:32:36 -0800 | [diff] [blame] | 159 | 0x1D850 | EXCLUSION, // Sgnw |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 160 | 0x1B83 | LIMITED_USE, // Sund |
| 161 | 0, |
| 162 | 0xABC0 | LIMITED_USE, // Mtei |
| 163 | 0x10840 | EXCLUSION | RTL, // Armi |
| 164 | 0x10B00 | EXCLUSION | RTL, // Avst |
| 165 | 0x11103 | LIMITED_USE, // Cakm |
| 166 | 0xAC00 | RECOMMENDED, // Kore |
| 167 | 0x11083 | EXCLUSION, // Kthi |
Jungshik Shin | 5feb9ad | 2016-10-21 12:52:48 -0700 | [diff] [blame] | 168 | 0x10AD8 | EXCLUSION | RTL, // Mani |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 169 | 0x10B60 | EXCLUSION | RTL, // Phli |
Jungshik Shin (jungshik at google) | 0f8746a | 2015-01-08 15:46:45 -0800 | [diff] [blame] | 170 | 0x10B8F | EXCLUSION | RTL, // Phlp |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 171 | 0, |
| 172 | 0x10B40 | EXCLUSION | RTL, // Prti |
| 173 | 0x0800 | EXCLUSION | RTL, // Samr |
| 174 | 0xAA80 | LIMITED_USE | LB_LETTERS, // Tavt |
| 175 | 0, |
| 176 | 0, |
| 177 | 0xA6A0 | LIMITED_USE, // Bamu |
Jungshik Shin | 5feb9ad | 2016-10-21 12:52:48 -0700 | [diff] [blame] | 178 | 0xA4D0 | LIMITED_USE, // Lisu |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 179 | 0, |
| 180 | 0x10A60 | EXCLUSION | RTL, // Sarb |
Jungshik Shin (jungshik at google) | 0f8746a | 2015-01-08 15:46:45 -0800 | [diff] [blame] | 181 | 0x16AE6 | EXCLUSION, // Bass |
| 182 | 0x1BC20 | EXCLUSION, // Dupl |
| 183 | 0x10500 | EXCLUSION, // Elba |
| 184 | 0x11315 | EXCLUSION, // Gran |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 185 | 0, |
| 186 | 0, |
Jungshik Shin (jungshik at google) | 0f8746a | 2015-01-08 15:46:45 -0800 | [diff] [blame] | 187 | 0x1E802 | EXCLUSION | RTL, // Mend |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 188 | 0x109A0 | EXCLUSION | RTL, // Merc |
Jungshik Shin (jungshik at google) | 0f8746a | 2015-01-08 15:46:45 -0800 | [diff] [blame] | 189 | 0x10A95 | EXCLUSION | RTL, // Narb |
| 190 | 0x10896 | EXCLUSION | RTL, // Nbat |
| 191 | 0x10873 | EXCLUSION | RTL, // Palm |
| 192 | 0x112BE | EXCLUSION, // Sind |
| 193 | 0x118B4 | EXCLUSION | CASED, // Wara |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 194 | 0, |
| 195 | 0, |
Jungshik Shin (jungshik at google) | 0f8746a | 2015-01-08 15:46:45 -0800 | [diff] [blame] | 196 | 0x16A4F | EXCLUSION, // Mroo |
Jungshik Shin | b318966 | 2017-11-07 11:18:34 -0800 | [diff] [blame^] | 197 | 0x1B1C4 | EXCLUSION | LB_LETTERS, // Nshu |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 198 | 0x11183 | EXCLUSION, // Shrd |
| 199 | 0x110D0 | EXCLUSION, // Sora |
| 200 | 0x11680 | EXCLUSION, // Takr |
Jungshik Shin | 5feb9ad | 2016-10-21 12:52:48 -0700 | [diff] [blame] | 201 | 0x18229 | EXCLUSION | LB_LETTERS, // Tang |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 202 | 0, |
Jungshik Shin | 70f8250 | 2016-01-29 00:32:36 -0800 | [diff] [blame] | 203 | 0x14400 | EXCLUSION, // Hluw |
Jungshik Shin (jungshik at google) | 0f8746a | 2015-01-08 15:46:45 -0800 | [diff] [blame] | 204 | 0x11208 | EXCLUSION, // Khoj |
| 205 | 0x11484 | EXCLUSION, // Tirh |
| 206 | 0x10537 | EXCLUSION, // Aghb |
| 207 | 0x11152 | EXCLUSION, // Mahj |
Jungshik Shin | 70f8250 | 2016-01-29 00:32:36 -0800 | [diff] [blame] | 208 | 0x11717 | EXCLUSION | LB_LETTERS, // Ahom |
| 209 | 0x108F4 | EXCLUSION | RTL, // Hatr |
Jungshik Shin (jungshik at google) | 0f8746a | 2015-01-08 15:46:45 -0800 | [diff] [blame] | 210 | 0x1160E | EXCLUSION, // Modi |
Jungshik Shin | 70f8250 | 2016-01-29 00:32:36 -0800 | [diff] [blame] | 211 | 0x1128F | EXCLUSION, // Mult |
Jungshik Shin (jungshik at google) | 0f8746a | 2015-01-08 15:46:45 -0800 | [diff] [blame] | 212 | 0x11AC0 | EXCLUSION, // Pauc |
| 213 | 0x1158E | EXCLUSION, // Sidd |
Jungshik Shin | 5feb9ad | 2016-10-21 12:52:48 -0700 | [diff] [blame] | 214 | 0x1E909 | LIMITED_USE | RTL | CASED, // Adlm |
| 215 | 0x11C0E | EXCLUSION, // Bhks |
| 216 | 0x11C72 | EXCLUSION, // Marc |
| 217 | 0x11412 | LIMITED_USE, // Newa |
| 218 | 0x104B5 | LIMITED_USE | CASED, // Osge |
| 219 | 0x5B57 | RECOMMENDED | LB_LETTERS, // Hanb |
| 220 | 0x1112 | RECOMMENDED, // Jamo |
| 221 | 0, |
Jungshik Shin | b318966 | 2017-11-07 11:18:34 -0800 | [diff] [blame^] | 222 | 0x11D10 | EXCLUSION, // Gonm |
| 223 | 0x11A5C | EXCLUSION, // Soyo |
| 224 | 0x11A0B | EXCLUSION, // Zanb |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 225 | // End copy-paste from parsescriptmetadata.py |
| 226 | }; |
| 227 | |
| 228 | int32_t getScriptProps(UScriptCode script) { |
Jungshik Shin (jungshik at google) | 0f8746a | 2015-01-08 15:46:45 -0800 | [diff] [blame] | 229 | if (0 <= script && script < UPRV_LENGTHOF(SCRIPT_PROPS)) { |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 230 | return SCRIPT_PROPS[script]; |
| 231 | } else { |
| 232 | return 0; |
| 233 | } |
| 234 | } |
| 235 | |
| 236 | } // namespace |
| 237 | |
| 238 | U_CAPI int32_t U_EXPORT2 |
| 239 | uscript_getSampleString(UScriptCode script, UChar *dest, int32_t capacity, UErrorCode *pErrorCode) { |
| 240 | if(U_FAILURE(*pErrorCode)) { return 0; } |
| 241 | if(capacity < 0 || (capacity > 0 && dest == NULL)) { |
| 242 | *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; |
| 243 | return 0; |
| 244 | } |
| 245 | int32_t sampleChar = getScriptProps(script) & 0x1fffff; |
| 246 | int32_t length; |
| 247 | if(sampleChar == 0) { |
| 248 | length = 0; |
| 249 | } else { |
| 250 | length = U16_LENGTH(sampleChar); |
| 251 | if(length <= capacity) { |
| 252 | int32_t i = 0; |
| 253 | U16_APPEND_UNSAFE(dest, i, sampleChar); |
| 254 | } |
| 255 | } |
| 256 | return u_terminateUChars(dest, capacity, length, pErrorCode); |
| 257 | } |
| 258 | |
| 259 | U_COMMON_API icu::UnicodeString U_EXPORT2 |
| 260 | uscript_getSampleUnicodeString(UScriptCode script) { |
| 261 | icu::UnicodeString sample; |
| 262 | int32_t sampleChar = getScriptProps(script) & 0x1fffff; |
| 263 | if(sampleChar != 0) { |
| 264 | sample.append(sampleChar); |
| 265 | } |
| 266 | return sample; |
| 267 | } |
| 268 | |
| 269 | U_CAPI UScriptUsage U_EXPORT2 |
| 270 | uscript_getUsage(UScriptCode script) { |
| 271 | return (UScriptUsage)((getScriptProps(script) >> 21) & 7); |
| 272 | } |
| 273 | |
| 274 | U_CAPI UBool U_EXPORT2 |
| 275 | uscript_isRightToLeft(UScriptCode script) { |
| 276 | return (getScriptProps(script) & RTL) != 0; |
| 277 | } |
| 278 | |
| 279 | U_CAPI UBool U_EXPORT2 |
| 280 | uscript_breaksBetweenLetters(UScriptCode script) { |
| 281 | return (getScriptProps(script) & LB_LETTERS) != 0; |
| 282 | } |
| 283 | |
| 284 | U_CAPI UBool U_EXPORT2 |
| 285 | uscript_isCased(UScriptCode script) { |
| 286 | return (getScriptProps(script) & CASED) != 0; |
| 287 | } |