blob: 7998c52c7f02c520b23c185d01c497676f0386b9 [file] [log] [blame]
Jungshik Shin87232d82017-05-13 21:10:13 -07001// © 2016 and later: Unicode, Inc. and others.
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002// License & terms of use: http://www.unicode.org/copyright.html
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00003/*
4*******************************************************************************
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07005* Copyright (C) 2013-2016, International Business Machines
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00006* Corporation and others. All Rights Reserved.
7*******************************************************************************
8* file name: uscript_props.cpp
Jungshik Shin87232d82017-05-13 21:10:13 -07009* encoding: UTF-8
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000010* tab size: 8 (not used)
11* indentation:4
12*
13* created on: 2013feb16
14* created by: Markus W. Scherer
15*/
16
17#include "unicode/utypes.h"
18#include "unicode/unistr.h"
19#include "unicode/uscript.h"
20#include "unicode/utf16.h"
21#include "ustr_imp.h"
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -080022#include "cmemory.h"
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000023
24namespace {
25
26// Script metadata (script properties).
27// See http://unicode.org/cldr/trac/browser/trunk/common/properties/scriptMetadata.txt
28
29// 0 = NOT_ENCODED, no sample character, default false script properties.
30// Bits 20.. 0: sample character
31
32// Bits 23..21: usage
33const int32_t UNKNOWN = 1 << 21;
34const int32_t EXCLUSION = 2 << 21;
35const int32_t LIMITED_USE = 3 << 21;
Jungshik Shinb3189662017-11-07 11:18:34 -080036// st int32_t ASPIRATIONAL = 4 << 21; -- not used any more since Unicode 10
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000037const int32_t RECOMMENDED = 5 << 21;
38
39// Bits 31..24: Single-bit flags
40const int32_t RTL = 1 << 24;
41const int32_t LB_LETTERS = 1 << 25;
42const int32_t CASED = 1 << 26;
43
44const int32_t SCRIPT_PROPS[] = {
45 // Begin copy-paste output from
46 // tools/trunk/unicode/py/parsescriptmetadata.py
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -080047 0x0040 | RECOMMENDED, // Zyyy
Jungshik Shin70f82502016-01-29 00:32:36 -080048 0x0308 | RECOMMENDED, // Zinh
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000049 0x0628 | RECOMMENDED | RTL, // Arab
50 0x0531 | RECOMMENDED | CASED, // Armn
51 0x0995 | RECOMMENDED, // Beng
52 0x3105 | RECOMMENDED | LB_LETTERS, // Bopo
Jungshik Shin70f82502016-01-29 00:32:36 -080053 0x13C4 | LIMITED_USE | CASED, // Cher
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000054 0x03E2 | EXCLUSION | CASED, // Copt
55 0x042F | RECOMMENDED | CASED, // Cyrl
56 0x10414 | EXCLUSION | CASED, // Dsrt
57 0x0905 | RECOMMENDED, // Deva
58 0x12A0 | RECOMMENDED, // Ethi
59 0x10D3 | RECOMMENDED, // Geor
60 0x10330 | EXCLUSION, // Goth
61 0x03A9 | RECOMMENDED | CASED, // Grek
62 0x0A95 | RECOMMENDED, // Gujr
63 0x0A15 | RECOMMENDED, // Guru
64 0x5B57 | RECOMMENDED | LB_LETTERS, // Hani
65 0xAC00 | RECOMMENDED, // Hang
66 0x05D0 | RECOMMENDED | RTL, // Hebr
67 0x304B | RECOMMENDED | LB_LETTERS, // Hira
68 0x0C95 | RECOMMENDED, // Knda
69 0x30AB | RECOMMENDED | LB_LETTERS, // Kana
70 0x1780 | RECOMMENDED | LB_LETTERS, // Khmr
71 0x0EA5 | RECOMMENDED | LB_LETTERS, // Laoo
72 0x004C | RECOMMENDED | CASED, // Latn
73 0x0D15 | RECOMMENDED, // Mlym
Jungshik Shinb3189662017-11-07 11:18:34 -080074 0x1826 | LIMITED_USE, // Mong
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000075 0x1000 | RECOMMENDED | LB_LETTERS, // Mymr
76 0x168F | EXCLUSION, // Ogam
Jungshik Shin5feb9ad2016-10-21 12:52:48 -070077 0x10300 | EXCLUSION, // Ital
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000078 0x0B15 | RECOMMENDED, // Orya
79 0x16A0 | EXCLUSION, // Runr
80 0x0D85 | RECOMMENDED, // Sinh
81 0x0710 | LIMITED_USE | RTL, // Syrc
82 0x0B95 | RECOMMENDED, // Taml
83 0x0C15 | RECOMMENDED, // Telu
84 0x078C | RECOMMENDED | RTL, // Thaa
85 0x0E17 | RECOMMENDED | LB_LETTERS, // Thai
86 0x0F40 | RECOMMENDED, // Tibt
Jungshik Shinb3189662017-11-07 11:18:34 -080087 0x14C0 | LIMITED_USE, // Cans
88 0xA288 | LIMITED_USE | LB_LETTERS, // Yiii
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000089 0x1703 | EXCLUSION, // Tglg
90 0x1723 | EXCLUSION, // Hano
91 0x1743 | EXCLUSION, // Buhd
92 0x1763 | EXCLUSION, // Tagb
Jungshik Shin70f82502016-01-29 00:32:36 -080093 0x280E | UNKNOWN, // Brai
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000094 0x10800 | EXCLUSION | RTL, // Cprt
95 0x1900 | LIMITED_USE, // Limb
96 0x10000 | EXCLUSION, // Linb
97 0x10480 | EXCLUSION, // Osma
98 0x10450 | EXCLUSION, // Shaw
99 0x1950 | LIMITED_USE | LB_LETTERS, // Tale
100 0x10380 | EXCLUSION, // Ugar
101 0,
102 0x1A00 | EXCLUSION, // Bugi
103 0x2C00 | EXCLUSION | CASED, // Glag
104 0x10A00 | EXCLUSION | RTL, // Khar
105 0xA800 | LIMITED_USE, // Sylo
106 0x1980 | LIMITED_USE | LB_LETTERS, // Talu
Jungshik Shinb3189662017-11-07 11:18:34 -0800107 0x2D30 | LIMITED_USE, // Tfng
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000108 0x103A0 | EXCLUSION, // Xpeo
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800109 0x1B05 | LIMITED_USE, // Bali
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000110 0x1BC0 | LIMITED_USE, // Batk
111 0,
112 0x11005 | EXCLUSION, // Brah
113 0xAA00 | LIMITED_USE, // Cham
114 0,
115 0,
116 0,
117 0,
118 0x13153 | EXCLUSION, // Egyp
119 0,
120 0x5B57 | RECOMMENDED | LB_LETTERS, // Hans
121 0x5B57 | RECOMMENDED | LB_LETTERS, // Hant
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800122 0x16B1C | EXCLUSION, // Hmng
Jungshik Shin70f82502016-01-29 00:32:36 -0800123 0x10CA1 | EXCLUSION | RTL | CASED, // Hung
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000124 0,
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800125 0xA984 | LIMITED_USE, // Java
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000126 0xA90A | LIMITED_USE, // Kali
127 0,
128 0,
129 0x1C00 | LIMITED_USE, // Lepc
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800130 0x10647 | EXCLUSION, // Lina
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000131 0x0840 | LIMITED_USE | RTL, // Mand
132 0,
133 0x10980 | EXCLUSION | RTL, // Mero
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700134 0x07CA | LIMITED_USE | RTL, // Nkoo
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000135 0x10C00 | EXCLUSION | RTL, // Orkh
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800136 0x1036B | EXCLUSION, // Perm
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000137 0xA840 | EXCLUSION, // Phag
138 0x10900 | EXCLUSION | RTL, // Phnx
Jungshik Shinb3189662017-11-07 11:18:34 -0800139 0x16F00 | LIMITED_USE, // Plrd
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000140 0,
141 0,
142 0,
143 0,
144 0,
145 0,
146 0xA549 | LIMITED_USE, // Vaii
147 0,
148 0x12000 | EXCLUSION, // Xsux
149 0,
150 0xFDD0 | UNKNOWN, // Zzzz
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700151 0x102A0 | EXCLUSION, // Cari
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000152 0x304B | RECOMMENDED | LB_LETTERS, // Jpan
153 0x1A20 | LIMITED_USE | LB_LETTERS, // Lana
154 0x10280 | EXCLUSION, // Lyci
155 0x10920 | EXCLUSION | RTL, // Lydi
156 0x1C5A | LIMITED_USE, // Olck
157 0xA930 | EXCLUSION, // Rjng
158 0xA882 | LIMITED_USE, // Saur
Jungshik Shin70f82502016-01-29 00:32:36 -0800159 0x1D850 | EXCLUSION, // Sgnw
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000160 0x1B83 | LIMITED_USE, // Sund
161 0,
162 0xABC0 | LIMITED_USE, // Mtei
163 0x10840 | EXCLUSION | RTL, // Armi
164 0x10B00 | EXCLUSION | RTL, // Avst
165 0x11103 | LIMITED_USE, // Cakm
166 0xAC00 | RECOMMENDED, // Kore
167 0x11083 | EXCLUSION, // Kthi
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700168 0x10AD8 | EXCLUSION | RTL, // Mani
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000169 0x10B60 | EXCLUSION | RTL, // Phli
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800170 0x10B8F | EXCLUSION | RTL, // Phlp
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000171 0,
172 0x10B40 | EXCLUSION | RTL, // Prti
173 0x0800 | EXCLUSION | RTL, // Samr
174 0xAA80 | LIMITED_USE | LB_LETTERS, // Tavt
175 0,
176 0,
177 0xA6A0 | LIMITED_USE, // Bamu
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700178 0xA4D0 | LIMITED_USE, // Lisu
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000179 0,
180 0x10A60 | EXCLUSION | RTL, // Sarb
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800181 0x16AE6 | EXCLUSION, // Bass
182 0x1BC20 | EXCLUSION, // Dupl
183 0x10500 | EXCLUSION, // Elba
184 0x11315 | EXCLUSION, // Gran
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000185 0,
186 0,
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800187 0x1E802 | EXCLUSION | RTL, // Mend
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000188 0x109A0 | EXCLUSION | RTL, // Merc
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800189 0x10A95 | EXCLUSION | RTL, // Narb
190 0x10896 | EXCLUSION | RTL, // Nbat
191 0x10873 | EXCLUSION | RTL, // Palm
192 0x112BE | EXCLUSION, // Sind
193 0x118B4 | EXCLUSION | CASED, // Wara
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000194 0,
195 0,
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800196 0x16A4F | EXCLUSION, // Mroo
Jungshik Shinb3189662017-11-07 11:18:34 -0800197 0x1B1C4 | EXCLUSION | LB_LETTERS, // Nshu
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000198 0x11183 | EXCLUSION, // Shrd
199 0x110D0 | EXCLUSION, // Sora
200 0x11680 | EXCLUSION, // Takr
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700201 0x18229 | EXCLUSION | LB_LETTERS, // Tang
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000202 0,
Jungshik Shin70f82502016-01-29 00:32:36 -0800203 0x14400 | EXCLUSION, // Hluw
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800204 0x11208 | EXCLUSION, // Khoj
205 0x11484 | EXCLUSION, // Tirh
206 0x10537 | EXCLUSION, // Aghb
207 0x11152 | EXCLUSION, // Mahj
Jungshik Shin70f82502016-01-29 00:32:36 -0800208 0x11717 | EXCLUSION | LB_LETTERS, // Ahom
209 0x108F4 | EXCLUSION | RTL, // Hatr
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800210 0x1160E | EXCLUSION, // Modi
Jungshik Shin70f82502016-01-29 00:32:36 -0800211 0x1128F | EXCLUSION, // Mult
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800212 0x11AC0 | EXCLUSION, // Pauc
213 0x1158E | EXCLUSION, // Sidd
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700214 0x1E909 | LIMITED_USE | RTL | CASED, // Adlm
215 0x11C0E | EXCLUSION, // Bhks
216 0x11C72 | EXCLUSION, // Marc
217 0x11412 | LIMITED_USE, // Newa
218 0x104B5 | LIMITED_USE | CASED, // Osge
219 0x5B57 | RECOMMENDED | LB_LETTERS, // Hanb
220 0x1112 | RECOMMENDED, // Jamo
221 0,
Jungshik Shinb3189662017-11-07 11:18:34 -0800222 0x11D10 | EXCLUSION, // Gonm
223 0x11A5C | EXCLUSION, // Soyo
224 0x11A0B | EXCLUSION, // Zanb
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000225 // End copy-paste from parsescriptmetadata.py
226};
227
228int32_t getScriptProps(UScriptCode script) {
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800229 if (0 <= script && script < UPRV_LENGTHOF(SCRIPT_PROPS)) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000230 return SCRIPT_PROPS[script];
231 } else {
232 return 0;
233 }
234}
235
236} // namespace
237
238U_CAPI int32_t U_EXPORT2
239uscript_getSampleString(UScriptCode script, UChar *dest, int32_t capacity, UErrorCode *pErrorCode) {
240 if(U_FAILURE(*pErrorCode)) { return 0; }
241 if(capacity < 0 || (capacity > 0 && dest == NULL)) {
242 *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
243 return 0;
244 }
245 int32_t sampleChar = getScriptProps(script) & 0x1fffff;
246 int32_t length;
247 if(sampleChar == 0) {
248 length = 0;
249 } else {
250 length = U16_LENGTH(sampleChar);
251 if(length <= capacity) {
252 int32_t i = 0;
253 U16_APPEND_UNSAFE(dest, i, sampleChar);
254 }
255 }
256 return u_terminateUChars(dest, capacity, length, pErrorCode);
257}
258
259U_COMMON_API icu::UnicodeString U_EXPORT2
260uscript_getSampleUnicodeString(UScriptCode script) {
261 icu::UnicodeString sample;
262 int32_t sampleChar = getScriptProps(script) & 0x1fffff;
263 if(sampleChar != 0) {
264 sample.append(sampleChar);
265 }
266 return sample;
267}
268
269U_CAPI UScriptUsage U_EXPORT2
270uscript_getUsage(UScriptCode script) {
271 return (UScriptUsage)((getScriptProps(script) >> 21) & 7);
272}
273
274U_CAPI UBool U_EXPORT2
275uscript_isRightToLeft(UScriptCode script) {
276 return (getScriptProps(script) & RTL) != 0;
277}
278
279U_CAPI UBool U_EXPORT2
280uscript_breaksBetweenLetters(UScriptCode script) {
281 return (getScriptProps(script) & LB_LETTERS) != 0;
282}
283
284U_CAPI UBool U_EXPORT2
285uscript_isCased(UScriptCode script) {
286 return (getScriptProps(script) & CASED) != 0;
287}