blob: 236bf9d37f19199ca9324650e54bafaa203f566d [file] [log] [blame]
Jungshik Shin87232d82017-05-13 21:10:13 -07001// © 2016 and later: Unicode, Inc. and others.
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002// License & terms of use: http://www.unicode.org/copyright.html
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00003/*
4**********************************************************************
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08005* Copyright (C) 2014, International Business Machines
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00006* Corporation and others. All Rights Reserved.
7**********************************************************************
8*
9* scriptset.cpp
10*
11* created on: 2013 Jan 7
12* created by: Andy Heninger
13*/
14
15#include "unicode/utypes.h"
16
17#include "unicode/uchar.h"
18#include "unicode/unistr.h"
19
20#include "scriptset.h"
21#include "uassert.h"
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -080022#include "cmemory.h"
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000023
24U_NAMESPACE_BEGIN
25
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000026//----------------------------------------------------------------------------
27//
28// ScriptSet implementation
29//
30//----------------------------------------------------------------------------
31ScriptSet::ScriptSet() {
Frank Tangf2223962020-04-27 18:25:29 -070032 uprv_memset(bits, 0, sizeof(bits));
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000033}
34
35ScriptSet::~ScriptSet() {
36}
37
38ScriptSet::ScriptSet(const ScriptSet &other) {
39 *this = other;
40}
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000041
42ScriptSet & ScriptSet::operator =(const ScriptSet &other) {
Frank Tangf2223962020-04-27 18:25:29 -070043 uprv_memcpy(bits, other.bits, sizeof(bits));
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000044 return *this;
45}
46
Frank Tang3e05d9d2021-11-08 14:04:04 -080047bool ScriptSet::operator == (const ScriptSet &other) const {
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -080048 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000049 if (bits[i] != other.bits[i]) {
Frank Tang3e05d9d2021-11-08 14:04:04 -080050 return false;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000051 }
52 }
Frank Tang3e05d9d2021-11-08 14:04:04 -080053 return true;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000054}
55
56UBool ScriptSet::test(UScriptCode script, UErrorCode &status) const {
57 if (U_FAILURE(status)) {
Frank Tang1f164ee2022-11-08 12:31:27 -080058 return false;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000059 }
Frank Tangf2223962020-04-27 18:25:29 -070060 if (script < 0 || (int32_t)script >= SCRIPT_LIMIT) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000061 status = U_ILLEGAL_ARGUMENT_ERROR;
Frank Tang1f164ee2022-11-08 12:31:27 -080062 return false;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000063 }
64 uint32_t index = script / 32;
65 uint32_t bit = 1 << (script & 31);
66 return ((bits[index] & bit) != 0);
67}
68
69
70ScriptSet &ScriptSet::set(UScriptCode script, UErrorCode &status) {
71 if (U_FAILURE(status)) {
72 return *this;
73 }
Frank Tangf2223962020-04-27 18:25:29 -070074 if (script < 0 || (int32_t)script >= SCRIPT_LIMIT) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000075 status = U_ILLEGAL_ARGUMENT_ERROR;
76 return *this;
77 }
78 uint32_t index = script / 32;
79 uint32_t bit = 1 << (script & 31);
80 bits[index] |= bit;
81 return *this;
82}
83
84ScriptSet &ScriptSet::reset(UScriptCode script, UErrorCode &status) {
85 if (U_FAILURE(status)) {
86 return *this;
87 }
Frank Tangf2223962020-04-27 18:25:29 -070088 if (script < 0 || (int32_t)script >= SCRIPT_LIMIT) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000089 status = U_ILLEGAL_ARGUMENT_ERROR;
90 return *this;
91 }
92 uint32_t index = script / 32;
93 uint32_t bit = 1 << (script & 31);
94 bits[index] &= ~bit;
95 return *this;
96}
97
98
99
100ScriptSet &ScriptSet::Union(const ScriptSet &other) {
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800101 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000102 bits[i] |= other.bits[i];
103 }
104 return *this;
105}
106
107ScriptSet &ScriptSet::intersect(const ScriptSet &other) {
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800108 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000109 bits[i] &= other.bits[i];
110 }
111 return *this;
112}
113
114ScriptSet &ScriptSet::intersect(UScriptCode script, UErrorCode &status) {
115 ScriptSet t;
116 t.set(script, status);
117 if (U_SUCCESS(status)) {
118 this->intersect(t);
119 }
120 return *this;
121}
Frank Tangf2223962020-04-27 18:25:29 -0700122
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000123UBool ScriptSet::intersects(const ScriptSet &other) const {
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800124 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000125 if ((bits[i] & other.bits[i]) != 0) {
126 return true;
127 }
128 }
129 return false;
130}
131
132UBool ScriptSet::contains(const ScriptSet &other) const {
133 ScriptSet t(*this);
134 t.intersect(other);
135 return (t == other);
136}
137
138
139ScriptSet &ScriptSet::setAll() {
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800140 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000141 bits[i] = 0xffffffffu;
142 }
143 return *this;
144}
145
146
147ScriptSet &ScriptSet::resetAll() {
Frank Tangf2223962020-04-27 18:25:29 -0700148 uprv_memset(bits, 0, sizeof(bits));
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000149 return *this;
150}
151
152int32_t ScriptSet::countMembers() const {
153 // This bit counter is good for sparse numbers of '1's, which is
154 // very much the case that we will usually have.
155 int32_t count = 0;
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800156 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000157 uint32_t x = bits[i];
158 while (x > 0) {
159 count++;
160 x &= (x - 1); // and off the least significant one bit.
161 }
162 }
163 return count;
164}
165
166int32_t ScriptSet::hashCode() const {
167 int32_t hash = 0;
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800168 for (int32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000169 hash ^= bits[i];
170 }
171 return hash;
172}
173
174int32_t ScriptSet::nextSetBit(int32_t fromIndex) const {
175 // TODO: Wants a better implementation.
176 if (fromIndex < 0) {
177 return -1;
178 }
179 UErrorCode status = U_ZERO_ERROR;
Frank Tangf2223962020-04-27 18:25:29 -0700180 for (int32_t scriptIndex = fromIndex; scriptIndex < SCRIPT_LIMIT; scriptIndex++) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000181 if (test((UScriptCode)scriptIndex, status)) {
182 return scriptIndex;
183 }
184 }
185 return -1;
186}
187
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700188UBool ScriptSet::isEmpty() const {
189 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
190 if (bits[i] != 0) {
Frank Tang1f164ee2022-11-08 12:31:27 -0800191 return false;
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700192 }
193 }
Frank Tang1f164ee2022-11-08 12:31:27 -0800194 return true;
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700195}
196
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000197UnicodeString &ScriptSet::displayScripts(UnicodeString &dest) const {
Frank Tang1f164ee2022-11-08 12:31:27 -0800198 UBool firstTime = true;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000199 for (int32_t i = nextSetBit(0); i >= 0; i = nextSetBit(i + 1)) {
200 if (!firstTime) {
201 dest.append((UChar)0x20);
202 }
Frank Tang1f164ee2022-11-08 12:31:27 -0800203 firstTime = false;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000204 const char *scriptName = uscript_getShortName((UScriptCode(i)));
205 dest.append(UnicodeString(scriptName, -1, US_INV));
206 }
207 return dest;
208}
209
210ScriptSet &ScriptSet::parseScripts(const UnicodeString &scriptString, UErrorCode &status) {
211 resetAll();
212 if (U_FAILURE(status)) {
213 return *this;
214 }
215 UnicodeString oneScriptName;
216 for (int32_t i=0; i<scriptString.length();) {
217 UChar32 c = scriptString.char32At(i);
218 i = scriptString.moveIndex32(i, 1);
219 if (!u_isUWhiteSpace(c)) {
220 oneScriptName.append(c);
221 if (i < scriptString.length()) {
222 continue;
223 }
224 }
225 if (oneScriptName.length() > 0) {
226 char buf[40];
227 oneScriptName.extract(0, oneScriptName.length(), buf, sizeof(buf)-1, US_INV);
228 buf[sizeof(buf)-1] = 0;
229 int32_t sc = u_getPropertyValueEnum(UCHAR_SCRIPT, buf);
230 if (sc == UCHAR_INVALID_CODE) {
231 status = U_ILLEGAL_ARGUMENT_ERROR;
232 } else {
233 this->set((UScriptCode)sc, status);
234 }
235 if (U_FAILURE(status)) {
236 return *this;
237 }
238 oneScriptName.remove();
239 }
240 }
241 return *this;
242}
243
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700244void ScriptSet::setScriptExtensions(UChar32 codePoint, UErrorCode& status) {
245 if (U_FAILURE(status)) { return; }
Frank Tangf2223962020-04-27 18:25:29 -0700246 static const int32_t FIRST_GUESS_SCRIPT_CAPACITY = 20;
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700247 MaybeStackArray<UScriptCode,FIRST_GUESS_SCRIPT_CAPACITY> scripts;
248 UErrorCode internalStatus = U_ZERO_ERROR;
249 int32_t script_count = -1;
250
Frank Tang1f164ee2022-11-08 12:31:27 -0800251 while (true) {
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700252 script_count = uscript_getScriptExtensions(
Jungshik Shine0d9b902016-10-28 12:56:54 -0700253 codePoint, scripts.getAlias(), scripts.getCapacity(), &internalStatus);
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700254 if (internalStatus == U_BUFFER_OVERFLOW_ERROR) {
255 // Need to allocate more space
256 if (scripts.resize(script_count) == NULL) {
257 status = U_MEMORY_ALLOCATION_ERROR;
258 return;
259 }
260 internalStatus = U_ZERO_ERROR;
261 } else {
262 break;
263 }
264 }
265
266 // Check if we failed for some reason other than buffer overflow
267 if (U_FAILURE(internalStatus)) {
268 status = internalStatus;
269 return;
270 }
271
272 // Load the scripts into the ScriptSet and return
273 for (int32_t i = 0; i < script_count; i++) {
274 this->set(scripts[i], status);
275 if (U_FAILURE(status)) { return; }
276 }
277}
278
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000279U_NAMESPACE_END
280
281U_CAPI UBool U_EXPORT2
282uhash_equalsScriptSet(const UElement key1, const UElement key2) {
283 icu::ScriptSet *s1 = static_cast<icu::ScriptSet *>(key1.pointer);
284 icu::ScriptSet *s2 = static_cast<icu::ScriptSet *>(key2.pointer);
285 return (*s1 == *s2);
286}
287
288U_CAPI int8_t U_EXPORT2
289uhash_compareScriptSet(UElement key0, UElement key1) {
290 icu::ScriptSet *s0 = static_cast<icu::ScriptSet *>(key0.pointer);
291 icu::ScriptSet *s1 = static_cast<icu::ScriptSet *>(key1.pointer);
292 int32_t diff = s0->countMembers() - s1->countMembers();
Jungshik Shin42d50272018-10-24 01:22:09 -0700293 if (diff != 0) return static_cast<UBool>(diff);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000294 int32_t i0 = s0->nextSetBit(0);
295 int32_t i1 = s1->nextSetBit(0);
296 while ((diff = i0-i1) == 0 && i0 > 0) {
297 i0 = s0->nextSetBit(i0+1);
298 i1 = s1->nextSetBit(i1+1);
299 }
300 return (int8_t)diff;
301}
302
303U_CAPI int32_t U_EXPORT2
304uhash_hashScriptSet(const UElement key) {
305 icu::ScriptSet *s = static_cast<icu::ScriptSet *>(key.pointer);
306 return s->hashCode();
307}
308
309U_CAPI void U_EXPORT2
310uhash_deleteScriptSet(void *obj) {
311 icu::ScriptSet *s = static_cast<icu::ScriptSet *>(obj);
312 delete s;
313}