Jungshik Shin | 87232d8 | 2017-05-13 21:10:13 -0700 | [diff] [blame] | 1 | // © 2016 and later: Unicode, Inc. and others. |
Jungshik Shin | 5feb9ad | 2016-10-21 12:52:48 -0700 | [diff] [blame] | 2 | // License & terms of use: http://www.unicode.org/copyright.html |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 3 | /* |
| 4 | ******************************************************************************* |
| 5 | * |
Jungshik Shin | c291cde | 2016-04-05 03:19:52 -0700 | [diff] [blame] | 6 | * Copyright (C) 2002-2016, International Business Machines |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 7 | * Corporation and others. All Rights Reserved. |
| 8 | * |
| 9 | ******************************************************************************* |
| 10 | * file name: uprops.cpp |
Jungshik Shin | 87232d8 | 2017-05-13 21:10:13 -0700 | [diff] [blame] | 11 | * encoding: UTF-8 |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 12 | * tab size: 8 (not used) |
| 13 | * indentation:4 |
| 14 | * |
| 15 | * created on: 2002feb24 |
| 16 | * created by: Markus W. Scherer |
| 17 | * |
| 18 | * Implementations for mostly non-core Unicode character properties |
| 19 | * stored in uprops.icu. |
| 20 | * |
| 21 | * With the APIs implemented here, almost all properties files and |
| 22 | * their associated implementation files are used from this file, |
| 23 | * including those for normalization and case mappings. |
| 24 | */ |
| 25 | |
| 26 | #include "unicode/utypes.h" |
| 27 | #include "unicode/uchar.h" |
Jungshik Shin | 42d5027 | 2018-10-24 01:22:09 -0700 | [diff] [blame] | 28 | #include "unicode/ucptrie.h" |
Frank Tang | 69c72a6 | 2019-04-03 21:41:21 -0700 | [diff] [blame] | 29 | #include "unicode/udata.h" |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 30 | #include "unicode/unorm2.h" |
| 31 | #include "unicode/uscript.h" |
| 32 | #include "unicode/ustring.h" |
Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 33 | #include "unicode/utf16.h" |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 34 | #include "cstring.h" |
Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 35 | #include "emojiprops.h" |
Frank Tang | 69c72a6 | 2019-04-03 21:41:21 -0700 | [diff] [blame] | 36 | #include "mutex.h" |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 37 | #include "normalizer2impl.h" |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 38 | #include "umutex.h" |
| 39 | #include "ubidi_props.h" |
| 40 | #include "uprops.h" |
| 41 | #include "ucase.h" |
Frank Tang | 69c72a6 | 2019-04-03 21:41:21 -0700 | [diff] [blame] | 42 | #include "ucln_cmn.h" |
| 43 | #include "ulayout_props.h" |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 44 | #include "ustr_imp.h" |
| 45 | |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 46 | U_NAMESPACE_USE |
| 47 | |
Frank Tang | 69c72a6 | 2019-04-03 21:41:21 -0700 | [diff] [blame] | 48 | // Unicode text layout properties data ----------------------------------------- |
| 49 | |
| 50 | namespace { |
| 51 | |
Frank Tang | 1c67b4e | 2022-05-18 10:13:51 -0700 | [diff] [blame] | 52 | icu::UInitOnce gLayoutInitOnce {}; |
Frank Tang | 69c72a6 | 2019-04-03 21:41:21 -0700 | [diff] [blame] | 53 | UDataMemory *gLayoutMemory = nullptr; |
| 54 | |
| 55 | UCPTrie *gInpcTrie = nullptr; // Indic_Positional_Category |
| 56 | UCPTrie *gInscTrie = nullptr; // Indic_Syllabic_Category |
| 57 | UCPTrie *gVoTrie = nullptr; // Vertical_Orientation |
| 58 | |
| 59 | int32_t gMaxInpcValue = 0; |
| 60 | int32_t gMaxInscValue = 0; |
| 61 | int32_t gMaxVoValue = 0; |
| 62 | |
| 63 | UBool U_CALLCONV uprops_cleanup() { |
| 64 | udata_close(gLayoutMemory); |
| 65 | gLayoutMemory = nullptr; |
| 66 | |
| 67 | ucptrie_close(gInpcTrie); |
| 68 | gInpcTrie = nullptr; |
| 69 | ucptrie_close(gInscTrie); |
| 70 | gInscTrie = nullptr; |
| 71 | ucptrie_close(gVoTrie); |
| 72 | gVoTrie = nullptr; |
| 73 | |
| 74 | gMaxInpcValue = 0; |
| 75 | gMaxInscValue = 0; |
| 76 | gMaxVoValue = 0; |
| 77 | |
| 78 | gLayoutInitOnce.reset(); |
| 79 | return TRUE; |
| 80 | } |
| 81 | |
| 82 | UBool U_CALLCONV |
| 83 | ulayout_isAcceptable(void * /*context*/, |
| 84 | const char * /* type */, const char * /*name*/, |
| 85 | const UDataInfo *pInfo) { |
| 86 | return pInfo->size >= 20 && |
| 87 | pInfo->isBigEndian == U_IS_BIG_ENDIAN && |
| 88 | pInfo->charsetFamily == U_CHARSET_FAMILY && |
| 89 | pInfo->dataFormat[0] == ULAYOUT_FMT_0 && |
| 90 | pInfo->dataFormat[1] == ULAYOUT_FMT_1 && |
| 91 | pInfo->dataFormat[2] == ULAYOUT_FMT_2 && |
| 92 | pInfo->dataFormat[3] == ULAYOUT_FMT_3 && |
| 93 | pInfo->formatVersion[0] == 1; |
| 94 | } |
| 95 | |
| 96 | // UInitOnce singleton initialization function |
| 97 | void U_CALLCONV ulayout_load(UErrorCode &errorCode) { |
| 98 | gLayoutMemory = udata_openChoice( |
| 99 | nullptr, ULAYOUT_DATA_TYPE, ULAYOUT_DATA_NAME, |
| 100 | ulayout_isAcceptable, nullptr, &errorCode); |
| 101 | if (U_FAILURE(errorCode)) { return; } |
| 102 | |
| 103 | const uint8_t *inBytes = (const uint8_t *)udata_getMemory(gLayoutMemory); |
| 104 | const int32_t *inIndexes = (const int32_t *)inBytes; |
| 105 | int32_t indexesLength = inIndexes[ULAYOUT_IX_INDEXES_LENGTH]; |
| 106 | if (indexesLength < 12) { |
| 107 | errorCode = U_INVALID_FORMAT_ERROR; // Not enough indexes. |
| 108 | return; |
| 109 | } |
| 110 | int32_t offset = indexesLength * 4; |
| 111 | int32_t top = inIndexes[ULAYOUT_IX_INPC_TRIE_TOP]; |
| 112 | int32_t trieSize = top - offset; |
| 113 | if (trieSize >= 16) { |
| 114 | gInpcTrie = ucptrie_openFromBinary( |
| 115 | UCPTRIE_TYPE_ANY, UCPTRIE_VALUE_BITS_ANY, |
| 116 | inBytes + offset, trieSize, nullptr, &errorCode); |
| 117 | } |
| 118 | offset = top; |
| 119 | top = inIndexes[ULAYOUT_IX_INSC_TRIE_TOP]; |
| 120 | trieSize = top - offset; |
| 121 | if (trieSize >= 16) { |
| 122 | gInscTrie = ucptrie_openFromBinary( |
| 123 | UCPTRIE_TYPE_ANY, UCPTRIE_VALUE_BITS_ANY, |
| 124 | inBytes + offset, trieSize, nullptr, &errorCode); |
| 125 | } |
| 126 | offset = top; |
| 127 | top = inIndexes[ULAYOUT_IX_VO_TRIE_TOP]; |
| 128 | trieSize = top - offset; |
| 129 | if (trieSize >= 16) { |
| 130 | gVoTrie = ucptrie_openFromBinary( |
| 131 | UCPTRIE_TYPE_ANY, UCPTRIE_VALUE_BITS_ANY, |
| 132 | inBytes + offset, trieSize, nullptr, &errorCode); |
| 133 | } |
| 134 | |
| 135 | uint32_t maxValues = inIndexes[ULAYOUT_IX_MAX_VALUES]; |
| 136 | gMaxInpcValue = maxValues >> ULAYOUT_MAX_INPC_SHIFT; |
| 137 | gMaxInscValue = (maxValues >> ULAYOUT_MAX_INSC_SHIFT) & 0xff; |
| 138 | gMaxVoValue = (maxValues >> ULAYOUT_MAX_VO_SHIFT) & 0xff; |
| 139 | |
| 140 | ucln_common_registerCleanup(UCLN_COMMON_UPROPS, uprops_cleanup); |
| 141 | } |
| 142 | |
| 143 | UBool ulayout_ensureData(UErrorCode &errorCode) { |
| 144 | if (U_FAILURE(errorCode)) { return FALSE; } |
| 145 | umtx_initOnce(gLayoutInitOnce, &ulayout_load, errorCode); |
| 146 | return U_SUCCESS(errorCode); |
| 147 | } |
| 148 | |
| 149 | UBool ulayout_ensureData() { |
| 150 | UErrorCode errorCode = U_ZERO_ERROR; |
| 151 | return ulayout_ensureData(errorCode); |
| 152 | } |
| 153 | |
| 154 | } // namespace |
| 155 | |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 156 | /* general properties API functions ----------------------------------------- */ |
| 157 | |
| 158 | struct BinaryProperty; |
| 159 | |
| 160 | typedef UBool BinaryPropertyContains(const BinaryProperty &prop, UChar32 c, UProperty which); |
| 161 | |
| 162 | struct BinaryProperty { |
| 163 | int32_t column; // SRC_PROPSVEC column, or "source" if mask==0 |
| 164 | uint32_t mask; |
| 165 | BinaryPropertyContains *contains; |
| 166 | }; |
| 167 | |
| 168 | static UBool defaultContains(const BinaryProperty &prop, UChar32 c, UProperty /*which*/) { |
| 169 | /* systematic, directly stored properties */ |
| 170 | return (u_getUnicodeProperties(c, prop.column)&prop.mask)!=0; |
| 171 | } |
| 172 | |
| 173 | static UBool caseBinaryPropertyContains(const BinaryProperty &/*prop*/, UChar32 c, UProperty which) { |
Jungshik Shin | 42d5027 | 2018-10-24 01:22:09 -0700 | [diff] [blame] | 174 | return static_cast<UBool>(ucase_hasBinaryProperty(c, which)); |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 175 | } |
| 176 | |
| 177 | static UBool isBidiControl(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { |
Jungshik Shin | f61e46d | 2018-05-04 13:00:45 -0700 | [diff] [blame] | 178 | return ubidi_isBidiControl(c); |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 179 | } |
| 180 | |
| 181 | static UBool isMirrored(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { |
Jungshik Shin | f61e46d | 2018-05-04 13:00:45 -0700 | [diff] [blame] | 182 | return ubidi_isMirrored(c); |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 183 | } |
| 184 | |
| 185 | static UBool isJoinControl(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { |
Jungshik Shin | f61e46d | 2018-05-04 13:00:45 -0700 | [diff] [blame] | 186 | return ubidi_isJoinControl(c); |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 187 | } |
| 188 | |
| 189 | #if UCONFIG_NO_NORMALIZATION |
| 190 | static UBool hasFullCompositionExclusion(const BinaryProperty &, UChar32, UProperty) { |
| 191 | return FALSE; |
| 192 | } |
| 193 | #else |
| 194 | static UBool hasFullCompositionExclusion(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { |
| 195 | // By definition, Full_Composition_Exclusion is the same as NFC_QC=No. |
| 196 | UErrorCode errorCode=U_ZERO_ERROR; |
| 197 | const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode); |
| 198 | return U_SUCCESS(errorCode) && impl->isCompNo(impl->getNorm16(c)); |
| 199 | } |
| 200 | #endif |
| 201 | |
| 202 | // UCHAR_NF*_INERT properties |
| 203 | #if UCONFIG_NO_NORMALIZATION |
| 204 | static UBool isNormInert(const BinaryProperty &, UChar32, UProperty) { |
| 205 | return FALSE; |
| 206 | } |
| 207 | #else |
| 208 | static UBool isNormInert(const BinaryProperty &/*prop*/, UChar32 c, UProperty which) { |
| 209 | UErrorCode errorCode=U_ZERO_ERROR; |
| 210 | const Normalizer2 *norm2=Normalizer2Factory::getInstance( |
| 211 | (UNormalizationMode)(which-UCHAR_NFD_INERT+UNORM_NFD), errorCode); |
| 212 | return U_SUCCESS(errorCode) && norm2->isInert(c); |
| 213 | } |
| 214 | #endif |
| 215 | |
| 216 | #if UCONFIG_NO_NORMALIZATION |
| 217 | static UBool changesWhenCasefolded(const BinaryProperty &, UChar32, UProperty) { |
| 218 | return FALSE; |
| 219 | } |
| 220 | #else |
| 221 | static UBool changesWhenCasefolded(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { |
| 222 | UnicodeString nfd; |
| 223 | UErrorCode errorCode=U_ZERO_ERROR; |
Jungshik Shin (jungshik at google) | 0f8746a | 2015-01-08 15:46:45 -0800 | [diff] [blame] | 224 | const Normalizer2 *nfcNorm2=Normalizer2::getNFCInstance(errorCode); |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 225 | if(U_FAILURE(errorCode)) { |
| 226 | return FALSE; |
| 227 | } |
| 228 | if(nfcNorm2->getDecomposition(c, nfd)) { |
| 229 | /* c has a decomposition */ |
| 230 | if(nfd.length()==1) { |
| 231 | c=nfd[0]; /* single BMP code point */ |
| 232 | } else if(nfd.length()<=U16_MAX_LENGTH && |
| 233 | nfd.length()==U16_LENGTH(c=nfd.char32At(0)) |
| 234 | ) { |
| 235 | /* single supplementary code point */ |
| 236 | } else { |
| 237 | c=U_SENTINEL; |
| 238 | } |
| 239 | } else if(c<0) { |
| 240 | return FALSE; /* protect against bad input */ |
| 241 | } |
| 242 | if(c>=0) { |
| 243 | /* single code point */ |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 244 | const UChar *resultString; |
Jungshik Shin | 87232d8 | 2017-05-13 21:10:13 -0700 | [diff] [blame] | 245 | return (UBool)(ucase_toFullFolding(c, &resultString, U_FOLD_CASE_DEFAULT)>=0); |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 246 | } else { |
| 247 | /* guess some large but stack-friendly capacity */ |
| 248 | UChar dest[2*UCASE_MAX_STRING_LENGTH]; |
| 249 | int32_t destLength; |
Jungshik Shin (jungshik at google) | 0f8746a | 2015-01-08 15:46:45 -0800 | [diff] [blame] | 250 | destLength=u_strFoldCase(dest, UPRV_LENGTHOF(dest), |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 251 | nfd.getBuffer(), nfd.length(), |
| 252 | U_FOLD_CASE_DEFAULT, &errorCode); |
| 253 | return (UBool)(U_SUCCESS(errorCode) && |
| 254 | 0!=u_strCompare(nfd.getBuffer(), nfd.length(), |
| 255 | dest, destLength, FALSE)); |
| 256 | } |
| 257 | } |
| 258 | #endif |
| 259 | |
| 260 | #if UCONFIG_NO_NORMALIZATION |
| 261 | static UBool changesWhenNFKC_Casefolded(const BinaryProperty &, UChar32, UProperty) { |
| 262 | return FALSE; |
| 263 | } |
| 264 | #else |
| 265 | static UBool changesWhenNFKC_Casefolded(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { |
| 266 | UErrorCode errorCode=U_ZERO_ERROR; |
| 267 | const Normalizer2Impl *kcf=Normalizer2Factory::getNFKC_CFImpl(errorCode); |
| 268 | if(U_FAILURE(errorCode)) { |
| 269 | return FALSE; |
| 270 | } |
| 271 | UnicodeString src(c); |
| 272 | UnicodeString dest; |
| 273 | { |
| 274 | // The ReorderingBuffer must be in a block because its destructor |
| 275 | // needs to release dest's buffer before we look at its contents. |
| 276 | ReorderingBuffer buffer(*kcf, dest); |
| 277 | // Small destCapacity for NFKC_CF(c). |
| 278 | if(buffer.init(5, errorCode)) { |
| 279 | const UChar *srcArray=src.getBuffer(); |
| 280 | kcf->compose(srcArray, srcArray+src.length(), FALSE, |
| 281 | TRUE, buffer, errorCode); |
| 282 | } |
| 283 | } |
| 284 | return U_SUCCESS(errorCode) && dest!=src; |
| 285 | } |
| 286 | #endif |
| 287 | |
| 288 | #if UCONFIG_NO_NORMALIZATION |
| 289 | static UBool isCanonSegmentStarter(const BinaryProperty &, UChar32, UProperty) { |
| 290 | return FALSE; |
| 291 | } |
| 292 | #else |
| 293 | static UBool isCanonSegmentStarter(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { |
| 294 | UErrorCode errorCode=U_ZERO_ERROR; |
| 295 | const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode); |
| 296 | return |
| 297 | U_SUCCESS(errorCode) && impl->ensureCanonIterData(errorCode) && |
| 298 | impl->isCanonSegmentStarter(c); |
| 299 | } |
| 300 | #endif |
| 301 | |
| 302 | static UBool isPOSIX_alnum(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { |
| 303 | return u_isalnumPOSIX(c); |
| 304 | } |
| 305 | |
| 306 | static UBool isPOSIX_blank(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { |
| 307 | return u_isblank(c); |
| 308 | } |
| 309 | |
| 310 | static UBool isPOSIX_graph(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { |
| 311 | return u_isgraphPOSIX(c); |
| 312 | } |
| 313 | |
| 314 | static UBool isPOSIX_print(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { |
| 315 | return u_isprintPOSIX(c); |
| 316 | } |
| 317 | |
| 318 | static UBool isPOSIX_xdigit(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { |
| 319 | return u_isxdigit(c); |
| 320 | } |
| 321 | |
Jungshik Shin | b318966 | 2017-11-07 11:18:34 -0800 | [diff] [blame] | 322 | static UBool isRegionalIndicator(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { |
| 323 | // Property starts are a subset of lb=RI etc. |
| 324 | return 0x1F1E6<=c && c<=0x1F1FF; |
| 325 | } |
| 326 | |
Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 327 | static UBool hasEmojiProperty(const BinaryProperty &/*prop*/, UChar32 c, UProperty which) { |
| 328 | return EmojiProps::hasBinaryProperty(c, which); |
| 329 | } |
| 330 | |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 331 | static const BinaryProperty binProps[UCHAR_BINARY_LIMIT]={ |
| 332 | /* |
| 333 | * column and mask values for binary properties from u_getUnicodeProperties(). |
| 334 | * Must be in order of corresponding UProperty, |
| 335 | * and there must be exactly one entry per binary UProperty. |
| 336 | * |
| 337 | * Properties with mask==0 are handled in code. |
| 338 | * For them, column is the UPropertySource value. |
| 339 | */ |
| 340 | { 1, U_MASK(UPROPS_ALPHABETIC), defaultContains }, |
| 341 | { 1, U_MASK(UPROPS_ASCII_HEX_DIGIT), defaultContains }, |
| 342 | { UPROPS_SRC_BIDI, 0, isBidiControl }, |
| 343 | { UPROPS_SRC_BIDI, 0, isMirrored }, |
| 344 | { 1, U_MASK(UPROPS_DASH), defaultContains }, |
| 345 | { 1, U_MASK(UPROPS_DEFAULT_IGNORABLE_CODE_POINT), defaultContains }, |
| 346 | { 1, U_MASK(UPROPS_DEPRECATED), defaultContains }, |
| 347 | { 1, U_MASK(UPROPS_DIACRITIC), defaultContains }, |
| 348 | { 1, U_MASK(UPROPS_EXTENDER), defaultContains }, |
| 349 | { UPROPS_SRC_NFC, 0, hasFullCompositionExclusion }, |
| 350 | { 1, U_MASK(UPROPS_GRAPHEME_BASE), defaultContains }, |
| 351 | { 1, U_MASK(UPROPS_GRAPHEME_EXTEND), defaultContains }, |
| 352 | { 1, U_MASK(UPROPS_GRAPHEME_LINK), defaultContains }, |
| 353 | { 1, U_MASK(UPROPS_HEX_DIGIT), defaultContains }, |
| 354 | { 1, U_MASK(UPROPS_HYPHEN), defaultContains }, |
| 355 | { 1, U_MASK(UPROPS_ID_CONTINUE), defaultContains }, |
| 356 | { 1, U_MASK(UPROPS_ID_START), defaultContains }, |
| 357 | { 1, U_MASK(UPROPS_IDEOGRAPHIC), defaultContains }, |
| 358 | { 1, U_MASK(UPROPS_IDS_BINARY_OPERATOR), defaultContains }, |
| 359 | { 1, U_MASK(UPROPS_IDS_TRINARY_OPERATOR), defaultContains }, |
| 360 | { UPROPS_SRC_BIDI, 0, isJoinControl }, |
| 361 | { 1, U_MASK(UPROPS_LOGICAL_ORDER_EXCEPTION), defaultContains }, |
| 362 | { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_LOWERCASE |
| 363 | { 1, U_MASK(UPROPS_MATH), defaultContains }, |
| 364 | { 1, U_MASK(UPROPS_NONCHARACTER_CODE_POINT), defaultContains }, |
| 365 | { 1, U_MASK(UPROPS_QUOTATION_MARK), defaultContains }, |
| 366 | { 1, U_MASK(UPROPS_RADICAL), defaultContains }, |
| 367 | { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_SOFT_DOTTED |
| 368 | { 1, U_MASK(UPROPS_TERMINAL_PUNCTUATION), defaultContains }, |
| 369 | { 1, U_MASK(UPROPS_UNIFIED_IDEOGRAPH), defaultContains }, |
| 370 | { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_UPPERCASE |
| 371 | { 1, U_MASK(UPROPS_WHITE_SPACE), defaultContains }, |
| 372 | { 1, U_MASK(UPROPS_XID_CONTINUE), defaultContains }, |
| 373 | { 1, U_MASK(UPROPS_XID_START), defaultContains }, |
| 374 | { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CASE_SENSITIVE |
| 375 | { 1, U_MASK(UPROPS_S_TERM), defaultContains }, |
| 376 | { 1, U_MASK(UPROPS_VARIATION_SELECTOR), defaultContains }, |
| 377 | { UPROPS_SRC_NFC, 0, isNormInert }, // UCHAR_NFD_INERT |
| 378 | { UPROPS_SRC_NFKC, 0, isNormInert }, // UCHAR_NFKD_INERT |
| 379 | { UPROPS_SRC_NFC, 0, isNormInert }, // UCHAR_NFC_INERT |
| 380 | { UPROPS_SRC_NFKC, 0, isNormInert }, // UCHAR_NFKC_INERT |
| 381 | { UPROPS_SRC_NFC_CANON_ITER, 0, isCanonSegmentStarter }, |
| 382 | { 1, U_MASK(UPROPS_PATTERN_SYNTAX), defaultContains }, |
| 383 | { 1, U_MASK(UPROPS_PATTERN_WHITE_SPACE), defaultContains }, |
| 384 | { UPROPS_SRC_CHAR_AND_PROPSVEC, 0, isPOSIX_alnum }, |
| 385 | { UPROPS_SRC_CHAR, 0, isPOSIX_blank }, |
| 386 | { UPROPS_SRC_CHAR, 0, isPOSIX_graph }, |
| 387 | { UPROPS_SRC_CHAR, 0, isPOSIX_print }, |
| 388 | { UPROPS_SRC_CHAR, 0, isPOSIX_xdigit }, |
| 389 | { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CASED |
| 390 | { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CASE_IGNORABLE |
| 391 | { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CHANGES_WHEN_LOWERCASED |
| 392 | { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CHANGES_WHEN_UPPERCASED |
| 393 | { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CHANGES_WHEN_TITLECASED |
| 394 | { UPROPS_SRC_CASE_AND_NORM, 0, changesWhenCasefolded }, |
| 395 | { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CHANGES_WHEN_CASEMAPPED |
Jungshik Shin | c291cde | 2016-04-05 03:19:52 -0700 | [diff] [blame] | 396 | { UPROPS_SRC_NFKC_CF, 0, changesWhenNFKC_Casefolded }, |
Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 397 | { UPROPS_SRC_EMOJI, 0, hasEmojiProperty }, // UCHAR_EMOJI |
| 398 | { UPROPS_SRC_EMOJI, 0, hasEmojiProperty }, // UCHAR_EMOJI_PRESENTATION |
| 399 | { UPROPS_SRC_EMOJI, 0, hasEmojiProperty }, // UCHAR_EMOJI_MODIFIER |
| 400 | { UPROPS_SRC_EMOJI, 0, hasEmojiProperty }, // UCHAR_EMOJI_MODIFIER_BASE |
| 401 | { UPROPS_SRC_EMOJI, 0, hasEmojiProperty }, // UCHAR_EMOJI_COMPONENT |
Jungshik Shin | b318966 | 2017-11-07 11:18:34 -0800 | [diff] [blame] | 402 | { 2, 0, isRegionalIndicator }, |
| 403 | { 1, U_MASK(UPROPS_PREPENDED_CONCATENATION_MARK), defaultContains }, |
Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 404 | { UPROPS_SRC_EMOJI, 0, hasEmojiProperty }, // UCHAR_EXTENDED_PICTOGRAPHIC |
| 405 | { UPROPS_SRC_EMOJI, 0, hasEmojiProperty }, // UCHAR_BASIC_EMOJI |
| 406 | { UPROPS_SRC_EMOJI, 0, hasEmojiProperty }, // UCHAR_EMOJI_KEYCAP_SEQUENCE |
| 407 | { UPROPS_SRC_EMOJI, 0, hasEmojiProperty }, // UCHAR_RGI_EMOJI_MODIFIER_SEQUENCE |
| 408 | { UPROPS_SRC_EMOJI, 0, hasEmojiProperty }, // UCHAR_RGI_EMOJI_FLAG_SEQUENCE |
| 409 | { UPROPS_SRC_EMOJI, 0, hasEmojiProperty }, // UCHAR_RGI_EMOJI_TAG_SEQUENCE |
| 410 | { UPROPS_SRC_EMOJI, 0, hasEmojiProperty }, // UCHAR_RGI_EMOJI_ZWJ_SEQUENCE |
| 411 | { UPROPS_SRC_EMOJI, 0, hasEmojiProperty }, // UCHAR_RGI_EMOJI |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 412 | }; |
| 413 | |
| 414 | U_CAPI UBool U_EXPORT2 |
| 415 | u_hasBinaryProperty(UChar32 c, UProperty which) { |
| 416 | /* c is range-checked in the functions that are called from here */ |
| 417 | if(which<UCHAR_BINARY_START || UCHAR_BINARY_LIMIT<=which) { |
| 418 | /* not a known binary property */ |
| 419 | return FALSE; |
| 420 | } else { |
| 421 | const BinaryProperty &prop=binProps[which]; |
| 422 | return prop.contains(prop, c, which); |
| 423 | } |
| 424 | } |
| 425 | |
Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 426 | U_CAPI UBool U_EXPORT2 |
| 427 | u_stringHasBinaryProperty(const UChar *s, int32_t length, UProperty which) { |
| 428 | if (s == nullptr && length != 0) { return false; } |
| 429 | if (length == 1) { |
| 430 | return u_hasBinaryProperty(s[0], which); // single code point |
| 431 | } else if (length == 2 || (length < 0 && *s != 0)) { // not empty string |
| 432 | // first code point |
| 433 | int32_t i = 0; |
| 434 | UChar32 c; |
| 435 | U16_NEXT(s, i, length, c); |
| 436 | if (length > 0 ? i == length : s[i] == 0) { |
| 437 | return u_hasBinaryProperty(c, which); // single code point |
| 438 | } |
| 439 | } |
| 440 | // Only call into EmojiProps for a relevant property, |
| 441 | // so that we not unnecessarily try to load its data file. |
| 442 | return UCHAR_BASIC_EMOJI <= which && which <= UCHAR_RGI_EMOJI && |
| 443 | EmojiProps::hasBinaryProperty(s, length, which); |
| 444 | } |
| 445 | |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 446 | struct IntProperty; |
| 447 | |
| 448 | typedef int32_t IntPropertyGetValue(const IntProperty &prop, UChar32 c, UProperty which); |
| 449 | typedef int32_t IntPropertyGetMaxValue(const IntProperty &prop, UProperty which); |
| 450 | |
| 451 | struct IntProperty { |
| 452 | int32_t column; // SRC_PROPSVEC column, or "source" if mask==0 |
| 453 | uint32_t mask; |
| 454 | int32_t shift; // =maxValue if getMaxValueFromShift() is used |
| 455 | IntPropertyGetValue *getValue; |
| 456 | IntPropertyGetMaxValue *getMaxValue; |
| 457 | }; |
| 458 | |
| 459 | static int32_t defaultGetValue(const IntProperty &prop, UChar32 c, UProperty /*which*/) { |
| 460 | /* systematic, directly stored properties */ |
| 461 | return (int32_t)(u_getUnicodeProperties(c, prop.column)&prop.mask)>>prop.shift; |
| 462 | } |
| 463 | |
| 464 | static int32_t defaultGetMaxValue(const IntProperty &prop, UProperty /*which*/) { |
| 465 | return (uprv_getMaxValues(prop.column)&prop.mask)>>prop.shift; |
| 466 | } |
| 467 | |
| 468 | static int32_t getMaxValueFromShift(const IntProperty &prop, UProperty /*which*/) { |
| 469 | return prop.shift; |
| 470 | } |
| 471 | |
| 472 | static int32_t getBiDiClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { |
| 473 | return (int32_t)u_charDirection(c); |
| 474 | } |
| 475 | |
| 476 | static int32_t getBiDiPairedBracketType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { |
Jungshik Shin | f61e46d | 2018-05-04 13:00:45 -0700 | [diff] [blame] | 477 | return (int32_t)ubidi_getPairedBracketType(c); |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 478 | } |
| 479 | |
| 480 | static int32_t biDiGetMaxValue(const IntProperty &/*prop*/, UProperty which) { |
Jungshik Shin | f61e46d | 2018-05-04 13:00:45 -0700 | [diff] [blame] | 481 | return ubidi_getMaxValue(which); |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 482 | } |
| 483 | |
| 484 | #if UCONFIG_NO_NORMALIZATION |
| 485 | static int32_t getCombiningClass(const IntProperty &, UChar32, UProperty) { |
| 486 | return 0; |
| 487 | } |
| 488 | #else |
| 489 | static int32_t getCombiningClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { |
| 490 | return u_getCombiningClass(c); |
| 491 | } |
| 492 | #endif |
| 493 | |
| 494 | static int32_t getGeneralCategory(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { |
| 495 | return (int32_t)u_charType(c); |
| 496 | } |
| 497 | |
| 498 | static int32_t getJoiningGroup(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { |
Jungshik Shin | f61e46d | 2018-05-04 13:00:45 -0700 | [diff] [blame] | 499 | return ubidi_getJoiningGroup(c); |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 500 | } |
| 501 | |
| 502 | static int32_t getJoiningType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { |
Jungshik Shin | f61e46d | 2018-05-04 13:00:45 -0700 | [diff] [blame] | 503 | return ubidi_getJoiningType(c); |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 504 | } |
| 505 | |
| 506 | static int32_t getNumericType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { |
| 507 | int32_t ntv=(int32_t)GET_NUMERIC_TYPE_VALUE(u_getMainProperties(c)); |
| 508 | return UPROPS_NTV_GET_TYPE(ntv); |
| 509 | } |
| 510 | |
| 511 | static int32_t getScript(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { |
| 512 | UErrorCode errorCode=U_ZERO_ERROR; |
| 513 | return (int32_t)uscript_getScript(c, &errorCode); |
| 514 | } |
| 515 | |
Frank Tang | f222396 | 2020-04-27 18:25:29 -0700 | [diff] [blame] | 516 | static int32_t scriptGetMaxValue(const IntProperty &/*prop*/, UProperty /*which*/) { |
| 517 | uint32_t scriptX=uprv_getMaxValues(0)&UPROPS_SCRIPT_X_MASK; |
| 518 | return uprops_mergeScriptCodeOrIndex(scriptX); |
| 519 | } |
| 520 | |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 521 | /* |
| 522 | * Map some of the Grapheme Cluster Break values to Hangul Syllable Types. |
| 523 | * Hangul_Syllable_Type is fully redundant with a subset of Grapheme_Cluster_Break. |
| 524 | */ |
| 525 | static const UHangulSyllableType gcbToHst[]={ |
| 526 | U_HST_NOT_APPLICABLE, /* U_GCB_OTHER */ |
| 527 | U_HST_NOT_APPLICABLE, /* U_GCB_CONTROL */ |
| 528 | U_HST_NOT_APPLICABLE, /* U_GCB_CR */ |
| 529 | U_HST_NOT_APPLICABLE, /* U_GCB_EXTEND */ |
| 530 | U_HST_LEADING_JAMO, /* U_GCB_L */ |
| 531 | U_HST_NOT_APPLICABLE, /* U_GCB_LF */ |
| 532 | U_HST_LV_SYLLABLE, /* U_GCB_LV */ |
| 533 | U_HST_LVT_SYLLABLE, /* U_GCB_LVT */ |
| 534 | U_HST_TRAILING_JAMO, /* U_GCB_T */ |
| 535 | U_HST_VOWEL_JAMO /* U_GCB_V */ |
| 536 | /* |
| 537 | * Omit GCB values beyond what we need for hst. |
| 538 | * The code below checks for the array length. |
| 539 | */ |
| 540 | }; |
| 541 | |
| 542 | static int32_t getHangulSyllableType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { |
| 543 | /* see comments on gcbToHst[] above */ |
| 544 | int32_t gcb=(int32_t)(u_getUnicodeProperties(c, 2)&UPROPS_GCB_MASK)>>UPROPS_GCB_SHIFT; |
Jungshik Shin (jungshik at google) | 0f8746a | 2015-01-08 15:46:45 -0800 | [diff] [blame] | 545 | if(gcb<UPRV_LENGTHOF(gcbToHst)) { |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 546 | return gcbToHst[gcb]; |
| 547 | } else { |
| 548 | return U_HST_NOT_APPLICABLE; |
| 549 | } |
| 550 | } |
| 551 | |
| 552 | #if UCONFIG_NO_NORMALIZATION |
| 553 | static int32_t getNormQuickCheck(const IntProperty &, UChar32, UProperty) { |
| 554 | return 0; |
| 555 | } |
| 556 | #else |
| 557 | static int32_t getNormQuickCheck(const IntProperty &/*prop*/, UChar32 c, UProperty which) { |
| 558 | return (int32_t)unorm_getQuickCheck(c, (UNormalizationMode)(which-UCHAR_NFD_QUICK_CHECK+UNORM_NFD)); |
| 559 | } |
| 560 | #endif |
| 561 | |
| 562 | #if UCONFIG_NO_NORMALIZATION |
| 563 | static int32_t getLeadCombiningClass(const IntProperty &, UChar32, UProperty) { |
| 564 | return 0; |
| 565 | } |
| 566 | #else |
| 567 | static int32_t getLeadCombiningClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { |
| 568 | return unorm_getFCD16(c)>>8; |
| 569 | } |
| 570 | #endif |
| 571 | |
| 572 | #if UCONFIG_NO_NORMALIZATION |
| 573 | static int32_t getTrailCombiningClass(const IntProperty &, UChar32, UProperty) { |
| 574 | return 0; |
| 575 | } |
| 576 | #else |
| 577 | static int32_t getTrailCombiningClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { |
| 578 | return unorm_getFCD16(c)&0xff; |
| 579 | } |
| 580 | #endif |
| 581 | |
Jungshik Shin | 42d5027 | 2018-10-24 01:22:09 -0700 | [diff] [blame] | 582 | static int32_t getInPC(const IntProperty &, UChar32 c, UProperty) { |
Frank Tang | 69c72a6 | 2019-04-03 21:41:21 -0700 | [diff] [blame] | 583 | return ulayout_ensureData() && gInpcTrie != nullptr ? ucptrie_get(gInpcTrie, c) : 0; |
Jungshik Shin | 42d5027 | 2018-10-24 01:22:09 -0700 | [diff] [blame] | 584 | } |
| 585 | |
| 586 | static int32_t getInSC(const IntProperty &, UChar32 c, UProperty) { |
Frank Tang | 69c72a6 | 2019-04-03 21:41:21 -0700 | [diff] [blame] | 587 | return ulayout_ensureData() && gInscTrie != nullptr ? ucptrie_get(gInscTrie, c) : 0; |
Jungshik Shin | 42d5027 | 2018-10-24 01:22:09 -0700 | [diff] [blame] | 588 | } |
| 589 | |
| 590 | static int32_t getVo(const IntProperty &, UChar32 c, UProperty) { |
Frank Tang | 69c72a6 | 2019-04-03 21:41:21 -0700 | [diff] [blame] | 591 | return ulayout_ensureData() && gVoTrie != nullptr ? ucptrie_get(gVoTrie, c) : 0; |
| 592 | } |
| 593 | |
| 594 | static int32_t layoutGetMaxValue(const IntProperty &/*prop*/, UProperty which) { |
| 595 | if (!ulayout_ensureData()) { return 0; } |
| 596 | switch (which) { |
| 597 | case UCHAR_INDIC_POSITIONAL_CATEGORY: |
| 598 | return gMaxInpcValue; |
| 599 | case UCHAR_INDIC_SYLLABIC_CATEGORY: |
| 600 | return gMaxInscValue; |
| 601 | case UCHAR_VERTICAL_ORIENTATION: |
| 602 | return gMaxVoValue; |
| 603 | default: |
| 604 | return 0; |
| 605 | } |
Jungshik Shin | 42d5027 | 2018-10-24 01:22:09 -0700 | [diff] [blame] | 606 | } |
| 607 | |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 608 | static const IntProperty intProps[UCHAR_INT_LIMIT-UCHAR_INT_START]={ |
| 609 | /* |
| 610 | * column, mask and shift values for int-value properties from u_getUnicodeProperties(). |
| 611 | * Must be in order of corresponding UProperty, |
| 612 | * and there must be exactly one entry per int UProperty. |
| 613 | * |
| 614 | * Properties with mask==0 are handled in code. |
| 615 | * For them, column is the UPropertySource value. |
| 616 | */ |
| 617 | { UPROPS_SRC_BIDI, 0, 0, getBiDiClass, biDiGetMaxValue }, |
| 618 | { 0, UPROPS_BLOCK_MASK, UPROPS_BLOCK_SHIFT, defaultGetValue, defaultGetMaxValue }, |
| 619 | { UPROPS_SRC_NFC, 0, 0xff, getCombiningClass, getMaxValueFromShift }, |
| 620 | { 2, UPROPS_DT_MASK, 0, defaultGetValue, defaultGetMaxValue }, |
| 621 | { 0, UPROPS_EA_MASK, UPROPS_EA_SHIFT, defaultGetValue, defaultGetMaxValue }, |
| 622 | { UPROPS_SRC_CHAR, 0, (int32_t)U_CHAR_CATEGORY_COUNT-1,getGeneralCategory, getMaxValueFromShift }, |
| 623 | { UPROPS_SRC_BIDI, 0, 0, getJoiningGroup, biDiGetMaxValue }, |
| 624 | { UPROPS_SRC_BIDI, 0, 0, getJoiningType, biDiGetMaxValue }, |
| 625 | { 2, UPROPS_LB_MASK, UPROPS_LB_SHIFT, defaultGetValue, defaultGetMaxValue }, |
| 626 | { UPROPS_SRC_CHAR, 0, (int32_t)U_NT_COUNT-1, getNumericType, getMaxValueFromShift }, |
Frank Tang | f222396 | 2020-04-27 18:25:29 -0700 | [diff] [blame] | 627 | { UPROPS_SRC_PROPSVEC, 0, 0, getScript, scriptGetMaxValue }, |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 628 | { UPROPS_SRC_PROPSVEC, 0, (int32_t)U_HST_COUNT-1, getHangulSyllableType, getMaxValueFromShift }, |
| 629 | // UCHAR_NFD_QUICK_CHECK: max=1=YES -- never "maybe", only "no" or "yes" |
| 630 | { UPROPS_SRC_NFC, 0, (int32_t)UNORM_YES, getNormQuickCheck, getMaxValueFromShift }, |
| 631 | // UCHAR_NFKD_QUICK_CHECK: max=1=YES -- never "maybe", only "no" or "yes" |
| 632 | { UPROPS_SRC_NFKC, 0, (int32_t)UNORM_YES, getNormQuickCheck, getMaxValueFromShift }, |
| 633 | // UCHAR_NFC_QUICK_CHECK: max=2=MAYBE |
| 634 | { UPROPS_SRC_NFC, 0, (int32_t)UNORM_MAYBE, getNormQuickCheck, getMaxValueFromShift }, |
| 635 | // UCHAR_NFKC_QUICK_CHECK: max=2=MAYBE |
| 636 | { UPROPS_SRC_NFKC, 0, (int32_t)UNORM_MAYBE, getNormQuickCheck, getMaxValueFromShift }, |
| 637 | { UPROPS_SRC_NFC, 0, 0xff, getLeadCombiningClass, getMaxValueFromShift }, |
| 638 | { UPROPS_SRC_NFC, 0, 0xff, getTrailCombiningClass, getMaxValueFromShift }, |
| 639 | { 2, UPROPS_GCB_MASK, UPROPS_GCB_SHIFT, defaultGetValue, defaultGetMaxValue }, |
| 640 | { 2, UPROPS_SB_MASK, UPROPS_SB_SHIFT, defaultGetValue, defaultGetMaxValue }, |
| 641 | { 2, UPROPS_WB_MASK, UPROPS_WB_SHIFT, defaultGetValue, defaultGetMaxValue }, |
| 642 | { UPROPS_SRC_BIDI, 0, 0, getBiDiPairedBracketType, biDiGetMaxValue }, |
Frank Tang | 69c72a6 | 2019-04-03 21:41:21 -0700 | [diff] [blame] | 643 | { UPROPS_SRC_INPC, 0, 0, getInPC, layoutGetMaxValue }, |
| 644 | { UPROPS_SRC_INSC, 0, 0, getInSC, layoutGetMaxValue }, |
| 645 | { UPROPS_SRC_VO, 0, 0, getVo, layoutGetMaxValue }, |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 646 | }; |
| 647 | |
| 648 | U_CAPI int32_t U_EXPORT2 |
| 649 | u_getIntPropertyValue(UChar32 c, UProperty which) { |
| 650 | if(which<UCHAR_INT_START) { |
| 651 | if(UCHAR_BINARY_START<=which && which<UCHAR_BINARY_LIMIT) { |
| 652 | const BinaryProperty &prop=binProps[which]; |
| 653 | return prop.contains(prop, c, which); |
| 654 | } |
| 655 | } else if(which<UCHAR_INT_LIMIT) { |
| 656 | const IntProperty &prop=intProps[which-UCHAR_INT_START]; |
| 657 | return prop.getValue(prop, c, which); |
| 658 | } else if(which==UCHAR_GENERAL_CATEGORY_MASK) { |
| 659 | return U_MASK(u_charType(c)); |
| 660 | } |
| 661 | return 0; // undefined |
| 662 | } |
| 663 | |
| 664 | U_CAPI int32_t U_EXPORT2 |
| 665 | u_getIntPropertyMinValue(UProperty /*which*/) { |
| 666 | return 0; /* all binary/enum/int properties have a minimum value of 0 */ |
| 667 | } |
| 668 | |
| 669 | U_CAPI int32_t U_EXPORT2 |
| 670 | u_getIntPropertyMaxValue(UProperty which) { |
| 671 | if(which<UCHAR_INT_START) { |
| 672 | if(UCHAR_BINARY_START<=which && which<UCHAR_BINARY_LIMIT) { |
| 673 | return 1; // maximum TRUE for all binary properties |
| 674 | } |
| 675 | } else if(which<UCHAR_INT_LIMIT) { |
| 676 | const IntProperty &prop=intProps[which-UCHAR_INT_START]; |
| 677 | return prop.getMaxValue(prop, which); |
| 678 | } |
| 679 | return -1; // undefined |
| 680 | } |
| 681 | |
| 682 | U_CFUNC UPropertySource U_EXPORT2 |
| 683 | uprops_getSource(UProperty which) { |
| 684 | if(which<UCHAR_BINARY_START) { |
| 685 | return UPROPS_SRC_NONE; /* undefined */ |
| 686 | } else if(which<UCHAR_BINARY_LIMIT) { |
| 687 | const BinaryProperty &prop=binProps[which]; |
| 688 | if(prop.mask!=0) { |
| 689 | return UPROPS_SRC_PROPSVEC; |
| 690 | } else { |
| 691 | return (UPropertySource)prop.column; |
| 692 | } |
| 693 | } else if(which<UCHAR_INT_START) { |
| 694 | return UPROPS_SRC_NONE; /* undefined */ |
| 695 | } else if(which<UCHAR_INT_LIMIT) { |
| 696 | const IntProperty &prop=intProps[which-UCHAR_INT_START]; |
| 697 | if(prop.mask!=0) { |
| 698 | return UPROPS_SRC_PROPSVEC; |
| 699 | } else { |
| 700 | return (UPropertySource)prop.column; |
| 701 | } |
| 702 | } else if(which<UCHAR_STRING_START) { |
| 703 | switch(which) { |
| 704 | case UCHAR_GENERAL_CATEGORY_MASK: |
| 705 | case UCHAR_NUMERIC_VALUE: |
| 706 | return UPROPS_SRC_CHAR; |
| 707 | |
| 708 | default: |
| 709 | return UPROPS_SRC_NONE; |
| 710 | } |
| 711 | } else if(which<UCHAR_STRING_LIMIT) { |
| 712 | switch(which) { |
| 713 | case UCHAR_AGE: |
| 714 | return UPROPS_SRC_PROPSVEC; |
| 715 | |
| 716 | case UCHAR_BIDI_MIRRORING_GLYPH: |
| 717 | return UPROPS_SRC_BIDI; |
| 718 | |
| 719 | case UCHAR_CASE_FOLDING: |
| 720 | case UCHAR_LOWERCASE_MAPPING: |
| 721 | case UCHAR_SIMPLE_CASE_FOLDING: |
| 722 | case UCHAR_SIMPLE_LOWERCASE_MAPPING: |
| 723 | case UCHAR_SIMPLE_TITLECASE_MAPPING: |
| 724 | case UCHAR_SIMPLE_UPPERCASE_MAPPING: |
| 725 | case UCHAR_TITLECASE_MAPPING: |
| 726 | case UCHAR_UPPERCASE_MAPPING: |
| 727 | return UPROPS_SRC_CASE; |
| 728 | |
| 729 | case UCHAR_ISO_COMMENT: |
| 730 | case UCHAR_NAME: |
| 731 | case UCHAR_UNICODE_1_NAME: |
| 732 | return UPROPS_SRC_NAMES; |
| 733 | |
| 734 | default: |
| 735 | return UPROPS_SRC_NONE; |
| 736 | } |
| 737 | } else { |
| 738 | switch(which) { |
| 739 | case UCHAR_SCRIPT_EXTENSIONS: |
| 740 | return UPROPS_SRC_PROPSVEC; |
| 741 | default: |
| 742 | return UPROPS_SRC_NONE; /* undefined */ |
| 743 | } |
| 744 | } |
| 745 | } |
| 746 | |
Jungshik Shin | 42d5027 | 2018-10-24 01:22:09 -0700 | [diff] [blame] | 747 | U_CFUNC void U_EXPORT2 |
| 748 | uprops_addPropertyStarts(UPropertySource src, const USetAdder *sa, UErrorCode *pErrorCode) { |
Frank Tang | 69c72a6 | 2019-04-03 21:41:21 -0700 | [diff] [blame] | 749 | if (!ulayout_ensureData(*pErrorCode)) { return; } |
Jungshik Shin | 42d5027 | 2018-10-24 01:22:09 -0700 | [diff] [blame] | 750 | const UCPTrie *trie; |
| 751 | switch (src) { |
| 752 | case UPROPS_SRC_INPC: |
Frank Tang | 69c72a6 | 2019-04-03 21:41:21 -0700 | [diff] [blame] | 753 | trie = gInpcTrie; |
Jungshik Shin | 42d5027 | 2018-10-24 01:22:09 -0700 | [diff] [blame] | 754 | break; |
| 755 | case UPROPS_SRC_INSC: |
Frank Tang | 69c72a6 | 2019-04-03 21:41:21 -0700 | [diff] [blame] | 756 | trie = gInscTrie; |
Jungshik Shin | 42d5027 | 2018-10-24 01:22:09 -0700 | [diff] [blame] | 757 | break; |
| 758 | case UPROPS_SRC_VO: |
Frank Tang | 69c72a6 | 2019-04-03 21:41:21 -0700 | [diff] [blame] | 759 | trie = gVoTrie; |
Jungshik Shin | 42d5027 | 2018-10-24 01:22:09 -0700 | [diff] [blame] | 760 | break; |
| 761 | default: |
| 762 | *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; |
| 763 | return; |
| 764 | } |
| 765 | |
Frank Tang | 69c72a6 | 2019-04-03 21:41:21 -0700 | [diff] [blame] | 766 | if (trie == nullptr) { |
| 767 | *pErrorCode = U_MISSING_RESOURCE_ERROR; |
| 768 | return; |
| 769 | } |
| 770 | |
Jungshik Shin | 42d5027 | 2018-10-24 01:22:09 -0700 | [diff] [blame] | 771 | // Add the start code point of each same-value range of the trie. |
| 772 | UChar32 start = 0, end; |
| 773 | while ((end = ucptrie_getRange(trie, start, UCPMAP_RANGE_NORMAL, 0, |
| 774 | nullptr, nullptr, nullptr)) >= 0) { |
| 775 | sa->add(sa->set, start); |
| 776 | start = end + 1; |
| 777 | } |
| 778 | } |
| 779 | |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 780 | #if !UCONFIG_NO_NORMALIZATION |
| 781 | |
| 782 | U_CAPI int32_t U_EXPORT2 |
| 783 | u_getFC_NFKC_Closure(UChar32 c, UChar *dest, int32_t destCapacity, UErrorCode *pErrorCode) { |
| 784 | if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { |
| 785 | return 0; |
| 786 | } |
| 787 | if(destCapacity<0 || (dest==NULL && destCapacity>0)) { |
| 788 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| 789 | return 0; |
| 790 | } |
| 791 | // Compute the FC_NFKC_Closure on the fly: |
| 792 | // We have the API for complete coverage of Unicode properties, although |
| 793 | // this value by itself is not useful via API. |
| 794 | // (What could be useful is a custom normalization table that combines |
| 795 | // case folding and NFKC.) |
| 796 | // For the derivation, see Unicode's DerivedNormalizationProps.txt. |
Jungshik Shin (jungshik at google) | 0f8746a | 2015-01-08 15:46:45 -0800 | [diff] [blame] | 797 | const Normalizer2 *nfkc=Normalizer2::getNFKCInstance(*pErrorCode); |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 798 | if(U_FAILURE(*pErrorCode)) { |
| 799 | return 0; |
| 800 | } |
| 801 | // first: b = NFKC(Fold(a)) |
| 802 | UnicodeString folded1String; |
| 803 | const UChar *folded1; |
Jungshik Shin | 87232d8 | 2017-05-13 21:10:13 -0700 | [diff] [blame] | 804 | int32_t folded1Length=ucase_toFullFolding(c, &folded1, U_FOLD_CASE_DEFAULT); |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 805 | if(folded1Length<0) { |
| 806 | const Normalizer2Impl *nfkcImpl=Normalizer2Factory::getImpl(nfkc); |
| 807 | if(nfkcImpl->getCompQuickCheck(nfkcImpl->getNorm16(c))!=UNORM_NO) { |
| 808 | return u_terminateUChars(dest, destCapacity, 0, pErrorCode); // c does not change at all under CaseFolding+NFKC |
| 809 | } |
| 810 | folded1String.setTo(c); |
| 811 | } else { |
| 812 | if(folded1Length>UCASE_MAX_STRING_LENGTH) { |
| 813 | folded1String.setTo(folded1Length); |
| 814 | } else { |
| 815 | folded1String.setTo(FALSE, folded1, folded1Length); |
| 816 | } |
| 817 | } |
| 818 | UnicodeString kc1=nfkc->normalize(folded1String, *pErrorCode); |
| 819 | // second: c = NFKC(Fold(b)) |
| 820 | UnicodeString folded2String(kc1); |
| 821 | UnicodeString kc2=nfkc->normalize(folded2String.foldCase(), *pErrorCode); |
| 822 | // if (c != b) add the mapping from a to c |
| 823 | if(U_FAILURE(*pErrorCode) || kc1==kc2) { |
| 824 | return u_terminateUChars(dest, destCapacity, 0, pErrorCode); |
| 825 | } else { |
| 826 | return kc2.extract(dest, destCapacity, *pErrorCode); |
| 827 | } |
| 828 | } |
| 829 | |
| 830 | #endif |