Jungshik Shin | 87232d8 | 2017-05-13 21:10:13 -0700 | [diff] [blame] | 1 | // © 2016 and later: Unicode, Inc. and others. |
Jungshik Shin | 5feb9ad | 2016-10-21 12:52:48 -0700 | [diff] [blame] | 2 | // License & terms of use: http://www.unicode.org/copyright.html |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 3 | /* |
| 4 | ******************************************************************************* |
| 5 | * |
Jungshik Shin (jungshik at google) | 0f8746a | 2015-01-08 15:46:45 -0800 | [diff] [blame] | 6 | * Copyright (C) 2004-2014, International Business Machines |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 7 | * Corporation and others. All Rights Reserved. |
| 8 | * |
| 9 | ******************************************************************************* |
| 10 | * file name: ubidi_props.c |
Jungshik Shin | 87232d8 | 2017-05-13 21:10:13 -0700 | [diff] [blame] | 11 | * encoding: UTF-8 |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 12 | * tab size: 8 (not used) |
| 13 | * indentation:4 |
| 14 | * |
| 15 | * created on: 2004dec30 |
| 16 | * created by: Markus W. Scherer |
| 17 | * |
| 18 | * Low-level Unicode bidi/shaping properties access. |
| 19 | */ |
| 20 | |
| 21 | #include "unicode/utypes.h" |
| 22 | #include "unicode/uset.h" |
| 23 | #include "unicode/udata.h" /* UDataInfo */ |
| 24 | #include "ucmndata.h" /* DataHeader */ |
| 25 | #include "udatamem.h" |
| 26 | #include "uassert.h" |
| 27 | #include "cmemory.h" |
| 28 | #include "utrie2.h" |
| 29 | #include "ubidi_props.h" |
| 30 | #include "ucln_cmn.h" |
| 31 | |
| 32 | struct UBiDiProps { |
| 33 | UDataMemory *mem; |
| 34 | const int32_t *indexes; |
| 35 | const uint32_t *mirrors; |
| 36 | const uint8_t *jgArray; |
Jungshik Shin (jungshik at google) | 0f8746a | 2015-01-08 15:46:45 -0800 | [diff] [blame] | 37 | const uint8_t *jgArray2; |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 38 | |
| 39 | UTrie2 trie; |
| 40 | uint8_t formatVersion[4]; |
| 41 | }; |
| 42 | |
| 43 | /* ubidi_props_data.h is machine-generated by genbidi --csource */ |
| 44 | #define INCLUDED_FROM_UBIDI_PROPS_C |
| 45 | #include "ubidi_props_data.h" |
| 46 | |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 47 | /* set of property starts for UnicodeSet ------------------------------------ */ |
| 48 | |
| 49 | static UBool U_CALLCONV |
| 50 | _enumPropertyStartsRange(const void *context, UChar32 start, UChar32 end, uint32_t value) { |
Jungshik Shin | 87232d8 | 2017-05-13 21:10:13 -0700 | [diff] [blame] | 51 | (void)end; |
| 52 | (void)value; |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 53 | /* add the start code point to the USet */ |
| 54 | const USetAdder *sa=(const USetAdder *)context; |
| 55 | sa->add(sa->set, start); |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 56 | return true; |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 57 | } |
| 58 | |
| 59 | U_CFUNC void |
Jungshik Shin | f61e46d | 2018-05-04 13:00:45 -0700 | [diff] [blame] | 60 | ubidi_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) { |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 61 | int32_t i, length; |
| 62 | UChar32 c, start, limit; |
| 63 | |
| 64 | const uint8_t *jgArray; |
| 65 | uint8_t prev, jg; |
| 66 | |
| 67 | if(U_FAILURE(*pErrorCode)) { |
| 68 | return; |
| 69 | } |
| 70 | |
| 71 | /* add the start code point of each same-value range of the trie */ |
Jungshik Shin | f61e46d | 2018-05-04 13:00:45 -0700 | [diff] [blame] | 72 | utrie2_enum(&ubidi_props_singleton.trie, NULL, _enumPropertyStartsRange, sa); |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 73 | |
| 74 | /* add the code points from the bidi mirroring table */ |
Jungshik Shin | f61e46d | 2018-05-04 13:00:45 -0700 | [diff] [blame] | 75 | length=ubidi_props_singleton.indexes[UBIDI_IX_MIRROR_LENGTH]; |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 76 | for(i=0; i<length; ++i) { |
Jungshik Shin | f61e46d | 2018-05-04 13:00:45 -0700 | [diff] [blame] | 77 | c=UBIDI_GET_MIRROR_CODE_POINT(ubidi_props_singleton.mirrors[i]); |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 78 | sa->addRange(sa->set, c, c+1); |
| 79 | } |
| 80 | |
| 81 | /* add the code points from the Joining_Group array where the value changes */ |
Jungshik Shin | f61e46d | 2018-05-04 13:00:45 -0700 | [diff] [blame] | 82 | start=ubidi_props_singleton.indexes[UBIDI_IX_JG_START]; |
| 83 | limit=ubidi_props_singleton.indexes[UBIDI_IX_JG_LIMIT]; |
| 84 | jgArray=ubidi_props_singleton.jgArray; |
Jungshik Shin (jungshik at google) | 0f8746a | 2015-01-08 15:46:45 -0800 | [diff] [blame] | 85 | for(;;) { |
| 86 | prev=0; |
| 87 | while(start<limit) { |
| 88 | jg=*jgArray++; |
| 89 | if(jg!=prev) { |
| 90 | sa->add(sa->set, start); |
| 91 | prev=jg; |
| 92 | } |
| 93 | ++start; |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 94 | } |
Jungshik Shin (jungshik at google) | 0f8746a | 2015-01-08 15:46:45 -0800 | [diff] [blame] | 95 | if(prev!=0) { |
| 96 | /* add the limit code point if the last value was not 0 (it is now start==limit) */ |
| 97 | sa->add(sa->set, limit); |
| 98 | } |
Jungshik Shin | f61e46d | 2018-05-04 13:00:45 -0700 | [diff] [blame] | 99 | if(limit==ubidi_props_singleton.indexes[UBIDI_IX_JG_LIMIT]) { |
Jungshik Shin (jungshik at google) | 0f8746a | 2015-01-08 15:46:45 -0800 | [diff] [blame] | 100 | /* switch to the second Joining_Group range */ |
Jungshik Shin | f61e46d | 2018-05-04 13:00:45 -0700 | [diff] [blame] | 101 | start=ubidi_props_singleton.indexes[UBIDI_IX_JG_START2]; |
| 102 | limit=ubidi_props_singleton.indexes[UBIDI_IX_JG_LIMIT2]; |
| 103 | jgArray=ubidi_props_singleton.jgArray2; |
Jungshik Shin (jungshik at google) | 0f8746a | 2015-01-08 15:46:45 -0800 | [diff] [blame] | 104 | } else { |
| 105 | break; |
| 106 | } |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 107 | } |
| 108 | |
| 109 | /* add code points with hardcoded properties, plus the ones following them */ |
| 110 | |
| 111 | /* (none right now) */ |
| 112 | } |
| 113 | |
| 114 | /* property access functions ------------------------------------------------ */ |
| 115 | |
| 116 | U_CFUNC int32_t |
Jungshik Shin | f61e46d | 2018-05-04 13:00:45 -0700 | [diff] [blame] | 117 | ubidi_getMaxValue(UProperty which) { |
| 118 | int32_t max=ubidi_props_singleton.indexes[UBIDI_MAX_VALUES_INDEX]; |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 119 | switch(which) { |
| 120 | case UCHAR_BIDI_CLASS: |
| 121 | return (max&UBIDI_CLASS_MASK); |
| 122 | case UCHAR_JOINING_GROUP: |
| 123 | return (max&UBIDI_MAX_JG_MASK)>>UBIDI_MAX_JG_SHIFT; |
| 124 | case UCHAR_JOINING_TYPE: |
| 125 | return (max&UBIDI_JT_MASK)>>UBIDI_JT_SHIFT; |
| 126 | case UCHAR_BIDI_PAIRED_BRACKET_TYPE: |
| 127 | return (max&UBIDI_BPT_MASK)>>UBIDI_BPT_SHIFT; |
| 128 | default: |
| 129 | return -1; /* undefined */ |
| 130 | } |
| 131 | } |
| 132 | |
| 133 | U_CAPI UCharDirection |
Jungshik Shin | f61e46d | 2018-05-04 13:00:45 -0700 | [diff] [blame] | 134 | ubidi_getClass(UChar32 c) { |
| 135 | uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c); |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 136 | return (UCharDirection)UBIDI_GET_CLASS(props); |
| 137 | } |
| 138 | |
| 139 | U_CFUNC UBool |
Jungshik Shin | f61e46d | 2018-05-04 13:00:45 -0700 | [diff] [blame] | 140 | ubidi_isMirrored(UChar32 c) { |
| 141 | uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c); |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 142 | return (UBool)UBIDI_GET_FLAG(props, UBIDI_IS_MIRRORED_SHIFT); |
| 143 | } |
| 144 | |
| 145 | static UChar32 |
Jungshik Shin | f61e46d | 2018-05-04 13:00:45 -0700 | [diff] [blame] | 146 | getMirror(UChar32 c, uint16_t props) { |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 147 | int32_t delta=UBIDI_GET_MIRROR_DELTA(props); |
| 148 | if(delta!=UBIDI_ESC_MIRROR_DELTA) { |
| 149 | return c+delta; |
| 150 | } else { |
| 151 | /* look for mirror code point in the mirrors[] table */ |
| 152 | const uint32_t *mirrors; |
| 153 | uint32_t m; |
| 154 | int32_t i, length; |
| 155 | UChar32 c2; |
| 156 | |
Jungshik Shin | f61e46d | 2018-05-04 13:00:45 -0700 | [diff] [blame] | 157 | mirrors=ubidi_props_singleton.mirrors; |
| 158 | length=ubidi_props_singleton.indexes[UBIDI_IX_MIRROR_LENGTH]; |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 159 | |
| 160 | /* linear search */ |
| 161 | for(i=0; i<length; ++i) { |
| 162 | m=mirrors[i]; |
| 163 | c2=UBIDI_GET_MIRROR_CODE_POINT(m); |
| 164 | if(c==c2) { |
| 165 | /* found c, return its mirror code point using the index in m */ |
| 166 | return UBIDI_GET_MIRROR_CODE_POINT(mirrors[UBIDI_GET_MIRROR_INDEX(m)]); |
| 167 | } else if(c<c2) { |
| 168 | break; |
| 169 | } |
| 170 | } |
| 171 | |
| 172 | /* c not found, return it itself */ |
| 173 | return c; |
| 174 | } |
| 175 | } |
| 176 | |
| 177 | U_CFUNC UChar32 |
Jungshik Shin | f61e46d | 2018-05-04 13:00:45 -0700 | [diff] [blame] | 178 | ubidi_getMirror(UChar32 c) { |
| 179 | uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c); |
| 180 | return getMirror(c, props); |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 181 | } |
| 182 | |
| 183 | U_CFUNC UBool |
Jungshik Shin | f61e46d | 2018-05-04 13:00:45 -0700 | [diff] [blame] | 184 | ubidi_isBidiControl(UChar32 c) { |
| 185 | uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c); |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 186 | return (UBool)UBIDI_GET_FLAG(props, UBIDI_BIDI_CONTROL_SHIFT); |
| 187 | } |
| 188 | |
| 189 | U_CFUNC UBool |
Jungshik Shin | f61e46d | 2018-05-04 13:00:45 -0700 | [diff] [blame] | 190 | ubidi_isJoinControl(UChar32 c) { |
| 191 | uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c); |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 192 | return (UBool)UBIDI_GET_FLAG(props, UBIDI_JOIN_CONTROL_SHIFT); |
| 193 | } |
| 194 | |
| 195 | U_CFUNC UJoiningType |
Jungshik Shin | f61e46d | 2018-05-04 13:00:45 -0700 | [diff] [blame] | 196 | ubidi_getJoiningType(UChar32 c) { |
| 197 | uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c); |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 198 | return (UJoiningType)((props&UBIDI_JT_MASK)>>UBIDI_JT_SHIFT); |
| 199 | } |
| 200 | |
| 201 | U_CFUNC UJoiningGroup |
Jungshik Shin | f61e46d | 2018-05-04 13:00:45 -0700 | [diff] [blame] | 202 | ubidi_getJoiningGroup(UChar32 c) { |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 203 | UChar32 start, limit; |
| 204 | |
Jungshik Shin | f61e46d | 2018-05-04 13:00:45 -0700 | [diff] [blame] | 205 | start=ubidi_props_singleton.indexes[UBIDI_IX_JG_START]; |
| 206 | limit=ubidi_props_singleton.indexes[UBIDI_IX_JG_LIMIT]; |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 207 | if(start<=c && c<limit) { |
Jungshik Shin | f61e46d | 2018-05-04 13:00:45 -0700 | [diff] [blame] | 208 | return (UJoiningGroup)ubidi_props_singleton.jgArray[c-start]; |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 209 | } |
Jungshik Shin | f61e46d | 2018-05-04 13:00:45 -0700 | [diff] [blame] | 210 | start=ubidi_props_singleton.indexes[UBIDI_IX_JG_START2]; |
| 211 | limit=ubidi_props_singleton.indexes[UBIDI_IX_JG_LIMIT2]; |
Jungshik Shin (jungshik at google) | 0f8746a | 2015-01-08 15:46:45 -0800 | [diff] [blame] | 212 | if(start<=c && c<limit) { |
Jungshik Shin | f61e46d | 2018-05-04 13:00:45 -0700 | [diff] [blame] | 213 | return (UJoiningGroup)ubidi_props_singleton.jgArray2[c-start]; |
Jungshik Shin (jungshik at google) | 0f8746a | 2015-01-08 15:46:45 -0800 | [diff] [blame] | 214 | } |
| 215 | return U_JG_NO_JOINING_GROUP; |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 216 | } |
| 217 | |
| 218 | U_CFUNC UBidiPairedBracketType |
Jungshik Shin | f61e46d | 2018-05-04 13:00:45 -0700 | [diff] [blame] | 219 | ubidi_getPairedBracketType(UChar32 c) { |
| 220 | uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c); |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 221 | return (UBidiPairedBracketType)((props&UBIDI_BPT_MASK)>>UBIDI_BPT_SHIFT); |
| 222 | } |
| 223 | |
| 224 | U_CFUNC UChar32 |
Jungshik Shin | f61e46d | 2018-05-04 13:00:45 -0700 | [diff] [blame] | 225 | ubidi_getPairedBracket(UChar32 c) { |
| 226 | uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c); |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 227 | if((props&UBIDI_BPT_MASK)==0) { |
| 228 | return c; |
| 229 | } else { |
Jungshik Shin | f61e46d | 2018-05-04 13:00:45 -0700 | [diff] [blame] | 230 | return getMirror(c, props); |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 231 | } |
| 232 | } |
| 233 | |
| 234 | /* public API (see uchar.h) ------------------------------------------------- */ |
| 235 | |
| 236 | U_CFUNC UCharDirection |
Jungshik Shin | f61e46d | 2018-05-04 13:00:45 -0700 | [diff] [blame] | 237 | u_charDirection(UChar32 c) { |
| 238 | return ubidi_getClass(c); |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 239 | } |
| 240 | |
| 241 | U_CFUNC UBool |
| 242 | u_isMirrored(UChar32 c) { |
Jungshik Shin | f61e46d | 2018-05-04 13:00:45 -0700 | [diff] [blame] | 243 | return ubidi_isMirrored(c); |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 244 | } |
| 245 | |
| 246 | U_CFUNC UChar32 |
| 247 | u_charMirror(UChar32 c) { |
Jungshik Shin | f61e46d | 2018-05-04 13:00:45 -0700 | [diff] [blame] | 248 | return ubidi_getMirror(c); |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 249 | } |
| 250 | |
Frank Tang | f90543d | 2020-10-30 19:02:04 -0700 | [diff] [blame] | 251 | U_CAPI UChar32 U_EXPORT2 |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 252 | u_getBidiPairedBracket(UChar32 c) { |
Jungshik Shin | f61e46d | 2018-05-04 13:00:45 -0700 | [diff] [blame] | 253 | return ubidi_getPairedBracket(c); |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 254 | } |