blob: 8e2fc36e5f1753245b7bd38afce8d107ad72ea0a [file] [log] [blame]
Jungshik Shin87232d82017-05-13 21:10:13 -07001// © 2016 and later: Unicode, Inc. and others.
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002// License & terms of use: http://www.unicode.org/copyright.html
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00003/*
4******************************************************************************
5*
Jungshik Shin (jungshik at google)3a9ccbc2015-02-10 10:56:28 -08006* Copyright (C) 1999-2015, International Business Machines
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00007* Corporation and others. All Rights Reserved.
8*
9******************************************************************************
10* file name: ubidi.c
Jungshik Shin87232d82017-05-13 21:10:13 -070011* encoding: UTF-8
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000012* tab size: 8 (not used)
13* indentation:4
14*
15* created on: 1999jul27
16* created by: Markus W. Scherer, updated by Matitiahu Allouche
17*
18*/
19
20#include "cmemory.h"
21#include "unicode/utypes.h"
22#include "unicode/ustring.h"
23#include "unicode/uchar.h"
24#include "unicode/ubidi.h"
25#include "unicode/utf16.h"
26#include "ubidi_props.h"
27#include "ubidiimp.h"
28#include "uassert.h"
29
30/*
31 * General implementation notes:
32 *
33 * Throughout the implementation, there are comments like (W2) that refer to
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -080034 * rules of the BiDi algorithm, in this example to the second rule of the
35 * resolution of weak types.
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000036 *
37 * For handling surrogate pairs, where two UChar's form one "abstract" (or UTF-32)
38 * character according to UTF-16, the second UChar gets the directional property of
39 * the entire character assigned, while the first one gets a BN, a boundary
40 * neutral, type, which is ignored by most of the algorithm according to
41 * rule (X9) and the implementation suggestions of the BiDi algorithm.
42 *
43 * Later, adjustWSLevels() will set the level for each BN to that of the
44 * following character (UChar), which results in surrogate pairs getting the
45 * same level on each of their surrogates.
46 *
47 * In a UTF-8 implementation, the same thing could be done: the last byte of
48 * a multi-byte sequence would get the "real" property, while all previous
49 * bytes of that sequence would get BN.
50 *
51 * It is not possible to assign all those parts of a character the same real
52 * property because this would fail in the resolution of weak types with rules
53 * that look at immediately surrounding types.
54 *
55 * As a related topic, this implementation does not remove Boundary Neutral
56 * types from the input, but ignores them wherever this is relevant.
57 * For example, the loop for the resolution of the weak types reads
58 * types until it finds a non-BN.
59 * Also, explicit embedding codes are neither changed into BN nor removed.
60 * They are only treated the same way real BNs are.
61 * As stated before, adjustWSLevels() takes care of them at the end.
62 * For the purpose of conformance, the levels of all these codes
63 * do not matter.
64 *
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -080065 * Note that this implementation modifies the dirProps
66 * after the initial setup, when applying X5c (replace FSI by LRI or RLI),
67 * X6, N0 (replace paired brackets by L or R).
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000068 *
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -080069 * In this implementation, the resolution of weak types (W1 to W6),
70 * neutrals (N1 and N2), and the assignment of the resolved level (In)
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000071 * are all done in one single loop, in resolveImplicitLevels().
72 * Changes of dirProp values are done on the fly, without writing
73 * them back to the dirProps array.
74 *
75 *
76 * This implementation contains code that allows to bypass steps of the
77 * algorithm that are not needed on the specific paragraph
78 * in order to speed up the most common cases considerably,
79 * like text that is entirely LTR, or RTL text without numbers.
80 *
81 * Most of this is done by setting a bit for each directional property
82 * in a flags variable and later checking for whether there are
83 * any LTR characters or any RTL characters, or both, whether
84 * there are any explicit embedding codes, etc.
85 *
86 * If the (Xn) steps are performed, then the flags are re-evaluated,
87 * because they will then not contain the embedding codes any more
88 * and will be adjusted for override codes, so that subsequently
89 * more bypassing may be possible than what the initial flags suggested.
90 *
91 * If the text is not mixed-directional, then the
92 * algorithm steps for the weak type resolution are not performed,
93 * and all levels are set to the paragraph level.
94 *
95 * If there are no explicit embedding codes, then the (Xn) steps
96 * are not performed.
97 *
98 * If embedding levels are supplied as a parameter, then all
99 * explicit embedding codes are ignored, and the (Xn) steps
100 * are not performed.
101 *
102 * White Space types could get the level of the run they belong to,
103 * and are checked with a test of (flags&MASK_EMBEDDING) to
104 * consider if the paragraph direction should be considered in
105 * the flags variable.
106 *
107 * If there are no White Space types in the paragraph, then
108 * (L1) is not necessary in adjustWSLevels().
109 */
110
111/* to avoid some conditional statements, use tiny constant arrays */
112static const Flags flagLR[2]={ DIRPROP_FLAG(L), DIRPROP_FLAG(R) };
113static const Flags flagE[2]={ DIRPROP_FLAG(LRE), DIRPROP_FLAG(RLE) };
114static const Flags flagO[2]={ DIRPROP_FLAG(LRO), DIRPROP_FLAG(RLO) };
115
116#define DIRPROP_FLAG_LR(level) flagLR[(level)&1]
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800117#define DIRPROP_FLAG_E(level) flagE[(level)&1]
118#define DIRPROP_FLAG_O(level) flagO[(level)&1]
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000119
120#define DIR_FROM_STRONG(strong) ((strong)==L ? L : R)
121
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800122#define NO_OVERRIDE(level) ((level)&~UBIDI_LEVEL_OVERRIDE)
123
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000124/* UBiDi object management -------------------------------------------------- */
125
126U_CAPI UBiDi * U_EXPORT2
127ubidi_open(void)
128{
129 UErrorCode errorCode=U_ZERO_ERROR;
130 return ubidi_openSized(0, 0, &errorCode);
131}
132
133U_CAPI UBiDi * U_EXPORT2
134ubidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode *pErrorCode) {
135 UBiDi *pBiDi;
136
137 /* check the argument values */
138 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
139 return NULL;
140 } else if(maxLength<0 || maxRunCount<0) {
141 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
142 return NULL; /* invalid arguments */
143 }
144
145 /* allocate memory for the object */
146 pBiDi=(UBiDi *)uprv_malloc(sizeof(UBiDi));
147 if(pBiDi==NULL) {
148 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
149 return NULL;
150 }
151
152 /* reset the object, all pointers NULL, all flags FALSE, all sizes 0 */
153 uprv_memset(pBiDi, 0, sizeof(UBiDi));
154
155 /* get BiDi properties */
156 pBiDi->bdp=ubidi_getSingleton();
157
158 /* allocate memory for arrays as requested */
159 if(maxLength>0) {
160 if( !getInitialDirPropsMemory(pBiDi, maxLength) ||
161 !getInitialLevelsMemory(pBiDi, maxLength)
162 ) {
163 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
164 }
165 } else {
166 pBiDi->mayAllocateText=TRUE;
167 }
168
169 if(maxRunCount>0) {
170 if(maxRunCount==1) {
171 /* use simpleRuns[] */
172 pBiDi->runsSize=sizeof(Run);
173 } else if(!getInitialRunsMemory(pBiDi, maxRunCount)) {
174 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
175 }
176 } else {
177 pBiDi->mayAllocateRuns=TRUE;
178 }
179
180 if(U_SUCCESS(*pErrorCode)) {
181 return pBiDi;
182 } else {
183 ubidi_close(pBiDi);
184 return NULL;
185 }
186}
187
188/*
189 * We are allowed to allocate memory if memory==NULL or
190 * mayAllocate==TRUE for each array that we need.
191 * We also try to grow memory as needed if we
192 * allocate it.
193 *
194 * Assume sizeNeeded>0.
195 * If *pMemory!=NULL, then assume *pSize>0.
196 *
197 * ### this realloc() may unnecessarily copy the old data,
198 * which we know we don't need any more;
199 * is this the best way to do this??
200 */
201U_CFUNC UBool
202ubidi_getMemory(BidiMemoryForAllocation *bidiMem, int32_t *pSize, UBool mayAllocate, int32_t sizeNeeded) {
203 void **pMemory = (void **)bidiMem;
204 /* check for existing memory */
205 if(*pMemory==NULL) {
206 /* we need to allocate memory */
207 if(mayAllocate && (*pMemory=uprv_malloc(sizeNeeded))!=NULL) {
208 *pSize=sizeNeeded;
209 return TRUE;
210 } else {
211 return FALSE;
212 }
213 } else {
214 if(sizeNeeded<=*pSize) {
215 /* there is already enough memory */
216 return TRUE;
217 }
218 else if(!mayAllocate) {
219 /* not enough memory, and we must not allocate */
220 return FALSE;
221 } else {
222 /* we try to grow */
223 void *memory;
224 /* in most cases, we do not need the copy-old-data part of
225 * realloc, but it is needed when adding runs using getRunsMemory()
226 * in setParaRunsOnly()
227 */
228 if((memory=uprv_realloc(*pMemory, sizeNeeded))!=NULL) {
229 *pMemory=memory;
230 *pSize=sizeNeeded;
231 return TRUE;
232 } else {
233 /* we failed to grow */
234 return FALSE;
235 }
236 }
237 }
238}
239
240U_CAPI void U_EXPORT2
241ubidi_close(UBiDi *pBiDi) {
242 if(pBiDi!=NULL) {
243 pBiDi->pParaBiDi=NULL; /* in case one tries to reuse this block */
244 if(pBiDi->dirPropsMemory!=NULL) {
245 uprv_free(pBiDi->dirPropsMemory);
246 }
247 if(pBiDi->levelsMemory!=NULL) {
248 uprv_free(pBiDi->levelsMemory);
249 }
250 if(pBiDi->openingsMemory!=NULL) {
251 uprv_free(pBiDi->openingsMemory);
252 }
253 if(pBiDi->parasMemory!=NULL) {
254 uprv_free(pBiDi->parasMemory);
255 }
256 if(pBiDi->runsMemory!=NULL) {
257 uprv_free(pBiDi->runsMemory);
258 }
259 if(pBiDi->isolatesMemory!=NULL) {
260 uprv_free(pBiDi->isolatesMemory);
261 }
262 if(pBiDi->insertPoints.points!=NULL) {
263 uprv_free(pBiDi->insertPoints.points);
264 }
265
266 uprv_free(pBiDi);
267 }
268}
269
270/* set to approximate "inverse BiDi" ---------------------------------------- */
271
272U_CAPI void U_EXPORT2
273ubidi_setInverse(UBiDi *pBiDi, UBool isInverse) {
274 if(pBiDi!=NULL) {
275 pBiDi->isInverse=isInverse;
276 pBiDi->reorderingMode = isInverse ? UBIDI_REORDER_INVERSE_NUMBERS_AS_L
277 : UBIDI_REORDER_DEFAULT;
278 }
279}
280
281U_CAPI UBool U_EXPORT2
282ubidi_isInverse(UBiDi *pBiDi) {
283 if(pBiDi!=NULL) {
284 return pBiDi->isInverse;
285 } else {
286 return FALSE;
287 }
288}
289
290/* FOOD FOR THOUGHT: currently the reordering modes are a mixture of
291 * algorithm for direct BiDi, algorithm for inverse BiDi and the bizarre
292 * concept of RUNS_ONLY which is a double operation.
293 * It could be advantageous to divide this into 3 concepts:
294 * a) Operation: direct / inverse / RUNS_ONLY
295 * b) Direct algorithm: default / NUMBERS_SPECIAL / GROUP_NUMBERS_WITH_R
296 * c) Inverse algorithm: default / INVERSE_LIKE_DIRECT / NUMBERS_SPECIAL
297 * This would allow combinations not possible today like RUNS_ONLY with
298 * NUMBERS_SPECIAL.
299 * Also allow to set INSERT_MARKS for the direct step of RUNS_ONLY and
300 * REMOVE_CONTROLS for the inverse step.
301 * Not all combinations would be supported, and probably not all do make sense.
302 * This would need to document which ones are supported and what are the
303 * fallbacks for unsupported combinations.
304 */
305U_CAPI void U_EXPORT2
306ubidi_setReorderingMode(UBiDi *pBiDi, UBiDiReorderingMode reorderingMode) {
307 if ((pBiDi!=NULL) && (reorderingMode >= UBIDI_REORDER_DEFAULT)
308 && (reorderingMode < UBIDI_REORDER_COUNT)) {
309 pBiDi->reorderingMode = reorderingMode;
310 pBiDi->isInverse = (UBool)(reorderingMode == UBIDI_REORDER_INVERSE_NUMBERS_AS_L);
311 }
312}
313
314U_CAPI UBiDiReorderingMode U_EXPORT2
315ubidi_getReorderingMode(UBiDi *pBiDi) {
316 if (pBiDi!=NULL) {
317 return pBiDi->reorderingMode;
318 } else {
319 return UBIDI_REORDER_DEFAULT;
320 }
321}
322
323U_CAPI void U_EXPORT2
324ubidi_setReorderingOptions(UBiDi *pBiDi, uint32_t reorderingOptions) {
325 if (reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) {
326 reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS;
327 }
328 if (pBiDi!=NULL) {
329 pBiDi->reorderingOptions=reorderingOptions;
330 }
331}
332
333U_CAPI uint32_t U_EXPORT2
334ubidi_getReorderingOptions(UBiDi *pBiDi) {
335 if (pBiDi!=NULL) {
336 return pBiDi->reorderingOptions;
337 } else {
338 return 0;
339 }
340}
341
342U_CAPI UBiDiDirection U_EXPORT2
343ubidi_getBaseDirection(const UChar *text,
344int32_t length){
345
346 int32_t i;
347 UChar32 uchar;
348 UCharDirection dir;
349
350 if( text==NULL || length<-1 ){
351 return UBIDI_NEUTRAL;
352 }
353
354 if(length==-1) {
355 length=u_strlen(text);
356 }
357
358 for( i = 0 ; i < length; ) {
359 /* i is incremented by U16_NEXT */
360 U16_NEXT(text, i, length, uchar);
361 dir = u_charDirection(uchar);
362 if( dir == U_LEFT_TO_RIGHT )
363 return UBIDI_LTR;
364 if( dir == U_RIGHT_TO_LEFT || dir ==U_RIGHT_TO_LEFT_ARABIC )
365 return UBIDI_RTL;
366 }
367 return UBIDI_NEUTRAL;
368}
369
370/* perform (P2)..(P3) ------------------------------------------------------- */
371
372/**
373 * Returns the directionality of the first strong character
374 * after the last B in prologue, if any.
375 * Requires prologue!=null.
376 */
377static DirProp
378firstL_R_AL(UBiDi *pBiDi) {
379 const UChar *text=pBiDi->prologue;
380 int32_t length=pBiDi->proLength;
381 int32_t i;
382 UChar32 uchar;
383 DirProp dirProp, result=ON;
384 for(i=0; i<length; ) {
385 /* i is incremented by U16_NEXT */
386 U16_NEXT(text, i, length, uchar);
387 dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar);
388 if(result==ON) {
389 if(dirProp==L || dirProp==R || dirProp==AL) {
390 result=dirProp;
391 }
392 } else {
393 if(dirProp==B) {
394 result=ON;
395 }
396 }
397 }
398 return result;
399}
400
401/*
402 * Check that there are enough entries in the array pointed to by pBiDi->paras
403 */
404static UBool
405checkParaCount(UBiDi *pBiDi) {
406 int32_t count=pBiDi->paraCount;
407 if(pBiDi->paras==pBiDi->simpleParas) {
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800408 if(count<=SIMPLE_PARAS_COUNT)
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000409 return TRUE;
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800410 if(!getInitialParasMemory(pBiDi, SIMPLE_PARAS_COUNT * 2))
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000411 return FALSE;
412 pBiDi->paras=pBiDi->parasMemory;
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800413 uprv_memcpy(pBiDi->parasMemory, pBiDi->simpleParas, SIMPLE_PARAS_COUNT * sizeof(Para));
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000414 return TRUE;
415 }
416 if(!getInitialParasMemory(pBiDi, count * 2))
417 return FALSE;
418 pBiDi->paras=pBiDi->parasMemory;
419 return TRUE;
420}
421
422/*
423 * Get the directional properties for the text, calculate the flags bit-set, and
424 * determine the paragraph level if necessary (in pBiDi->paras[i].level).
425 * FSI initiators are also resolved and their dirProp replaced with LRI or RLI.
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800426 * When encountering an FSI, it is initially replaced with an LRI, which is the
427 * default. Only if a strong R or AL is found within its scope will the LRI be
428 * replaced by an RLI.
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000429 */
430static UBool
431getDirProps(UBiDi *pBiDi) {
432 const UChar *text=pBiDi->text;
433 DirProp *dirProps=pBiDi->dirPropsMemory; /* pBiDi->dirProps is const */
434
435 int32_t i=0, originalLength=pBiDi->originalLength;
436 Flags flags=0; /* collect all directionalities in the text */
437 UChar32 uchar;
438 DirProp dirProp=0, defaultParaLevel=0; /* initialize to avoid compiler warnings */
439 UBool isDefaultLevel=IS_DEFAULT_LEVEL(pBiDi->paraLevel);
440 /* for inverse BiDi, the default para level is set to RTL if there is a
441 strong R or AL character at either end of the text */
442 UBool isDefaultLevelInverse=isDefaultLevel && (UBool)
443 (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT ||
444 pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL);
445 int32_t lastArabicPos=-1;
446 int32_t controlCount=0;
447 UBool removeBiDiControls = (UBool)(pBiDi->reorderingOptions &
448 UBIDI_OPTION_REMOVE_CONTROLS);
449
Jungshik Shin87232d82017-05-13 21:10:13 -0700450 enum State {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000451 NOT_SEEKING_STRONG, /* 0: not contextual paraLevel, not after FSI */
452 SEEKING_STRONG_FOR_PARA, /* 1: looking for first strong char in para */
453 SEEKING_STRONG_FOR_FSI, /* 2: looking for first strong after FSI */
454 LOOKING_FOR_PDI /* 3: found strong after FSI, looking for PDI */
Jungshik Shin87232d82017-05-13 21:10:13 -0700455 };
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000456 State state;
457 DirProp lastStrong=ON; /* for default level & inverse BiDi */
458 /* The following stacks are used to manage isolate sequences. Those
459 sequences may be nested, but obviously never more deeply than the
460 maximum explicit embedding level.
461 lastStack is the index of the last used entry in the stack. A value of -1
462 means that there is no open isolate sequence.
463 lastStack is reset to -1 on paragraph boundaries. */
464 /* The following stack contains the position of the initiator of
465 each open isolate sequence */
466 int32_t isolateStartStack[UBIDI_MAX_EXPLICIT_LEVEL+1];
467 /* The following stack contains the last known state before
468 encountering the initiator of an isolate sequence */
Jungshik Shin87232d82017-05-13 21:10:13 -0700469 State previousStateStack[UBIDI_MAX_EXPLICIT_LEVEL+1];
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000470 int32_t stackLast=-1;
471
472 if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING)
473 pBiDi->length=0;
474 defaultParaLevel=pBiDi->paraLevel&1;
475 if(isDefaultLevel) {
476 pBiDi->paras[0].level=defaultParaLevel;
477 lastStrong=defaultParaLevel;
478 if(pBiDi->proLength>0 && /* there is a prologue */
479 (dirProp=firstL_R_AL(pBiDi))!=ON) { /* with a strong character */
480 if(dirProp==L)
481 pBiDi->paras[0].level=0; /* set the default para level */
482 else
483 pBiDi->paras[0].level=1; /* set the default para level */
484 state=NOT_SEEKING_STRONG;
485 } else {
486 state=SEEKING_STRONG_FOR_PARA;
487 }
488 } else {
489 pBiDi->paras[0].level=pBiDi->paraLevel;
490 state=NOT_SEEKING_STRONG;
491 }
492 /* count paragraphs and determine the paragraph level (P2..P3) */
493 /*
494 * see comment in ubidi.h:
495 * the UBIDI_DEFAULT_XXX values are designed so that
496 * their bit 0 alone yields the intended default
497 */
498 for( /* i=0 above */ ; i<originalLength; ) {
499 /* i is incremented by U16_NEXT */
500 U16_NEXT(text, i, originalLength, uchar);
501 flags|=DIRPROP_FLAG(dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar));
502 dirProps[i-1]=dirProp;
503 if(uchar>0xffff) { /* set the lead surrogate's property to BN */
504 flags|=DIRPROP_FLAG(BN);
505 dirProps[i-2]=BN;
506 }
507 if(removeBiDiControls && IS_BIDI_CONTROL_CHAR(uchar))
508 controlCount++;
509 if(dirProp==L) {
510 if(state==SEEKING_STRONG_FOR_PARA) {
511 pBiDi->paras[pBiDi->paraCount-1].level=0;
512 state=NOT_SEEKING_STRONG;
513 }
514 else if(state==SEEKING_STRONG_FOR_FSI) {
515 if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800516 /* no need for next statement, already set by default */
517 /* dirProps[isolateStartStack[stackLast]]=LRI; */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000518 flags|=DIRPROP_FLAG(LRI);
519 }
520 state=LOOKING_FOR_PDI;
521 }
522 lastStrong=L;
523 continue;
524 }
525 if(dirProp==R || dirProp==AL) {
526 if(state==SEEKING_STRONG_FOR_PARA) {
527 pBiDi->paras[pBiDi->paraCount-1].level=1;
528 state=NOT_SEEKING_STRONG;
529 }
530 else if(state==SEEKING_STRONG_FOR_FSI) {
531 if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
532 dirProps[isolateStartStack[stackLast]]=RLI;
533 flags|=DIRPROP_FLAG(RLI);
534 }
535 state=LOOKING_FOR_PDI;
536 }
537 lastStrong=R;
538 if(dirProp==AL)
539 lastArabicPos=i-1;
540 continue;
541 }
542 if(dirProp>=FSI && dirProp<=RLI) { /* FSI, LRI or RLI */
543 stackLast++;
544 if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
545 isolateStartStack[stackLast]=i-1;
546 previousStateStack[stackLast]=state;
547 }
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800548 if(dirProp==FSI) {
549 dirProps[i-1]=LRI; /* default if no strong char */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000550 state=SEEKING_STRONG_FOR_FSI;
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800551 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000552 else
553 state=LOOKING_FOR_PDI;
554 continue;
555 }
556 if(dirProp==PDI) {
557 if(state==SEEKING_STRONG_FOR_FSI) {
558 if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800559 /* no need for next statement, already set by default */
560 /* dirProps[isolateStartStack[stackLast]]=LRI; */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000561 flags|=DIRPROP_FLAG(LRI);
562 }
563 }
564 if(stackLast>=0) {
565 if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL)
566 state=previousStateStack[stackLast];
567 stackLast--;
568 }
569 continue;
570 }
571 if(dirProp==B) {
572 if(i<originalLength && uchar==CR && text[i]==LF) /* do nothing on the CR */
573 continue;
574 pBiDi->paras[pBiDi->paraCount-1].limit=i;
575 if(isDefaultLevelInverse && lastStrong==R)
576 pBiDi->paras[pBiDi->paraCount-1].level=1;
577 if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) {
578 /* When streaming, we only process whole paragraphs
579 thus some updates are only done on paragraph boundaries */
580 pBiDi->length=i; /* i is index to next character */
581 pBiDi->controlCount=controlCount;
582 }
583 if(i<originalLength) { /* B not last char in text */
584 pBiDi->paraCount++;
585 if(checkParaCount(pBiDi)==FALSE) /* not enough memory for a new para entry */
586 return FALSE;
587 if(isDefaultLevel) {
588 pBiDi->paras[pBiDi->paraCount-1].level=defaultParaLevel;
589 state=SEEKING_STRONG_FOR_PARA;
590 lastStrong=defaultParaLevel;
591 } else {
592 pBiDi->paras[pBiDi->paraCount-1].level=pBiDi->paraLevel;
593 state=NOT_SEEKING_STRONG;
594 }
595 stackLast=-1;
596 }
597 continue;
598 }
599 }
600 /* Ignore still open isolate sequences with overflow */
601 if(stackLast>UBIDI_MAX_EXPLICIT_LEVEL) {
602 stackLast=UBIDI_MAX_EXPLICIT_LEVEL;
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800603 state=SEEKING_STRONG_FOR_FSI; /* to be on the safe side */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000604 }
605 /* Resolve direction of still unresolved open FSI sequences */
606 while(stackLast>=0) {
607 if(state==SEEKING_STRONG_FOR_FSI) {
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800608 /* no need for next statement, already set by default */
609 /* dirProps[isolateStartStack[stackLast]]=LRI; */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000610 flags|=DIRPROP_FLAG(LRI);
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800611 break;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000612 }
613 state=previousStateStack[stackLast];
614 stackLast--;
615 }
616 /* When streaming, ignore text after the last paragraph separator */
617 if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) {
618 if(pBiDi->length<originalLength)
619 pBiDi->paraCount--;
620 } else {
621 pBiDi->paras[pBiDi->paraCount-1].limit=originalLength;
622 pBiDi->controlCount=controlCount;
623 }
624 /* For inverse bidi, default para direction is RTL if there is
625 a strong R or AL at either end of the paragraph */
626 if(isDefaultLevelInverse && lastStrong==R) {
627 pBiDi->paras[pBiDi->paraCount-1].level=1;
628 }
629 if(isDefaultLevel) {
630 pBiDi->paraLevel=pBiDi->paras[0].level;
631 }
632 /* The following is needed to resolve the text direction for default level
633 paragraphs containing no strong character */
634 for(i=0; i<pBiDi->paraCount; i++)
635 flags|=DIRPROP_FLAG_LR(pBiDi->paras[i].level);
636
637 if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B))) {
638 flags|=DIRPROP_FLAG(L);
639 }
640 pBiDi->flags=flags;
641 pBiDi->lastArabicPos=lastArabicPos;
642 return TRUE;
643}
644
645/* determine the paragraph level at position index */
646U_CFUNC UBiDiLevel
647ubidi_getParaLevelAtIndex(const UBiDi *pBiDi, int32_t pindex) {
648 int32_t i;
649 for(i=0; i<pBiDi->paraCount; i++)
650 if(pindex<pBiDi->paras[i].limit)
651 break;
652 if(i>=pBiDi->paraCount)
653 i=pBiDi->paraCount-1;
654 return (UBiDiLevel)(pBiDi->paras[i].level);
655}
656
657/* Functions for handling paired brackets ----------------------------------- */
658
659/* In the isoRuns array, the first entry is used for text outside of any
660 isolate sequence. Higher entries are used for each more deeply nested
661 isolate sequence. isoRunLast is the index of the last used entry. The
662 openings array is used to note the data of opening brackets not yet
663 matched by a closing bracket, or matched but still susceptible to change
664 level.
665 Each isoRun entry contains the index of the first and
666 one-after-last openings entries for pending opening brackets it
667 contains. The next openings entry to use is the one-after-last of the
668 most deeply nested isoRun entry.
669 isoRun entries also contain their current embedding level and the last
670 encountered strong character, since these will be needed to resolve
671 the level of paired brackets. */
672
673static void
674bracketInit(UBiDi *pBiDi, BracketData *bd) {
675 bd->pBiDi=pBiDi;
676 bd->isoRunLast=0;
677 bd->isoRuns[0].start=0;
678 bd->isoRuns[0].limit=0;
679 bd->isoRuns[0].level=GET_PARALEVEL(pBiDi, 0);
Jungshik Shin87232d82017-05-13 21:10:13 -0700680 UBiDiLevel t = GET_PARALEVEL(pBiDi, 0) & 1;
681 bd->isoRuns[0].lastStrong = bd->isoRuns[0].lastBase = t;
682 bd->isoRuns[0].contextDir = (UBiDiDirection)t;
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800683 bd->isoRuns[0].contextPos=0;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000684 if(pBiDi->openingsMemory) {
685 bd->openings=pBiDi->openingsMemory;
jshin@chromium.orgd2abf6c2014-09-17 18:53:08 +0000686 bd->openingsCount=pBiDi->openingsSize / sizeof(Opening);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000687 } else {
688 bd->openings=bd->simpleOpenings;
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800689 bd->openingsCount=SIMPLE_OPENINGS_COUNT;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000690 }
691 bd->isNumbersSpecial=bd->pBiDi->reorderingMode==UBIDI_REORDER_NUMBERS_SPECIAL ||
692 bd->pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL;
693}
694
695/* paragraph boundary */
696static void
697bracketProcessB(BracketData *bd, UBiDiLevel level) {
698 bd->isoRunLast=0;
699 bd->isoRuns[0].limit=0;
700 bd->isoRuns[0].level=level;
Jungshik Shin87232d82017-05-13 21:10:13 -0700701 bd->isoRuns[0].lastStrong=bd->isoRuns[0].lastBase=level&1;
702 bd->isoRuns[0].contextDir=(UBiDiDirection)(level&1);
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800703 bd->isoRuns[0].contextPos=0;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000704}
705
706/* LRE, LRO, RLE, RLO, PDF */
707static void
708bracketProcessBoundary(BracketData *bd, int32_t lastCcPos,
709 UBiDiLevel contextLevel, UBiDiLevel embeddingLevel) {
710 IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
711 DirProp *dirProps=bd->pBiDi->dirProps;
712 if(DIRPROP_FLAG(dirProps[lastCcPos])&MASK_ISO) /* after an isolate */
713 return;
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800714 if(NO_OVERRIDE(embeddingLevel)>NO_OVERRIDE(contextLevel)) /* not a PDF */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000715 contextLevel=embeddingLevel;
716 pLastIsoRun->limit=pLastIsoRun->start;
717 pLastIsoRun->level=embeddingLevel;
Jungshik Shin87232d82017-05-13 21:10:13 -0700718 pLastIsoRun->lastStrong=pLastIsoRun->lastBase=contextLevel&1;
719 pLastIsoRun->contextDir=(UBiDiDirection)(contextLevel&1);
720 pLastIsoRun->contextPos=(UBiDiDirection)lastCcPos;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000721}
722
723/* LRI or RLI */
724static void
725bracketProcessLRI_RLI(BracketData *bd, UBiDiLevel level) {
726 IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
727 int16_t lastLimit;
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800728 pLastIsoRun->lastBase=ON;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000729 lastLimit=pLastIsoRun->limit;
730 bd->isoRunLast++;
731 pLastIsoRun++;
732 pLastIsoRun->start=pLastIsoRun->limit=lastLimit;
733 pLastIsoRun->level=level;
Jungshik Shin87232d82017-05-13 21:10:13 -0700734 pLastIsoRun->lastStrong=pLastIsoRun->lastBase=level&1;
735 pLastIsoRun->contextDir=(UBiDiDirection)(level&1);
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800736 pLastIsoRun->contextPos=0;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000737}
738
739/* PDI */
740static void
741bracketProcessPDI(BracketData *bd) {
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800742 IsoRun *pLastIsoRun;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000743 bd->isoRunLast--;
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800744 pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
745 pLastIsoRun->lastBase=ON;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000746}
747
748/* newly found opening bracket: create an openings entry */
749static UBool /* return TRUE if success */
750bracketAddOpening(BracketData *bd, UChar match, int32_t position) {
751 IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
752 Opening *pOpening;
jshin@chromium.orgd2abf6c2014-09-17 18:53:08 +0000753 if(pLastIsoRun->limit>=bd->openingsCount) { /* no available new entry */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000754 UBiDi *pBiDi=bd->pBiDi;
755 if(!getInitialOpeningsMemory(pBiDi, pLastIsoRun->limit * 2))
756 return FALSE;
757 if(bd->openings==bd->simpleOpenings)
758 uprv_memcpy(pBiDi->openingsMemory, bd->simpleOpenings,
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800759 SIMPLE_OPENINGS_COUNT * sizeof(Opening));
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000760 bd->openings=pBiDi->openingsMemory; /* may have changed */
jshin@chromium.orgd2abf6c2014-09-17 18:53:08 +0000761 bd->openingsCount=pBiDi->openingsSize / sizeof(Opening);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000762 }
763 pOpening=&bd->openings[pLastIsoRun->limit];
764 pOpening->position=position;
765 pOpening->match=match;
766 pOpening->contextDir=pLastIsoRun->contextDir;
767 pOpening->contextPos=pLastIsoRun->contextPos;
768 pOpening->flags=0;
769 pLastIsoRun->limit++;
770 return TRUE;
771}
772
773/* change N0c1 to N0c2 when a preceding bracket is assigned the embedding level */
774static void
775fixN0c(BracketData *bd, int32_t openingIndex, int32_t newPropPosition, DirProp newProp) {
776 /* This function calls itself recursively */
777 IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
778 Opening *qOpening;
779 DirProp *dirProps=bd->pBiDi->dirProps;
780 int32_t k, openingPosition, closingPosition;
781 for(k=openingIndex+1, qOpening=&bd->openings[k]; k<pLastIsoRun->limit; k++, qOpening++) {
782 if(qOpening->match>=0) /* not an N0c match */
783 continue;
784 if(newPropPosition<qOpening->contextPos)
785 break;
786 if(newPropPosition>=qOpening->position)
787 continue;
788 if(newProp==qOpening->contextDir)
789 break;
790 openingPosition=qOpening->position;
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800791 dirProps[openingPosition]=newProp;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000792 closingPosition=-(qOpening->match);
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800793 dirProps[closingPosition]=newProp;
794 qOpening->match=0; /* prevent further changes */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000795 fixN0c(bd, k, openingPosition, newProp);
796 fixN0c(bd, k, closingPosition, newProp);
797 }
798}
799
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800800/* process closing bracket */
801static DirProp /* return L or R if N0b or N0c, ON if N0d */
802bracketProcessClosing(BracketData *bd, int32_t openIdx, int32_t position) {
803 IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
804 Opening *pOpening, *qOpening;
805 UBiDiDirection direction;
806 UBool stable;
807 DirProp newProp;
808 pOpening=&bd->openings[openIdx];
Jungshik Shin87232d82017-05-13 21:10:13 -0700809 direction=(UBiDiDirection)(pLastIsoRun->level&1);
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800810 stable=TRUE; /* assume stable until proved otherwise */
811
812 /* The stable flag is set when brackets are paired and their
813 level is resolved and cannot be changed by what will be
814 found later in the source string.
815 An unstable match can occur only when applying N0c, where
816 the resolved level depends on the preceding context, and
817 this context may be affected by text occurring later.
818 Example: RTL paragraph containing: abc[(latin) HEBREW]
819 When the closing parenthesis is encountered, it appears
820 that N0c1 must be applied since 'abc' sets an opposite
821 direction context and both parentheses receive level 2.
822 However, when the closing square bracket is processed,
823 N0b applies because of 'HEBREW' being included within the
824 brackets, thus the square brackets are treated like R and
825 receive level 1. However, this changes the preceding
826 context of the opening parenthesis, and it now appears
827 that N0c2 must be applied to the parentheses rather than
828 N0c1. */
829
830 if((direction==0 && pOpening->flags&FOUND_L) ||
831 (direction==1 && pOpening->flags&FOUND_R)) { /* N0b */
832 newProp=direction;
833 }
834 else if(pOpening->flags&(FOUND_L|FOUND_R)) { /* N0c */
835 /* it is stable if there is no containing pair or in
836 conditions too complicated and not worth checking */
837 stable=(openIdx==pLastIsoRun->start);
838 if(direction!=pOpening->contextDir)
839 newProp=pOpening->contextDir; /* N0c1 */
840 else
841 newProp=direction; /* N0c2 */
842 } else {
843 /* forget this and any brackets nested within this pair */
844 pLastIsoRun->limit=openIdx;
845 return ON; /* N0d */
846 }
847 bd->pBiDi->dirProps[pOpening->position]=newProp;
848 bd->pBiDi->dirProps[position]=newProp;
849 /* Update nested N0c pairs that may be affected */
850 fixN0c(bd, openIdx, pOpening->position, newProp);
851 if(stable) {
852 pLastIsoRun->limit=openIdx; /* forget any brackets nested within this pair */
853 /* remove lower located synonyms if any */
854 while(pLastIsoRun->limit>pLastIsoRun->start &&
855 bd->openings[pLastIsoRun->limit-1].position==pOpening->position)
856 pLastIsoRun->limit--;
857 } else {
858 int32_t k;
859 pOpening->match=-position;
860 /* neutralize lower located synonyms if any */
861 k=openIdx-1;
862 while(k>=pLastIsoRun->start &&
863 bd->openings[k].position==pOpening->position)
864 bd->openings[k--].match=0;
865 /* neutralize any unmatched opening between the current pair;
866 this will also neutralize higher located synonyms if any */
867 for(k=openIdx+1; k<pLastIsoRun->limit; k++) {
868 qOpening=&bd->openings[k];
869 if(qOpening->position>=position)
870 break;
871 if(qOpening->match>0)
872 qOpening->match=0;
873 }
874 }
875 return newProp;
876}
877
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000878/* handle strong characters, digits and candidates for closing brackets */
879static UBool /* return TRUE if success */
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800880bracketProcessChar(BracketData *bd, int32_t position) {
881 IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
882 DirProp *dirProps, dirProp, newProp;
883 UBiDiLevel level;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000884 dirProps=bd->pBiDi->dirProps;
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800885 dirProp=dirProps[position];
886 if(dirProp==ON) {
887 UChar c, match;
888 int32_t idx;
889 /* First see if it is a matching closing bracket. Hopefully, this is
890 more efficient than checking if it is a closing bracket at all */
891 c=bd->pBiDi->text[position];
892 for(idx=pLastIsoRun->limit-1; idx>=pLastIsoRun->start; idx--) {
893 if(bd->openings[idx].match!=c)
894 continue;
895 /* We have a match */
896 newProp=bracketProcessClosing(bd, idx, position);
897 if(newProp==ON) { /* N0d */
898 c=0; /* prevent handling as an opening */
899 break;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000900 }
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800901 pLastIsoRun->lastBase=ON;
Jungshik Shin87232d82017-05-13 21:10:13 -0700902 pLastIsoRun->contextDir=(UBiDiDirection)newProp;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000903 pLastIsoRun->contextPos=position;
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800904 level=bd->pBiDi->levels[position];
905 if(level&UBIDI_LEVEL_OVERRIDE) { /* X4, X5 */
906 uint16_t flag;
907 int32_t i;
908 newProp=level&1;
909 pLastIsoRun->lastStrong=newProp;
910 flag=DIRPROP_FLAG(newProp);
911 for(i=pLastIsoRun->start; i<idx; i++)
912 bd->openings[i].flags|=flag;
913 /* matching brackets are not overridden by LRO/RLO */
914 bd->pBiDi->levels[position]&=~UBIDI_LEVEL_OVERRIDE;
915 }
916 /* matching brackets are not overridden by LRO/RLO */
917 bd->pBiDi->levels[bd->openings[idx].position]&=~UBIDI_LEVEL_OVERRIDE;
918 return TRUE;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000919 }
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800920 /* We get here only if the ON character is not a matching closing
921 bracket or it is a case of N0d */
922 /* Now see if it is an opening bracket */
923 if(c)
924 match=u_getBidiPairedBracket(c); /* get the matching char */
925 else
926 match=0;
927 if(match!=c && /* has a matching char */
928 ubidi_getPairedBracketType(bd->pBiDi->bdp, c)==U_BPT_OPEN) { /* opening bracket */
929 /* special case: process synonyms
930 create an opening entry for each synonym */
931 if(match==0x232A) { /* RIGHT-POINTING ANGLE BRACKET */
932 if(!bracketAddOpening(bd, 0x3009, position))
933 return FALSE;
934 }
935 else if(match==0x3009) { /* RIGHT ANGLE BRACKET */
936 if(!bracketAddOpening(bd, 0x232A, position))
937 return FALSE;
938 }
939 if(!bracketAddOpening(bd, match, position))
940 return FALSE;
941 }
942 }
943 level=bd->pBiDi->levels[position];
944 if(level&UBIDI_LEVEL_OVERRIDE) { /* X4, X5 */
945 newProp=level&1;
946 if(dirProp!=S && dirProp!=WS && dirProp!=ON)
947 dirProps[position]=newProp;
948 pLastIsoRun->lastBase=newProp;
949 pLastIsoRun->lastStrong=newProp;
Jungshik Shin87232d82017-05-13 21:10:13 -0700950 pLastIsoRun->contextDir=(UBiDiDirection)newProp;
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800951 pLastIsoRun->contextPos=position;
952 }
953 else if(dirProp<=R || dirProp==AL) {
954 newProp=DIR_FROM_STRONG(dirProp);
955 pLastIsoRun->lastBase=dirProp;
956 pLastIsoRun->lastStrong=dirProp;
Jungshik Shin87232d82017-05-13 21:10:13 -0700957 pLastIsoRun->contextDir=(UBiDiDirection)newProp;
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800958 pLastIsoRun->contextPos=position;
959 }
960 else if(dirProp==EN) {
961 pLastIsoRun->lastBase=EN;
962 if(pLastIsoRun->lastStrong==L) {
963 newProp=L; /* W7 */
964 if(!bd->isNumbersSpecial)
965 dirProps[position]=ENL;
Jungshik Shin87232d82017-05-13 21:10:13 -0700966 pLastIsoRun->contextDir=(UBiDiDirection)L;
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800967 pLastIsoRun->contextPos=position;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000968 }
969 else {
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800970 newProp=R; /* N0 */
971 if(pLastIsoRun->lastStrong==AL)
972 dirProps[position]=AN; /* W2 */
973 else
974 dirProps[position]=ENR;
Jungshik Shin87232d82017-05-13 21:10:13 -0700975 pLastIsoRun->contextDir=(UBiDiDirection)R;
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800976 pLastIsoRun->contextPos=position;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000977 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000978 }
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800979 else if(dirProp==AN) {
980 newProp=R; /* N0 */
981 pLastIsoRun->lastBase=AN;
Jungshik Shin87232d82017-05-13 21:10:13 -0700982 pLastIsoRun->contextDir=(UBiDiDirection)R;
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800983 pLastIsoRun->contextPos=position;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000984 }
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800985 else if(dirProp==NSM) {
986 /* if the last real char was ON, change NSM to ON so that it
987 will stay ON even if the last real char is a bracket which
988 may be changed to L or R */
989 newProp=pLastIsoRun->lastBase;
990 if(newProp==ON)
991 dirProps[position]=newProp;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000992 }
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -0800993 else {
994 newProp=dirProp;
995 pLastIsoRun->lastBase=dirProp;
996 }
997 if(newProp<=R || newProp==AL) {
998 int32_t i;
999 uint16_t flag=DIRPROP_FLAG(DIR_FROM_STRONG(newProp));
1000 for(i=pLastIsoRun->start; i<pLastIsoRun->limit; i++)
1001 if(position>bd->openings[i].position)
1002 bd->openings[i].flags|=flag;
1003 }
1004 return TRUE;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001005}
1006
1007/* perform (X1)..(X9) ------------------------------------------------------- */
1008
1009/* determine if the text is mixed-directional or single-directional */
1010static UBiDiDirection
1011directionFromFlags(UBiDi *pBiDi) {
1012 Flags flags=pBiDi->flags;
1013 /* if the text contains AN and neutrals, then some neutrals may become RTL */
1014 if(!(flags&MASK_RTL || ((flags&DIRPROP_FLAG(AN)) && (flags&MASK_POSSIBLE_N)))) {
1015 return UBIDI_LTR;
1016 } else if(!(flags&MASK_LTR)) {
1017 return UBIDI_RTL;
1018 } else {
1019 return UBIDI_MIXED;
1020 }
1021}
1022
1023/*
1024 * Resolve the explicit levels as specified by explicit embedding codes.
1025 * Recalculate the flags to have them reflect the real properties
1026 * after taking the explicit embeddings into account.
1027 *
1028 * The BiDi algorithm is designed to result in the same behavior whether embedding
1029 * levels are externally specified (from "styled text", supposedly the preferred
1030 * method) or set by explicit embedding codes (LRx, RLx, PDF, FSI, PDI) in the plain text.
1031 * That is why (X9) instructs to remove all not-isolate explicit codes (and BN).
1032 * However, in a real implementation, the removal of these codes and their index
1033 * positions in the plain text is undesirable since it would result in
1034 * reallocated, reindexed text.
1035 * Instead, this implementation leaves the codes in there and just ignores them
1036 * in the subsequent processing.
1037 * In order to get the same reordering behavior, positions with a BN or a not-isolate
1038 * explicit embedding code just get the same level assigned as the last "real"
1039 * character.
1040 *
1041 * Some implementations, not this one, then overwrite some of these
1042 * directionality properties at "real" same-level-run boundaries by
1043 * L or R codes so that the resolution of weak types can be performed on the
1044 * entire paragraph at once instead of having to parse it once more and
1045 * perform that resolution on same-level-runs.
1046 * This limits the scope of the implicit rules in effectively
1047 * the same way as the run limits.
1048 *
1049 * Instead, this implementation does not modify these codes, except for
1050 * paired brackets whose properties (ON) may be replaced by L or R.
1051 * On one hand, the paragraph has to be scanned for same-level-runs, but
1052 * on the other hand, this saves another loop to reset these codes,
1053 * or saves making and modifying a copy of dirProps[].
1054 *
1055 *
1056 * Note that (Pn) and (Xn) changed significantly from version 4 of the BiDi algorithm.
1057 *
1058 *
1059 * Handling the stack of explicit levels (Xn):
1060 *
1061 * With the BiDi stack of explicit levels, as pushed with each
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001062 * LRE, RLE, LRO, RLO, LRI, RLI and FSI and popped with each PDF and PDI,
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001063 * the explicit level must never exceed UBIDI_MAX_EXPLICIT_LEVEL.
1064 *
1065 * In order to have a correct push-pop semantics even in the case of overflows,
1066 * overflow counters and a valid isolate counter are used as described in UAX#9
1067 * section 3.3.2 "Explicit Levels and Directions".
1068 *
1069 * This implementation assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd.
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001070 *
1071 * Returns normally the direction; -1 if there was a memory shortage
1072 *
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001073 */
1074static UBiDiDirection
1075resolveExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
1076 DirProp *dirProps=pBiDi->dirProps;
1077 UBiDiLevel *levels=pBiDi->levels;
1078 const UChar *text=pBiDi->text;
1079
1080 int32_t i=0, length=pBiDi->length;
1081 Flags flags=pBiDi->flags; /* collect all directionalities in the text */
1082 DirProp dirProp;
1083 UBiDiLevel level=GET_PARALEVEL(pBiDi, 0);
1084 UBiDiDirection direction;
1085 pBiDi->isolateCount=0;
1086
1087 if(U_FAILURE(*pErrorCode)) { return UBIDI_LTR; }
1088
1089 /* determine if the text is mixed-directional or single-directional */
1090 direction=directionFromFlags(pBiDi);
1091
1092 /* we may not need to resolve any explicit levels */
1093 if((direction!=UBIDI_MIXED)) {
1094 /* not mixed directionality: levels don't matter - trailingWSStart will be 0 */
1095 return direction;
1096 }
1097 if(pBiDi->reorderingMode > UBIDI_REORDER_LAST_LOGICAL_TO_VISUAL) {
1098 /* inverse BiDi: mixed, but all characters are at the same embedding level */
1099 /* set all levels to the paragraph level */
1100 int32_t paraIndex, start, limit;
1101 for(paraIndex=0; paraIndex<pBiDi->paraCount; paraIndex++) {
1102 if(paraIndex==0)
1103 start=0;
1104 else
1105 start=pBiDi->paras[paraIndex-1].limit;
1106 limit=pBiDi->paras[paraIndex].limit;
1107 level=pBiDi->paras[paraIndex].level;
1108 for(i=start; i<limit; i++)
1109 levels[i]=level;
1110 }
1111 return direction; /* no bracket matching for inverse BiDi */
1112 }
1113 if(!(flags&(MASK_EXPLICIT|MASK_ISO))) {
1114 /* no embeddings, set all levels to the paragraph level */
1115 /* we still have to perform bracket matching */
1116 int32_t paraIndex, start, limit;
1117 BracketData bracketData;
1118 bracketInit(pBiDi, &bracketData);
1119 for(paraIndex=0; paraIndex<pBiDi->paraCount; paraIndex++) {
1120 if(paraIndex==0)
1121 start=0;
1122 else
1123 start=pBiDi->paras[paraIndex-1].limit;
1124 limit=pBiDi->paras[paraIndex].limit;
1125 level=pBiDi->paras[paraIndex].level;
1126 for(i=start; i<limit; i++) {
1127 levels[i]=level;
1128 dirProp=dirProps[i];
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001129 if(dirProp==BN)
1130 continue;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001131 if(dirProp==B) {
1132 if((i+1)<length) {
1133 if(text[i]==CR && text[i+1]==LF)
1134 continue; /* skip CR when followed by LF */
1135 bracketProcessB(&bracketData, level);
1136 }
1137 continue;
1138 }
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001139 if(!bracketProcessChar(&bracketData, i)) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001140 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
1141 return UBIDI_LTR;
1142 }
1143 }
1144 }
1145 return direction;
1146 }
1147 {
1148 /* continue to perform (Xn) */
1149
1150 /* (X1) level is set for all codes, embeddingLevel keeps track of the push/pop operations */
1151 /* both variables may carry the UBIDI_LEVEL_OVERRIDE flag to indicate the override status */
1152 UBiDiLevel embeddingLevel=level, newLevel;
1153 UBiDiLevel previousLevel=level; /* previous level for regular (not CC) characters */
1154 int32_t lastCcPos=0; /* index of last effective LRx,RLx, PDx */
1155
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001156 /* The following stack remembers the embedding level and the ISOLATE flag of level runs.
1157 stackLast points to its current entry. */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001158 uint16_t stack[UBIDI_MAX_EXPLICIT_LEVEL+2]; /* we never push anything >=UBIDI_MAX_EXPLICIT_LEVEL
1159 but we need one more entry as base */
1160 uint32_t stackLast=0;
1161 int32_t overflowIsolateCount=0;
1162 int32_t overflowEmbeddingCount=0;
1163 int32_t validIsolateCount=0;
1164 BracketData bracketData;
1165 bracketInit(pBiDi, &bracketData);
1166 stack[0]=level; /* initialize base entry to para level, no override, no isolate */
1167
1168 /* recalculate the flags */
1169 flags=0;
1170
1171 for(i=0; i<length; ++i) {
1172 dirProp=dirProps[i];
1173 switch(dirProp) {
1174 case LRE:
1175 case RLE:
1176 case LRO:
1177 case RLO:
1178 /* (X2, X3, X4, X5) */
1179 flags|=DIRPROP_FLAG(BN);
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001180 levels[i]=previousLevel;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001181 if (dirProp==LRE || dirProp==LRO)
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001182 /* least greater even level */
1183 newLevel=(UBiDiLevel)((embeddingLevel+2)&~(UBIDI_LEVEL_OVERRIDE|1));
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001184 else
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001185 /* least greater odd level */
1186 newLevel=(UBiDiLevel)((NO_OVERRIDE(embeddingLevel)+1)|1);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001187 if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL && overflowIsolateCount==0 &&
1188 overflowEmbeddingCount==0) {
1189 lastCcPos=i;
1190 embeddingLevel=newLevel;
1191 if(dirProp==LRO || dirProp==RLO)
1192 embeddingLevel|=UBIDI_LEVEL_OVERRIDE;
1193 stackLast++;
1194 stack[stackLast]=embeddingLevel;
1195 /* we don't need to set UBIDI_LEVEL_OVERRIDE off for LRE and RLE
1196 since this has already been done for newLevel which is
1197 the source for embeddingLevel.
1198 */
1199 } else {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001200 if(overflowIsolateCount==0)
1201 overflowEmbeddingCount++;
1202 }
1203 break;
1204 case PDF:
1205 /* (X7) */
1206 flags|=DIRPROP_FLAG(BN);
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001207 levels[i]=previousLevel;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001208 /* handle all the overflow cases first */
1209 if(overflowIsolateCount) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001210 break;
1211 }
1212 if(overflowEmbeddingCount) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001213 overflowEmbeddingCount--;
1214 break;
1215 }
1216 if(stackLast>0 && stack[stackLast]<ISOLATE) { /* not an isolate entry */
1217 lastCcPos=i;
1218 stackLast--;
1219 embeddingLevel=(UBiDiLevel)stack[stackLast];
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001220 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001221 break;
1222 case LRI:
1223 case RLI:
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001224 flags|=(DIRPROP_FLAG(ON)|DIRPROP_FLAG_LR(embeddingLevel));
1225 levels[i]=NO_OVERRIDE(embeddingLevel);
1226 if(NO_OVERRIDE(embeddingLevel)!=NO_OVERRIDE(previousLevel)) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001227 bracketProcessBoundary(&bracketData, lastCcPos,
1228 previousLevel, embeddingLevel);
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001229 flags|=DIRPROP_FLAG_MULTI_RUNS;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001230 }
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001231 previousLevel=embeddingLevel;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001232 /* (X5a, X5b) */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001233 if(dirProp==LRI)
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001234 /* least greater even level */
1235 newLevel=(UBiDiLevel)((embeddingLevel+2)&~(UBIDI_LEVEL_OVERRIDE|1));
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001236 else
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001237 /* least greater odd level */
1238 newLevel=(UBiDiLevel)((NO_OVERRIDE(embeddingLevel)+1)|1);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001239 if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL && overflowIsolateCount==0 &&
1240 overflowEmbeddingCount==0) {
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001241 flags|=DIRPROP_FLAG(dirProp);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001242 lastCcPos=i;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001243 validIsolateCount++;
1244 if(validIsolateCount>pBiDi->isolateCount)
1245 pBiDi->isolateCount=validIsolateCount;
1246 embeddingLevel=newLevel;
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001247 /* we can increment stackLast without checking because newLevel
1248 will exceed UBIDI_MAX_EXPLICIT_LEVEL before stackLast overflows */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001249 stackLast++;
1250 stack[stackLast]=embeddingLevel+ISOLATE;
1251 bracketProcessLRI_RLI(&bracketData, embeddingLevel);
1252 } else {
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001253 /* make it WS so that it is handled by adjustWSLevels() */
1254 dirProps[i]=WS;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001255 overflowIsolateCount++;
1256 }
1257 break;
1258 case PDI:
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001259 if(NO_OVERRIDE(embeddingLevel)!=NO_OVERRIDE(previousLevel)) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001260 bracketProcessBoundary(&bracketData, lastCcPos,
1261 previousLevel, embeddingLevel);
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001262 flags|=DIRPROP_FLAG_MULTI_RUNS;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001263 }
1264 /* (X6a) */
1265 if(overflowIsolateCount) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001266 overflowIsolateCount--;
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001267 /* make it WS so that it is handled by adjustWSLevels() */
1268 dirProps[i]=WS;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001269 }
1270 else if(validIsolateCount) {
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001271 flags|=DIRPROP_FLAG(PDI);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001272 lastCcPos=i;
1273 overflowEmbeddingCount=0;
1274 while(stack[stackLast]<ISOLATE) /* pop embedding entries */
1275 stackLast--; /* until the last isolate entry */
1276 stackLast--; /* pop also the last isolate entry */
1277 validIsolateCount--;
1278 bracketProcessPDI(&bracketData);
1279 } else
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001280 /* make it WS so that it is handled by adjustWSLevels() */
1281 dirProps[i]=WS;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001282 embeddingLevel=(UBiDiLevel)stack[stackLast]&~ISOLATE;
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001283 flags|=(DIRPROP_FLAG(ON)|DIRPROP_FLAG_LR(embeddingLevel));
1284 previousLevel=embeddingLevel;
1285 levels[i]=NO_OVERRIDE(embeddingLevel);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001286 break;
1287 case B:
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001288 flags|=DIRPROP_FLAG(B);
1289 levels[i]=GET_PARALEVEL(pBiDi, i);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001290 if((i+1)<length) {
1291 if(text[i]==CR && text[i+1]==LF)
1292 break; /* skip CR when followed by LF */
1293 overflowEmbeddingCount=overflowIsolateCount=0;
1294 validIsolateCount=0;
1295 stackLast=0;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001296 previousLevel=embeddingLevel=GET_PARALEVEL(pBiDi, i+1);
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001297 stack[0]=embeddingLevel; /* initialize base entry to para level, no override, no isolate */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001298 bracketProcessB(&bracketData, embeddingLevel);
1299 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001300 break;
1301 case BN:
1302 /* BN, LRE, RLE, and PDF are supposed to be removed (X9) */
1303 /* they will get their levels set correctly in adjustWSLevels() */
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001304 levels[i]=previousLevel;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001305 flags|=DIRPROP_FLAG(BN);
1306 break;
1307 default:
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001308 /* all other types are normal characters and get the "real" level */
1309 if(NO_OVERRIDE(embeddingLevel)!=NO_OVERRIDE(previousLevel)) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001310 bracketProcessBoundary(&bracketData, lastCcPos,
1311 previousLevel, embeddingLevel);
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001312 flags|=DIRPROP_FLAG_MULTI_RUNS;
1313 if(embeddingLevel&UBIDI_LEVEL_OVERRIDE)
1314 flags|=DIRPROP_FLAG_O(embeddingLevel);
1315 else
1316 flags|=DIRPROP_FLAG_E(embeddingLevel);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001317 }
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001318 previousLevel=embeddingLevel;
1319 levels[i]=embeddingLevel;
1320 if(!bracketProcessChar(&bracketData, i))
Jungshik Shin87232d82017-05-13 21:10:13 -07001321 return (UBiDiDirection)-1;
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001322 /* the dirProp may have been changed in bracketProcessChar() */
1323 flags|=DIRPROP_FLAG(dirProps[i]);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001324 break;
1325 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001326 }
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001327 if(flags&MASK_EMBEDDING)
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001328 flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001329 if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B)))
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001330 flags|=DIRPROP_FLAG(L);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001331 /* again, determine if the text is mixed-directional or single-directional */
1332 pBiDi->flags=flags;
1333 direction=directionFromFlags(pBiDi);
1334 }
1335 return direction;
1336}
1337
1338/*
1339 * Use a pre-specified embedding levels array:
1340 *
1341 * Adjust the directional properties for overrides (->LEVEL_OVERRIDE),
1342 * ignore all explicit codes (X9),
1343 * and check all the preset levels.
1344 *
1345 * Recalculate the flags to have them reflect the real properties
1346 * after taking the explicit embeddings into account.
1347 */
1348static UBiDiDirection
1349checkExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
1350 DirProp *dirProps=pBiDi->dirProps;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001351 UBiDiLevel *levels=pBiDi->levels;
1352 int32_t isolateCount=0;
1353
Jungshik Shin87232d82017-05-13 21:10:13 -07001354 int32_t length=pBiDi->length;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001355 Flags flags=0; /* collect all directionalities in the text */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001356 pBiDi->isolateCount=0;
1357
Jungshik Shin87232d82017-05-13 21:10:13 -07001358 int32_t currentParaIndex = 0;
1359 int32_t currentParaLimit = pBiDi->paras[0].limit;
1360 int32_t currentParaLevel = pBiDi->paraLevel;
1361
1362 for(int32_t i=0; i<length; ++i) {
1363 UBiDiLevel level=levels[i];
1364 DirProp dirProp=dirProps[i];
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001365 if(dirProp==LRI || dirProp==RLI) {
1366 isolateCount++;
1367 if(isolateCount>pBiDi->isolateCount)
1368 pBiDi->isolateCount=isolateCount;
1369 }
1370 else if(dirProp==PDI)
1371 isolateCount--;
1372 else if(dirProp==B)
1373 isolateCount=0;
Jungshik Shin87232d82017-05-13 21:10:13 -07001374
1375 // optimized version of int32_t currentParaLevel = GET_PARALEVEL(pBiDi, i);
1376 if (pBiDi->defaultParaLevel != 0 &&
1377 i == currentParaLimit && (currentParaIndex + 1) < pBiDi->paraCount) {
1378 currentParaLevel = pBiDi->paras[++currentParaIndex].level;
1379 currentParaLimit = pBiDi->paras[currentParaIndex].limit;
1380 }
1381
1382 UBiDiLevel overrideFlag = level & UBIDI_LEVEL_OVERRIDE;
1383 level &= ~UBIDI_LEVEL_OVERRIDE;
1384 if (level < currentParaLevel || UBIDI_MAX_EXPLICIT_LEVEL < level) {
1385 if (level == 0) {
1386 if (dirProp == B) {
1387 // Paragraph separators are ok with explicit level 0.
1388 // Prevents reordering of paragraphs.
1389 } else {
1390 // Treat explicit level 0 as a wildcard for the paragraph level.
1391 // Avoid making the caller guess what the paragraph level would be.
1392 level = (UBiDiLevel)currentParaLevel;
1393 levels[i] = level | overrideFlag;
1394 }
1395 } else {
1396 // 1 <= level < currentParaLevel or UBIDI_MAX_EXPLICIT_LEVEL < level
1397 /* level out of bounds */
1398 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1399 return UBIDI_LTR;
1400 }
1401 }
1402 if (overrideFlag != 0) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001403 /* keep the override flag in levels[i] but adjust the flags */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001404 flags|=DIRPROP_FLAG_O(level);
1405 } else {
1406 /* set the flags */
1407 flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG(dirProp);
1408 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001409 }
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001410 if(flags&MASK_EMBEDDING)
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001411 flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001412 /* determine if the text is mixed-directional or single-directional */
1413 pBiDi->flags=flags;
1414 return directionFromFlags(pBiDi);
1415}
1416
1417/******************************************************************
1418 The Properties state machine table
1419*******************************************************************
1420
1421 All table cells are 8 bits:
1422 bits 0..4: next state
1423 bits 5..7: action to perform (if > 0)
1424
1425 Cells may be of format "n" where n represents the next state
1426 (except for the rightmost column).
1427 Cells may also be of format "s(x,y)" where x represents an action
1428 to perform and y represents the next state.
1429
1430*******************************************************************
1431 Definitions and type for properties state table
1432*******************************************************************
1433*/
1434#define IMPTABPROPS_COLUMNS 16
1435#define IMPTABPROPS_RES (IMPTABPROPS_COLUMNS - 1)
1436#define GET_STATEPROPS(cell) ((cell)&0x1f)
1437#define GET_ACTIONPROPS(cell) ((cell)>>5)
1438#define s(action, newState) ((uint8_t)(newState+(action<<5)))
1439
1440static const uint8_t groupProp[] = /* dirProp regrouped */
1441{
1442/* L R EN ES ET AN CS B S WS ON LRE LRO AL RLE RLO PDF NSM BN FSI LRI RLI PDI ENL ENR */
1443 0, 1, 2, 7, 8, 3, 9, 6, 5, 4, 4, 10, 10, 12, 10, 10, 10, 11, 10, 4, 4, 4, 4, 13, 14
1444};
1445enum { DirProp_L=0, DirProp_R=1, DirProp_EN=2, DirProp_AN=3, DirProp_ON=4, DirProp_S=5, DirProp_B=6 }; /* reduced dirProp */
1446
1447/******************************************************************
1448
1449 PROPERTIES STATE TABLE
1450
1451 In table impTabProps,
1452 - the ON column regroups ON and WS, FSI, RLI, LRI and PDI
1453 - the BN column regroups BN, LRE, RLE, LRO, RLO, PDF
1454 - the Res column is the reduced property assigned to a run
1455
1456 Action 1: process current run1, init new run1
1457 2: init new run2
1458 3: process run1, process run2, init new run1
1459 4: process run1, set run1=run2, init new run2
1460
1461 Notes:
1462 1) This table is used in resolveImplicitLevels().
1463 2) This table triggers actions when there is a change in the Bidi
1464 property of incoming characters (action 1).
1465 3) Most such property sequences are processed immediately (in
1466 fact, passed to processPropertySeq().
1467 4) However, numbers are assembled as one sequence. This means
1468 that undefined situations (like CS following digits, until
1469 it is known if the next char will be a digit) are held until
1470 following chars define them.
1471 Example: digits followed by CS, then comes another CS or ON;
1472 the digits will be processed, then the CS assigned
1473 as the start of an ON sequence (action 3).
1474 5) There are cases where more than one sequence must be
1475 processed, for instance digits followed by CS followed by L:
1476 the digits must be processed as one sequence, and the CS
1477 must be processed as an ON sequence, all this before starting
1478 assembling chars for the opening L sequence.
1479
1480
1481*/
1482static const uint8_t impTabProps[][IMPTABPROPS_COLUMNS] =
1483{
1484/* L , R , EN , AN , ON , S , B , ES , ET , CS , BN , NSM , AL , ENL , ENR , Res */
1485/* 0 Init */ { 1 , 2 , 4 , 5 , 7 , 15 , 17 , 7 , 9 , 7 , 0 , 7 , 3 , 18 , 21 , DirProp_ON },
1486/* 1 L */ { 1 , s(1,2), s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7), 1 , 1 , s(1,3),s(1,18),s(1,21), DirProp_L },
1487/* 2 R */ { s(1,1), 2 , s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7), 2 , 2 , s(1,3),s(1,18),s(1,21), DirProp_R },
1488/* 3 AL */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8),s(1,16),s(1,17), s(1,8), s(1,8), s(1,8), 3 , 3 , 3 ,s(1,18),s(1,21), DirProp_R },
1489/* 4 EN */ { s(1,1), s(1,2), 4 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,10), 11 ,s(2,10), 4 , 4 , s(1,3), 18 , 21 , DirProp_EN },
1490/* 5 AN */ { s(1,1), s(1,2), s(1,4), 5 , s(1,7),s(1,15),s(1,17), s(1,7), s(1,9),s(2,12), 5 , 5 , s(1,3),s(1,18),s(1,21), DirProp_AN },
1491/* 6 AL:EN/AN */ { s(1,1), s(1,2), 6 , 6 , s(1,8),s(1,16),s(1,17), s(1,8), s(1,8),s(2,13), 6 , 6 , s(1,3), 18 , 21 , DirProp_AN },
1492/* 7 ON */ { s(1,1), s(1,2), s(1,4), s(1,5), 7 ,s(1,15),s(1,17), 7 ,s(2,14), 7 , 7 , 7 , s(1,3),s(1,18),s(1,21), DirProp_ON },
1493/* 8 AL:ON */ { s(1,1), s(1,2), s(1,6), s(1,6), 8 ,s(1,16),s(1,17), 8 , 8 , 8 , 8 , 8 , s(1,3),s(1,18),s(1,21), DirProp_ON },
1494/* 9 ET */ { s(1,1), s(1,2), 4 , s(1,5), 7 ,s(1,15),s(1,17), 7 , 9 , 7 , 9 , 9 , s(1,3), 18 , 21 , DirProp_ON },
1495/*10 EN+ES/CS */ { s(3,1), s(3,2), 4 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 10 , s(4,7), s(3,3), 18 , 21 , DirProp_EN },
1496/*11 EN+ET */ { s(1,1), s(1,2), 4 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 11 , s(1,7), 11 , 11 , s(1,3), 18 , 21 , DirProp_EN },
1497/*12 AN+CS */ { s(3,1), s(3,2), s(3,4), 5 , s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 12 , s(4,7), s(3,3),s(3,18),s(3,21), DirProp_AN },
1498/*13 AL:EN/AN+CS */ { s(3,1), s(3,2), 6 , 6 , s(4,8),s(3,16),s(3,17), s(4,8), s(4,8), s(4,8), 13 , s(4,8), s(3,3), 18 , 21 , DirProp_AN },
1499/*14 ON+ET */ { s(1,1), s(1,2), s(4,4), s(1,5), 7 ,s(1,15),s(1,17), 7 , 14 , 7 , 14 , 14 , s(1,3),s(4,18),s(4,21), DirProp_ON },
1500/*15 S */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7), 15 ,s(1,17), s(1,7), s(1,9), s(1,7), 15 , s(1,7), s(1,3),s(1,18),s(1,21), DirProp_S },
1501/*16 AL:S */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8), 16 ,s(1,17), s(1,8), s(1,8), s(1,8), 16 , s(1,8), s(1,3),s(1,18),s(1,21), DirProp_S },
1502/*17 B */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7),s(1,15), 17 , s(1,7), s(1,9), s(1,7), 17 , s(1,7), s(1,3),s(1,18),s(1,21), DirProp_B },
1503/*18 ENL */ { s(1,1), s(1,2), 18 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,19), 20 ,s(2,19), 18 , 18 , s(1,3), 18 , 21 , DirProp_L },
1504/*19 ENL+ES/CS */ { s(3,1), s(3,2), 18 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 19 , s(4,7), s(3,3), 18 , 21 , DirProp_L },
1505/*20 ENL+ET */ { s(1,1), s(1,2), 18 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 20 , s(1,7), 20 , 20 , s(1,3), 18 , 21 , DirProp_L },
1506/*21 ENR */ { s(1,1), s(1,2), 21 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,22), 23 ,s(2,22), 21 , 21 , s(1,3), 18 , 21 , DirProp_AN },
1507/*22 ENR+ES/CS */ { s(3,1), s(3,2), 21 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 22 , s(4,7), s(3,3), 18 , 21 , DirProp_AN },
1508/*23 ENR+ET */ { s(1,1), s(1,2), 21 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 23 , s(1,7), 23 , 23 , s(1,3), 18 , 21 , DirProp_AN }
1509};
1510
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001511/* we must undef macro s because the levels tables have a different
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001512 * structure (4 bits for action and 4 bits for next state.
1513 */
1514#undef s
1515
1516/******************************************************************
1517 The levels state machine tables
1518*******************************************************************
1519
1520 All table cells are 8 bits:
1521 bits 0..3: next state
1522 bits 4..7: action to perform (if > 0)
1523
1524 Cells may be of format "n" where n represents the next state
1525 (except for the rightmost column).
1526 Cells may also be of format "s(x,y)" where x represents an action
1527 to perform and y represents the next state.
1528
1529 This format limits each table to 16 states each and to 15 actions.
1530
1531*******************************************************************
1532 Definitions and type for levels state tables
1533*******************************************************************
1534*/
1535#define IMPTABLEVELS_COLUMNS (DirProp_B + 2)
1536#define IMPTABLEVELS_RES (IMPTABLEVELS_COLUMNS - 1)
1537#define GET_STATE(cell) ((cell)&0x0f)
1538#define GET_ACTION(cell) ((cell)>>4)
1539#define s(action, newState) ((uint8_t)(newState+(action<<4)))
1540
1541typedef uint8_t ImpTab[][IMPTABLEVELS_COLUMNS];
1542typedef uint8_t ImpAct[];
1543
1544/* FOOD FOR THOUGHT: each ImpTab should have its associated ImpAct,
1545 * instead of having a pair of ImpTab and a pair of ImpAct.
1546 */
1547typedef struct ImpTabPair {
1548 const void * pImpTab[2];
1549 const void * pImpAct[2];
1550} ImpTabPair;
1551
1552/******************************************************************
1553
1554 LEVELS STATE TABLES
1555
1556 In all levels state tables,
1557 - state 0 is the initial state
1558 - the Res column is the increment to add to the text level
1559 for this property sequence.
1560
1561 The impAct arrays for each table of a pair map the local action
1562 numbers of the table to the total list of actions. For instance,
1563 action 2 in a given table corresponds to the action number which
1564 appears in entry [2] of the impAct array for that table.
1565 The first entry of all impAct arrays must be 0.
1566
1567 Action 1: init conditional sequence
1568 2: prepend conditional sequence to current sequence
1569 3: set ON sequence to new level - 1
1570 4: init EN/AN/ON sequence
1571 5: fix EN/AN/ON sequence followed by R
1572 6: set previous level sequence to level 2
1573
1574 Notes:
1575 1) These tables are used in processPropertySeq(). The input
1576 is property sequences as determined by resolveImplicitLevels.
1577 2) Most such property sequences are processed immediately
1578 (levels are assigned).
1579 3) However, some sequences cannot be assigned a final level till
1580 one or more following sequences are received. For instance,
1581 ON following an R sequence within an even-level paragraph.
1582 If the following sequence is R, the ON sequence will be
1583 assigned basic run level+1, and so will the R sequence.
1584 4) S is generally handled like ON, since its level will be fixed
1585 to paragraph level in adjustWSLevels().
1586
1587*/
1588
1589static const ImpTab impTabL_DEFAULT = /* Even paragraph level */
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001590/* In this table, conditional sequences receive the lower possible level
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001591 until proven otherwise.
1592*/
1593{
1594/* L , R , EN , AN , ON , S , B , Res */
1595/* 0 : init */ { 0 , 1 , 0 , 2 , 0 , 0 , 0 , 0 },
1596/* 1 : R */ { 0 , 1 , 3 , 3 , s(1,4), s(1,4), 0 , 1 },
1597/* 2 : AN */ { 0 , 1 , 0 , 2 , s(1,5), s(1,5), 0 , 2 },
1598/* 3 : R+EN/AN */ { 0 , 1 , 3 , 3 , s(1,4), s(1,4), 0 , 2 },
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001599/* 4 : R+ON */ { 0 , s(2,1), s(3,3), s(3,3), 4 , 4 , 0 , 0 },
1600/* 5 : AN+ON */ { 0 , s(2,1), 0 , s(3,2), 5 , 5 , 0 , 0 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001601};
1602static const ImpTab impTabR_DEFAULT = /* Odd paragraph level */
1603/* In this table, conditional sequences receive the lower possible level
1604 until proven otherwise.
1605*/
1606{
1607/* L , R , EN , AN , ON , S , B , Res */
1608/* 0 : init */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 0 },
1609/* 1 : L */ { 1 , 0 , 1 , 3 , s(1,4), s(1,4), 0 , 1 },
1610/* 2 : EN/AN */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 1 },
1611/* 3 : L+AN */ { 1 , 0 , 1 , 3 , 5 , 5 , 0 , 1 },
1612/* 4 : L+ON */ { s(2,1), 0 , s(2,1), 3 , 4 , 4 , 0 , 0 },
1613/* 5 : L+AN+ON */ { 1 , 0 , 1 , 3 , 5 , 5 , 0 , 0 }
1614};
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001615static const ImpAct impAct0 = {0,1,2,3,4};
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001616static const ImpTabPair impTab_DEFAULT = {{&impTabL_DEFAULT,
1617 &impTabR_DEFAULT},
1618 {&impAct0, &impAct0}};
1619
1620static const ImpTab impTabL_NUMBERS_SPECIAL = /* Even paragraph level */
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001621/* In this table, conditional sequences receive the lower possible level
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001622 until proven otherwise.
1623*/
1624{
1625/* L , R , EN , AN , ON , S , B , Res */
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001626/* 0 : init */ { 0 , 2 , s(1,1), s(1,1), 0 , 0 , 0 , 0 },
1627/* 1 : L+EN/AN */ { 0 , s(4,2), 1 , 1 , 0 , 0 , 0 , 0 },
1628/* 2 : R */ { 0 , 2 , 4 , 4 , s(1,3), s(1,3), 0 , 1 },
1629/* 3 : R+ON */ { 0 , s(2,2), s(3,4), s(3,4), 3 , 3 , 0 , 0 },
1630/* 4 : R+EN/AN */ { 0 , 2 , 4 , 4 , s(1,3), s(1,3), 0 , 2 }
1631};
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001632static const ImpTabPair impTab_NUMBERS_SPECIAL = {{&impTabL_NUMBERS_SPECIAL,
1633 &impTabR_DEFAULT},
1634 {&impAct0, &impAct0}};
1635
1636static const ImpTab impTabL_GROUP_NUMBERS_WITH_R =
1637/* In this table, EN/AN+ON sequences receive levels as if associated with R
1638 until proven that there is L or sor/eor on both sides. AN is handled like EN.
1639*/
1640{
1641/* L , R , EN , AN , ON , S , B , Res */
1642/* 0 init */ { 0 , 3 , s(1,1), s(1,1), 0 , 0 , 0 , 0 },
1643/* 1 EN/AN */ { s(2,0), 3 , 1 , 1 , 2 , s(2,0), s(2,0), 2 },
1644/* 2 EN/AN+ON */ { s(2,0), 3 , 1 , 1 , 2 , s(2,0), s(2,0), 1 },
1645/* 3 R */ { 0 , 3 , 5 , 5 , s(1,4), 0 , 0 , 1 },
1646/* 4 R+ON */ { s(2,0), 3 , 5 , 5 , 4 , s(2,0), s(2,0), 1 },
1647/* 5 R+EN/AN */ { 0 , 3 , 5 , 5 , s(1,4), 0 , 0 , 2 }
1648};
1649static const ImpTab impTabR_GROUP_NUMBERS_WITH_R =
1650/* In this table, EN/AN+ON sequences receive levels as if associated with R
1651 until proven that there is L on both sides. AN is handled like EN.
1652*/
1653{
1654/* L , R , EN , AN , ON , S , B , Res */
1655/* 0 init */ { 2 , 0 , 1 , 1 , 0 , 0 , 0 , 0 },
1656/* 1 EN/AN */ { 2 , 0 , 1 , 1 , 0 , 0 , 0 , 1 },
1657/* 2 L */ { 2 , 0 , s(1,4), s(1,4), s(1,3), 0 , 0 , 1 },
1658/* 3 L+ON */ { s(2,2), 0 , 4 , 4 , 3 , 0 , 0 , 0 },
1659/* 4 L+EN/AN */ { s(2,2), 0 , 4 , 4 , 3 , 0 , 0 , 1 }
1660};
1661static const ImpTabPair impTab_GROUP_NUMBERS_WITH_R = {
1662 {&impTabL_GROUP_NUMBERS_WITH_R,
1663 &impTabR_GROUP_NUMBERS_WITH_R},
1664 {&impAct0, &impAct0}};
1665
1666
1667static const ImpTab impTabL_INVERSE_NUMBERS_AS_L =
1668/* This table is identical to the Default LTR table except that EN and AN are
1669 handled like L.
1670*/
1671{
1672/* L , R , EN , AN , ON , S , B , Res */
1673/* 0 : init */ { 0 , 1 , 0 , 0 , 0 , 0 , 0 , 0 },
1674/* 1 : R */ { 0 , 1 , 0 , 0 , s(1,4), s(1,4), 0 , 1 },
1675/* 2 : AN */ { 0 , 1 , 0 , 0 , s(1,5), s(1,5), 0 , 2 },
1676/* 3 : R+EN/AN */ { 0 , 1 , 0 , 0 , s(1,4), s(1,4), 0 , 2 },
1677/* 4 : R+ON */ { s(2,0), 1 , s(2,0), s(2,0), 4 , 4 , s(2,0), 1 },
1678/* 5 : AN+ON */ { s(2,0), 1 , s(2,0), s(2,0), 5 , 5 , s(2,0), 1 }
1679};
1680static const ImpTab impTabR_INVERSE_NUMBERS_AS_L =
1681/* This table is identical to the Default RTL table except that EN and AN are
1682 handled like L.
1683*/
1684{
1685/* L , R , EN , AN , ON , S , B , Res */
1686/* 0 : init */ { 1 , 0 , 1 , 1 , 0 , 0 , 0 , 0 },
1687/* 1 : L */ { 1 , 0 , 1 , 1 , s(1,4), s(1,4), 0 , 1 },
1688/* 2 : EN/AN */ { 1 , 0 , 1 , 1 , 0 , 0 , 0 , 1 },
1689/* 3 : L+AN */ { 1 , 0 , 1 , 1 , 5 , 5 , 0 , 1 },
1690/* 4 : L+ON */ { s(2,1), 0 , s(2,1), s(2,1), 4 , 4 , 0 , 0 },
1691/* 5 : L+AN+ON */ { 1 , 0 , 1 , 1 , 5 , 5 , 0 , 0 }
1692};
1693static const ImpTabPair impTab_INVERSE_NUMBERS_AS_L = {
1694 {&impTabL_INVERSE_NUMBERS_AS_L,
1695 &impTabR_INVERSE_NUMBERS_AS_L},
1696 {&impAct0, &impAct0}};
1697
1698static const ImpTab impTabR_INVERSE_LIKE_DIRECT = /* Odd paragraph level */
1699/* In this table, conditional sequences receive the lower possible level
1700 until proven otherwise.
1701*/
1702{
1703/* L , R , EN , AN , ON , S , B , Res */
1704/* 0 : init */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 0 },
1705/* 1 : L */ { 1 , 0 , 1 , 2 , s(1,3), s(1,3), 0 , 1 },
1706/* 2 : EN/AN */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 1 },
1707/* 3 : L+ON */ { s(2,1), s(3,0), 6 , 4 , 3 , 3 , s(3,0), 0 },
1708/* 4 : L+ON+AN */ { s(2,1), s(3,0), 6 , 4 , 5 , 5 , s(3,0), 3 },
1709/* 5 : L+AN+ON */ { s(2,1), s(3,0), 6 , 4 , 5 , 5 , s(3,0), 2 },
1710/* 6 : L+ON+EN */ { s(2,1), s(3,0), 6 , 4 , 3 , 3 , s(3,0), 1 }
1711};
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001712static const ImpAct impAct1 = {0,1,13,14};
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001713/* FOOD FOR THOUGHT: in LTR table below, check case "JKL 123abc"
1714 */
1715static const ImpTabPair impTab_INVERSE_LIKE_DIRECT = {
1716 {&impTabL_DEFAULT,
1717 &impTabR_INVERSE_LIKE_DIRECT},
1718 {&impAct0, &impAct1}};
1719
1720static const ImpTab impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS =
1721/* The case handled in this table is (visually): R EN L
1722*/
1723{
1724/* L , R , EN , AN , ON , S , B , Res */
1725/* 0 : init */ { 0 , s(6,3), 0 , 1 , 0 , 0 , 0 , 0 },
1726/* 1 : L+AN */ { 0 , s(6,3), 0 , 1 , s(1,2), s(3,0), 0 , 4 },
1727/* 2 : L+AN+ON */ { s(2,0), s(6,3), s(2,0), 1 , 2 , s(3,0), s(2,0), 3 },
1728/* 3 : R */ { 0 , s(6,3), s(5,5), s(5,6), s(1,4), s(3,0), 0 , 3 },
1729/* 4 : R+ON */ { s(3,0), s(4,3), s(5,5), s(5,6), 4 , s(3,0), s(3,0), 3 },
1730/* 5 : R+EN */ { s(3,0), s(4,3), 5 , s(5,6), s(1,4), s(3,0), s(3,0), 4 },
1731/* 6 : R+AN */ { s(3,0), s(4,3), s(5,5), 6 , s(1,4), s(3,0), s(3,0), 4 }
1732};
1733static const ImpTab impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS =
1734/* The cases handled in this table are (visually): R EN L
1735 R L AN L
1736*/
1737{
1738/* L , R , EN , AN , ON , S , B , Res */
1739/* 0 : init */ { s(1,3), 0 , 1 , 1 , 0 , 0 , 0 , 0 },
1740/* 1 : R+EN/AN */ { s(2,3), 0 , 1 , 1 , 2 , s(4,0), 0 , 1 },
1741/* 2 : R+EN/AN+ON */ { s(2,3), 0 , 1 , 1 , 2 , s(4,0), 0 , 0 },
1742/* 3 : L */ { 3 , 0 , 3 , s(3,6), s(1,4), s(4,0), 0 , 1 },
1743/* 4 : L+ON */ { s(5,3), s(4,0), 5 , s(3,6), 4 , s(4,0), s(4,0), 0 },
1744/* 5 : L+ON+EN */ { s(5,3), s(4,0), 5 , s(3,6), 4 , s(4,0), s(4,0), 1 },
1745/* 6 : L+AN */ { s(5,3), s(4,0), 6 , 6 , 4 , s(4,0), s(4,0), 3 }
1746};
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001747static const ImpAct impAct2 = {0,1,2,5,6,7,8};
1748static const ImpAct impAct3 = {0,1,9,10,11,12};
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001749static const ImpTabPair impTab_INVERSE_LIKE_DIRECT_WITH_MARKS = {
1750 {&impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS,
1751 &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS},
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001752 {&impAct2, &impAct3}};
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001753
1754static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL = {
1755 {&impTabL_NUMBERS_SPECIAL,
1756 &impTabR_INVERSE_LIKE_DIRECT},
1757 {&impAct0, &impAct1}};
1758
1759static const ImpTab impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS =
1760/* The case handled in this table is (visually): R EN L
1761*/
1762{
1763/* L , R , EN , AN , ON , S , B , Res */
1764/* 0 : init */ { 0 , s(6,2), 1 , 1 , 0 , 0 , 0 , 0 },
1765/* 1 : L+EN/AN */ { 0 , s(6,2), 1 , 1 , 0 , s(3,0), 0 , 4 },
1766/* 2 : R */ { 0 , s(6,2), s(5,4), s(5,4), s(1,3), s(3,0), 0 , 3 },
1767/* 3 : R+ON */ { s(3,0), s(4,2), s(5,4), s(5,4), 3 , s(3,0), s(3,0), 3 },
1768/* 4 : R+EN/AN */ { s(3,0), s(4,2), 4 , 4 , s(1,3), s(3,0), s(3,0), 4 }
1769};
1770static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = {
1771 {&impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS,
1772 &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS},
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001773 {&impAct2, &impAct3}};
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001774
1775#undef s
1776
1777typedef struct {
1778 const ImpTab * pImpTab; /* level table pointer */
1779 const ImpAct * pImpAct; /* action map array */
1780 int32_t startON; /* start of ON sequence */
1781 int32_t startL2EN; /* start of level 2 sequence */
1782 int32_t lastStrongRTL; /* index of last found R or AL */
1783 int32_t state; /* current state */
1784 int32_t runStart; /* start position of the run */
1785 UBiDiLevel runLevel; /* run level before implicit solving */
1786} LevState;
1787
1788/*------------------------------------------------------------------------*/
1789
1790static void
1791addPoint(UBiDi *pBiDi, int32_t pos, int32_t flag)
1792 /* param pos: position where to insert
1793 param flag: one of LRM_BEFORE, LRM_AFTER, RLM_BEFORE, RLM_AFTER
1794 */
1795{
1796#define FIRSTALLOC 10
1797 Point point;
1798 InsertPoints * pInsertPoints=&(pBiDi->insertPoints);
1799
1800 if (pInsertPoints->capacity == 0)
1801 {
Jungshik Shin87232d82017-05-13 21:10:13 -07001802 pInsertPoints->points=static_cast<Point *>(uprv_malloc(sizeof(Point)*FIRSTALLOC));
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001803 if (pInsertPoints->points == NULL)
1804 {
1805 pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR;
1806 return;
1807 }
1808 pInsertPoints->capacity=FIRSTALLOC;
1809 }
1810 if (pInsertPoints->size >= pInsertPoints->capacity) /* no room for new point */
1811 {
Jungshik Shin87232d82017-05-13 21:10:13 -07001812 Point * savePoints=pInsertPoints->points;
1813 pInsertPoints->points=static_cast<Point *>(uprv_realloc(pInsertPoints->points,
1814 pInsertPoints->capacity*2*sizeof(Point)));
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001815 if (pInsertPoints->points == NULL)
1816 {
1817 pInsertPoints->points=savePoints;
1818 pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR;
1819 return;
1820 }
1821 else pInsertPoints->capacity*=2;
1822 }
1823 point.pos=pos;
1824 point.flag=flag;
1825 pInsertPoints->points[pInsertPoints->size]=point;
1826 pInsertPoints->size++;
1827#undef FIRSTALLOC
1828}
1829
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001830static void
1831setLevelsOutsideIsolates(UBiDi *pBiDi, int32_t start, int32_t limit, UBiDiLevel level)
1832{
1833 DirProp *dirProps=pBiDi->dirProps, dirProp;
1834 UBiDiLevel *levels=pBiDi->levels;
1835 int32_t isolateCount=0, k;
1836 for(k=start; k<limit; k++) {
1837 dirProp=dirProps[k];
1838 if(dirProp==PDI)
1839 isolateCount--;
1840 if(isolateCount==0)
1841 levels[k]=level;
1842 if(dirProp==LRI || dirProp==RLI)
1843 isolateCount++;
1844 }
1845}
1846
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001847/* perform rules (Wn), (Nn), and (In) on a run of the text ------------------ */
1848
1849/*
1850 * This implementation of the (Wn) rules applies all rules in one pass.
1851 * In order to do so, it needs a look-ahead of typically 1 character
1852 * (except for W5: sequences of ET) and keeps track of changes
1853 * in a rule Wp that affect a later Wq (p<q).
1854 *
1855 * The (Nn) and (In) rules are also performed in that same single loop,
1856 * but effectively one iteration behind for white space.
1857 *
1858 * Since all implicit rules are performed in one step, it is not necessary
1859 * to actually store the intermediate directional properties in dirProps[].
1860 */
1861
1862static void
1863processPropertySeq(UBiDi *pBiDi, LevState *pLevState, uint8_t _prop,
1864 int32_t start, int32_t limit) {
1865 uint8_t cell, oldStateSeq, actionSeq;
1866 const ImpTab * pImpTab=pLevState->pImpTab;
1867 const ImpAct * pImpAct=pLevState->pImpAct;
1868 UBiDiLevel * levels=pBiDi->levels;
1869 UBiDiLevel level, addLevel;
1870 InsertPoints * pInsertPoints;
1871 int32_t start0, k;
1872
1873 start0=start; /* save original start position */
1874 oldStateSeq=(uint8_t)pLevState->state;
1875 cell=(*pImpTab)[oldStateSeq][_prop];
1876 pLevState->state=GET_STATE(cell); /* isolate the new state */
1877 actionSeq=(*pImpAct)[GET_ACTION(cell)]; /* isolate the action */
1878 addLevel=(*pImpTab)[pLevState->state][IMPTABLEVELS_RES];
1879
1880 if(actionSeq) {
1881 switch(actionSeq) {
1882 case 1: /* init ON seq */
1883 pLevState->startON=start0;
1884 break;
1885
1886 case 2: /* prepend ON seq to current seq */
1887 start=pLevState->startON;
1888 break;
1889
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001890 case 3: /* EN/AN after R+ON */
1891 level=pLevState->runLevel+1;
1892 setLevelsOutsideIsolates(pBiDi, pLevState->startON, start0, level);
1893 break;
1894
1895 case 4: /* EN/AN before R for NUMBERS_SPECIAL */
1896 level=pLevState->runLevel+2;
1897 setLevelsOutsideIsolates(pBiDi, pLevState->startON, start0, level);
1898 break;
1899
1900 case 5: /* L or S after possible relevant EN/AN */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001901 /* check if we had EN after R/AL */
1902 if (pLevState->startL2EN >= 0) {
1903 addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE);
1904 }
1905 pLevState->startL2EN=-1; /* not within previous if since could also be -2 */
1906 /* check if we had any relevant EN/AN after R/AL */
1907 pInsertPoints=&(pBiDi->insertPoints);
1908 if ((pInsertPoints->capacity == 0) ||
1909 (pInsertPoints->size <= pInsertPoints->confirmed))
1910 {
1911 /* nothing, just clean up */
1912 pLevState->lastStrongRTL=-1;
1913 /* check if we have a pending conditional segment */
1914 level=(*pImpTab)[oldStateSeq][IMPTABLEVELS_RES];
1915 if ((level & 1) && (pLevState->startON > 0)) { /* after ON */
1916 start=pLevState->startON; /* reset to basic run level */
1917 }
1918 if (_prop == DirProp_S) /* add LRM before S */
1919 {
1920 addPoint(pBiDi, start0, LRM_BEFORE);
1921 pInsertPoints->confirmed=pInsertPoints->size;
1922 }
1923 break;
1924 }
1925 /* reset previous RTL cont to level for LTR text */
1926 for (k=pLevState->lastStrongRTL+1; k<start0; k++)
1927 {
1928 /* reset odd level, leave runLevel+2 as is */
1929 levels[k]=(levels[k] - 2) & ~1;
1930 }
1931 /* mark insert points as confirmed */
1932 pInsertPoints->confirmed=pInsertPoints->size;
1933 pLevState->lastStrongRTL=-1;
1934 if (_prop == DirProp_S) /* add LRM before S */
1935 {
1936 addPoint(pBiDi, start0, LRM_BEFORE);
1937 pInsertPoints->confirmed=pInsertPoints->size;
1938 }
1939 break;
1940
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001941 case 6: /* R/AL after possible relevant EN/AN */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001942 /* just clean up */
1943 pInsertPoints=&(pBiDi->insertPoints);
1944 if (pInsertPoints->capacity > 0)
1945 /* remove all non confirmed insert points */
1946 pInsertPoints->size=pInsertPoints->confirmed;
1947 pLevState->startON=-1;
1948 pLevState->startL2EN=-1;
1949 pLevState->lastStrongRTL=limit - 1;
1950 break;
1951
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001952 case 7: /* EN/AN after R/AL + possible cont */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001953 /* check for real AN */
1954 if ((_prop == DirProp_AN) && (pBiDi->dirProps[start0] == AN) &&
1955 (pBiDi->reorderingMode!=UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL))
1956 {
1957 /* real AN */
1958 if (pLevState->startL2EN == -1) /* if no relevant EN already found */
1959 {
1960 /* just note the righmost digit as a strong RTL */
1961 pLevState->lastStrongRTL=limit - 1;
1962 break;
1963 }
1964 if (pLevState->startL2EN >= 0) /* after EN, no AN */
1965 {
1966 addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE);
1967 pLevState->startL2EN=-2;
1968 }
1969 /* note AN */
1970 addPoint(pBiDi, start0, LRM_BEFORE);
1971 break;
1972 }
1973 /* if first EN/AN after R/AL */
1974 if (pLevState->startL2EN == -1) {
1975 pLevState->startL2EN=start0;
1976 }
1977 break;
1978
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001979 case 8: /* note location of latest R/AL */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001980 pLevState->lastStrongRTL=limit - 1;
1981 pLevState->startON=-1;
1982 break;
1983
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001984 case 9: /* L after R+ON/EN/AN */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001985 /* include possible adjacent number on the left */
1986 for (k=start0-1; k>=0 && !(levels[k]&1); k--);
1987 if(k>=0) {
1988 addPoint(pBiDi, k, RLM_BEFORE); /* add RLM before */
1989 pInsertPoints=&(pBiDi->insertPoints);
1990 pInsertPoints->confirmed=pInsertPoints->size; /* confirm it */
1991 }
1992 pLevState->startON=start0;
1993 break;
1994
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08001995 case 10: /* AN after L */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00001996 /* AN numbers between L text on both sides may be trouble. */
1997 /* tentatively bracket with LRMs; will be confirmed if followed by L */
1998 addPoint(pBiDi, start0, LRM_BEFORE); /* add LRM before */
1999 addPoint(pBiDi, start0, LRM_AFTER); /* add LRM after */
2000 break;
2001
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08002002 case 11: /* R after L+ON/EN/AN */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002003 /* false alert, infirm LRMs around previous AN */
2004 pInsertPoints=&(pBiDi->insertPoints);
2005 pInsertPoints->size=pInsertPoints->confirmed;
2006 if (_prop == DirProp_S) /* add RLM before S */
2007 {
2008 addPoint(pBiDi, start0, RLM_BEFORE);
2009 pInsertPoints->confirmed=pInsertPoints->size;
2010 }
2011 break;
2012
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08002013 case 12: /* L after L+ON/AN */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002014 level=pLevState->runLevel + addLevel;
2015 for(k=pLevState->startON; k<start0; k++) {
2016 if (levels[k]<level)
2017 levels[k]=level;
2018 }
2019 pInsertPoints=&(pBiDi->insertPoints);
2020 pInsertPoints->confirmed=pInsertPoints->size; /* confirm inserts */
2021 pLevState->startON=start0;
2022 break;
2023
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08002024 case 13: /* L after L+ON+EN/AN/ON */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002025 level=pLevState->runLevel;
2026 for(k=start0-1; k>=pLevState->startON; k--) {
2027 if(levels[k]==level+3) {
2028 while(levels[k]==level+3) {
2029 levels[k--]-=2;
2030 }
2031 while(levels[k]==level) {
2032 k--;
2033 }
2034 }
2035 if(levels[k]==level+2) {
2036 levels[k]=level;
2037 continue;
2038 }
2039 levels[k]=level+1;
2040 }
2041 break;
2042
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08002043 case 14: /* R after L+ON+EN/AN/ON */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002044 level=pLevState->runLevel+1;
2045 for(k=start0-1; k>=pLevState->startON; k--) {
2046 if(levels[k]>level) {
2047 levels[k]-=2;
2048 }
2049 }
2050 break;
2051
2052 default: /* we should never get here */
2053 U_ASSERT(FALSE);
2054 break;
2055 }
2056 }
2057 if((addLevel) || (start < start0)) {
2058 level=pLevState->runLevel + addLevel;
2059 if(start>=pLevState->runStart) {
2060 for(k=start; k<limit; k++) {
2061 levels[k]=level;
2062 }
2063 } else {
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08002064 setLevelsOutsideIsolates(pBiDi, start, limit, level);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002065 }
2066 }
2067}
2068
2069/**
2070 * Returns the directionality of the last strong character at the end of the prologue, if any.
2071 * Requires prologue!=null.
2072 */
2073static DirProp
2074lastL_R_AL(UBiDi *pBiDi) {
2075 const UChar *text=pBiDi->prologue;
2076 int32_t length=pBiDi->proLength;
2077 int32_t i;
2078 UChar32 uchar;
2079 DirProp dirProp;
2080 for(i=length; i>0; ) {
2081 /* i is decremented by U16_PREV */
2082 U16_PREV(text, 0, i, uchar);
2083 dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar);
2084 if(dirProp==L) {
2085 return DirProp_L;
2086 }
2087 if(dirProp==R || dirProp==AL) {
2088 return DirProp_R;
2089 }
2090 if(dirProp==B) {
2091 return DirProp_ON;
2092 }
2093 }
2094 return DirProp_ON;
2095}
2096
2097/**
2098 * Returns the directionality of the first strong character, or digit, in the epilogue, if any.
2099 * Requires epilogue!=null.
2100 */
2101static DirProp
2102firstL_R_AL_EN_AN(UBiDi *pBiDi) {
2103 const UChar *text=pBiDi->epilogue;
2104 int32_t length=pBiDi->epiLength;
2105 int32_t i;
2106 UChar32 uchar;
2107 DirProp dirProp;
2108 for(i=0; i<length; ) {
2109 /* i is incremented by U16_NEXT */
2110 U16_NEXT(text, i, length, uchar);
2111 dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar);
2112 if(dirProp==L) {
2113 return DirProp_L;
2114 }
2115 if(dirProp==R || dirProp==AL) {
2116 return DirProp_R;
2117 }
2118 if(dirProp==EN) {
2119 return DirProp_EN;
2120 }
2121 if(dirProp==AN) {
2122 return DirProp_AN;
2123 }
2124 }
2125 return DirProp_ON;
2126}
2127
2128static void
2129resolveImplicitLevels(UBiDi *pBiDi,
2130 int32_t start, int32_t limit,
2131 DirProp sor, DirProp eor) {
2132 const DirProp *dirProps=pBiDi->dirProps;
2133 DirProp dirProp;
2134 LevState levState;
2135 int32_t i, start1, start2;
2136 uint16_t oldStateImp, stateImp, actionImp;
2137 uint8_t gprop, resProp, cell;
2138 UBool inverseRTL;
2139 DirProp nextStrongProp=R;
2140 int32_t nextStrongPos=-1;
2141
2142 /* check for RTL inverse BiDi mode */
2143 /* FOOD FOR THOUGHT: in case of RTL inverse BiDi, it would make sense to
2144 * loop on the text characters from end to start.
2145 * This would need a different properties state table (at least different
2146 * actions) and different levels state tables (maybe very similar to the
2147 * LTR corresponding ones.
2148 */
2149 inverseRTL=(UBool)
2150 ((start<pBiDi->lastArabicPos) && (GET_PARALEVEL(pBiDi, start) & 1) &&
2151 (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT ||
2152 pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL));
2153
2154 /* initialize for property and levels state tables */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002155 levState.startL2EN=-1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
2156 levState.lastStrongRTL=-1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
2157 levState.runStart=start;
2158 levState.runLevel=pBiDi->levels[start];
2159 levState.pImpTab=(const ImpTab*)((pBiDi->pImpTabPair)->pImpTab)[levState.runLevel&1];
2160 levState.pImpAct=(const ImpAct*)((pBiDi->pImpTabPair)->pImpAct)[levState.runLevel&1];
2161 if(start==0 && pBiDi->proLength>0) {
2162 DirProp lastStrong=lastL_R_AL(pBiDi);
2163 if(lastStrong!=DirProp_ON) {
2164 sor=lastStrong;
2165 }
2166 }
2167 /* The isolates[] entries contain enough information to
2168 resume the bidi algorithm in the same state as it was
2169 when it was interrupted by an isolate sequence. */
Jungshik Shin (jungshik at google)7c817402015-03-21 00:26:40 -07002170 if(dirProps[start]==PDI && pBiDi->isolateCount >= 0) {
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08002171 levState.startON=pBiDi->isolates[pBiDi->isolateCount].startON;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002172 start1=pBiDi->isolates[pBiDi->isolateCount].start1;
2173 stateImp=pBiDi->isolates[pBiDi->isolateCount].stateImp;
2174 levState.state=pBiDi->isolates[pBiDi->isolateCount].state;
2175 pBiDi->isolateCount--;
2176 } else {
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08002177 levState.startON=-1;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002178 start1=start;
2179 if(dirProps[start]==NSM)
2180 stateImp = 1 + sor;
2181 else
2182 stateImp=0;
2183 levState.state=0;
2184 processPropertySeq(pBiDi, &levState, sor, start, start);
2185 }
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08002186 start2=start; /* to make Java compiler happy */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002187
2188 for(i=start; i<=limit; i++) {
2189 if(i>=limit) {
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08002190 int32_t k;
2191 for(k=limit-1; k>start&&(DIRPROP_FLAG(dirProps[k])&MASK_BN_EXPLICIT); k--);
2192 dirProp=dirProps[k];
2193 if(dirProp==LRI || dirProp==RLI)
2194 break; /* no forced closing for sequence ending with LRI/RLI */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002195 gprop=eor;
2196 } else {
2197 DirProp prop, prop1;
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08002198 prop=dirProps[i];
2199 if(prop==B) {
2200 pBiDi->isolateCount=-1; /* current isolates stack entry == none */
2201 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002202 if(inverseRTL) {
2203 if(prop==AL) {
2204 /* AL before EN does not make it AN */
2205 prop=R;
2206 } else if(prop==EN) {
2207 if(nextStrongPos<=i) {
2208 /* look for next strong char (L/R/AL) */
2209 int32_t j;
2210 nextStrongProp=R; /* set default */
2211 nextStrongPos=limit;
2212 for(j=i+1; j<limit; j++) {
2213 prop1=dirProps[j];
2214 if(prop1==L || prop1==R || prop1==AL) {
2215 nextStrongProp=prop1;
2216 nextStrongPos=j;
2217 break;
2218 }
2219 }
2220 }
2221 if(nextStrongProp==AL) {
2222 prop=AN;
2223 }
2224 }
2225 }
2226 gprop=groupProp[prop];
2227 }
2228 oldStateImp=stateImp;
2229 cell=impTabProps[oldStateImp][gprop];
2230 stateImp=GET_STATEPROPS(cell); /* isolate the new state */
2231 actionImp=GET_ACTIONPROPS(cell); /* isolate the action */
2232 if((i==limit) && (actionImp==0)) {
2233 /* there is an unprocessed sequence if its property == eor */
2234 actionImp=1; /* process the last sequence */
2235 }
2236 if(actionImp) {
2237 resProp=impTabProps[oldStateImp][IMPTABPROPS_RES];
2238 switch(actionImp) {
2239 case 1: /* process current seq1, init new seq1 */
2240 processPropertySeq(pBiDi, &levState, resProp, start1, i);
2241 start1=i;
2242 break;
2243 case 2: /* init new seq2 */
2244 start2=i;
2245 break;
2246 case 3: /* process seq1, process seq2, init new seq1 */
2247 processPropertySeq(pBiDi, &levState, resProp, start1, start2);
2248 processPropertySeq(pBiDi, &levState, DirProp_ON, start2, i);
2249 start1=i;
2250 break;
2251 case 4: /* process seq1, set seq1=seq2, init new seq2 */
2252 processPropertySeq(pBiDi, &levState, resProp, start1, start2);
2253 start1=start2;
2254 start2=i;
2255 break;
2256 default: /* we should never get here */
2257 U_ASSERT(FALSE);
2258 break;
2259 }
2260 }
2261 }
2262
2263 /* flush possible pending sequence, e.g. ON */
2264 if(limit==pBiDi->length && pBiDi->epiLength>0) {
2265 DirProp firstStrong=firstL_R_AL_EN_AN(pBiDi);
2266 if(firstStrong!=DirProp_ON) {
2267 eor=firstStrong;
2268 }
2269 }
2270
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08002271 /* look for the last char not a BN or LRE/RLE/LRO/RLO/PDF */
2272 for(i=limit-1; i>start&&(DIRPROP_FLAG(dirProps[i])&MASK_BN_EXPLICIT); i--);
2273 dirProp=dirProps[i];
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002274 if((dirProp==LRI || dirProp==RLI) && limit<pBiDi->length) {
2275 pBiDi->isolateCount++;
2276 pBiDi->isolates[pBiDi->isolateCount].stateImp=stateImp;
2277 pBiDi->isolates[pBiDi->isolateCount].state=levState.state;
2278 pBiDi->isolates[pBiDi->isolateCount].start1=start1;
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08002279 pBiDi->isolates[pBiDi->isolateCount].startON=levState.startON;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002280 }
2281 else
2282 processPropertySeq(pBiDi, &levState, eor, limit, limit);
2283}
2284
2285/* perform (L1) and (X9) ---------------------------------------------------- */
2286
2287/*
2288 * Reset the embedding levels for some non-graphic characters (L1).
2289 * This function also sets appropriate levels for BN, and
2290 * explicit embedding types that are supposed to have been removed
2291 * from the paragraph in (X9).
2292 */
2293static void
2294adjustWSLevels(UBiDi *pBiDi) {
2295 const DirProp *dirProps=pBiDi->dirProps;
2296 UBiDiLevel *levels=pBiDi->levels;
2297 int32_t i;
2298
2299 if(pBiDi->flags&MASK_WS) {
2300 UBool orderParagraphsLTR=pBiDi->orderParagraphsLTR;
2301 Flags flag;
2302
2303 i=pBiDi->trailingWSStart;
2304 while(i>0) {
2305 /* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08002306 while(i>0 && (flag=DIRPROP_FLAG(dirProps[--i]))&MASK_WS) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002307 if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) {
2308 levels[i]=0;
2309 } else {
2310 levels[i]=GET_PARALEVEL(pBiDi, i);
2311 }
2312 }
2313
2314 /* reset BN to the next character's paraLevel until B/S, which restarts above loop */
2315 /* here, i+1 is guaranteed to be <length */
2316 while(i>0) {
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08002317 flag=DIRPROP_FLAG(dirProps[--i]);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002318 if(flag&MASK_BN_EXPLICIT) {
2319 levels[i]=levels[i+1];
2320 } else if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) {
2321 levels[i]=0;
2322 break;
2323 } else if(flag&MASK_B_S) {
2324 levels[i]=GET_PARALEVEL(pBiDi, i);
2325 break;
2326 }
2327 }
2328 }
2329 }
2330}
2331
2332U_CAPI void U_EXPORT2
2333ubidi_setContext(UBiDi *pBiDi,
2334 const UChar *prologue, int32_t proLength,
2335 const UChar *epilogue, int32_t epiLength,
2336 UErrorCode *pErrorCode) {
2337 /* check the argument values */
2338 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
2339 if(pBiDi==NULL || proLength<-1 || epiLength<-1 ||
2340 (prologue==NULL && proLength!=0) || (epilogue==NULL && epiLength!=0)) {
2341 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2342 return;
2343 }
2344
2345 if(proLength==-1) {
2346 pBiDi->proLength=u_strlen(prologue);
2347 } else {
2348 pBiDi->proLength=proLength;
2349 }
2350 if(epiLength==-1) {
2351 pBiDi->epiLength=u_strlen(epilogue);
2352 } else {
2353 pBiDi->epiLength=epiLength;
2354 }
2355 pBiDi->prologue=prologue;
2356 pBiDi->epilogue=epilogue;
2357}
2358
2359static void
2360setParaSuccess(UBiDi *pBiDi) {
2361 pBiDi->proLength=0; /* forget the last context */
2362 pBiDi->epiLength=0;
2363 pBiDi->pParaBiDi=pBiDi; /* mark successful setPara */
2364}
2365
2366#define BIDI_MIN(x, y) ((x)<(y) ? (x) : (y))
2367#define BIDI_ABS(x) ((x)>=0 ? (x) : (-(x)))
2368
2369static void
2370setParaRunsOnly(UBiDi *pBiDi, const UChar *text, int32_t length,
2371 UBiDiLevel paraLevel, UErrorCode *pErrorCode) {
Jungshik Shin87232d82017-05-13 21:10:13 -07002372 int32_t *runsOnlyMemory = NULL;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002373 int32_t *visualMap;
2374 UChar *visualText;
2375 int32_t saveLength, saveTrailingWSStart;
2376 const UBiDiLevel *levels;
2377 UBiDiLevel *saveLevels;
2378 UBiDiDirection saveDirection;
2379 UBool saveMayAllocateText;
2380 Run *runs;
2381 int32_t visualLength, i, j, visualStart, logicalStart,
2382 runCount, runLength, addedRuns, insertRemove,
2383 start, limit, step, indexOddBit, logicalPos,
2384 index0, index1;
2385 uint32_t saveOptions;
2386
2387 pBiDi->reorderingMode=UBIDI_REORDER_DEFAULT;
2388 if(length==0) {
2389 ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode);
2390 goto cleanup3;
2391 }
2392 /* obtain memory for mapping table and visual text */
Jungshik Shin87232d82017-05-13 21:10:13 -07002393 runsOnlyMemory=static_cast<int32_t *>(uprv_malloc(length*(sizeof(int32_t)+sizeof(UChar)+sizeof(UBiDiLevel))));
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002394 if(runsOnlyMemory==NULL) {
2395 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
2396 goto cleanup3;
2397 }
2398 visualMap=runsOnlyMemory;
2399 visualText=(UChar *)&visualMap[length];
2400 saveLevels=(UBiDiLevel *)&visualText[length];
2401 saveOptions=pBiDi->reorderingOptions;
2402 if(saveOptions & UBIDI_OPTION_INSERT_MARKS) {
2403 pBiDi->reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS;
2404 pBiDi->reorderingOptions|=UBIDI_OPTION_REMOVE_CONTROLS;
2405 }
2406 paraLevel&=1; /* accept only 0 or 1 */
2407 ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode);
2408 if(U_FAILURE(*pErrorCode)) {
2409 goto cleanup3;
2410 }
2411 /* we cannot access directly pBiDi->levels since it is not yet set if
2412 * direction is not MIXED
2413 */
2414 levels=ubidi_getLevels(pBiDi, pErrorCode);
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002415 uprv_memcpy(saveLevels, levels, (size_t)pBiDi->length*sizeof(UBiDiLevel));
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002416 saveTrailingWSStart=pBiDi->trailingWSStart;
2417 saveLength=pBiDi->length;
2418 saveDirection=pBiDi->direction;
2419
2420 /* FOOD FOR THOUGHT: instead of writing the visual text, we could use
2421 * the visual map and the dirProps array to drive the second call
2422 * to ubidi_setPara (but must make provision for possible removal of
2423 * BiDi controls. Alternatively, only use the dirProps array via
2424 * customized classifier callback.
2425 */
2426 visualLength=ubidi_writeReordered(pBiDi, visualText, length,
2427 UBIDI_DO_MIRRORING, pErrorCode);
2428 ubidi_getVisualMap(pBiDi, visualMap, pErrorCode);
2429 if(U_FAILURE(*pErrorCode)) {
2430 goto cleanup2;
2431 }
2432 pBiDi->reorderingOptions=saveOptions;
2433
2434 pBiDi->reorderingMode=UBIDI_REORDER_INVERSE_LIKE_DIRECT;
2435 paraLevel^=1;
2436 /* Because what we did with reorderingOptions, visualText may be shorter
2437 * than the original text. But we don't want the levels memory to be
2438 * reallocated shorter than the original length, since we need to restore
2439 * the levels as after the first call to ubidi_setpara() before returning.
2440 * We will force mayAllocateText to FALSE before the second call to
2441 * ubidi_setpara(), and will restore it afterwards.
2442 */
2443 saveMayAllocateText=pBiDi->mayAllocateText;
2444 pBiDi->mayAllocateText=FALSE;
2445 ubidi_setPara(pBiDi, visualText, visualLength, paraLevel, NULL, pErrorCode);
2446 pBiDi->mayAllocateText=saveMayAllocateText;
2447 ubidi_getRuns(pBiDi, pErrorCode);
2448 if(U_FAILURE(*pErrorCode)) {
2449 goto cleanup1;
2450 }
2451 /* check if some runs must be split, count how many splits */
2452 addedRuns=0;
2453 runCount=pBiDi->runCount;
2454 runs=pBiDi->runs;
2455 visualStart=0;
2456 for(i=0; i<runCount; i++, visualStart+=runLength) {
2457 runLength=runs[i].visualLimit-visualStart;
2458 if(runLength<2) {
2459 continue;
2460 }
2461 logicalStart=GET_INDEX(runs[i].logicalStart);
2462 for(j=logicalStart+1; j<logicalStart+runLength; j++) {
2463 index0=visualMap[j];
2464 index1=visualMap[j-1];
2465 if((BIDI_ABS(index0-index1)!=1) || (saveLevels[index0]!=saveLevels[index1])) {
2466 addedRuns++;
2467 }
2468 }
2469 }
2470 if(addedRuns) {
2471 if(getRunsMemory(pBiDi, runCount+addedRuns)) {
2472 if(runCount==1) {
2473 /* because we switch from UBiDi.simpleRuns to UBiDi.runs */
2474 pBiDi->runsMemory[0]=runs[0];
2475 }
2476 runs=pBiDi->runs=pBiDi->runsMemory;
2477 pBiDi->runCount+=addedRuns;
2478 } else {
2479 goto cleanup1;
2480 }
2481 }
2482 /* split runs which are not consecutive in source text */
2483 for(i=runCount-1; i>=0; i--) {
2484 runLength= i==0 ? runs[0].visualLimit :
2485 runs[i].visualLimit-runs[i-1].visualLimit;
2486 logicalStart=runs[i].logicalStart;
2487 indexOddBit=GET_ODD_BIT(logicalStart);
2488 logicalStart=GET_INDEX(logicalStart);
2489 if(runLength<2) {
2490 if(addedRuns) {
2491 runs[i+addedRuns]=runs[i];
2492 }
2493 logicalPos=visualMap[logicalStart];
2494 runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
2495 saveLevels[logicalPos]^indexOddBit);
2496 continue;
2497 }
2498 if(indexOddBit) {
2499 start=logicalStart;
2500 limit=logicalStart+runLength-1;
2501 step=1;
2502 } else {
2503 start=logicalStart+runLength-1;
2504 limit=logicalStart;
2505 step=-1;
2506 }
2507 for(j=start; j!=limit; j+=step) {
2508 index0=visualMap[j];
2509 index1=visualMap[j+step];
2510 if((BIDI_ABS(index0-index1)!=1) || (saveLevels[index0]!=saveLevels[index1])) {
2511 logicalPos=BIDI_MIN(visualMap[start], index0);
2512 runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
2513 saveLevels[logicalPos]^indexOddBit);
2514 runs[i+addedRuns].visualLimit=runs[i].visualLimit;
2515 runs[i].visualLimit-=BIDI_ABS(j-start)+1;
2516 insertRemove=runs[i].insertRemove&(LRM_AFTER|RLM_AFTER);
2517 runs[i+addedRuns].insertRemove=insertRemove;
2518 runs[i].insertRemove&=~insertRemove;
2519 start=j+step;
2520 addedRuns--;
2521 }
2522 }
2523 if(addedRuns) {
2524 runs[i+addedRuns]=runs[i];
2525 }
2526 logicalPos=BIDI_MIN(visualMap[start], visualMap[limit]);
2527 runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
2528 saveLevels[logicalPos]^indexOddBit);
2529 }
2530
2531 cleanup1:
2532 /* restore initial paraLevel */
2533 pBiDi->paraLevel^=1;
2534 cleanup2:
2535 /* restore real text */
2536 pBiDi->text=text;
2537 pBiDi->length=saveLength;
2538 pBiDi->originalLength=length;
2539 pBiDi->direction=saveDirection;
2540 /* the saved levels should never excess levelsSize, but we check anyway */
2541 if(saveLength>pBiDi->levelsSize) {
2542 saveLength=pBiDi->levelsSize;
2543 }
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002544 uprv_memcpy(pBiDi->levels, saveLevels, (size_t)saveLength*sizeof(UBiDiLevel));
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002545 pBiDi->trailingWSStart=saveTrailingWSStart;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002546 if(pBiDi->runCount>1) {
2547 pBiDi->direction=UBIDI_MIXED;
2548 }
2549 cleanup3:
Jungshik Shin (jungshik at google)3a9ccbc2015-02-10 10:56:28 -08002550 /* free memory for mapping table and visual text */
2551 uprv_free(runsOnlyMemory);
2552
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002553 pBiDi->reorderingMode=UBIDI_REORDER_RUNS_ONLY;
2554}
2555
2556/* ubidi_setPara ------------------------------------------------------------ */
2557
2558U_CAPI void U_EXPORT2
2559ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length,
2560 UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels,
2561 UErrorCode *pErrorCode) {
2562 UBiDiDirection direction;
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08002563 DirProp *dirProps;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002564
2565 /* check the argument values */
2566 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
2567 if(pBiDi==NULL || text==NULL || length<-1 ||
2568 (paraLevel>UBIDI_MAX_EXPLICIT_LEVEL && paraLevel<UBIDI_DEFAULT_LTR)) {
2569 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2570 return;
2571 }
2572
2573 if(length==-1) {
2574 length=u_strlen(text);
2575 }
2576
2577 /* special treatment for RUNS_ONLY mode */
2578 if(pBiDi->reorderingMode==UBIDI_REORDER_RUNS_ONLY) {
2579 setParaRunsOnly(pBiDi, text, length, paraLevel, pErrorCode);
2580 return;
2581 }
2582
2583 /* initialize the UBiDi structure */
2584 pBiDi->pParaBiDi=NULL; /* mark unfinished setPara */
2585 pBiDi->text=text;
2586 pBiDi->length=pBiDi->originalLength=pBiDi->resultLength=length;
2587 pBiDi->paraLevel=paraLevel;
Jungshik Shin87232d82017-05-13 21:10:13 -07002588 pBiDi->direction=(UBiDiDirection)(paraLevel&1);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002589 pBiDi->paraCount=1;
2590
2591 pBiDi->dirProps=NULL;
2592 pBiDi->levels=NULL;
2593 pBiDi->runs=NULL;
2594 pBiDi->insertPoints.size=0; /* clean up from last call */
2595 pBiDi->insertPoints.confirmed=0; /* clean up from last call */
2596
2597 /*
2598 * Save the original paraLevel if contextual; otherwise, set to 0.
2599 */
2600 pBiDi->defaultParaLevel=IS_DEFAULT_LEVEL(paraLevel);
2601
2602 if(length==0) {
2603 /*
2604 * For an empty paragraph, create a UBiDi object with the paraLevel and
2605 * the flags and the direction set but without allocating zero-length arrays.
2606 * There is nothing more to do.
2607 */
2608 if(IS_DEFAULT_LEVEL(paraLevel)) {
2609 pBiDi->paraLevel&=1;
2610 pBiDi->defaultParaLevel=0;
2611 }
2612 pBiDi->flags=DIRPROP_FLAG_LR(paraLevel);
2613 pBiDi->runCount=0;
2614 pBiDi->paraCount=0;
2615 setParaSuccess(pBiDi); /* mark successful setPara */
2616 return;
2617 }
2618
2619 pBiDi->runCount=-1;
2620
2621 /* allocate paras memory */
2622 if(pBiDi->parasMemory)
2623 pBiDi->paras=pBiDi->parasMemory;
2624 else
2625 pBiDi->paras=pBiDi->simpleParas;
2626
2627 /*
2628 * Get the directional properties,
2629 * the flags bit-set, and
2630 * determine the paragraph level if necessary.
2631 */
2632 if(getDirPropsMemory(pBiDi, length)) {
2633 pBiDi->dirProps=pBiDi->dirPropsMemory;
2634 if(!getDirProps(pBiDi)) {
2635 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
2636 return;
2637 }
2638 } else {
2639 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
2640 return;
2641 }
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08002642 dirProps=pBiDi->dirProps;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002643 /* the processed length may have changed if UBIDI_OPTION_STREAMING */
2644 length= pBiDi->length;
2645 pBiDi->trailingWSStart=length; /* the levels[] will reflect the WS run */
2646
2647 /* are explicit levels specified? */
2648 if(embeddingLevels==NULL) {
2649 /* no: determine explicit levels according to the (Xn) rules */\
2650 if(getLevelsMemory(pBiDi, length)) {
2651 pBiDi->levels=pBiDi->levelsMemory;
2652 direction=resolveExplicitLevels(pBiDi, pErrorCode);
2653 if(U_FAILURE(*pErrorCode)) {
2654 return;
2655 }
2656 } else {
2657 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
2658 return;
2659 }
2660 } else {
2661 /* set BN for all explicit codes, check that all levels are 0 or paraLevel..UBIDI_MAX_EXPLICIT_LEVEL */
2662 pBiDi->levels=embeddingLevels;
2663 direction=checkExplicitLevels(pBiDi, pErrorCode);
2664 if(U_FAILURE(*pErrorCode)) {
2665 return;
2666 }
2667 }
2668
2669 /* allocate isolate memory */
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08002670 if(pBiDi->isolateCount<=SIMPLE_ISOLATES_COUNT)
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002671 pBiDi->isolates=pBiDi->simpleIsolates;
2672 else
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08002673 if((int32_t)(pBiDi->isolateCount*sizeof(Isolate))<=pBiDi->isolatesSize)
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002674 pBiDi->isolates=pBiDi->isolatesMemory;
2675 else {
2676 if(getInitialIsolatesMemory(pBiDi, pBiDi->isolateCount)) {
2677 pBiDi->isolates=pBiDi->isolatesMemory;
2678 } else {
2679 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
2680 return;
2681 }
2682 }
2683 pBiDi->isolateCount=-1; /* current isolates stack entry == none */
2684
2685 /*
2686 * The steps after (X9) in the UBiDi algorithm are performed only if
2687 * the paragraph text has mixed directionality!
2688 */
2689 pBiDi->direction=direction;
2690 switch(direction) {
2691 case UBIDI_LTR:
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002692 /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
2693 pBiDi->trailingWSStart=0;
2694 break;
2695 case UBIDI_RTL:
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002696 /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
2697 pBiDi->trailingWSStart=0;
2698 break;
2699 default:
2700 /*
2701 * Choose the right implicit state table
2702 */
2703 switch(pBiDi->reorderingMode) {
2704 case UBIDI_REORDER_DEFAULT:
2705 pBiDi->pImpTabPair=&impTab_DEFAULT;
2706 break;
2707 case UBIDI_REORDER_NUMBERS_SPECIAL:
2708 pBiDi->pImpTabPair=&impTab_NUMBERS_SPECIAL;
2709 break;
2710 case UBIDI_REORDER_GROUP_NUMBERS_WITH_R:
2711 pBiDi->pImpTabPair=&impTab_GROUP_NUMBERS_WITH_R;
2712 break;
2713 case UBIDI_REORDER_INVERSE_NUMBERS_AS_L:
2714 pBiDi->pImpTabPair=&impTab_INVERSE_NUMBERS_AS_L;
2715 break;
2716 case UBIDI_REORDER_INVERSE_LIKE_DIRECT:
2717 if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) {
2718 pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT_WITH_MARKS;
2719 } else {
2720 pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT;
2721 }
2722 break;
2723 case UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL:
2724 if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) {
2725 pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS;
2726 } else {
2727 pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL;
2728 }
2729 break;
2730 default:
2731 /* we should never get here */
2732 U_ASSERT(FALSE);
2733 break;
2734 }
2735 /*
2736 * If there are no external levels specified and there
2737 * are no significant explicit level codes in the text,
2738 * then we can treat the entire paragraph as one run.
2739 * Otherwise, we need to perform the following rules on runs of
2740 * the text with the same embedding levels. (X10)
2741 * "Significant" explicit level codes are ones that actually
2742 * affect non-BN characters.
2743 * Examples for "insignificant" ones are empty embeddings
2744 * LRE-PDF, LRE-RLE-PDF-PDF, etc.
2745 */
2746 if(embeddingLevels==NULL && pBiDi->paraCount<=1 &&
2747 !(pBiDi->flags&DIRPROP_FLAG_MULTI_RUNS)) {
2748 resolveImplicitLevels(pBiDi, 0, length,
2749 GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, 0)),
2750 GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, length-1)));
2751 } else {
2752 /* sor, eor: start and end types of same-level-run */
2753 UBiDiLevel *levels=pBiDi->levels;
2754 int32_t start, limit=0;
2755 UBiDiLevel level, nextLevel;
2756 DirProp sor, eor;
2757
2758 /* determine the first sor and set eor to it because of the loop body (sor=eor there) */
2759 level=GET_PARALEVEL(pBiDi, 0);
2760 nextLevel=levels[0];
2761 if(level<nextLevel) {
2762 eor=GET_LR_FROM_LEVEL(nextLevel);
2763 } else {
2764 eor=GET_LR_FROM_LEVEL(level);
2765 }
2766
2767 do {
2768 /* determine start and limit of the run (end points just behind the run) */
2769
2770 /* the values for this run's start are the same as for the previous run's end */
2771 start=limit;
2772 level=nextLevel;
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08002773 if((start>0) && (dirProps[start-1]==B)) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002774 /* except if this is a new paragraph, then set sor = para level */
2775 sor=GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, start));
2776 } else {
2777 sor=eor;
2778 }
2779
2780 /* search for the limit of this run */
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08002781 while((++limit<length) &&
2782 ((levels[limit]==level) ||
2783 (DIRPROP_FLAG(dirProps[limit])&MASK_BN_EXPLICIT))) {}
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002784
2785 /* get the correct level of the next run */
2786 if(limit<length) {
2787 nextLevel=levels[limit];
2788 } else {
2789 nextLevel=GET_PARALEVEL(pBiDi, length-1);
2790 }
2791
2792 /* determine eor from max(level, nextLevel); sor is last run's eor */
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08002793 if(NO_OVERRIDE(level)<NO_OVERRIDE(nextLevel)) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002794 eor=GET_LR_FROM_LEVEL(nextLevel);
2795 } else {
2796 eor=GET_LR_FROM_LEVEL(level);
2797 }
2798
2799 /* if the run consists of overridden directional types, then there
2800 are no implicit types to be resolved */
2801 if(!(level&UBIDI_LEVEL_OVERRIDE)) {
2802 resolveImplicitLevels(pBiDi, start, limit, sor, eor);
2803 } else {
2804 /* remove the UBIDI_LEVEL_OVERRIDE flags */
2805 do {
2806 levels[start++]&=~UBIDI_LEVEL_OVERRIDE;
2807 } while(start<limit);
2808 }
2809 } while(limit<length);
2810 }
2811 /* check if we got any memory shortage while adding insert points */
2812 if (U_FAILURE(pBiDi->insertPoints.errorCode))
2813 {
2814 *pErrorCode=pBiDi->insertPoints.errorCode;
2815 return;
2816 }
2817 /* reset the embedding levels for some non-graphic characters (L1), (X9) */
2818 adjustWSLevels(pBiDi);
2819 break;
2820 }
2821 /* add RLM for inverse Bidi with contextual orientation resolving
2822 * to RTL which would not round-trip otherwise
2823 */
2824 if((pBiDi->defaultParaLevel>0) &&
2825 (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) &&
2826 ((pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT) ||
2827 (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL))) {
2828 int32_t i, j, start, last;
2829 UBiDiLevel level;
2830 DirProp dirProp;
2831 for(i=0; i<pBiDi->paraCount; i++) {
2832 last=(pBiDi->paras[i].limit)-1;
2833 level=pBiDi->paras[i].level;
2834 if(level==0)
2835 continue; /* LTR paragraph */
2836 start= i==0 ? 0 : pBiDi->paras[i-1].limit;
2837 for(j=last; j>=start; j--) {
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08002838 dirProp=dirProps[j];
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002839 if(dirProp==L) {
2840 if(j<last) {
Jungshik Shin (jungshik at google)0f8746a2015-01-08 15:46:45 -08002841 while(dirProps[last]==B) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00002842 last--;
2843 }
2844 }
2845 addPoint(pBiDi, last, RLM_BEFORE);
2846 break;
2847 }
2848 if(DIRPROP_FLAG(dirProp) & MASK_R_AL) {
2849 break;
2850 }
2851 }
2852 }
2853 }
2854
2855 if(pBiDi->reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) {
2856 pBiDi->resultLength -= pBiDi->controlCount;
2857 } else {
2858 pBiDi->resultLength += pBiDi->insertPoints.size;
2859 }
2860 setParaSuccess(pBiDi); /* mark successful setPara */
2861}
2862
2863U_CAPI void U_EXPORT2
2864ubidi_orderParagraphsLTR(UBiDi *pBiDi, UBool orderParagraphsLTR) {
2865 if(pBiDi!=NULL) {
2866 pBiDi->orderParagraphsLTR=orderParagraphsLTR;
2867 }
2868}
2869
2870U_CAPI UBool U_EXPORT2
2871ubidi_isOrderParagraphsLTR(UBiDi *pBiDi) {
2872 if(pBiDi!=NULL) {
2873 return pBiDi->orderParagraphsLTR;
2874 } else {
2875 return FALSE;
2876 }
2877}
2878
2879U_CAPI UBiDiDirection U_EXPORT2
2880ubidi_getDirection(const UBiDi *pBiDi) {
2881 if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2882 return pBiDi->direction;
2883 } else {
2884 return UBIDI_LTR;
2885 }
2886}
2887
2888U_CAPI const UChar * U_EXPORT2
2889ubidi_getText(const UBiDi *pBiDi) {
2890 if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2891 return pBiDi->text;
2892 } else {
2893 return NULL;
2894 }
2895}
2896
2897U_CAPI int32_t U_EXPORT2
2898ubidi_getLength(const UBiDi *pBiDi) {
2899 if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2900 return pBiDi->originalLength;
2901 } else {
2902 return 0;
2903 }
2904}
2905
2906U_CAPI int32_t U_EXPORT2
2907ubidi_getProcessedLength(const UBiDi *pBiDi) {
2908 if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2909 return pBiDi->length;
2910 } else {
2911 return 0;
2912 }
2913}
2914
2915U_CAPI int32_t U_EXPORT2
2916ubidi_getResultLength(const UBiDi *pBiDi) {
2917 if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2918 return pBiDi->resultLength;
2919 } else {
2920 return 0;
2921 }
2922}
2923
2924/* paragraphs API functions ------------------------------------------------- */
2925
2926U_CAPI UBiDiLevel U_EXPORT2
2927ubidi_getParaLevel(const UBiDi *pBiDi) {
2928 if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2929 return pBiDi->paraLevel;
2930 } else {
2931 return 0;
2932 }
2933}
2934
2935U_CAPI int32_t U_EXPORT2
2936ubidi_countParagraphs(UBiDi *pBiDi) {
2937 if(!IS_VALID_PARA_OR_LINE(pBiDi)) {
2938 return 0;
2939 } else {
2940 return pBiDi->paraCount;
2941 }
2942}
2943
2944U_CAPI void U_EXPORT2
2945ubidi_getParagraphByIndex(const UBiDi *pBiDi, int32_t paraIndex,
2946 int32_t *pParaStart, int32_t *pParaLimit,
2947 UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) {
2948 int32_t paraStart;
2949
2950 /* check the argument values */
2951 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
2952 RETURN_VOID_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode);
2953 RETURN_VOID_IF_BAD_RANGE(paraIndex, 0, pBiDi->paraCount, *pErrorCode);
2954
2955 pBiDi=pBiDi->pParaBiDi; /* get Para object if Line object */
2956 if(paraIndex) {
2957 paraStart=pBiDi->paras[paraIndex-1].limit;
2958 } else {
2959 paraStart=0;
2960 }
2961 if(pParaStart!=NULL) {
2962 *pParaStart=paraStart;
2963 }
2964 if(pParaLimit!=NULL) {
2965 *pParaLimit=pBiDi->paras[paraIndex].limit;
2966 }
2967 if(pParaLevel!=NULL) {
2968 *pParaLevel=GET_PARALEVEL(pBiDi, paraStart);
2969 }
2970}
2971
2972U_CAPI int32_t U_EXPORT2
2973ubidi_getParagraph(const UBiDi *pBiDi, int32_t charIndex,
2974 int32_t *pParaStart, int32_t *pParaLimit,
2975 UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) {
2976 int32_t paraIndex;
2977
2978 /* check the argument values */
2979 /* pErrorCode will be checked by the call to ubidi_getParagraphByIndex */
2980 RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1);
2981 RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1);
2982 pBiDi=pBiDi->pParaBiDi; /* get Para object if Line object */
2983 RETURN_IF_BAD_RANGE(charIndex, 0, pBiDi->length, *pErrorCode, -1);
2984
2985 for(paraIndex=0; charIndex>=pBiDi->paras[paraIndex].limit; paraIndex++);
2986 ubidi_getParagraphByIndex(pBiDi, paraIndex, pParaStart, pParaLimit, pParaLevel, pErrorCode);
2987 return paraIndex;
2988}
2989
2990U_CAPI void U_EXPORT2
2991ubidi_setClassCallback(UBiDi *pBiDi, UBiDiClassCallback *newFn,
2992 const void *newContext, UBiDiClassCallback **oldFn,
2993 const void **oldContext, UErrorCode *pErrorCode)
2994{
2995 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
2996 if(pBiDi==NULL) {
2997 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2998 return;
2999 }
3000 if( oldFn )
3001 {
3002 *oldFn = pBiDi->fnClassCallback;
3003 }
3004 if( oldContext )
3005 {
3006 *oldContext = pBiDi->coClassCallback;
3007 }
3008 pBiDi->fnClassCallback = newFn;
3009 pBiDi->coClassCallback = newContext;
3010}
3011
3012U_CAPI void U_EXPORT2
3013ubidi_getClassCallback(UBiDi *pBiDi, UBiDiClassCallback **fn, const void **context)
3014{
3015 if(pBiDi==NULL) {
3016 return;
3017 }
3018 if( fn )
3019 {
3020 *fn = pBiDi->fnClassCallback;
3021 }
3022 if( context )
3023 {
3024 *context = pBiDi->coClassCallback;
3025 }
3026}
3027
3028U_CAPI UCharDirection U_EXPORT2
3029ubidi_getCustomizedClass(UBiDi *pBiDi, UChar32 c)
3030{
3031 UCharDirection dir;
3032
3033 if( pBiDi->fnClassCallback == NULL ||
3034 (dir = (*pBiDi->fnClassCallback)(pBiDi->coClassCallback, c)) == U_BIDI_CLASS_DEFAULT )
3035 {
3036 dir = ubidi_getClass(pBiDi->bdp, c);
3037 }
3038 if(dir >= U_CHAR_DIRECTION_COUNT) {
Jungshik Shin87232d82017-05-13 21:10:13 -07003039 dir = (UCharDirection)ON;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00003040 }
3041 return dir;
3042}