blob: 95b55d56a02c47b7146ef7675480f75a0a943df9 [file] [log] [blame]
Jungshik Shin87232d82017-05-13 21:10:13 -07001// © 2016 and later: Unicode, Inc. and others.
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07002// License & terms of use: http://www.unicode.org/copyright.html
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00003/*
4*******************************************************************************
5*
Jungshik Shin5feb9ad2016-10-21 12:52:48 -07006* Copyright (C) 2005-2016, International Business Machines
jshin@chromium.org6f31ac32014-03-26 22:15:14 +00007* Corporation and others. All Rights Reserved.
8*
9*******************************************************************************
10* file name: ucasemap.cpp
Jungshik Shin87232d82017-05-13 21:10:13 -070011* encoding: UTF-8
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000012* tab size: 8 (not used)
13* indentation:4
14*
15* created on: 2005may06
16* created by: Markus W. Scherer
17*
18* Case mapping service object and functions using it.
19*/
20
21#include "unicode/utypes.h"
22#include "unicode/brkiter.h"
Jungshik Shinb3189662017-11-07 11:18:34 -080023#include "unicode/bytestream.h"
Jungshik Shin87232d82017-05-13 21:10:13 -070024#include "unicode/casemap.h"
25#include "unicode/edits.h"
Jungshik Shinb3189662017-11-07 11:18:34 -080026#include "unicode/stringoptions.h"
27#include "unicode/stringpiece.h"
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000028#include "unicode/ubrk.h"
29#include "unicode/uloc.h"
30#include "unicode/ustring.h"
31#include "unicode/ucasemap.h"
32#if !UCONFIG_NO_BREAK_ITERATION
33#include "unicode/utext.h"
34#endif
35#include "unicode/utf.h"
36#include "unicode/utf8.h"
37#include "unicode/utf16.h"
Jungshik Shinb3189662017-11-07 11:18:34 -080038#include "bytesinkutil.h"
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000039#include "cmemory.h"
40#include "cstring.h"
Jungshik Shin87232d82017-05-13 21:10:13 -070041#include "uassert.h"
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000042#include "ucase.h"
Jungshik Shin87232d82017-05-13 21:10:13 -070043#include "ucasemap_imp.h"
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000044#include "ustr_imp.h"
45
46U_NAMESPACE_USE
47
48/* UCaseMap service object -------------------------------------------------- */
49
Jungshik Shin87232d82017-05-13 21:10:13 -070050UCaseMap::UCaseMap(const char *localeID, uint32_t opts, UErrorCode *pErrorCode) :
51#if !UCONFIG_NO_BREAK_ITERATION
52 iter(NULL),
53#endif
54 caseLocale(UCASE_LOC_UNKNOWN), options(opts) {
55 ucasemap_setLocale(this, localeID, pErrorCode);
56}
57
58UCaseMap::~UCaseMap() {
59#if !UCONFIG_NO_BREAK_ITERATION
60 delete iter;
61#endif
62}
63
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000064U_CAPI UCaseMap * U_EXPORT2
65ucasemap_open(const char *locale, uint32_t options, UErrorCode *pErrorCode) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000066 if(U_FAILURE(*pErrorCode)) {
67 return NULL;
68 }
Jungshik Shin87232d82017-05-13 21:10:13 -070069 UCaseMap *csm = new UCaseMap(locale, options, pErrorCode);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000070 if(csm==NULL) {
Jungshik Shin87232d82017-05-13 21:10:13 -070071 *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
72 return NULL;
73 } else if (U_FAILURE(*pErrorCode)) {
74 delete csm;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000075 return NULL;
76 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000077 return csm;
78}
79
80U_CAPI void U_EXPORT2
81ucasemap_close(UCaseMap *csm) {
Jungshik Shin87232d82017-05-13 21:10:13 -070082 delete csm;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000083}
84
85U_CAPI const char * U_EXPORT2
86ucasemap_getLocale(const UCaseMap *csm) {
87 return csm->locale;
88}
89
90U_CAPI uint32_t U_EXPORT2
91ucasemap_getOptions(const UCaseMap *csm) {
92 return csm->options;
93}
94
95U_CAPI void U_EXPORT2
96ucasemap_setLocale(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +000097 if(U_FAILURE(*pErrorCode)) {
98 return;
99 }
Jungshik Shin87232d82017-05-13 21:10:13 -0700100 if (locale != NULL && *locale == 0) {
101 csm->locale[0] = 0;
102 csm->caseLocale = UCASE_LOC_ROOT;
103 return;
104 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000105
Jungshik Shin87232d82017-05-13 21:10:13 -0700106 int32_t length=uloc_getName(locale, csm->locale, (int32_t)sizeof(csm->locale), pErrorCode);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000107 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR || length==sizeof(csm->locale)) {
108 *pErrorCode=U_ZERO_ERROR;
109 /* we only really need the language code for case mappings */
110 length=uloc_getLanguage(locale, csm->locale, (int32_t)sizeof(csm->locale), pErrorCode);
111 }
112 if(length==sizeof(csm->locale)) {
113 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
114 }
Frank Tangd2858cb2022-04-08 20:34:12 -0700115 if(U_SUCCESS(*pErrorCode)) {
Jungshik Shin87232d82017-05-13 21:10:13 -0700116 csm->caseLocale = ucase_getCaseLocale(csm->locale);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000117 } else {
118 csm->locale[0]=0;
Jungshik Shin87232d82017-05-13 21:10:13 -0700119 csm->caseLocale = UCASE_LOC_ROOT;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000120 }
121}
122
123U_CAPI void U_EXPORT2
Jungshik Shin87232d82017-05-13 21:10:13 -0700124ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode *pErrorCode) {
125 if(U_FAILURE(*pErrorCode)) {
126 return;
127 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000128 csm->options=options;
129}
130
131/* UTF-8 string case mappings ----------------------------------------------- */
132
Jungshik Shin87232d82017-05-13 21:10:13 -0700133/* TODO(markus): Move to a new, separate utf8case.cpp file. */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000134
Jungshik Shinb3189662017-11-07 11:18:34 -0800135namespace {
136
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000137/* append a full case mapping result, see UCASE_MAX_STRING_LENGTH */
Jungshik Shinb3189662017-11-07 11:18:34 -0800138inline UBool
139appendResult(int32_t cpLength, int32_t result, const UChar *s,
140 ByteSink &sink, uint32_t options, icu::Edits *edits, UErrorCode &errorCode) {
141 U_ASSERT(U_SUCCESS(errorCode));
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000142
143 /* decode the result */
144 if(result<0) {
145 /* (not) original code point */
Jungshik Shin87232d82017-05-13 21:10:13 -0700146 if(edits!=NULL) {
147 edits->addUnchanged(cpLength);
Jungshik Shin87232d82017-05-13 21:10:13 -0700148 }
Jungshik Shinb3189662017-11-07 11:18:34 -0800149 if((options & U_OMIT_UNCHANGED_TEXT) == 0) {
150 ByteSinkUtil::appendCodePoint(cpLength, ~result, sink);
Jungshik Shin87232d82017-05-13 21:10:13 -0700151 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000152 } else {
Jungshik Shin87232d82017-05-13 21:10:13 -0700153 if(result<=UCASE_MAX_STRING_LENGTH) {
154 // string: "result" is the UTF-16 length
Jungshik Shinb3189662017-11-07 11:18:34 -0800155 return ByteSinkUtil::appendChange(cpLength, s, result, sink, edits, errorCode);
Jungshik Shin87232d82017-05-13 21:10:13 -0700156 } else {
Jungshik Shinb3189662017-11-07 11:18:34 -0800157 ByteSinkUtil::appendCodePoint(cpLength, result, sink, edits);
Jungshik Shin87232d82017-05-13 21:10:13 -0700158 }
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700159 }
Jungshik Shinb3189662017-11-07 11:18:34 -0800160 return TRUE;
Jungshik Shin87232d82017-05-13 21:10:13 -0700161}
162
163// See unicode/utf8.h U8_APPEND_UNSAFE().
Jungshik Shinb3189662017-11-07 11:18:34 -0800164inline uint8_t getTwoByteLead(UChar32 c) { return (uint8_t)((c >> 6) | 0xc0); }
165inline uint8_t getTwoByteTrail(UChar32 c) { return (uint8_t)((c & 0x3f) | 0x80); }
Jungshik Shin87232d82017-05-13 21:10:13 -0700166
Jungshik Shinf61e46d2018-05-04 13:00:45 -0700167UChar32 U_CALLCONV
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000168utf8_caseContextIterator(void *context, int8_t dir) {
169 UCaseContext *csc=(UCaseContext *)context;
170 UChar32 c;
171
172 if(dir<0) {
173 /* reset for backward iteration */
174 csc->index=csc->cpStart;
175 csc->dir=dir;
176 } else if(dir>0) {
177 /* reset for forward iteration */
178 csc->index=csc->cpLimit;
179 csc->dir=dir;
180 } else {
181 /* continue current iteration direction */
182 dir=csc->dir;
183 }
184
185 if(dir<0) {
186 if(csc->start<csc->index) {
187 U8_PREV((const uint8_t *)csc->p, csc->start, csc->index, c);
188 return c;
189 }
190 } else {
191 if(csc->index<csc->limit) {
192 U8_NEXT((const uint8_t *)csc->p, csc->index, csc->limit, c);
193 return c;
194 }
195 }
196 return U_SENTINEL;
197}
198
Jungshik Shinf61e46d2018-05-04 13:00:45 -0700199/**
200 * caseLocale >= 0: Lowercases [srcStart..srcLimit[ but takes context [0..srcLength[ into account.
201 * caseLocale < 0: Case-folds [srcStart..srcLimit[.
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000202 */
Jungshik Shinf61e46d2018-05-04 13:00:45 -0700203void toLower(int32_t caseLocale, uint32_t options,
204 const uint8_t *src, UCaseContext *csc, int32_t srcStart, int32_t srcLimit,
205 icu::ByteSink &sink, icu::Edits *edits, UErrorCode &errorCode) {
206 const int8_t *latinToLower;
207 if (caseLocale == UCASE_LOC_ROOT ||
208 (caseLocale >= 0 ?
209 !(caseLocale == UCASE_LOC_TURKISH || caseLocale == UCASE_LOC_LITHUANIAN) :
210 (options & _FOLD_CASE_OPTIONS_MASK) == U_FOLD_CASE_DEFAULT)) {
211 latinToLower = LatinCase::TO_LOWER_NORMAL;
212 } else {
213 latinToLower = LatinCase::TO_LOWER_TR_LT;
214 }
215 const UTrie2 *trie = ucase_getTrie();
216 int32_t prev = srcStart;
217 int32_t srcIndex = srcStart;
218 for (;;) {
219 // fast path for simple cases
Jungshik Shin87232d82017-05-13 21:10:13 -0700220 int32_t cpStart;
Jungshik Shin87232d82017-05-13 21:10:13 -0700221 UChar32 c;
Jungshik Shinf61e46d2018-05-04 13:00:45 -0700222 for (;;) {
223 if (U_FAILURE(errorCode) || srcIndex >= srcLimit) {
224 c = U_SENTINEL;
225 break;
226 }
227 uint8_t lead = src[srcIndex++];
228 if (lead <= 0x7f) {
229 int8_t d = latinToLower[lead];
230 if (d == LatinCase::EXC) {
231 cpStart = srcIndex - 1;
232 c = lead;
233 break;
234 }
235 if (d == 0) { continue; }
236 ByteSinkUtil::appendUnchanged(src + prev, srcIndex - 1 - prev,
237 sink, options, edits, errorCode);
238 char ascii = (char)(lead + d);
239 sink.Append(&ascii, 1);
240 if (edits != nullptr) {
241 edits->addReplace(1, 1);
242 }
243 prev = srcIndex;
244 continue;
245 } else if (lead < 0xe3) {
246 uint8_t t;
247 if (0xc2 <= lead && lead <= 0xc5 && srcIndex < srcLimit &&
248 (t = src[srcIndex] - 0x80) <= 0x3f) {
249 // U+0080..U+017F
250 ++srcIndex;
251 c = ((lead - 0xc0) << 6) | t;
252 int8_t d = latinToLower[c];
253 if (d == LatinCase::EXC) {
254 cpStart = srcIndex - 2;
255 break;
256 }
257 if (d == 0) { continue; }
258 ByteSinkUtil::appendUnchanged(src + prev, srcIndex - 2 - prev,
259 sink, options, edits, errorCode);
260 ByteSinkUtil::appendTwoBytes(c + d, sink);
261 if (edits != nullptr) {
262 edits->addReplace(2, 2);
263 }
264 prev = srcIndex;
265 continue;
266 }
267 } else if ((lead <= 0xe9 || lead == 0xeb || lead == 0xec) &&
268 (srcIndex + 2) <= srcLimit &&
269 U8_IS_TRAIL(src[srcIndex]) && U8_IS_TRAIL(src[srcIndex + 1])) {
270 // most of CJK: no case mappings
271 srcIndex += 2;
272 continue;
273 }
274 cpStart = --srcIndex;
275 U8_NEXT(src, srcIndex, srcLimit, c);
276 if (c < 0) {
277 // ill-formed UTF-8
278 continue;
279 }
280 uint16_t props = UTRIE2_GET16(trie, c);
281 if (UCASE_HAS_EXCEPTION(props)) { break; }
282 int32_t delta;
283 if (!UCASE_IS_UPPER_OR_TITLE(props) || (delta = UCASE_GET_DELTA(props)) == 0) {
284 continue;
285 }
286 ByteSinkUtil::appendUnchanged(src + prev, cpStart - prev,
Jungshik Shinb3189662017-11-07 11:18:34 -0800287 sink, options, edits, errorCode);
Jungshik Shinf61e46d2018-05-04 13:00:45 -0700288 ByteSinkUtil::appendCodePoint(srcIndex - cpStart, c + delta, sink, edits);
289 prev = srcIndex;
290 }
291 if (c < 0) {
292 break;
293 }
294 // slow path
295 const UChar *s;
296 if (caseLocale >= 0) {
297 csc->cpStart = cpStart;
298 csc->cpLimit = srcIndex;
299 c = ucase_toFullLower(c, utf8_caseContextIterator, csc, &s, caseLocale);
Jungshik Shinb3189662017-11-07 11:18:34 -0800300 } else {
Jungshik Shinf61e46d2018-05-04 13:00:45 -0700301 c = ucase_toFullFolding(c, &s, options);
302 }
303 if (c >= 0) {
304 ByteSinkUtil::appendUnchanged(src + prev, cpStart - prev,
305 sink, options, edits, errorCode);
Jungshik Shinb3189662017-11-07 11:18:34 -0800306 appendResult(srcIndex - cpStart, c, s, sink, options, edits, errorCode);
Jungshik Shinf61e46d2018-05-04 13:00:45 -0700307 prev = srcIndex;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000308 }
309 }
Jungshik Shinf61e46d2018-05-04 13:00:45 -0700310 ByteSinkUtil::appendUnchanged(src + prev, srcIndex - prev,
311 sink, options, edits, errorCode);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000312}
313
Jungshik Shinf61e46d2018-05-04 13:00:45 -0700314void toUpper(int32_t caseLocale, uint32_t options,
315 const uint8_t *src, UCaseContext *csc, int32_t srcLength,
316 icu::ByteSink &sink, icu::Edits *edits, UErrorCode &errorCode) {
317 const int8_t *latinToUpper;
318 if (caseLocale == UCASE_LOC_TURKISH) {
319 latinToUpper = LatinCase::TO_UPPER_TR;
320 } else {
321 latinToUpper = LatinCase::TO_UPPER_NORMAL;
322 }
323 const UTrie2 *trie = ucase_getTrie();
324 int32_t prev = 0;
325 int32_t srcIndex = 0;
326 for (;;) {
327 // fast path for simple cases
328 int32_t cpStart;
329 UChar32 c;
330 for (;;) {
331 if (U_FAILURE(errorCode) || srcIndex >= srcLength) {
332 c = U_SENTINEL;
333 break;
334 }
335 uint8_t lead = src[srcIndex++];
336 if (lead <= 0x7f) {
337 int8_t d = latinToUpper[lead];
338 if (d == LatinCase::EXC) {
339 cpStart = srcIndex - 1;
340 c = lead;
341 break;
342 }
343 if (d == 0) { continue; }
344 ByteSinkUtil::appendUnchanged(src + prev, srcIndex - 1 - prev,
345 sink, options, edits, errorCode);
346 char ascii = (char)(lead + d);
347 sink.Append(&ascii, 1);
348 if (edits != nullptr) {
349 edits->addReplace(1, 1);
350 }
351 prev = srcIndex;
352 continue;
353 } else if (lead < 0xe3) {
354 uint8_t t;
355 if (0xc2 <= lead && lead <= 0xc5 && srcIndex < srcLength &&
356 (t = src[srcIndex] - 0x80) <= 0x3f) {
357 // U+0080..U+017F
358 ++srcIndex;
359 c = ((lead - 0xc0) << 6) | t;
360 int8_t d = latinToUpper[c];
361 if (d == LatinCase::EXC) {
362 cpStart = srcIndex - 2;
363 break;
364 }
365 if (d == 0) { continue; }
366 ByteSinkUtil::appendUnchanged(src + prev, srcIndex - 2 - prev,
367 sink, options, edits, errorCode);
368 ByteSinkUtil::appendTwoBytes(c + d, sink);
369 if (edits != nullptr) {
370 edits->addReplace(2, 2);
371 }
372 prev = srcIndex;
373 continue;
374 }
375 } else if ((lead <= 0xe9 || lead == 0xeb || lead == 0xec) &&
376 (srcIndex + 2) <= srcLength &&
377 U8_IS_TRAIL(src[srcIndex]) && U8_IS_TRAIL(src[srcIndex + 1])) {
378 // most of CJK: no case mappings
379 srcIndex += 2;
380 continue;
381 }
382 cpStart = --srcIndex;
383 U8_NEXT(src, srcIndex, srcLength, c);
384 if (c < 0) {
385 // ill-formed UTF-8
386 continue;
387 }
388 uint16_t props = UTRIE2_GET16(trie, c);
389 if (UCASE_HAS_EXCEPTION(props)) { break; }
390 int32_t delta;
391 if (UCASE_GET_TYPE(props) != UCASE_LOWER || (delta = UCASE_GET_DELTA(props)) == 0) {
392 continue;
393 }
394 ByteSinkUtil::appendUnchanged(src + prev, cpStart - prev,
395 sink, options, edits, errorCode);
396 ByteSinkUtil::appendCodePoint(srcIndex - cpStart, c + delta, sink, edits);
397 prev = srcIndex;
398 }
399 if (c < 0) {
400 break;
401 }
402 // slow path
403 csc->cpStart = cpStart;
404 csc->cpLimit = srcIndex;
405 const UChar *s;
406 c = ucase_toFullUpper(c, utf8_caseContextIterator, csc, &s, caseLocale);
407 if (c >= 0) {
408 ByteSinkUtil::appendUnchanged(src + prev, cpStart - prev,
409 sink, options, edits, errorCode);
410 appendResult(srcIndex - cpStart, c, s, sink, options, edits, errorCode);
411 prev = srcIndex;
412 }
413 }
414 ByteSinkUtil::appendUnchanged(src + prev, srcIndex - prev,
415 sink, options, edits, errorCode);
416}
417
418} // namespace
419
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000420#if !UCONFIG_NO_BREAK_ITERATION
421
Frank Tangd2858cb2022-04-08 20:34:12 -0700422namespace {
423
424constexpr uint8_t ACUTE_BYTE0 = u8"\u0301"[0];
425
426constexpr uint8_t ACUTE_BYTE1 = u8"\u0301"[1];
427
428/**
429 * Input: c is a letter I with or without acute accent.
430 * start is the index in src after c, and is less than segmentLimit.
431 * If a plain i/I is followed by a plain j/J,
432 * or an i/I with acute (precomposed or decomposed) is followed by a j/J with acute,
433 * then we output accordingly.
434 *
435 * @return the src index after the titlecased sequence, or the start index if no Dutch IJ
436 */
437int32_t maybeTitleDutchIJ(const uint8_t *src, UChar32 c, int32_t start, int32_t segmentLimit,
438 ByteSink &sink, uint32_t options, icu::Edits *edits, UErrorCode &errorCode) {
439 U_ASSERT(start < segmentLimit);
440
441 int32_t index = start;
442 bool withAcute = false;
443
444 // If the conditions are met, then the following variables tell us what to output.
445 int32_t unchanged1 = 0; // code units before the j, or the whole sequence (0..3)
446 bool doTitleJ = false; // true if the j needs to be titlecased
447 int32_t unchanged2 = 0; // after the j (0 or 1)
448
449 // next character after the first letter
450 UChar32 c2;
451 c2 = src[index++];
452
453 // Is the first letter an i/I with accent?
454 if (c == u'I') {
455 if (c2 == ACUTE_BYTE0 && index < segmentLimit && src[index++] == ACUTE_BYTE1) {
456 withAcute = true;
457 unchanged1 = 2; // ACUTE is 2 code units in UTF-8
458 if (index == segmentLimit) { return start; }
459 c2 = src[index++];
460 }
461 } else { // Í
462 withAcute = true;
463 }
464
465 // Is the next character a j/J?
466 if (c2 == u'j') {
467 doTitleJ = true;
468 } else if (c2 == u'J') {
469 ++unchanged1;
470 } else {
471 return start;
472 }
473
474 // A plain i/I must be followed by a plain j/J.
475 // An i/I with acute must be followed by a j/J with acute.
476 if (withAcute) {
477 if ((index + 1) >= segmentLimit || src[index++] != ACUTE_BYTE0 || src[index++] != ACUTE_BYTE1) {
478 return start;
479 }
480 if (doTitleJ) {
481 unchanged2 = 2; // ACUTE is 2 code units in UTF-8
482 } else {
483 unchanged1 = unchanged1 + 2; // ACUTE is 2 code units in UTF-8
484 }
485 }
486
487 // There must not be another combining mark.
488 if (index < segmentLimit) {
489 int32_t cp;
490 int32_t i = index;
491 U8_NEXT(src, i, segmentLimit, cp);
492 uint32_t typeMask = U_GET_GC_MASK(cp);
493 if ((typeMask & U_GC_M_MASK) != 0) {
494 return start;
495 }
496 }
497
498 // Output the rest of the Dutch IJ.
499 ByteSinkUtil::appendUnchanged(src + start, unchanged1, sink, options, edits, errorCode);
500 start += unchanged1;
501 if (doTitleJ) {
502 ByteSinkUtil::appendCodePoint(1, u'J', sink, edits);
503 ++start;
504 }
505 ByteSinkUtil::appendUnchanged(src + start, unchanged2, sink, options, edits, errorCode);
506
507 U_ASSERT(start + unchanged2 == index);
508 return index;
509}
510
511} // namespace
512
Jungshik Shinb3189662017-11-07 11:18:34 -0800513U_CFUNC void U_CALLCONV
Jungshik Shin87232d82017-05-13 21:10:13 -0700514ucasemap_internalUTF8ToTitle(
515 int32_t caseLocale, uint32_t options, BreakIterator *iter,
Jungshik Shin87232d82017-05-13 21:10:13 -0700516 const uint8_t *src, int32_t srcLength,
Jungshik Shinb3189662017-11-07 11:18:34 -0800517 ByteSink &sink, icu::Edits *edits,
Jungshik Shin87232d82017-05-13 21:10:13 -0700518 UErrorCode &errorCode) {
Jungshik Shinb3189662017-11-07 11:18:34 -0800519 if (!ustrcase_checkTitleAdjustmentOptions(options, errorCode)) {
520 return;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000521 }
522
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000523 /* set up local variables */
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000524 UCaseContext csc=UCASECONTEXT_INITIALIZER;
525 csc.p=(void *)src;
526 csc.limit=srcLength;
Jungshik Shin87232d82017-05-13 21:10:13 -0700527 int32_t prev=0;
528 UBool isFirstIndex=TRUE;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000529
530 /* titlecasing loop */
531 while(prev<srcLength) {
532 /* find next index where to titlecase */
Jungshik Shin87232d82017-05-13 21:10:13 -0700533 int32_t index;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000534 if(isFirstIndex) {
535 isFirstIndex=FALSE;
Jungshik Shin87232d82017-05-13 21:10:13 -0700536 index=iter->first();
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000537 } else {
Jungshik Shin87232d82017-05-13 21:10:13 -0700538 index=iter->next();
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000539 }
Jungshik Shin87232d82017-05-13 21:10:13 -0700540 if(index==UBRK_DONE || index>srcLength) {
541 index=srcLength;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000542 }
543
544 /*
Jungshik Shinb3189662017-11-07 11:18:34 -0800545 * Segment [prev..index[ into 3 parts:
546 * a) skipped characters (copy as-is) [prev..titleStart[
547 * b) first letter (titlecase) [titleStart..titleLimit[
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000548 * c) subsequent characters (lowercase) [titleLimit..index[
549 */
Jungshik Shin87232d82017-05-13 21:10:13 -0700550 if(prev<index) {
Jungshik Shinb3189662017-11-07 11:18:34 -0800551 /* find and copy skipped characters [prev..titleStart[ */
Jungshik Shin87232d82017-05-13 21:10:13 -0700552 int32_t titleStart=prev;
553 int32_t titleLimit=prev;
554 UChar32 c;
555 U8_NEXT(src, titleLimit, index, c);
Jungshik Shinb3189662017-11-07 11:18:34 -0800556 if ((options&U_TITLECASE_NO_BREAK_ADJUSTMENT)==0) {
557 // Adjust the titlecasing index to the next cased character,
558 // or to the next letter/number/symbol/private use.
559 // Stop with titleStart<titleLimit<=index
560 // if there is a character to be titlecased,
561 // or else stop with titleStart==titleLimit==index.
562 UBool toCased = (options&U_TITLECASE_ADJUST_TO_CASED) != 0;
563 while (toCased ? UCASE_NONE==ucase_getType(c) : !ustrcase_isLNS(c)) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000564 titleStart=titleLimit;
Jungshik Shin87232d82017-05-13 21:10:13 -0700565 if(titleLimit==index) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000566 break;
567 }
Jungshik Shin87232d82017-05-13 21:10:13 -0700568 U8_NEXT(src, titleLimit, index, c);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000569 }
Jungshik Shinb3189662017-11-07 11:18:34 -0800570 if (prev < titleStart) {
571 if (!ByteSinkUtil::appendUnchanged(src+prev, titleStart-prev,
572 sink, options, edits, errorCode)) {
573 return;
574 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000575 }
576 }
577
578 if(titleStart<titleLimit) {
579 /* titlecase c which is from [titleStart..titleLimit[ */
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700580 if(c>=0) {
581 csc.cpStart=titleStart;
582 csc.cpLimit=titleLimit;
Jungshik Shin87232d82017-05-13 21:10:13 -0700583 const UChar *s;
584 c=ucase_toFullTitle(c, utf8_caseContextIterator, &csc, &s, caseLocale);
Jungshik Shinb3189662017-11-07 11:18:34 -0800585 if (!appendResult(titleLimit-titleStart, c, s, sink, options, edits, errorCode)) {
586 return;
587 }
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700588 } else {
589 // Malformed UTF-8.
Jungshik Shinb3189662017-11-07 11:18:34 -0800590 if (!ByteSinkUtil::appendUnchanged(src+titleStart, titleLimit-titleStart,
591 sink, options, edits, errorCode)) {
592 return;
593 }
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700594 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000595
596 /* Special case Dutch IJ titlecasing */
Frank Tangd2858cb2022-04-08 20:34:12 -0700597 if (titleLimit < index &&
598 caseLocale == UCASE_LOC_DUTCH) {
599 if (c < 0) {
600 c = ~c;
601 }
602
603 if (c == u'I' || c == u'Í') {
604 titleLimit = maybeTitleDutchIJ(src, c, titleLimit, index, sink, options, edits, errorCode);
Jungshik Shin87232d82017-05-13 21:10:13 -0700605 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000606 }
Jungshik Shin87232d82017-05-13 21:10:13 -0700607
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000608 /* lowercase [titleLimit..index[ */
Jungshik Shin87232d82017-05-13 21:10:13 -0700609 if(titleLimit<index) {
610 if((options&U_TITLECASE_NO_LOWERCASE)==0) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000611 /* Normal operation: Lowercase the rest of the word. */
Jungshik Shinf61e46d2018-05-04 13:00:45 -0700612 toLower(caseLocale, options,
613 src, &csc, titleLimit, index,
614 sink, edits, errorCode);
Jungshik Shin87232d82017-05-13 21:10:13 -0700615 if(U_FAILURE(errorCode)) {
Jungshik Shinb3189662017-11-07 11:18:34 -0800616 return;
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700617 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000618 } else {
619 /* Optionally just copy the rest of the word unchanged. */
Jungshik Shinb3189662017-11-07 11:18:34 -0800620 if (!ByteSinkUtil::appendUnchanged(src+titleLimit, index-titleLimit,
621 sink, options, edits, errorCode)) {
622 return;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000623 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000624 }
625 }
626 }
627 }
628
Jungshik Shin87232d82017-05-13 21:10:13 -0700629 prev=index;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000630 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000631}
632
633#endif
634
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700635U_NAMESPACE_BEGIN
636namespace GreekUpper {
637
Jungshik Shin87232d82017-05-13 21:10:13 -0700638UBool isFollowedByCasedLetter(const uint8_t *s, int32_t i, int32_t length) {
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700639 while (i < length) {
640 UChar32 c;
641 U8_NEXT(s, i, length, c);
Jungshik Shin87232d82017-05-13 21:10:13 -0700642 int32_t type = ucase_getTypeOrIgnorable(c);
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700643 if ((type & UCASE_IGNORABLE) != 0) {
644 // Case-ignorable, continue with the loop.
645 } else if (type != UCASE_NONE) {
646 return TRUE; // Followed by cased letter.
647 } else {
648 return FALSE; // Uncased and not case-ignorable.
649 }
650 }
651 return FALSE; // Not followed by cased letter.
652}
653
654// Keep this consistent with the UTF-16 version in ustrcase.cpp and the Java version in CaseMap.java.
Jungshik Shinb3189662017-11-07 11:18:34 -0800655void toUpper(uint32_t options,
656 const uint8_t *src, int32_t srcLength,
657 ByteSink &sink, Edits *edits,
658 UErrorCode &errorCode) {
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700659 uint32_t state = 0;
660 for (int32_t i = 0; i < srcLength;) {
661 int32_t nextIndex = i;
662 UChar32 c;
663 U8_NEXT(src, nextIndex, srcLength, c);
664 uint32_t nextState = 0;
Jungshik Shin87232d82017-05-13 21:10:13 -0700665 int32_t type = ucase_getTypeOrIgnorable(c);
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700666 if ((type & UCASE_IGNORABLE) != 0) {
667 // c is case-ignorable
668 nextState |= (state & AFTER_CASED);
669 } else if (type != UCASE_NONE) {
670 // c is cased
671 nextState |= AFTER_CASED;
672 }
673 uint32_t data = getLetterData(c);
674 if (data > 0) {
675 uint32_t upper = data & UPPER_MASK;
676 // Add a dialytika to this iota or ypsilon vowel
677 // if we removed a tonos from the previous vowel,
678 // and that previous vowel did not also have (or gain) a dialytika.
679 // Adding one only to the final vowel in a longer sequence
680 // (which does not occur in normal writing) would require lookahead.
681 // Set the same flag as for preserving an existing dialytika.
682 if ((data & HAS_VOWEL) != 0 && (state & AFTER_VOWEL_WITH_ACCENT) != 0 &&
683 (upper == 0x399 || upper == 0x3A5)) {
684 data |= HAS_DIALYTIKA;
685 }
686 int32_t numYpogegrammeni = 0; // Map each one to a trailing, spacing, capital iota.
687 if ((data & HAS_YPOGEGRAMMENI) != 0) {
688 numYpogegrammeni = 1;
689 }
690 // Skip combining diacritics after this Greek letter.
691 int32_t nextNextIndex = nextIndex;
692 while (nextIndex < srcLength) {
693 UChar32 c2;
694 U8_NEXT(src, nextNextIndex, srcLength, c2);
695 uint32_t diacriticData = getDiacriticData(c2);
696 if (diacriticData != 0) {
697 data |= diacriticData;
698 if ((diacriticData & HAS_YPOGEGRAMMENI) != 0) {
699 ++numYpogegrammeni;
700 }
701 nextIndex = nextNextIndex;
702 } else {
703 break; // not a Greek diacritic
704 }
705 }
706 if ((data & HAS_VOWEL_AND_ACCENT_AND_DIALYTIKA) == HAS_VOWEL_AND_ACCENT) {
707 nextState |= AFTER_VOWEL_WITH_ACCENT;
708 }
709 // Map according to Greek rules.
710 UBool addTonos = FALSE;
711 if (upper == 0x397 &&
712 (data & HAS_ACCENT) != 0 &&
713 numYpogegrammeni == 0 &&
714 (state & AFTER_CASED) == 0 &&
Jungshik Shin87232d82017-05-13 21:10:13 -0700715 !isFollowedByCasedLetter(src, nextIndex, srcLength)) {
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700716 // Keep disjunctive "or" with (only) a tonos.
717 // We use the same "word boundary" conditions as for the Final_Sigma test.
718 if (i == nextIndex) {
719 upper = 0x389; // Preserve the precomposed form.
720 } else {
721 addTonos = TRUE;
722 }
723 } else if ((data & HAS_DIALYTIKA) != 0) {
724 // Preserve a vowel with dialytika in precomposed form if it exists.
725 if (upper == 0x399) {
726 upper = 0x3AA;
727 data &= ~HAS_EITHER_DIALYTIKA;
728 } else if (upper == 0x3A5) {
729 upper = 0x3AB;
730 data &= ~HAS_EITHER_DIALYTIKA;
731 }
732 }
Jungshik Shin87232d82017-05-13 21:10:13 -0700733
Jungshik Shinb3189662017-11-07 11:18:34 -0800734 UBool change;
735 if (edits == nullptr && (options & U_OMIT_UNCHANGED_TEXT) == 0) {
736 change = TRUE; // common, simple usage
737 } else {
Jungshik Shin87232d82017-05-13 21:10:13 -0700738 // Find out first whether we are changing the text.
739 U_ASSERT(0x370 <= upper && upper <= 0x3ff); // 2-byte UTF-8, main Greek block
740 change = (i + 2) > nextIndex ||
741 src[i] != getTwoByteLead(upper) || src[i + 1] != getTwoByteTrail(upper) ||
742 numYpogegrammeni > 0;
743 int32_t i2 = i + 2;
744 if ((data & HAS_EITHER_DIALYTIKA) != 0) {
745 change |= (i2 + 2) > nextIndex ||
746 src[i2] != (uint8_t)u8"\u0308"[0] ||
747 src[i2 + 1] != (uint8_t)u8"\u0308"[1];
748 i2 += 2;
749 }
750 if (addTonos) {
751 change |= (i2 + 2) > nextIndex ||
752 src[i2] != (uint8_t)u8"\u0301"[0] ||
753 src[i2 + 1] != (uint8_t)u8"\u0301"[1];
754 i2 += 2;
755 }
756 int32_t oldLength = nextIndex - i;
757 int32_t newLength = (i2 - i) + numYpogegrammeni * 2; // 2 bytes per U+0399
758 change |= oldLength != newLength;
759 if (change) {
760 if (edits != NULL) {
761 edits->addReplace(oldLength, newLength);
762 }
763 } else {
764 if (edits != NULL) {
765 edits->addUnchanged(oldLength);
766 }
767 // Write unchanged text?
Jungshik Shinb3189662017-11-07 11:18:34 -0800768 change = (options & U_OMIT_UNCHANGED_TEXT) == 0;
Jungshik Shin87232d82017-05-13 21:10:13 -0700769 }
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700770 }
Jungshik Shin87232d82017-05-13 21:10:13 -0700771
772 if (change) {
Jungshik Shinb3189662017-11-07 11:18:34 -0800773 ByteSinkUtil::appendTwoBytes(upper, sink);
774 if ((data & HAS_EITHER_DIALYTIKA) != 0) {
Frank Tangf2223962020-04-27 18:25:29 -0700775 sink.AppendU8(u8"\u0308", 2); // restore or add a dialytika
Jungshik Shin87232d82017-05-13 21:10:13 -0700776 }
Jungshik Shinb3189662017-11-07 11:18:34 -0800777 if (addTonos) {
Frank Tangf2223962020-04-27 18:25:29 -0700778 sink.AppendU8(u8"\u0301", 2);
Jungshik Shin87232d82017-05-13 21:10:13 -0700779 }
Jungshik Shinb3189662017-11-07 11:18:34 -0800780 while (numYpogegrammeni > 0) {
Frank Tangf2223962020-04-27 18:25:29 -0700781 sink.AppendU8(u8"\u0399", 2);
Jungshik Shin87232d82017-05-13 21:10:13 -0700782 --numYpogegrammeni;
783 }
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700784 }
Jungshik Shin87232d82017-05-13 21:10:13 -0700785 } else if(c>=0) {
786 const UChar *s;
787 c=ucase_toFullUpper(c, NULL, NULL, &s, UCASE_LOC_GREEK);
Jungshik Shinb3189662017-11-07 11:18:34 -0800788 if (!appendResult(nextIndex - i, c, s, sink, options, edits, errorCode)) {
789 return;
Jungshik Shin87232d82017-05-13 21:10:13 -0700790 }
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700791 } else {
792 // Malformed UTF-8.
Jungshik Shinb3189662017-11-07 11:18:34 -0800793 if (!ByteSinkUtil::appendUnchanged(src+i, nextIndex-i,
794 sink, options, edits, errorCode)) {
795 return;
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700796 }
797 }
798 i = nextIndex;
799 state = nextState;
800 }
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700801}
802
803} // namespace GreekUpper
804U_NAMESPACE_END
805
Jungshik Shinb3189662017-11-07 11:18:34 -0800806static void U_CALLCONV
Jungshik Shin87232d82017-05-13 21:10:13 -0700807ucasemap_internalUTF8ToLower(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000808 const uint8_t *src, int32_t srcLength,
Jungshik Shinb3189662017-11-07 11:18:34 -0800809 icu::ByteSink &sink, icu::Edits *edits,
Jungshik Shin87232d82017-05-13 21:10:13 -0700810 UErrorCode &errorCode) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000811 UCaseContext csc=UCASECONTEXT_INITIALIZER;
812 csc.p=(void *)src;
813 csc.limit=srcLength;
Jungshik Shinf61e46d2018-05-04 13:00:45 -0700814 toLower(
815 caseLocale, options,
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000816 src, &csc, 0, srcLength,
Jungshik Shinb3189662017-11-07 11:18:34 -0800817 sink, edits, errorCode);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000818}
819
Jungshik Shinb3189662017-11-07 11:18:34 -0800820static void U_CALLCONV
Jungshik Shin87232d82017-05-13 21:10:13 -0700821ucasemap_internalUTF8ToUpper(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000822 const uint8_t *src, int32_t srcLength,
Jungshik Shinb3189662017-11-07 11:18:34 -0800823 icu::ByteSink &sink, icu::Edits *edits,
Jungshik Shin87232d82017-05-13 21:10:13 -0700824 UErrorCode &errorCode) {
Jungshik Shin87232d82017-05-13 21:10:13 -0700825 if (caseLocale == UCASE_LOC_GREEK) {
Jungshik Shinb3189662017-11-07 11:18:34 -0800826 GreekUpper::toUpper(options, src, srcLength, sink, edits, errorCode);
Jungshik Shin87232d82017-05-13 21:10:13 -0700827 } else {
828 UCaseContext csc=UCASECONTEXT_INITIALIZER;
829 csc.p=(void *)src;
830 csc.limit=srcLength;
Jungshik Shinf61e46d2018-05-04 13:00:45 -0700831 toUpper(
832 caseLocale, options,
833 src, &csc, srcLength,
Jungshik Shinb3189662017-11-07 11:18:34 -0800834 sink, edits, errorCode);
Jungshik Shin5feb9ad2016-10-21 12:52:48 -0700835 }
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000836}
837
Jungshik Shinb3189662017-11-07 11:18:34 -0800838static void U_CALLCONV
Jungshik Shin87232d82017-05-13 21:10:13 -0700839ucasemap_internalUTF8Fold(int32_t /* caseLocale */, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED
Jungshik Shin87232d82017-05-13 21:10:13 -0700840 const uint8_t *src, int32_t srcLength,
Jungshik Shinb3189662017-11-07 11:18:34 -0800841 icu::ByteSink &sink, icu::Edits *edits,
Jungshik Shin87232d82017-05-13 21:10:13 -0700842 UErrorCode &errorCode) {
Jungshik Shinf61e46d2018-05-04 13:00:45 -0700843 toLower(
844 -1, options,
845 src, nullptr, 0, srcLength,
846 sink, edits, errorCode);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000847}
848
Jungshik Shinb3189662017-11-07 11:18:34 -0800849void
Jungshik Shin87232d82017-05-13 21:10:13 -0700850ucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
Jungshik Shinb3189662017-11-07 11:18:34 -0800851 const char *src, int32_t srcLength,
852 UTF8CaseMapper *stringCaseMapper,
853 icu::ByteSink &sink, icu::Edits *edits,
854 UErrorCode &errorCode) {
855 /* check argument values */
856 if (U_FAILURE(errorCode)) {
857 return;
858 }
859 if ((src == nullptr && srcLength != 0) || srcLength < -1) {
860 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
861 return;
862 }
863
864 // Get the string length.
865 if (srcLength == -1) {
866 srcLength = (int32_t)uprv_strlen((const char *)src);
867 }
868
869 if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) {
870 edits->reset();
871 }
872 stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR
873 (const uint8_t *)src, srcLength, sink, edits, errorCode);
874 sink.Flush();
875 if (U_SUCCESS(errorCode)) {
876 if (edits != nullptr) {
877 edits->copyErrorTo(errorCode);
878 }
879 }
880}
881
882int32_t
883ucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
884 char *dest, int32_t destCapacity,
885 const char *src, int32_t srcLength,
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000886 UTF8CaseMapper *stringCaseMapper,
Jungshik Shin87232d82017-05-13 21:10:13 -0700887 icu::Edits *edits,
888 UErrorCode &errorCode) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000889 /* check argument values */
Jungshik Shin87232d82017-05-13 21:10:13 -0700890 if(U_FAILURE(errorCode)) {
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000891 return 0;
892 }
893 if( destCapacity<0 ||
894 (dest==NULL && destCapacity>0) ||
Jungshik Shinb3189662017-11-07 11:18:34 -0800895 (src==NULL && srcLength!=0) || srcLength<-1
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000896 ) {
Jungshik Shin87232d82017-05-13 21:10:13 -0700897 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000898 return 0;
899 }
900
901 /* get the string length */
902 if(srcLength==-1) {
903 srcLength=(int32_t)uprv_strlen((const char *)src);
904 }
905
906 /* check for overlapping source and destination */
907 if( dest!=NULL &&
908 ((src>=dest && src<(dest+destCapacity)) ||
909 (dest>=src && dest<(src+srcLength)))
910 ) {
Jungshik Shin87232d82017-05-13 21:10:13 -0700911 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000912 return 0;
913 }
914
Jungshik Shinb3189662017-11-07 11:18:34 -0800915 CheckedArrayByteSink sink(dest, destCapacity);
916 if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) {
Jungshik Shin87232d82017-05-13 21:10:13 -0700917 edits->reset();
918 }
Jungshik Shinb3189662017-11-07 11:18:34 -0800919 stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR
920 (const uint8_t *)src, srcLength, sink, edits, errorCode);
921 sink.Flush();
922 if (U_SUCCESS(errorCode)) {
923 if (sink.Overflowed()) {
924 errorCode = U_BUFFER_OVERFLOW_ERROR;
925 } else if (edits != nullptr) {
926 edits->copyErrorTo(errorCode);
927 }
928 }
929 return u_terminateChars(dest, destCapacity, sink.NumberOfBytesAppended(), &errorCode);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000930}
931
932/* public API functions */
933
934U_CAPI int32_t U_EXPORT2
935ucasemap_utf8ToLower(const UCaseMap *csm,
936 char *dest, int32_t destCapacity,
937 const char *src, int32_t srcLength,
938 UErrorCode *pErrorCode) {
Jungshik Shin87232d82017-05-13 21:10:13 -0700939 return ucasemap_mapUTF8(
940 csm->caseLocale, csm->options, UCASEMAP_BREAK_ITERATOR_NULL
Jungshik Shinb3189662017-11-07 11:18:34 -0800941 dest, destCapacity,
942 src, srcLength,
Jungshik Shin87232d82017-05-13 21:10:13 -0700943 ucasemap_internalUTF8ToLower, NULL, *pErrorCode);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000944}
945
946U_CAPI int32_t U_EXPORT2
947ucasemap_utf8ToUpper(const UCaseMap *csm,
948 char *dest, int32_t destCapacity,
949 const char *src, int32_t srcLength,
950 UErrorCode *pErrorCode) {
Jungshik Shin87232d82017-05-13 21:10:13 -0700951 return ucasemap_mapUTF8(
952 csm->caseLocale, csm->options, UCASEMAP_BREAK_ITERATOR_NULL
Jungshik Shinb3189662017-11-07 11:18:34 -0800953 dest, destCapacity,
954 src, srcLength,
Jungshik Shin87232d82017-05-13 21:10:13 -0700955 ucasemap_internalUTF8ToUpper, NULL, *pErrorCode);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000956}
957
958U_CAPI int32_t U_EXPORT2
959ucasemap_utf8FoldCase(const UCaseMap *csm,
960 char *dest, int32_t destCapacity,
961 const char *src, int32_t srcLength,
962 UErrorCode *pErrorCode) {
Jungshik Shin87232d82017-05-13 21:10:13 -0700963 return ucasemap_mapUTF8(
964 UCASE_LOC_ROOT, csm->options, UCASEMAP_BREAK_ITERATOR_NULL
Jungshik Shinb3189662017-11-07 11:18:34 -0800965 dest, destCapacity,
966 src, srcLength,
Jungshik Shin87232d82017-05-13 21:10:13 -0700967 ucasemap_internalUTF8Fold, NULL, *pErrorCode);
jshin@chromium.org6f31ac32014-03-26 22:15:14 +0000968}
Jungshik Shin87232d82017-05-13 21:10:13 -0700969
970U_NAMESPACE_BEGIN
971
Jungshik Shinb3189662017-11-07 11:18:34 -0800972void CaseMap::utf8ToLower(
973 const char *locale, uint32_t options,
974 StringPiece src, ByteSink &sink, Edits *edits,
975 UErrorCode &errorCode) {
976 ucasemap_mapUTF8(
977 ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL
978 src.data(), src.length(),
979 ucasemap_internalUTF8ToLower, sink, edits, errorCode);
980}
981
982void CaseMap::utf8ToUpper(
983 const char *locale, uint32_t options,
984 StringPiece src, ByteSink &sink, Edits *edits,
985 UErrorCode &errorCode) {
986 ucasemap_mapUTF8(
987 ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL
988 src.data(), src.length(),
989 ucasemap_internalUTF8ToUpper, sink, edits, errorCode);
990}
991
992void CaseMap::utf8Fold(
993 uint32_t options,
994 StringPiece src, ByteSink &sink, Edits *edits,
995 UErrorCode &errorCode) {
996 ucasemap_mapUTF8(
997 UCASE_LOC_ROOT, options, UCASEMAP_BREAK_ITERATOR_NULL
998 src.data(), src.length(),
999 ucasemap_internalUTF8Fold, sink, edits, errorCode);
1000}
1001
Jungshik Shin87232d82017-05-13 21:10:13 -07001002int32_t CaseMap::utf8ToLower(
1003 const char *locale, uint32_t options,
1004 const char *src, int32_t srcLength,
1005 char *dest, int32_t destCapacity, Edits *edits,
1006 UErrorCode &errorCode) {
1007 return ucasemap_mapUTF8(
1008 ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL
Jungshik Shinb3189662017-11-07 11:18:34 -08001009 dest, destCapacity,
1010 src, srcLength,
Jungshik Shin87232d82017-05-13 21:10:13 -07001011 ucasemap_internalUTF8ToLower, edits, errorCode);
1012}
1013
1014int32_t CaseMap::utf8ToUpper(
1015 const char *locale, uint32_t options,
1016 const char *src, int32_t srcLength,
1017 char *dest, int32_t destCapacity, Edits *edits,
1018 UErrorCode &errorCode) {
1019 return ucasemap_mapUTF8(
1020 ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL
Jungshik Shinb3189662017-11-07 11:18:34 -08001021 dest, destCapacity,
1022 src, srcLength,
Jungshik Shin87232d82017-05-13 21:10:13 -07001023 ucasemap_internalUTF8ToUpper, edits, errorCode);
1024}
1025
1026int32_t CaseMap::utf8Fold(
1027 uint32_t options,
1028 const char *src, int32_t srcLength,
1029 char *dest, int32_t destCapacity, Edits *edits,
1030 UErrorCode &errorCode) {
1031 return ucasemap_mapUTF8(
1032 UCASE_LOC_ROOT, options, UCASEMAP_BREAK_ITERATOR_NULL
Jungshik Shinb3189662017-11-07 11:18:34 -08001033 dest, destCapacity,
1034 src, srcLength,
Jungshik Shin87232d82017-05-13 21:10:13 -07001035 ucasemap_internalUTF8Fold, edits, errorCode);
1036}
1037
1038U_NAMESPACE_END