Jungshik Shin | 87232d8 | 2017-05-13 21:10:13 -0700 | [diff] [blame] | 1 | // © 2016 and later: Unicode, Inc. and others. |
Jungshik Shin | 5feb9ad | 2016-10-21 12:52:48 -0700 | [diff] [blame] | 2 | // License & terms of use: http://www.unicode.org/copyright.html |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 3 | /* |
| 4 | ********************************************************************** |
Jungshik Shin | 5feb9ad | 2016-10-21 12:52:48 -0700 | [diff] [blame] | 5 | * Copyright (C) 2005-2016, International Business Machines |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 6 | * Corporation and others. All Rights Reserved. |
| 7 | ********************************************************************** |
| 8 | */ |
| 9 | |
| 10 | #include "unicode/utypes.h" |
| 11 | |
| 12 | #if !UCONFIG_NO_CONVERSION |
| 13 | |
| 14 | #include "unicode/ucsdet.h" |
| 15 | |
| 16 | #include "csdetect.h" |
| 17 | #include "csmatch.h" |
| 18 | #include "uenumimp.h" |
| 19 | |
| 20 | #include "cmemory.h" |
| 21 | #include "cstring.h" |
| 22 | #include "umutex.h" |
| 23 | #include "ucln_in.h" |
| 24 | #include "uarrsort.h" |
| 25 | #include "inputext.h" |
| 26 | #include "csrsbcs.h" |
| 27 | #include "csrmbcs.h" |
| 28 | #include "csrutf8.h" |
| 29 | #include "csrucode.h" |
| 30 | #include "csr2022.h" |
| 31 | |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 32 | #define NEW_ARRAY(type,count) (type *) uprv_malloc((count) * sizeof(type)) |
| 33 | #define DELETE_ARRAY(array) uprv_free((void *) (array)) |
| 34 | |
| 35 | U_NAMESPACE_BEGIN |
| 36 | |
| 37 | struct CSRecognizerInfo : public UMemory { |
| 38 | CSRecognizerInfo(CharsetRecognizer *recognizer, UBool isDefaultEnabled) |
Frank Tang | 69c72a6 | 2019-04-03 21:41:21 -0700 | [diff] [blame] | 39 | : recognizer(recognizer), isDefaultEnabled(isDefaultEnabled) {} |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 40 | |
Frank Tang | 69c72a6 | 2019-04-03 21:41:21 -0700 | [diff] [blame] | 41 | ~CSRecognizerInfo() {delete recognizer;} |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 42 | |
| 43 | CharsetRecognizer *recognizer; |
| 44 | UBool isDefaultEnabled; |
| 45 | }; |
| 46 | |
| 47 | U_NAMESPACE_END |
| 48 | |
| 49 | static icu::CSRecognizerInfo **fCSRecognizers = NULL; |
Frank Tang | 1c67b4e | 2022-05-18 10:13:51 -0700 | [diff] [blame] | 50 | static icu::UInitOnce gCSRecognizersInitOnce {}; |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 51 | static int32_t fCSRecognizers_size = 0; |
| 52 | |
| 53 | U_CDECL_BEGIN |
| 54 | static UBool U_CALLCONV csdet_cleanup(void) |
| 55 | { |
| 56 | U_NAMESPACE_USE |
| 57 | if (fCSRecognizers != NULL) { |
| 58 | for(int32_t r = 0; r < fCSRecognizers_size; r += 1) { |
| 59 | delete fCSRecognizers[r]; |
| 60 | fCSRecognizers[r] = NULL; |
| 61 | } |
| 62 | |
| 63 | DELETE_ARRAY(fCSRecognizers); |
| 64 | fCSRecognizers = NULL; |
| 65 | fCSRecognizers_size = 0; |
| 66 | } |
| 67 | gCSRecognizersInitOnce.reset(); |
| 68 | |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 69 | return true; |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 70 | } |
| 71 | |
| 72 | static int32_t U_CALLCONV |
| 73 | charsetMatchComparator(const void * /*context*/, const void *left, const void *right) |
| 74 | { |
| 75 | U_NAMESPACE_USE |
| 76 | |
| 77 | const CharsetMatch **csm_l = (const CharsetMatch **) left; |
| 78 | const CharsetMatch **csm_r = (const CharsetMatch **) right; |
| 79 | |
| 80 | // NOTE: compare is backwards to sort from highest to lowest. |
| 81 | return (*csm_r)->getConfidence() - (*csm_l)->getConfidence(); |
| 82 | } |
| 83 | |
| 84 | static void U_CALLCONV initRecognizers(UErrorCode &status) { |
| 85 | U_NAMESPACE_USE |
| 86 | ucln_i18n_registerCleanup(UCLN_I18N_CSDET, csdet_cleanup); |
| 87 | CSRecognizerInfo *tempArray[] = { |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 88 | new CSRecognizerInfo(new CharsetRecog_UTF8(), true), |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 89 | |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 90 | new CSRecognizerInfo(new CharsetRecog_UTF_16_BE(), true), |
| 91 | new CSRecognizerInfo(new CharsetRecog_UTF_16_LE(), true), |
| 92 | new CSRecognizerInfo(new CharsetRecog_UTF_32_BE(), true), |
| 93 | new CSRecognizerInfo(new CharsetRecog_UTF_32_LE(), true), |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 94 | |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 95 | new CSRecognizerInfo(new CharsetRecog_8859_1(), true), |
| 96 | new CSRecognizerInfo(new CharsetRecog_8859_2(), true), |
| 97 | new CSRecognizerInfo(new CharsetRecog_8859_5_ru(), true), |
| 98 | new CSRecognizerInfo(new CharsetRecog_8859_6_ar(), true), |
| 99 | new CSRecognizerInfo(new CharsetRecog_8859_7_el(), true), |
| 100 | new CSRecognizerInfo(new CharsetRecog_8859_8_I_he(), true), |
| 101 | new CSRecognizerInfo(new CharsetRecog_8859_8_he(), true), |
| 102 | new CSRecognizerInfo(new CharsetRecog_windows_1251(), true), |
| 103 | new CSRecognizerInfo(new CharsetRecog_windows_1256(), true), |
| 104 | new CSRecognizerInfo(new CharsetRecog_KOI8_R(), true), |
| 105 | new CSRecognizerInfo(new CharsetRecog_8859_9_tr(), true), |
| 106 | new CSRecognizerInfo(new CharsetRecog_sjis(), true), |
| 107 | new CSRecognizerInfo(new CharsetRecog_gb_18030(), true), |
| 108 | new CSRecognizerInfo(new CharsetRecog_euc_jp(), true), |
| 109 | new CSRecognizerInfo(new CharsetRecog_euc_kr(), true), |
| 110 | new CSRecognizerInfo(new CharsetRecog_big5(), true), |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 111 | |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 112 | new CSRecognizerInfo(new CharsetRecog_2022JP(), true), |
Jungshik Shin | 70f8250 | 2016-01-29 00:32:36 -0800 | [diff] [blame] | 113 | #if !UCONFIG_ONLY_HTML_CONVERSION |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 114 | new CSRecognizerInfo(new CharsetRecog_2022KR(), true), |
| 115 | new CSRecognizerInfo(new CharsetRecog_2022CN(), true), |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 116 | |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 117 | new CSRecognizerInfo(new CharsetRecog_IBM424_he_rtl(), false), |
| 118 | new CSRecognizerInfo(new CharsetRecog_IBM424_he_ltr(), false), |
| 119 | new CSRecognizerInfo(new CharsetRecog_IBM420_ar_rtl(), false), |
| 120 | new CSRecognizerInfo(new CharsetRecog_IBM420_ar_ltr(), false) |
Jungshik Shin (jungshik at google) | afd723b | 2015-01-21 13:24:04 -0800 | [diff] [blame] | 121 | #endif |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 122 | }; |
Jungshik Shin | 5feb9ad | 2016-10-21 12:52:48 -0700 | [diff] [blame] | 123 | int32_t rCount = UPRV_LENGTHOF(tempArray); |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 124 | |
| 125 | fCSRecognizers = NEW_ARRAY(CSRecognizerInfo *, rCount); |
| 126 | |
| 127 | if (fCSRecognizers == NULL) { |
| 128 | status = U_MEMORY_ALLOCATION_ERROR; |
| 129 | } |
| 130 | else { |
| 131 | fCSRecognizers_size = rCount; |
| 132 | for (int32_t r = 0; r < rCount; r += 1) { |
| 133 | fCSRecognizers[r] = tempArray[r]; |
| 134 | if (fCSRecognizers[r] == NULL) { |
| 135 | status = U_MEMORY_ALLOCATION_ERROR; |
| 136 | } |
| 137 | } |
| 138 | } |
| 139 | } |
| 140 | |
| 141 | U_CDECL_END |
| 142 | |
| 143 | U_NAMESPACE_BEGIN |
| 144 | |
| 145 | void CharsetDetector::setRecognizers(UErrorCode &status) |
| 146 | { |
| 147 | umtx_initOnce(gCSRecognizersInitOnce, &initRecognizers, status); |
| 148 | } |
| 149 | |
| 150 | CharsetDetector::CharsetDetector(UErrorCode &status) |
| 151 | : textIn(new InputText(status)), resultArray(NULL), |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 152 | resultCount(0), fStripTags(false), fFreshTextSet(false), |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 153 | fEnabledRecognizers(NULL) |
| 154 | { |
| 155 | if (U_FAILURE(status)) { |
| 156 | return; |
| 157 | } |
| 158 | |
| 159 | setRecognizers(status); |
| 160 | |
| 161 | if (U_FAILURE(status)) { |
| 162 | return; |
| 163 | } |
| 164 | |
| 165 | resultArray = (CharsetMatch **)uprv_malloc(sizeof(CharsetMatch *)*fCSRecognizers_size); |
| 166 | |
| 167 | if (resultArray == NULL) { |
| 168 | status = U_MEMORY_ALLOCATION_ERROR; |
| 169 | return; |
| 170 | } |
| 171 | |
| 172 | for(int32_t i = 0; i < fCSRecognizers_size; i += 1) { |
| 173 | resultArray[i] = new CharsetMatch(); |
| 174 | |
| 175 | if (resultArray[i] == NULL) { |
| 176 | status = U_MEMORY_ALLOCATION_ERROR; |
| 177 | break; |
| 178 | } |
| 179 | } |
| 180 | } |
| 181 | |
| 182 | CharsetDetector::~CharsetDetector() |
| 183 | { |
| 184 | delete textIn; |
| 185 | |
| 186 | for(int32_t i = 0; i < fCSRecognizers_size; i += 1) { |
| 187 | delete resultArray[i]; |
| 188 | } |
| 189 | |
| 190 | uprv_free(resultArray); |
| 191 | |
| 192 | if (fEnabledRecognizers) { |
| 193 | uprv_free(fEnabledRecognizers); |
| 194 | } |
| 195 | } |
| 196 | |
| 197 | void CharsetDetector::setText(const char *in, int32_t len) |
| 198 | { |
| 199 | textIn->setText(in, len); |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 200 | fFreshTextSet = true; |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 201 | } |
| 202 | |
| 203 | UBool CharsetDetector::setStripTagsFlag(UBool flag) |
| 204 | { |
| 205 | UBool temp = fStripTags; |
| 206 | fStripTags = flag; |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 207 | fFreshTextSet = true; |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 208 | return temp; |
| 209 | } |
| 210 | |
| 211 | UBool CharsetDetector::getStripTagsFlag() const |
| 212 | { |
| 213 | return fStripTags; |
| 214 | } |
| 215 | |
| 216 | void CharsetDetector::setDeclaredEncoding(const char *encoding, int32_t len) const |
| 217 | { |
| 218 | textIn->setDeclaredEncoding(encoding,len); |
| 219 | } |
| 220 | |
| 221 | int32_t CharsetDetector::getDetectableCount() |
| 222 | { |
| 223 | UErrorCode status = U_ZERO_ERROR; |
| 224 | |
| 225 | setRecognizers(status); |
| 226 | |
| 227 | return fCSRecognizers_size; |
| 228 | } |
| 229 | |
| 230 | const CharsetMatch *CharsetDetector::detect(UErrorCode &status) |
| 231 | { |
| 232 | int32_t maxMatchesFound = 0; |
| 233 | |
| 234 | detectAll(maxMatchesFound, status); |
| 235 | |
| 236 | if(maxMatchesFound > 0) { |
| 237 | return resultArray[0]; |
| 238 | } else { |
| 239 | return NULL; |
| 240 | } |
| 241 | } |
| 242 | |
| 243 | const CharsetMatch * const *CharsetDetector::detectAll(int32_t &maxMatchesFound, UErrorCode &status) |
| 244 | { |
| 245 | if(!textIn->isSet()) { |
| 246 | status = U_MISSING_RESOURCE_ERROR;// TODO: Need to set proper status code for input text not set |
| 247 | |
| 248 | return NULL; |
| 249 | } else if (fFreshTextSet) { |
| 250 | CharsetRecognizer *csr; |
| 251 | int32_t i; |
| 252 | |
| 253 | textIn->MungeInput(fStripTags); |
| 254 | |
| 255 | // Iterate over all possible charsets, remember all that |
| 256 | // give a match quality > 0. |
| 257 | resultCount = 0; |
| 258 | for (i = 0; i < fCSRecognizers_size; i += 1) { |
| 259 | csr = fCSRecognizers[i]->recognizer; |
| 260 | if (csr->match(textIn, resultArray[resultCount])) { |
| 261 | resultCount++; |
| 262 | } |
| 263 | } |
| 264 | |
| 265 | if (resultCount > 1) { |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 266 | uprv_sortArray(resultArray, resultCount, sizeof resultArray[0], charsetMatchComparator, NULL, true, &status); |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 267 | } |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 268 | fFreshTextSet = false; |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 269 | } |
| 270 | |
| 271 | maxMatchesFound = resultCount; |
| 272 | |
Frank Tang | d2858cb | 2022-04-08 20:34:12 -0700 | [diff] [blame] | 273 | if (maxMatchesFound == 0) { |
| 274 | status = U_INVALID_CHAR_FOUND; |
| 275 | return NULL; |
| 276 | } |
| 277 | |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 278 | return resultArray; |
| 279 | } |
| 280 | |
| 281 | void CharsetDetector::setDetectableCharset(const char *encoding, UBool enabled, UErrorCode &status) |
| 282 | { |
| 283 | if (U_FAILURE(status)) { |
| 284 | return; |
| 285 | } |
| 286 | |
| 287 | int32_t modIdx = -1; |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 288 | UBool isDefaultVal = false; |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 289 | for (int32_t i = 0; i < fCSRecognizers_size; i++) { |
| 290 | CSRecognizerInfo *csrinfo = fCSRecognizers[i]; |
| 291 | if (uprv_strcmp(csrinfo->recognizer->getName(), encoding) == 0) { |
| 292 | modIdx = i; |
| 293 | isDefaultVal = (csrinfo->isDefaultEnabled == enabled); |
| 294 | break; |
| 295 | } |
| 296 | } |
| 297 | if (modIdx < 0) { |
| 298 | // No matching encoding found |
| 299 | status = U_ILLEGAL_ARGUMENT_ERROR; |
| 300 | return; |
| 301 | } |
| 302 | |
| 303 | if (fEnabledRecognizers == NULL && !isDefaultVal) { |
| 304 | // Create an array storing the non default setting |
| 305 | fEnabledRecognizers = NEW_ARRAY(UBool, fCSRecognizers_size); |
| 306 | if (fEnabledRecognizers == NULL) { |
| 307 | status = U_MEMORY_ALLOCATION_ERROR; |
| 308 | return; |
| 309 | } |
| 310 | // Initialize the array with default info |
| 311 | for (int32_t i = 0; i < fCSRecognizers_size; i++) { |
| 312 | fEnabledRecognizers[i] = fCSRecognizers[i]->isDefaultEnabled; |
| 313 | } |
| 314 | } |
| 315 | |
| 316 | if (fEnabledRecognizers != NULL) { |
| 317 | fEnabledRecognizers[modIdx] = enabled; |
| 318 | } |
| 319 | } |
| 320 | |
| 321 | /*const char *CharsetDetector::getCharsetName(int32_t index, UErrorCode &status) const |
| 322 | { |
| 323 | if( index > fCSRecognizers_size-1 || index < 0) { |
| 324 | status = U_INDEX_OUTOFBOUNDS_ERROR; |
| 325 | |
| 326 | return 0; |
| 327 | } else { |
| 328 | return fCSRecognizers[index]->getName(); |
| 329 | } |
| 330 | }*/ |
| 331 | |
| 332 | U_NAMESPACE_END |
| 333 | |
| 334 | U_CDECL_BEGIN |
| 335 | typedef struct { |
| 336 | int32_t currIndex; |
| 337 | UBool all; |
| 338 | UBool *enabledRecognizers; |
| 339 | } Context; |
| 340 | |
| 341 | |
| 342 | |
| 343 | static void U_CALLCONV |
| 344 | enumClose(UEnumeration *en) { |
| 345 | if(en->context != NULL) { |
| 346 | DELETE_ARRAY(en->context); |
| 347 | } |
| 348 | |
| 349 | DELETE_ARRAY(en); |
| 350 | } |
| 351 | |
| 352 | static int32_t U_CALLCONV |
| 353 | enumCount(UEnumeration *en, UErrorCode *) { |
| 354 | if (((Context *)en->context)->all) { |
| 355 | // ucsdet_getAllDetectableCharsets, all charset detector names |
| 356 | return fCSRecognizers_size; |
| 357 | } |
| 358 | |
| 359 | // Otherwise, ucsdet_getDetectableCharsets - only enabled ones |
| 360 | int32_t count = 0; |
| 361 | UBool *enabledArray = ((Context *)en->context)->enabledRecognizers; |
| 362 | if (enabledArray != NULL) { |
| 363 | // custom set |
| 364 | for (int32_t i = 0; i < fCSRecognizers_size; i++) { |
| 365 | if (enabledArray[i]) { |
| 366 | count++; |
| 367 | } |
| 368 | } |
| 369 | } else { |
| 370 | // default set |
| 371 | for (int32_t i = 0; i < fCSRecognizers_size; i++) { |
| 372 | if (fCSRecognizers[i]->isDefaultEnabled) { |
| 373 | count++; |
| 374 | } |
| 375 | } |
| 376 | } |
| 377 | return count; |
| 378 | } |
| 379 | |
| 380 | static const char* U_CALLCONV |
| 381 | enumNext(UEnumeration *en, int32_t *resultLength, UErrorCode * /*status*/) { |
| 382 | const char *currName = NULL; |
| 383 | |
| 384 | if (((Context *)en->context)->currIndex < fCSRecognizers_size) { |
| 385 | if (((Context *)en->context)->all) { |
| 386 | // ucsdet_getAllDetectableCharsets, all charset detector names |
| 387 | currName = fCSRecognizers[((Context *)en->context)->currIndex]->recognizer->getName(); |
| 388 | ((Context *)en->context)->currIndex++; |
| 389 | } else { |
| 390 | // ucsdet_getDetectableCharsets |
| 391 | UBool *enabledArray = ((Context *)en->context)->enabledRecognizers; |
| 392 | if (enabledArray != NULL) { |
Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 393 | // custom set |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 394 | while (currName == NULL && ((Context *)en->context)->currIndex < fCSRecognizers_size) { |
| 395 | if (enabledArray[((Context *)en->context)->currIndex]) { |
| 396 | currName = fCSRecognizers[((Context *)en->context)->currIndex]->recognizer->getName(); |
| 397 | } |
| 398 | ((Context *)en->context)->currIndex++; |
| 399 | } |
| 400 | } else { |
| 401 | // default set |
| 402 | while (currName == NULL && ((Context *)en->context)->currIndex < fCSRecognizers_size) { |
| 403 | if (fCSRecognizers[((Context *)en->context)->currIndex]->isDefaultEnabled) { |
| 404 | currName = fCSRecognizers[((Context *)en->context)->currIndex]->recognizer->getName(); |
| 405 | } |
| 406 | ((Context *)en->context)->currIndex++; |
| 407 | } |
| 408 | } |
| 409 | } |
| 410 | } |
| 411 | |
| 412 | if(resultLength != NULL) { |
| 413 | *resultLength = currName == NULL ? 0 : (int32_t)uprv_strlen(currName); |
| 414 | } |
| 415 | |
| 416 | return currName; |
| 417 | } |
| 418 | |
| 419 | |
| 420 | static void U_CALLCONV |
| 421 | enumReset(UEnumeration *en, UErrorCode *) { |
| 422 | ((Context *)en->context)->currIndex = 0; |
| 423 | } |
| 424 | |
| 425 | static const UEnumeration gCSDetEnumeration = { |
| 426 | NULL, |
| 427 | NULL, |
| 428 | enumClose, |
| 429 | enumCount, |
| 430 | uenum_unextDefault, |
| 431 | enumNext, |
| 432 | enumReset |
| 433 | }; |
| 434 | |
| 435 | U_CDECL_END |
| 436 | |
| 437 | U_NAMESPACE_BEGIN |
| 438 | |
| 439 | UEnumeration * CharsetDetector::getAllDetectableCharsets(UErrorCode &status) |
| 440 | { |
| 441 | |
| 442 | /* Initialize recognized charsets. */ |
| 443 | setRecognizers(status); |
| 444 | |
| 445 | if(U_FAILURE(status)) { |
| 446 | return 0; |
| 447 | } |
| 448 | |
| 449 | UEnumeration *en = NEW_ARRAY(UEnumeration, 1); |
| 450 | if (en == NULL) { |
| 451 | status = U_MEMORY_ALLOCATION_ERROR; |
| 452 | return 0; |
| 453 | } |
| 454 | memcpy(en, &gCSDetEnumeration, sizeof(UEnumeration)); |
| 455 | en->context = (void*)NEW_ARRAY(Context, 1); |
| 456 | if (en->context == NULL) { |
| 457 | status = U_MEMORY_ALLOCATION_ERROR; |
| 458 | DELETE_ARRAY(en); |
| 459 | return 0; |
| 460 | } |
| 461 | uprv_memset(en->context, 0, sizeof(Context)); |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 462 | ((Context*)en->context)->all = true; |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 463 | return en; |
| 464 | } |
| 465 | |
| 466 | UEnumeration * CharsetDetector::getDetectableCharsets(UErrorCode &status) const |
| 467 | { |
| 468 | if(U_FAILURE(status)) { |
| 469 | return 0; |
| 470 | } |
| 471 | |
| 472 | UEnumeration *en = NEW_ARRAY(UEnumeration, 1); |
| 473 | if (en == NULL) { |
| 474 | status = U_MEMORY_ALLOCATION_ERROR; |
| 475 | return 0; |
| 476 | } |
| 477 | memcpy(en, &gCSDetEnumeration, sizeof(UEnumeration)); |
| 478 | en->context = (void*)NEW_ARRAY(Context, 1); |
| 479 | if (en->context == NULL) { |
| 480 | status = U_MEMORY_ALLOCATION_ERROR; |
| 481 | DELETE_ARRAY(en); |
| 482 | return 0; |
| 483 | } |
| 484 | uprv_memset(en->context, 0, sizeof(Context)); |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 485 | ((Context*)en->context)->all = false; |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 486 | ((Context*)en->context)->enabledRecognizers = fEnabledRecognizers; |
| 487 | return en; |
| 488 | } |
| 489 | |
| 490 | U_NAMESPACE_END |
| 491 | |
| 492 | #endif |