Jungshik Shin | 87232d8 | 2017-05-13 21:10:13 -0700 | [diff] [blame] | 1 | // © 2016 and later: Unicode, Inc. and others. |
Jungshik Shin | 5feb9ad | 2016-10-21 12:52:48 -0700 | [diff] [blame] | 2 | // License & terms of use: http://www.unicode.org/copyright.html |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 3 | /* |
| 4 | ******************************************************************************** |
| 5 | * |
Jungshik Shin | 70f8250 | 2016-01-29 00:32:36 -0800 | [diff] [blame] | 6 | * Copyright (C) 1998-2015, International Business Machines |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 7 | * Corporation and others. All Rights Reserved. |
| 8 | * |
| 9 | ******************************************************************************** |
| 10 | * |
| 11 | * |
Jungshik Shin | 70f8250 | 2016-01-29 00:32:36 -0800 | [diff] [blame] | 12 | * makeconv.cpp: |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 13 | * tool creating a binary (compressed) representation of the conversion mapping |
| 14 | * table (IBM NLTC ucmap format). |
| 15 | * |
| 16 | * 05/04/2000 helena Added fallback mapping into the picture... |
| 17 | * 06/29/2000 helena Major rewrite of the callback APIs. |
| 18 | */ |
| 19 | |
| 20 | #include <stdio.h> |
| 21 | #include "unicode/putil.h" |
| 22 | #include "unicode/ucnv_err.h" |
Jungshik Shin | 70f8250 | 2016-01-29 00:32:36 -0800 | [diff] [blame] | 23 | #include "charstr.h" |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 24 | #include "ucnv_bld.h" |
| 25 | #include "ucnv_imp.h" |
| 26 | #include "ucnv_cnv.h" |
| 27 | #include "cstring.h" |
| 28 | #include "cmemory.h" |
| 29 | #include "uinvchar.h" |
| 30 | #include "filestrm.h" |
| 31 | #include "toolutil.h" |
| 32 | #include "uoptions.h" |
| 33 | #include "unicode/udata.h" |
| 34 | #include "unewdata.h" |
| 35 | #include "uparse.h" |
| 36 | #include "ucm.h" |
| 37 | #include "makeconv.h" |
| 38 | #include "genmbcs.h" |
| 39 | |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 40 | #define DEBUG 0 |
| 41 | |
| 42 | typedef struct ConvData { |
| 43 | UCMFile *ucm; |
| 44 | NewConverter *cnvData, *extData; |
| 45 | UConverterSharedData sharedData; |
| 46 | UConverterStaticData staticData; |
| 47 | } ConvData; |
| 48 | |
| 49 | static void |
| 50 | initConvData(ConvData *data) { |
| 51 | uprv_memset(data, 0, sizeof(ConvData)); |
| 52 | data->sharedData.structSize=sizeof(UConverterSharedData); |
| 53 | data->staticData.structSize=sizeof(UConverterStaticData); |
| 54 | data->sharedData.staticData=&data->staticData; |
| 55 | } |
| 56 | |
| 57 | static void |
| 58 | cleanupConvData(ConvData *data) { |
| 59 | if(data!=NULL) { |
| 60 | if(data->cnvData!=NULL) { |
| 61 | data->cnvData->close(data->cnvData); |
| 62 | data->cnvData=NULL; |
| 63 | } |
| 64 | if(data->extData!=NULL) { |
| 65 | data->extData->close(data->extData); |
| 66 | data->extData=NULL; |
| 67 | } |
| 68 | ucm_close(data->ucm); |
| 69 | data->ucm=NULL; |
| 70 | } |
| 71 | } |
| 72 | |
| 73 | /* |
| 74 | * from ucnvstat.c - static prototypes of data-based converters |
| 75 | */ |
Jungshik Shin | 70f8250 | 2016-01-29 00:32:36 -0800 | [diff] [blame] | 76 | U_CAPI const UConverterStaticData * ucnv_converterStaticData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES]; |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 77 | |
| 78 | /* |
| 79 | * Global - verbosity |
| 80 | */ |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 81 | UBool VERBOSE = false; |
| 82 | UBool QUIET = false; |
| 83 | UBool SMALL = false; |
| 84 | UBool IGNORE_SISO_CHECK = false; |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 85 | |
| 86 | static void |
| 87 | createConverter(ConvData *data, const char* converterName, UErrorCode *pErrorCode); |
| 88 | |
| 89 | /* |
| 90 | * Set up the UNewData and write the converter.. |
| 91 | */ |
| 92 | static void |
| 93 | writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status); |
| 94 | |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 95 | UBool haveCopyright=true; |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 96 | |
| 97 | static UDataInfo dataInfo={ |
| 98 | sizeof(UDataInfo), |
| 99 | 0, |
| 100 | |
| 101 | U_IS_BIG_ENDIAN, |
| 102 | U_CHARSET_FAMILY, |
| 103 | sizeof(UChar), |
| 104 | 0, |
| 105 | |
| 106 | {0x63, 0x6e, 0x76, 0x74}, /* dataFormat="cnvt" */ |
| 107 | {6, 2, 0, 0}, /* formatVersion */ |
| 108 | {0, 0, 0, 0} /* dataVersion (calculated at runtime) */ |
| 109 | }; |
| 110 | |
| 111 | static void |
| 112 | writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status) |
| 113 | { |
| 114 | UNewDataMemory *mem = NULL; |
| 115 | uint32_t sz2; |
| 116 | uint32_t size = 0; |
| 117 | int32_t tableType; |
| 118 | |
| 119 | if(U_FAILURE(*status)) |
| 120 | { |
| 121 | return; |
| 122 | } |
| 123 | |
| 124 | tableType=TABLE_NONE; |
| 125 | if(data->cnvData!=NULL) { |
| 126 | tableType|=TABLE_BASE; |
| 127 | } |
| 128 | if(data->extData!=NULL) { |
| 129 | tableType|=TABLE_EXT; |
| 130 | } |
| 131 | |
| 132 | mem = udata_create(cnvDir, "cnv", cnvName, &dataInfo, haveCopyright ? U_COPYRIGHT_STRING : NULL, status); |
| 133 | |
| 134 | if(U_FAILURE(*status)) |
| 135 | { |
| 136 | fprintf(stderr, "Couldn't create the udata %s.%s: %s\n", |
| 137 | cnvName, |
| 138 | "cnv", |
| 139 | u_errorName(*status)); |
| 140 | return; |
| 141 | } |
| 142 | |
| 143 | if(VERBOSE) |
| 144 | { |
| 145 | printf("- Opened udata %s.%s\n", cnvName, "cnv"); |
| 146 | } |
| 147 | |
| 148 | |
| 149 | /* all read only, clean, platform independent data. Mmmm. :) */ |
| 150 | udata_writeBlock(mem, &data->staticData, sizeof(UConverterStaticData)); |
| 151 | size += sizeof(UConverterStaticData); /* Is 4-aligned - by size */ |
| 152 | /* Now, write the table */ |
| 153 | if(tableType&TABLE_BASE) { |
| 154 | size += data->cnvData->write(data->cnvData, &data->staticData, mem, tableType); |
| 155 | } |
| 156 | if(tableType&TABLE_EXT) { |
| 157 | size += data->extData->write(data->extData, &data->staticData, mem, tableType); |
| 158 | } |
| 159 | |
| 160 | sz2 = udata_finish(mem, status); |
| 161 | if(size != sz2) |
| 162 | { |
| 163 | fprintf(stderr, "error: wrote %u bytes to the .cnv file but counted %u bytes\n", (int)sz2, (int)size); |
| 164 | *status=U_INTERNAL_PROGRAM_ERROR; |
| 165 | } |
| 166 | if(VERBOSE) |
| 167 | { |
| 168 | printf("- Wrote %u bytes to the udata.\n", (int)sz2); |
| 169 | } |
| 170 | } |
| 171 | |
| 172 | enum { |
| 173 | OPT_HELP_H, |
| 174 | OPT_HELP_QUESTION_MARK, |
| 175 | OPT_COPYRIGHT, |
| 176 | OPT_VERSION, |
| 177 | OPT_DESTDIR, |
| 178 | OPT_VERBOSE, |
| 179 | OPT_SMALL, |
| 180 | OPT_IGNORE_SISO_CHECK, |
Jungshik Shin | 70f8250 | 2016-01-29 00:32:36 -0800 | [diff] [blame] | 181 | OPT_QUIET, |
Frank Tang | 69c72a6 | 2019-04-03 21:41:21 -0700 | [diff] [blame] | 182 | OPT_SOURCEDIR, |
Jungshik Shin | 70f8250 | 2016-01-29 00:32:36 -0800 | [diff] [blame] | 183 | |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 184 | OPT_COUNT |
| 185 | }; |
| 186 | |
| 187 | static UOption options[]={ |
| 188 | UOPTION_HELP_H, |
| 189 | UOPTION_HELP_QUESTION_MARK, |
| 190 | UOPTION_COPYRIGHT, |
| 191 | UOPTION_VERSION, |
| 192 | UOPTION_DESTDIR, |
| 193 | UOPTION_VERBOSE, |
| 194 | { "small", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 }, |
Jungshik Shin | 70f8250 | 2016-01-29 00:32:36 -0800 | [diff] [blame] | 195 | { "ignore-siso-check", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 }, |
| 196 | UOPTION_QUIET, |
Frank Tang | 69c72a6 | 2019-04-03 21:41:21 -0700 | [diff] [blame] | 197 | UOPTION_SOURCEDIR, |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 198 | }; |
| 199 | |
| 200 | int main(int argc, char* argv[]) |
| 201 | { |
| 202 | ConvData data; |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 203 | char cnvName[UCNV_MAX_FULL_FILE_NAME_LENGTH]; |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 204 | |
| 205 | U_MAIN_INIT_ARGS(argc, argv); |
| 206 | |
| 207 | /* Set up the ICU version number */ |
Jungshik Shin | 70f8250 | 2016-01-29 00:32:36 -0800 | [diff] [blame] | 208 | UVersionInfo icuVersion; |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 209 | u_getVersion(icuVersion); |
| 210 | uprv_memcpy(&dataInfo.dataVersion, &icuVersion, sizeof(UVersionInfo)); |
| 211 | |
| 212 | /* preset then read command line options */ |
| 213 | options[OPT_DESTDIR].value=u_getDataDirectory(); |
Jungshik Shin (jungshik at google) | 0f8746a | 2015-01-08 15:46:45 -0800 | [diff] [blame] | 214 | argc=u_parseArgs(argc, argv, UPRV_LENGTHOF(options), options); |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 215 | |
Frank Tang | 3e05d9d | 2021-11-08 14:04:04 -0800 | [diff] [blame] | 216 | if(options[OPT_VERSION].doesOccur) { |
| 217 | printf("makeconv version %u.%u, ICU tool to read .ucm codepage mapping files and write .cnv files\n", |
| 218 | dataInfo.formatVersion[0], dataInfo.formatVersion[1]); |
| 219 | printf("%s\n", U_COPYRIGHT_STRING); |
| 220 | exit(0); |
| 221 | } |
| 222 | |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 223 | /* error handling, printing usage message */ |
| 224 | if(argc<0) { |
| 225 | fprintf(stderr, |
| 226 | "error in command line argument \"%s\"\n", |
| 227 | argv[-argc]); |
| 228 | } else if(argc<2) { |
| 229 | argc=-1; |
| 230 | } |
| 231 | if(argc<0 || options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK].doesOccur) { |
| 232 | FILE *stdfile=argc<0 ? stderr : stdout; |
| 233 | fprintf(stdfile, |
| 234 | "usage: %s [-options] files...\n" |
| 235 | "\tread .ucm codepage mapping files and write .cnv files\n" |
| 236 | "options:\n" |
| 237 | "\t-h or -? or --help this usage text\n" |
| 238 | "\t-V or --version show a version message\n" |
| 239 | "\t-c or --copyright include a copyright notice\n" |
| 240 | "\t-d or --destdir destination directory, followed by the path\n" |
Jungshik Shin | 70f8250 | 2016-01-29 00:32:36 -0800 | [diff] [blame] | 241 | "\t-v or --verbose Turn on verbose output\n" |
Frank Tang | 69c72a6 | 2019-04-03 21:41:21 -0700 | [diff] [blame] | 242 | "\t-q or --quiet do not display warnings and progress\n" |
| 243 | "\t-s or --sourcedir source directory, followed by the path\n", |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 244 | argv[0]); |
| 245 | fprintf(stdfile, |
| 246 | "\t --small Generate smaller .cnv files. They will be\n" |
| 247 | "\t significantly smaller but may not be compatible with\n" |
| 248 | "\t older versions of ICU and will require heap memory\n" |
| 249 | "\t allocation when loaded.\n" |
| 250 | "\t --ignore-siso-check Use SI/SO other than 0xf/0xe.\n"); |
| 251 | return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR; |
| 252 | } |
| 253 | |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 254 | /* get the options values */ |
| 255 | haveCopyright = options[OPT_COPYRIGHT].doesOccur; |
Jungshik Shin | 70f8250 | 2016-01-29 00:32:36 -0800 | [diff] [blame] | 256 | const char *destdir = options[OPT_DESTDIR].value; |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 257 | VERBOSE = options[OPT_VERBOSE].doesOccur; |
Jungshik Shin | 70f8250 | 2016-01-29 00:32:36 -0800 | [diff] [blame] | 258 | QUIET = options[OPT_QUIET].doesOccur; |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 259 | SMALL = options[OPT_SMALL].doesOccur; |
| 260 | |
| 261 | if (options[OPT_IGNORE_SISO_CHECK].doesOccur) { |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 262 | IGNORE_SISO_CHECK = true; |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 263 | } |
| 264 | |
Jungshik Shin | 70f8250 | 2016-01-29 00:32:36 -0800 | [diff] [blame] | 265 | icu::CharString outFileName; |
| 266 | UErrorCode err = U_ZERO_ERROR; |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 267 | if (destdir != NULL && *destdir != 0) { |
Jungshik Shin | 70f8250 | 2016-01-29 00:32:36 -0800 | [diff] [blame] | 268 | outFileName.append(destdir, err).ensureEndsWithFileSeparator(err); |
| 269 | if (U_FAILURE(err)) { |
| 270 | return err; |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 271 | } |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 272 | } |
Jungshik Shin | 70f8250 | 2016-01-29 00:32:36 -0800 | [diff] [blame] | 273 | int32_t outBasenameStart = outFileName.length(); |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 274 | |
| 275 | #if DEBUG |
| 276 | { |
| 277 | int i; |
| 278 | printf("makeconv: processing %d files...\n", argc - 1); |
| 279 | for(i=1; i<argc; ++i) { |
| 280 | printf("%s ", argv[i]); |
| 281 | } |
| 282 | printf("\n"); |
| 283 | fflush(stdout); |
| 284 | } |
| 285 | #endif |
| 286 | |
Jungshik Shin | 70f8250 | 2016-01-29 00:32:36 -0800 | [diff] [blame] | 287 | UBool printFilename = (UBool) (argc > 2 || VERBOSE); |
Frank Tang | 69c72a6 | 2019-04-03 21:41:21 -0700 | [diff] [blame] | 288 | icu::CharString pathBuf; |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 289 | for (++argv; --argc; ++argv) |
| 290 | { |
Jungshik Shin | 70f8250 | 2016-01-29 00:32:36 -0800 | [diff] [blame] | 291 | UErrorCode localError = U_ZERO_ERROR; |
| 292 | const char *arg = getLongPathname(*argv); |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 293 | |
Frank Tang | 69c72a6 | 2019-04-03 21:41:21 -0700 | [diff] [blame] | 294 | const char* sourcedir = options[OPT_SOURCEDIR].value; |
| 295 | if (sourcedir != NULL && *sourcedir != 0 && uprv_strcmp(sourcedir, ".") != 0) { |
| 296 | pathBuf.clear(); |
| 297 | pathBuf.appendPathPart(sourcedir, localError); |
| 298 | pathBuf.appendPathPart(arg, localError); |
| 299 | arg = pathBuf.data(); |
| 300 | } |
| 301 | |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 302 | /*produces the right destination path for display*/ |
Jungshik Shin | 70f8250 | 2016-01-29 00:32:36 -0800 | [diff] [blame] | 303 | outFileName.truncate(outBasenameStart); |
| 304 | if (outBasenameStart != 0) |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 305 | { |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 306 | /* find the last file sepator */ |
Jungshik Shin | 70f8250 | 2016-01-29 00:32:36 -0800 | [diff] [blame] | 307 | const char *basename = findBasename(arg); |
| 308 | outFileName.append(basename, localError); |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 309 | } |
| 310 | else |
| 311 | { |
Jungshik Shin | 70f8250 | 2016-01-29 00:32:36 -0800 | [diff] [blame] | 312 | outFileName.append(arg, localError); |
| 313 | } |
| 314 | if (U_FAILURE(localError)) { |
| 315 | return localError; |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 316 | } |
| 317 | |
| 318 | /*removes the extension if any is found*/ |
Jungshik Shin | 70f8250 | 2016-01-29 00:32:36 -0800 | [diff] [blame] | 319 | int32_t lastDotIndex = outFileName.lastIndexOf('.'); |
| 320 | if (lastDotIndex >= outBasenameStart) { |
| 321 | outFileName.truncate(lastDotIndex); |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 322 | } |
| 323 | |
| 324 | /* the basename without extension is the converter name */ |
Jungshik Shin | 70f8250 | 2016-01-29 00:32:36 -0800 | [diff] [blame] | 325 | if ((outFileName.length() - outBasenameStart) >= UPRV_LENGTHOF(cnvName)) { |
| 326 | fprintf(stderr, "converter name %s too long\n", outFileName.data() + outBasenameStart); |
| 327 | return U_BUFFER_OVERFLOW_ERROR; |
| 328 | } |
| 329 | uprv_strcpy(cnvName, outFileName.data() + outBasenameStart); |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 330 | |
| 331 | /*Adds the target extension*/ |
Jungshik Shin | 70f8250 | 2016-01-29 00:32:36 -0800 | [diff] [blame] | 332 | outFileName.append(CONVERTER_FILE_EXTENSION, localError); |
| 333 | if (U_FAILURE(localError)) { |
| 334 | return localError; |
| 335 | } |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 336 | |
| 337 | #if DEBUG |
| 338 | printf("makeconv: processing %s ...\n", arg); |
| 339 | fflush(stdout); |
| 340 | #endif |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 341 | initConvData(&data); |
| 342 | createConverter(&data, arg, &localError); |
| 343 | |
| 344 | if (U_FAILURE(localError)) |
| 345 | { |
| 346 | /* if an error is found, print out an error msg and keep going */ |
Jungshik Shin | 70f8250 | 2016-01-29 00:32:36 -0800 | [diff] [blame] | 347 | fprintf(stderr, "Error creating converter for \"%s\" file for \"%s\" (%s)\n", |
| 348 | outFileName.data(), arg, u_errorName(localError)); |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 349 | if(U_SUCCESS(err)) { |
| 350 | err = localError; |
| 351 | } |
| 352 | } |
| 353 | else |
| 354 | { |
| 355 | /* Insure the static data name matches the file name */ |
| 356 | /* Changed to ignore directory and only compare base name |
| 357 | LDH 1/2/08*/ |
| 358 | char *p; |
| 359 | p = strrchr(cnvName, U_FILE_SEP_CHAR); /* Find last file separator */ |
| 360 | |
| 361 | if(p == NULL) /* OK, try alternate */ |
| 362 | { |
| 363 | p = strrchr(cnvName, U_FILE_ALT_SEP_CHAR); |
| 364 | if(p == NULL) |
| 365 | { |
| 366 | p=cnvName; /* If no separators, no problem */ |
| 367 | } |
| 368 | } |
| 369 | else |
| 370 | { |
Jungshik Shin | 70f8250 | 2016-01-29 00:32:36 -0800 | [diff] [blame] | 371 | p++; /* If found separator, don't include it in compare */ |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 372 | } |
Jungshik Shin | 70f8250 | 2016-01-29 00:32:36 -0800 | [diff] [blame] | 373 | if(uprv_stricmp(p,data.staticData.name) && !QUIET) |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 374 | { |
| 375 | fprintf(stderr, "Warning: %s%s claims to be '%s'\n", |
| 376 | cnvName, CONVERTER_FILE_EXTENSION, |
| 377 | data.staticData.name); |
| 378 | } |
| 379 | |
| 380 | uprv_strcpy((char*)data.staticData.name, cnvName); |
| 381 | |
| 382 | if(!uprv_isInvariantString((char*)data.staticData.name, -1)) { |
| 383 | fprintf(stderr, |
| 384 | "Error: A converter name must contain only invariant characters.\n" |
| 385 | "%s is not a valid converter name.\n", |
| 386 | data.staticData.name); |
| 387 | if(U_SUCCESS(err)) { |
| 388 | err = U_INVALID_TABLE_FORMAT; |
| 389 | } |
| 390 | } |
| 391 | |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 392 | localError = U_ZERO_ERROR; |
Jungshik Shin | 70f8250 | 2016-01-29 00:32:36 -0800 | [diff] [blame] | 393 | writeConverterData(&data, cnvName, destdir, &localError); |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 394 | |
| 395 | if(U_FAILURE(localError)) |
| 396 | { |
| 397 | /* if an error is found, print out an error msg and keep going*/ |
Jungshik Shin | 70f8250 | 2016-01-29 00:32:36 -0800 | [diff] [blame] | 398 | fprintf(stderr, "Error writing \"%s\" file for \"%s\" (%s)\n", outFileName.data(), arg, |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 399 | u_errorName(localError)); |
| 400 | if(U_SUCCESS(err)) { |
| 401 | err = localError; |
| 402 | } |
| 403 | } |
| 404 | else if (printFilename) |
| 405 | { |
Jungshik Shin | 70f8250 | 2016-01-29 00:32:36 -0800 | [diff] [blame] | 406 | puts(outFileName.data() + outBasenameStart); |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 407 | } |
| 408 | } |
| 409 | fflush(stdout); |
| 410 | fflush(stderr); |
| 411 | |
| 412 | cleanupConvData(&data); |
| 413 | } |
| 414 | |
| 415 | return err; |
| 416 | } |
| 417 | |
| 418 | static void |
| 419 | getPlatformAndCCSIDFromName(const char *name, int8_t *pPlatform, int32_t *pCCSID) { |
| 420 | if( (name[0]=='i' || name[0]=='I') && |
| 421 | (name[1]=='b' || name[1]=='B') && |
| 422 | (name[2]=='m' || name[2]=='M') |
| 423 | ) { |
| 424 | name+=3; |
| 425 | if(*name=='-') { |
| 426 | ++name; |
| 427 | } |
| 428 | *pPlatform=UCNV_IBM; |
| 429 | *pCCSID=(int32_t)uprv_strtoul(name, NULL, 10); |
| 430 | } else { |
| 431 | *pPlatform=UCNV_UNKNOWN; |
| 432 | *pCCSID=0; |
| 433 | } |
| 434 | } |
| 435 | |
| 436 | static void |
| 437 | readHeader(ConvData *data, |
| 438 | FileStream* convFile, |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 439 | UErrorCode *pErrorCode) { |
| 440 | char line[1024]; |
| 441 | char *s, *key, *value; |
| 442 | const UConverterStaticData *prototype; |
| 443 | UConverterStaticData *staticData; |
| 444 | |
| 445 | if(U_FAILURE(*pErrorCode)) { |
| 446 | return; |
| 447 | } |
| 448 | |
| 449 | staticData=&data->staticData; |
| 450 | staticData->platform=UCNV_IBM; |
| 451 | staticData->subCharLen=0; |
| 452 | |
| 453 | while(T_FileStream_readLine(convFile, line, sizeof(line))) { |
| 454 | /* basic parsing and handling of state-related items */ |
| 455 | if(ucm_parseHeaderLine(data->ucm, line, &key, &value)) { |
| 456 | continue; |
| 457 | } |
| 458 | |
| 459 | /* stop at the beginning of the mapping section */ |
| 460 | if(uprv_strcmp(line, "CHARMAP")==0) { |
| 461 | break; |
| 462 | } |
| 463 | |
| 464 | /* collect the information from the header field, ignore unknown keys */ |
| 465 | if(uprv_strcmp(key, "code_set_name")==0) { |
| 466 | if(*value!=0) { |
| 467 | uprv_strcpy((char *)staticData->name, value); |
| 468 | getPlatformAndCCSIDFromName(value, &staticData->platform, &staticData->codepage); |
| 469 | } |
| 470 | } else if(uprv_strcmp(key, "subchar")==0) { |
| 471 | uint8_t bytes[UCNV_EXT_MAX_BYTES]; |
| 472 | int8_t length; |
| 473 | |
| 474 | s=value; |
| 475 | length=ucm_parseBytes(bytes, line, (const char **)&s); |
| 476 | if(1<=length && length<=4 && *s==0) { |
| 477 | staticData->subCharLen=length; |
| 478 | uprv_memcpy(staticData->subChar, bytes, length); |
| 479 | } else { |
| 480 | fprintf(stderr, "error: illegal <subchar> %s\n", value); |
| 481 | *pErrorCode=U_INVALID_TABLE_FORMAT; |
| 482 | return; |
| 483 | } |
| 484 | } else if(uprv_strcmp(key, "subchar1")==0) { |
| 485 | uint8_t bytes[UCNV_EXT_MAX_BYTES]; |
| 486 | |
| 487 | s=value; |
| 488 | if(1==ucm_parseBytes(bytes, line, (const char **)&s) && *s==0) { |
| 489 | staticData->subChar1=bytes[0]; |
| 490 | } else { |
| 491 | fprintf(stderr, "error: illegal <subchar1> %s\n", value); |
| 492 | *pErrorCode=U_INVALID_TABLE_FORMAT; |
| 493 | return; |
| 494 | } |
| 495 | } |
| 496 | } |
| 497 | |
| 498 | /* copy values from the UCMFile to the static data */ |
| 499 | staticData->maxBytesPerChar=(int8_t)data->ucm->states.maxCharLength; |
| 500 | staticData->minBytesPerChar=(int8_t)data->ucm->states.minCharLength; |
| 501 | staticData->conversionType=data->ucm->states.conversionType; |
| 502 | |
| 503 | if(staticData->conversionType==UCNV_UNSUPPORTED_CONVERTER) { |
| 504 | fprintf(stderr, "ucm error: missing conversion type (<uconv_class>)\n"); |
| 505 | *pErrorCode=U_INVALID_TABLE_FORMAT; |
| 506 | return; |
| 507 | } |
| 508 | |
| 509 | /* |
| 510 | * Now that we know the type, copy any 'default' values from the table. |
| 511 | * We need not check the type any further because the parser only |
| 512 | * recognizes what we have prototypes for. |
| 513 | * |
| 514 | * For delta (extension-only) tables, copy values from the base file |
| 515 | * instead, see createConverter(). |
| 516 | */ |
| 517 | if(data->ucm->baseName[0]==0) { |
| 518 | prototype=ucnv_converterStaticData[staticData->conversionType]; |
| 519 | if(prototype!=NULL) { |
| 520 | if(staticData->name[0]==0) { |
| 521 | uprv_strcpy((char *)staticData->name, prototype->name); |
| 522 | } |
| 523 | |
| 524 | if(staticData->codepage==0) { |
| 525 | staticData->codepage=prototype->codepage; |
| 526 | } |
| 527 | |
| 528 | if(staticData->platform==0) { |
| 529 | staticData->platform=prototype->platform; |
| 530 | } |
| 531 | |
| 532 | if(staticData->minBytesPerChar==0) { |
| 533 | staticData->minBytesPerChar=prototype->minBytesPerChar; |
| 534 | } |
| 535 | |
| 536 | if(staticData->maxBytesPerChar==0) { |
| 537 | staticData->maxBytesPerChar=prototype->maxBytesPerChar; |
| 538 | } |
| 539 | |
| 540 | if(staticData->subCharLen==0) { |
| 541 | staticData->subCharLen=prototype->subCharLen; |
| 542 | if(prototype->subCharLen>0) { |
| 543 | uprv_memcpy(staticData->subChar, prototype->subChar, prototype->subCharLen); |
| 544 | } |
| 545 | } |
| 546 | } |
| 547 | } |
| 548 | |
| 549 | if(data->ucm->states.outputType<0) { |
| 550 | data->ucm->states.outputType=(int8_t)data->ucm->states.maxCharLength-1; |
| 551 | } |
| 552 | |
| 553 | if( staticData->subChar1!=0 && |
| 554 | (staticData->minBytesPerChar>1 || |
| 555 | (staticData->conversionType!=UCNV_MBCS && |
| 556 | staticData->conversionType!=UCNV_EBCDIC_STATEFUL)) |
| 557 | ) { |
| 558 | fprintf(stderr, "error: <subchar1> defined for a type other than MBCS or EBCDIC_STATEFUL\n"); |
| 559 | *pErrorCode=U_INVALID_TABLE_FORMAT; |
| 560 | } |
| 561 | } |
| 562 | |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 563 | /* return true if a base table was read, false for an extension table */ |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 564 | static UBool |
| 565 | readFile(ConvData *data, const char* converterName, |
| 566 | UErrorCode *pErrorCode) { |
| 567 | char line[1024]; |
| 568 | char *end; |
| 569 | FileStream *convFile; |
| 570 | |
| 571 | UCMStates *baseStates; |
| 572 | UBool dataIsBase; |
| 573 | |
| 574 | if(U_FAILURE(*pErrorCode)) { |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 575 | return false; |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 576 | } |
| 577 | |
| 578 | data->ucm=ucm_open(); |
| 579 | |
| 580 | convFile=T_FileStream_open(converterName, "r"); |
| 581 | if(convFile==NULL) { |
| 582 | *pErrorCode=U_FILE_ACCESS_ERROR; |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 583 | return false; |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 584 | } |
| 585 | |
Jungshik Shin | 70f8250 | 2016-01-29 00:32:36 -0800 | [diff] [blame] | 586 | readHeader(data, convFile, pErrorCode); |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 587 | if(U_FAILURE(*pErrorCode)) { |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 588 | return false; |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 589 | } |
| 590 | |
| 591 | if(data->ucm->baseName[0]==0) { |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 592 | dataIsBase=true; |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 593 | baseStates=&data->ucm->states; |
| 594 | ucm_processStates(baseStates, IGNORE_SISO_CHECK); |
| 595 | } else { |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 596 | dataIsBase=false; |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 597 | baseStates=NULL; |
| 598 | } |
| 599 | |
| 600 | /* read the base table */ |
| 601 | ucm_readTable(data->ucm, convFile, dataIsBase, baseStates, pErrorCode); |
| 602 | if(U_FAILURE(*pErrorCode)) { |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 603 | return false; |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 604 | } |
| 605 | |
| 606 | /* read an extension table if there is one */ |
| 607 | while(T_FileStream_readLine(convFile, line, sizeof(line))) { |
| 608 | end=uprv_strchr(line, 0); |
| 609 | while(line<end && |
| 610 | (*(end-1)=='\n' || *(end-1)=='\r' || *(end-1)==' ' || *(end-1)=='\t')) { |
| 611 | --end; |
| 612 | } |
| 613 | *end=0; |
| 614 | |
| 615 | if(line[0]=='#' || u_skipWhitespace(line)==end) { |
| 616 | continue; /* ignore empty and comment lines */ |
| 617 | } |
| 618 | |
| 619 | if(0==uprv_strcmp(line, "CHARMAP")) { |
| 620 | /* read the extension table */ |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 621 | ucm_readTable(data->ucm, convFile, false, baseStates, pErrorCode); |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 622 | } else { |
| 623 | fprintf(stderr, "unexpected text after the base mapping table\n"); |
| 624 | } |
| 625 | break; |
| 626 | } |
| 627 | |
| 628 | T_FileStream_close(convFile); |
| 629 | |
| 630 | if(data->ucm->base->flagsType==UCM_FLAGS_MIXED || data->ucm->ext->flagsType==UCM_FLAGS_MIXED) { |
| 631 | fprintf(stderr, "error: some entries have the mapping precision (with '|'), some do not\n"); |
| 632 | *pErrorCode=U_INVALID_TABLE_FORMAT; |
| 633 | } |
| 634 | |
| 635 | return dataIsBase; |
| 636 | } |
| 637 | |
| 638 | static void |
| 639 | createConverter(ConvData *data, const char *converterName, UErrorCode *pErrorCode) { |
| 640 | ConvData baseData; |
| 641 | UBool dataIsBase; |
| 642 | |
| 643 | UConverterStaticData *staticData; |
| 644 | UCMStates *states, *baseStates; |
| 645 | |
| 646 | if(U_FAILURE(*pErrorCode)) { |
| 647 | return; |
| 648 | } |
| 649 | |
| 650 | initConvData(data); |
| 651 | |
| 652 | dataIsBase=readFile(data, converterName, pErrorCode); |
| 653 | if(U_FAILURE(*pErrorCode)) { |
| 654 | return; |
| 655 | } |
| 656 | |
| 657 | staticData=&data->staticData; |
| 658 | states=&data->ucm->states; |
| 659 | |
| 660 | if(dataIsBase) { |
| 661 | /* |
| 662 | * Build a normal .cnv file with a base table |
| 663 | * and an optional extension table. |
| 664 | */ |
| 665 | data->cnvData=MBCSOpen(data->ucm); |
| 666 | if(data->cnvData==NULL) { |
| 667 | *pErrorCode=U_MEMORY_ALLOCATION_ERROR; |
| 668 | |
| 669 | } else if(!data->cnvData->isValid(data->cnvData, |
| 670 | staticData->subChar, staticData->subCharLen) |
| 671 | ) { |
| 672 | fprintf(stderr, " the substitution character byte sequence is illegal in this codepage structure!\n"); |
| 673 | *pErrorCode=U_INVALID_TABLE_FORMAT; |
| 674 | |
| 675 | } else if(staticData->subChar1!=0 && |
| 676 | !data->cnvData->isValid(data->cnvData, &staticData->subChar1, 1) |
| 677 | ) { |
| 678 | fprintf(stderr, " the subchar1 byte is illegal in this codepage structure!\n"); |
| 679 | *pErrorCode=U_INVALID_TABLE_FORMAT; |
| 680 | |
| 681 | } else if( |
| 682 | data->ucm->ext->mappingsLength>0 && |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 683 | !ucm_checkBaseExt(states, data->ucm->base, data->ucm->ext, data->ucm->ext, false) |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 684 | ) { |
| 685 | *pErrorCode=U_INVALID_TABLE_FORMAT; |
| 686 | } else if(data->ucm->base->flagsType&UCM_FLAGS_EXPLICIT) { |
| 687 | /* sort the table so that it can be turned into UTF-8-friendly data */ |
| 688 | ucm_sortTable(data->ucm->base); |
| 689 | } |
| 690 | |
| 691 | if(U_SUCCESS(*pErrorCode)) { |
| 692 | if( |
| 693 | /* add the base table after ucm_checkBaseExt()! */ |
| 694 | !data->cnvData->addTable(data->cnvData, data->ucm->base, &data->staticData) |
| 695 | ) { |
| 696 | *pErrorCode=U_INVALID_TABLE_FORMAT; |
| 697 | } else { |
| 698 | /* |
| 699 | * addTable() may have requested moving more mappings to the extension table |
| 700 | * if they fit into the base toUnicode table but not into the |
| 701 | * base fromUnicode table. |
| 702 | * (Especially for UTF-8-friendly fromUnicode tables.) |
| 703 | * Such mappings will have the MBCS_FROM_U_EXT_FLAG set, which causes them |
| 704 | * to be excluded from the extension toUnicode data. |
| 705 | * See MBCSOkForBaseFromUnicode() for which mappings do not fit into |
| 706 | * the base fromUnicode table. |
| 707 | */ |
| 708 | ucm_moveMappings(data->ucm->base, data->ucm->ext); |
| 709 | ucm_sortTable(data->ucm->ext); |
| 710 | if(data->ucm->ext->mappingsLength>0) { |
| 711 | /* prepare the extension table, if there is one */ |
| 712 | data->extData=CnvExtOpen(data->ucm); |
| 713 | if(data->extData==NULL) { |
| 714 | *pErrorCode=U_MEMORY_ALLOCATION_ERROR; |
| 715 | } else if( |
| 716 | !data->extData->addTable(data->extData, data->ucm->ext, &data->staticData) |
| 717 | ) { |
| 718 | *pErrorCode=U_INVALID_TABLE_FORMAT; |
| 719 | } |
| 720 | } |
| 721 | } |
| 722 | } |
| 723 | } else { |
| 724 | /* Build an extension-only .cnv file. */ |
| 725 | char baseFilename[500]; |
| 726 | char *basename; |
| 727 | |
| 728 | initConvData(&baseData); |
| 729 | |
| 730 | /* assemble a path/filename for data->ucm->baseName */ |
| 731 | uprv_strcpy(baseFilename, converterName); |
| 732 | basename=(char *)findBasename(baseFilename); |
| 733 | uprv_strcpy(basename, data->ucm->baseName); |
| 734 | uprv_strcat(basename, ".ucm"); |
| 735 | |
| 736 | /* read the base table */ |
| 737 | dataIsBase=readFile(&baseData, baseFilename, pErrorCode); |
| 738 | if(U_FAILURE(*pErrorCode)) { |
| 739 | return; |
| 740 | } else if(!dataIsBase) { |
| 741 | fprintf(stderr, "error: the <icu:base> file \"%s\" is not a base table file\n", baseFilename); |
| 742 | *pErrorCode=U_INVALID_TABLE_FORMAT; |
| 743 | } else { |
| 744 | /* prepare the extension table */ |
| 745 | data->extData=CnvExtOpen(data->ucm); |
| 746 | if(data->extData==NULL) { |
| 747 | *pErrorCode=U_MEMORY_ALLOCATION_ERROR; |
| 748 | } else { |
| 749 | /* fill in gaps in extension file header fields */ |
| 750 | UCMapping *m, *mLimit; |
| 751 | uint8_t fallbackFlags; |
| 752 | |
| 753 | baseStates=&baseData.ucm->states; |
| 754 | if(states->conversionType==UCNV_DBCS) { |
| 755 | staticData->minBytesPerChar=(int8_t)(states->minCharLength=2); |
| 756 | } else if(states->minCharLength==0) { |
| 757 | staticData->minBytesPerChar=(int8_t)(states->minCharLength=baseStates->minCharLength); |
| 758 | } |
| 759 | if(states->maxCharLength<states->minCharLength) { |
| 760 | staticData->maxBytesPerChar=(int8_t)(states->maxCharLength=baseStates->maxCharLength); |
| 761 | } |
| 762 | |
| 763 | if(staticData->subCharLen==0) { |
| 764 | uprv_memcpy(staticData->subChar, baseData.staticData.subChar, 4); |
| 765 | staticData->subCharLen=baseData.staticData.subCharLen; |
| 766 | } |
| 767 | /* |
| 768 | * do not copy subChar1 - |
| 769 | * only use what is explicitly specified |
| 770 | * because it cannot be unset in the extension file header |
| 771 | */ |
| 772 | |
| 773 | /* get the fallback flags */ |
| 774 | fallbackFlags=0; |
| 775 | for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength; |
| 776 | m<mLimit && fallbackFlags!=3; |
| 777 | ++m |
| 778 | ) { |
| 779 | if(m->f==1) { |
| 780 | fallbackFlags|=1; |
| 781 | } else if(m->f==3) { |
| 782 | fallbackFlags|=2; |
| 783 | } |
| 784 | } |
| 785 | |
| 786 | if(fallbackFlags&1) { |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 787 | staticData->hasFromUnicodeFallback=true; |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 788 | } |
| 789 | if(fallbackFlags&2) { |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 790 | staticData->hasToUnicodeFallback=true; |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 791 | } |
| 792 | |
| 793 | if(1!=ucm_countChars(baseStates, staticData->subChar, staticData->subCharLen)) { |
| 794 | fprintf(stderr, " the substitution character byte sequence is illegal in this codepage structure!\n"); |
| 795 | *pErrorCode=U_INVALID_TABLE_FORMAT; |
| 796 | |
| 797 | } else if(staticData->subChar1!=0 && 1!=ucm_countChars(baseStates, &staticData->subChar1, 1)) { |
| 798 | fprintf(stderr, " the subchar1 byte is illegal in this codepage structure!\n"); |
| 799 | *pErrorCode=U_INVALID_TABLE_FORMAT; |
| 800 | |
| 801 | } else if( |
| 802 | !ucm_checkValidity(data->ucm->ext, baseStates) || |
Frank Tang | 1f164ee | 2022-11-08 12:31:27 -0800 | [diff] [blame^] | 803 | !ucm_checkBaseExt(baseStates, baseData.ucm->base, data->ucm->ext, data->ucm->ext, false) |
jshin@chromium.org | 6f31ac3 | 2014-03-26 22:15:14 +0000 | [diff] [blame] | 804 | ) { |
| 805 | *pErrorCode=U_INVALID_TABLE_FORMAT; |
| 806 | } else { |
| 807 | if(states->maxCharLength>1) { |
| 808 | /* |
| 809 | * When building a normal .cnv file with a base table |
| 810 | * for an MBCS (not SBCS) table with explicit precision flags, |
| 811 | * the MBCSAddTable() function marks some mappings for moving |
| 812 | * to the extension table. |
| 813 | * They fit into the base toUnicode table but not into the |
| 814 | * base fromUnicode table. |
| 815 | * (Note: We do have explicit precision flags because they are |
| 816 | * required for extension table generation, and |
| 817 | * ucm_checkBaseExt() verified it.) |
| 818 | * |
| 819 | * We do not call MBCSAddTable() here (we probably could) |
| 820 | * so we need to do the analysis before building the extension table. |
| 821 | * We assume that MBCSAddTable() will build a UTF-8-friendly table. |
| 822 | * Redundant mappings in the extension table are ok except they cost some size. |
| 823 | * |
| 824 | * Do this after ucm_checkBaseExt(). |
| 825 | */ |
| 826 | const MBCSData *mbcsData=MBCSGetDummy(); |
| 827 | int32_t needsMove=0; |
| 828 | for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength; |
| 829 | m<mLimit; |
| 830 | ++m |
| 831 | ) { |
| 832 | if(!MBCSOkForBaseFromUnicode(mbcsData, m->b.bytes, m->bLen, m->u, m->f)) { |
| 833 | m->f|=MBCS_FROM_U_EXT_FLAG; |
| 834 | m->moveFlag=UCM_MOVE_TO_EXT; |
| 835 | ++needsMove; |
| 836 | } |
| 837 | } |
| 838 | |
| 839 | if(needsMove!=0) { |
| 840 | ucm_moveMappings(baseData.ucm->base, data->ucm->ext); |
| 841 | ucm_sortTable(data->ucm->ext); |
| 842 | } |
| 843 | } |
| 844 | if(!data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)) { |
| 845 | *pErrorCode=U_INVALID_TABLE_FORMAT; |
| 846 | } |
| 847 | } |
| 848 | } |
| 849 | } |
| 850 | |
| 851 | cleanupConvData(&baseData); |
| 852 | } |
| 853 | } |
| 854 | |
| 855 | /* |
| 856 | * Hey, Emacs, please set the following: |
| 857 | * |
| 858 | * Local Variables: |
| 859 | * indent-tabs-mode: nil |
| 860 | * End: |
| 861 | * |
| 862 | */ |