Update to ICU 65.1
ICU 65.1 release notes: http://site.icu-project.org/download/65
CLDR 36: http://blog.unicode.org/2019/10/unicode-cldr-version-36-languagelocale.html
1. Update to 65-1
Diary https://docs.google.com/document/d/1RHttPn26N3hPeGUz9LLTsrQ7UvORLDOK9XumhgqgtvU/edit#
2. Remove unnecessary files
$ git rm source/data/*/*local.mk
$ git rm source/data/translit/root_subset.txt
3. Reduce the data size by filtering supplemental data
Size changes:
65-1 . 64-2
7786368 7777488 android/icudtl.dat
6412768 6427344 android_small/icudtl.dat
1418320 1394480 android_small/icudtl_extra.dat
5060080 5108144 cast/icudtl.dat
10481216 10450592 chromeos/icudtl.dat
10505952 10426816 common/icudtb.dat
10505952 10426816 common/icudtl.dat
900576 . 901808 flutter/icudtl.dat
6375984 6377520 ios/icudtl.dat
A. Filter supplementalData by keeping fields only if referenced by the code.
calendarData source/i18n/erarules.cpp
calendarPreferenceData source/i18n/calendar.cpp source/i18n/ucal.cpp
cldrVersion source/i18n/ulocdata.cpp
codeMappings source/i18n/region.cpp
idValidity source/i18n/region.cpp
measurementData source/i18n/ulocdata.cpp
territoryContainment source/i18n/region.cpp
timeData source/i18n/dtptngen.cpp
weekData source/i18n/dtptngen.cpp
weekData%variant source/i18n/dtptngen.cpp
The following fields will be dropped because there's no code reference in ICU, Chrome and v8:
codeMappingsCurrency
languageData
languageMatching
languageMatchingInfo
languageMatchingNew
parentLocales
subdivisionContainment
territoryInfo
unitPreferenceData
weekOfPreference
To verify that they're not needed by running the following command:
find -type f |egrep -v "./data/"|xargs egrep "(codeMappingsCurrency|languageData|languageMatching|parentLocales|subdivisionContainment|territoryInfo|unitPreferenceData|weekOfPreference)"
under icu4c/source in ICU65 tree.
B. add back dnam in unit to prepare for extension of Intl.DisplayNames
Note that ICU 65.1 comes with a new file langInfo.res which is needed by the
new LocaleMatcher API. The languageMatching* fields in supplementalData,
although named very similar to that, is not used by the implementation of
LocaleMatcher* API. We also search the chromium code and found no references
to these fields name in chromium tree.
See https://cs.chromium.org/search/?q=%5B%5EA-Za-z0-9%5D(codeMappingsCurrency%7ClanguageData%7ClanguageMatching%7CparentLocales%7CsubdivisionContainment%7CterritoryInfo%7CunitPreferenceData%7CweekOfPreference)%5B%5EA-Za-z0-9%5D&type=cs
Bug: chromium:1014272
Change-Id: I30220449b1dbe58371ca89fbfe61c6b3f47295c2
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/deps/icu/+/1881599
Reviewed-by: Jungshik Shin <jshin@chromium.org>
diff --git a/source/common/udata.cpp b/source/common/udata.cpp
index 1051f18..a54393c 100644
--- a/source/common/udata.cpp
+++ b/source/common/udata.cpp
@@ -111,11 +111,12 @@
static UHashtable *gCommonDataCache = NULL; /* Global hash table of opened ICU data files. */
static icu::UInitOnce gCommonDataCacheInitOnce = U_INITONCE_INITIALIZER;
-#if U_PLATFORM_HAS_WINUWP_API == 0
+#if !defined(ICU_DATA_DIR_WINDOWS)
static UDataFileAccess gDataFileAccess = UDATA_DEFAULT_ACCESS; // Access not synchronized.
// Modifying is documented as thread-unsafe.
#else
-static UDataFileAccess gDataFileAccess = UDATA_NO_FILES; // Windows UWP looks in one spot explicitly
+// If we are using the Windows data directory, then look in one spot only.
+static UDataFileAccess gDataFileAccess = UDATA_NO_FILES;
#endif
static UBool U_CALLCONV
@@ -207,7 +208,7 @@
return didUpdate;
}
-#if U_PLATFORM_HAS_WINUWP_API == 0
+#if !defined(ICU_DATA_DIR_WINDOWS)
static UBool
setCommonICUDataPointer(const void *pData, UBool /*warn*/, UErrorCode *pErrorCode) {
@@ -321,7 +322,7 @@
retVal = el->item;
}
#ifdef UDATA_DEBUG
- fprintf(stderr, "Cache: [%s] -> %p\n", baseName, retVal);
+ fprintf(stderr, "Cache: [%s] -> %p\n", baseName, (void*) retVal);
#endif
return retVal;
}
@@ -384,7 +385,7 @@
#ifdef UDATA_DEBUG
fprintf(stderr, "Cache: [%s] <<< %p : %s. vFunc=%p\n", newElement->name,
- newElement->item, u_errorName(subErr), newElement->item->vFuncs);
+ (void*) newElement->item, u_errorName(subErr), (void*) newElement->item->vFuncs);
#endif
if (subErr == U_USING_DEFAULT_WARNING || U_FAILURE(subErr)) {
@@ -478,7 +479,7 @@
nextPath = itemPath.data();
}
#ifdef UDATA_DEBUG
- fprintf(stderr, "SUFFIX=%s [%p]\n", inSuffix, inSuffix);
+ fprintf(stderr, "SUFFIX=%s [%p]\n", inSuffix, (void*) inSuffix);
#endif
/** Suffix **/
@@ -493,12 +494,11 @@
/* pathBuffer will hold the output path strings returned by this iterator */
#ifdef UDATA_DEBUG
- fprintf(stderr, "%p: init %s -> [path=%s], [base=%s], [suff=%s], [itempath=%s], [nextpath=%s], [checklast4=%s]\n",
- iter,
+ fprintf(stderr, "0: init %s -> [path=%s], [base=%s], [suff=%s], [itempath=%s], [nextpath=%s], [checklast4=%s]\n",
item,
path,
basename,
- suffix,
+ suffix.data(),
itemPath.data(),
nextPath,
checkLastFour?"TRUE":"false");
@@ -554,7 +554,7 @@
fprintf(stderr, "rest of path (IDD) = %s\n", currentPath);
fprintf(stderr, " ");
{
- uint32_t qqq;
+ int32_t qqq;
for(qqq=0;qqq<pathLen;qqq++)
{
fprintf(stderr, " ");
@@ -575,7 +575,7 @@
uprv_strlen(pathBasename)==(basenameLen+4)) { /* base+suffix = full len */
#ifdef UDATA_DEBUG
- fprintf(stderr, "Have %s file on the path: %s\n", suffix, pathBuffer.data());
+ fprintf(stderr, "Have %s file on the path: %s\n", suffix.data(), pathBuffer.data());
#endif
/* do nothing */
}
@@ -641,7 +641,8 @@
* our common data. *
* *
*----------------------------------------------------------------------*/
-#if U_PLATFORM_HAS_WINUWP_API == 0 // Windows UWP Platform does not support dll icu data at this time
+#if !defined(ICU_DATA_DIR_WINDOWS)
+// When using the Windows system data, we expect only a single data file.
extern "C" const ICU_Data_Header U_DATA_API U_ICUDATA_ENTRY_POINT;
#endif
@@ -691,7 +692,8 @@
if(gCommonICUDataArray[commonDataIndex] != NULL) {
return gCommonICUDataArray[commonDataIndex];
}
-#if U_PLATFORM_HAS_WINUWP_API == 0 // Windows UWP Platform does not support dll icu data at this time
+#if !defined(ICU_DATA_DIR_WINDOWS)
+// When using the Windows system data, we expect only a single data file.
int32_t i;
for(i = 0; i < commonDataIndex; ++i) {
if(gCommonICUDataArray[i]->pHeader == &U_ICUDATA_ENTRY_POINT.hdr) {
@@ -715,7 +717,8 @@
setCommonICUDataPointer(uprv_getICUData_conversion(), FALSE, pErrorCode);
}
*/
-#if U_PLATFORM_HAS_WINUWP_API == 0 // Windows UWP Platform does not support dll icu data at this time
+#if !defined(ICU_DATA_DIR_WINDOWS)
+// When using the Windows system data, we expect only a single data file.
setCommonICUDataPointer(&U_ICUDATA_ENTRY_POINT.hdr, FALSE, pErrorCode);
{
Mutex lock;
@@ -832,8 +835,8 @@
* Use a specific mutex to avoid nested locks of the global mutex.
*/
#if MAP_IMPLEMENTATION==MAP_STDIO
- static UMutex *extendICUDataMutex = new UMutex();
- umtx_lock(extendICUDataMutex);
+ static UMutex extendICUDataMutex;
+ umtx_lock(&extendICUDataMutex);
#endif
if(!umtx_loadAcquire(gHaveTriedToLoadCommonData)) {
/* See if we can explicitly open a .dat file for the ICUData. */
@@ -1071,13 +1074,13 @@
/* look up the data piece in the common data */
pHeader=pCommonData->vFuncs->Lookup(pCommonData, tocEntryName, &length, subErrorCode);
#ifdef UDATA_DEBUG
- fprintf(stderr, "%s: pHeader=%p - %s\n", tocEntryName, pHeader, u_errorName(*subErrorCode));
+ fprintf(stderr, "%s: pHeader=%p - %s\n", tocEntryName, (void*) pHeader, u_errorName(*subErrorCode));
#endif
if(pHeader!=NULL) {
pEntryData = checkDataItem(pHeader, isAcceptable, context, type, name, subErrorCode, pErrorCode);
#ifdef UDATA_DEBUG
- fprintf(stderr, "pEntryData=%p\n", pEntryData);
+ fprintf(stderr, "pEntryData=%p\n", (void*) pEntryData);
#endif
if (U_FAILURE(*pErrorCode)) {
return NULL;
@@ -1280,12 +1283,12 @@
fprintf(stderr, " tocEntryPath = %s\n", tocEntryName.data());
#endif
-#if U_PLATFORM_HAS_WINUWP_API == 0 // Windows UWP Platform does not support dll icu data at this time
+#if !defined(ICU_DATA_DIR_WINDOWS)
if(path == NULL) {
path = COMMON_DATA_NAME; /* "icudt26e" */
}
#else
- // Windows UWP expects only a single data file.
+ // When using the Windows system data, we expects only a single data file.
path = COMMON_DATA_NAME; /* "icudt26e" */
#endif