Blame - source/i18n/uspoof_impl.cpp - chromium.googlesource.com/chromium/deps/icu

blob: 47dca16aaf0640729b90e3e4d3ff48769af7acfd [file] [log] [blame]

jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1	/*
				2	**********************************************************************
				3	* Copyright (C) 2008-2013, International Business Machines
				4	* Corporation and others. All Rights Reserved.
				5	**********************************************************************
				6	*/
				7
				8	#include "unicode/utypes.h"
				9	#include "unicode/uspoof.h"
				10	#include "unicode/uchar.h"
				11	#include "unicode/uniset.h"
				12	#include "unicode/utf16.h"
				13	#include "utrie2.h"
				14	#include "cmemory.h"
				15	#include "cstring.h"
				16	#include "identifier_info.h"
				17	#include "scriptset.h"
				18	#include "udatamem.h"
				19	#include "umutex.h"
				20	#include "udataswp.h"
				21	#include "uassert.h"
				22	#include "uspoof_impl.h"
				23
				24	#if !UCONFIG_NO_NORMALIZATION
				25
				26
				27	U_NAMESPACE_BEGIN
				28
				29	UOBJECT_DEFINE_RTTI_IMPLEMENTATION(SpoofImpl)
				30
				31	SpoofImpl::SpoofImpl(SpoofData *data, UErrorCode &status) :
				32	fMagic(0), fChecks(USPOOF_ALL_CHECKS), fSpoofData(NULL), fAllowedCharsSet(NULL) ,
				33	fAllowedLocales(NULL), fCachedIdentifierInfo(NULL) {
				34	if (U_FAILURE(status)) {
				35	return;
				36	}
				37	fSpoofData = data;
				38	fRestrictionLevel = USPOOF_HIGHLY_RESTRICTIVE;
				39
				40	UnicodeSet *allowedCharsSet = new UnicodeSet(0, 0x10ffff);
				41	allowedCharsSet->freeze();
				42	fAllowedCharsSet = allowedCharsSet;
				43	fAllowedLocales = uprv_strdup("");
				44	if (fAllowedCharsSet == NULL \|\| fAllowedLocales == NULL) {
				45	status = U_MEMORY_ALLOCATION_ERROR;
				46	return;
				47	}
				48	fMagic = USPOOF_MAGIC;
				49	}
				50
				51
				52	SpoofImpl::SpoofImpl() :
				53	fMagic(USPOOF_MAGIC), fChecks(USPOOF_ALL_CHECKS), fSpoofData(NULL), fAllowedCharsSet(NULL) ,
				54	fAllowedLocales(NULL), fCachedIdentifierInfo(NULL) {
				55	UnicodeSet *allowedCharsSet = new UnicodeSet(0, 0x10ffff);
				56	allowedCharsSet->freeze();
				57	fAllowedCharsSet = allowedCharsSet;
				58	fAllowedLocales = uprv_strdup("");
				59	fRestrictionLevel = USPOOF_HIGHLY_RESTRICTIVE;
				60	}
				61
				62
				63	// Copy Constructor, used by the user level clone() function.
				64	SpoofImpl::SpoofImpl(const SpoofImpl &src, UErrorCode &status) :
				65	fMagic(0), fChecks(USPOOF_ALL_CHECKS), fSpoofData(NULL), fAllowedCharsSet(NULL) ,
				66	fAllowedLocales(NULL), fCachedIdentifierInfo(NULL) {
				67	if (U_FAILURE(status)) {
				68	return;
				69	}
				70	fMagic = src.fMagic;
				71	fChecks = src.fChecks;
				72	if (src.fSpoofData != NULL) {
				73	fSpoofData = src.fSpoofData->addReference();
				74	}
				75	fAllowedCharsSet = static_cast<const UnicodeSet *>(src.fAllowedCharsSet->clone());
				76	if (fAllowedCharsSet == NULL) {
				77	status = U_MEMORY_ALLOCATION_ERROR;
				78	}
				79	fAllowedLocales = uprv_strdup(src.fAllowedLocales);
				80	fRestrictionLevel = src.fRestrictionLevel;
				81	}
				82
				83	SpoofImpl::~SpoofImpl() {
				84	fMagic = 0; // head off application errors by preventing use of
				85	// of deleted objects.
				86	if (fSpoofData != NULL) {
				87	fSpoofData->removeReference(); // Will delete if refCount goes to zero.
				88	}
				89	delete fAllowedCharsSet;
				90	uprv_free((void *)fAllowedLocales);
				91	delete fCachedIdentifierInfo;
				92	}
				93
				94	//
				95	// Incoming parameter check on Status and the SpoofChecker object
				96	// received from the C API.
				97	//
				98	const SpoofImpl SpoofImpl::validateThis(const USpoofChecker sc, UErrorCode &status) {
				99	if (U_FAILURE(status)) {
				100	return NULL;
				101	}
				102	if (sc == NULL) {
				103	status = U_ILLEGAL_ARGUMENT_ERROR;
				104	return NULL;
				105	}
				106	SpoofImpl This = (SpoofImpl )sc;
				107	if (This->fMagic != USPOOF_MAGIC \|\|
				108	This->fSpoofData == NULL) {
				109	status = U_INVALID_FORMAT_ERROR;
				110	return NULL;
				111	}
				112	if (!SpoofData::validateDataVersion(This->fSpoofData->fRawData, status)) {
				113	return NULL;
				114	}
				115	return This;
				116	}
				117
				118	SpoofImpl SpoofImpl::validateThis(USpoofChecker sc, UErrorCode &status) {
				119	return const_cast<SpoofImpl *>
				120	(SpoofImpl::validateThis(const_cast<const USpoofChecker *>(sc), status));
				121	}
				122
				123
				124
				125	//--------------------------------------------------------------------------------------
				126	//
				127	// confusableLookup() This is the heart of the confusable skeleton generation
				128	// implementation.
				129	//
				130	// Given a source character, produce the corresponding
				131	// replacement character(s), appending them to the dest string.
				132	//
				133	//---------------------------------------------------------------------------------------
				134	int32_t SpoofImpl::confusableLookup(UChar32 inChar, int32_t tableMask, UnicodeString &dest) const {
				135
				136	// Binary search the spoof data key table for the inChar
				137	int32_t *low = fSpoofData->fCFUKeys;
				138	int32_t *mid = NULL;
				139	int32_t *limit = low + fSpoofData->fRawData->fCFUKeysSize;
				140	UChar32 midc;
				141	do {
				142	int32_t delta = ((int32_t)(limit-low))/2;
				143	mid = low + delta;
				144	midc = *mid & 0x1fffff;
				145	if (inChar == midc) {
				146	goto foundChar;
				147	} else if (inChar < midc) {
				148	limit = mid;
				149	} else {
				150	low = mid;
				151	}
				152	} while (low < limit-1);
				153	mid = low;
				154	midc = *mid & 0x1fffff;
				155	if (inChar != midc) {
				156	// Char not found. It maps to itself.
				157	int i = 0;
				158	dest.append(inChar);
				159	return i;
				160	}
				161	foundChar:
				162	int32_t keyFlags = *mid & 0xff000000;
				163	if ((keyFlags & tableMask) == 0) {
				164	// We found the right key char, but the entry doesn't pertain to the
				165	// table we need. See if there is an adjacent key that does
				166	if (keyFlags & USPOOF_KEY_MULTIPLE_VALUES) {
				167	int32_t *altMid;
				168	for (altMid = mid-1; (*altMid&0x00ffffff) == inChar; altMid--) {
				169	keyFlags = *altMid & 0xff000000;
				170	if (keyFlags & tableMask) {
				171	mid = altMid;
				172	goto foundKey;
				173	}
				174	}
				175	for (altMid = mid+1; (*altMid&0x00ffffff) == inChar; altMid++) {
				176	keyFlags = *altMid & 0xff000000;
				177	if (keyFlags & tableMask) {
				178	mid = altMid;
				179	goto foundKey;
				180	}
				181	}
				182	}
				183	// No key entry for this char & table.
				184	// The input char maps to itself.
				185	int i = 0;
				186	dest.append(inChar);
				187	return i;
				188	}
				189
				190	foundKey:
				191	int32_t stringLen = USPOOF_KEY_LENGTH_FIELD(keyFlags) + 1;
				192	int32_t keyTableIndex = (int32_t)(mid - fSpoofData->fCFUKeys);
				193
				194	// Value is either a UChar (for strings of length 1) or
				195	// an index into the string table (for longer strings)
				196	uint16_t value = fSpoofData->fCFUValues[keyTableIndex];
				197	if (stringLen == 1) {
				198	dest.append((UChar)value);
				199	return 1;
				200	}
				201
				202	// String length of 4 from the above lookup is used for all strings of length >= 4.
				203	// For these, get the real length from the string lengths table,
				204	// which maps string table indexes to lengths.
				205	// All strings of the same length are stored contiguously in the string table.
				206	// 'value' from the lookup above is the starting index for the desired string.
				207
				208	int32_t ix;
				209	if (stringLen == 4) {
				210	int32_t stringLengthsLimit = fSpoofData->fRawData->fCFUStringLengthsSize;
				211	for (ix = 0; ix < stringLengthsLimit; ix++) {
				212	if (fSpoofData->fCFUStringLengths[ix].fLastString >= value) {
				213	stringLen = fSpoofData->fCFUStringLengths[ix].fStrLength;
				214	break;
				215	}
				216	}
				217	U_ASSERT(ix < stringLengthsLimit);
				218	}
				219
				220	U_ASSERT(value + stringLen <= fSpoofData->fRawData->fCFUStringTableLen);
				221	UChar *src = &fSpoofData->fCFUStrings[value];
				222	dest.append(src, stringLen);
				223	return stringLen;
				224	}
				225
				226
				227	//---------------------------------------------------------------------------------------
				228	//
				229	// wholeScriptCheck()
				230	//
				231	// Input text is already normalized to NFD
				232	// Return the set of scripts, each of which can represent something that is
				233	// confusable with the input text. The script of the input text
				234	// is included; input consisting of characters from a single script will
				235	// always produce a result consisting of a set containing that script.
				236	//
				237	//---------------------------------------------------------------------------------------
				238	void SpoofImpl::wholeScriptCheck(
				239	const UnicodeString &text, ScriptSet *result, UErrorCode &status) const {
				240
				241	UTrie2 *table =
				242	(fChecks & USPOOF_ANY_CASE) ? fSpoofData->fAnyCaseTrie : fSpoofData->fLowerCaseTrie;
				243	result->setAll();
				244	int32_t length = text.length();
				245	for (int32_t inputIdx=0; inputIdx < length;) {
				246	UChar32 c = text.char32At(inputIdx);
				247	inputIdx += U16_LENGTH(c);
				248	uint32_t index = utrie2_get32(table, c);
				249	if (index == 0) {
				250	// No confusables in another script for this char.
				251	// TODO: we should change the data to have sets with just the single script
				252	// bit for the script of this char. Gets rid of this special case.
				253	// Until then, grab the script from the char and intersect it with the set.
				254	UScriptCode cpScript = uscript_getScript(c, &status);
				255	U_ASSERT(cpScript > USCRIPT_INHERITED);
				256	result->intersect(cpScript, status);
				257	} else if (index == 1) {
				258	// Script == Common or Inherited. Nothing to do.
				259	} else {
				260	result->intersect(fSpoofData->fScriptSets[index]);
				261	}
				262	}
				263	}
				264
				265
				266	void SpoofImpl::setAllowedLocales(const char *localesList, UErrorCode &status) {
				267	UnicodeSet allowedChars;
				268	UnicodeSet *tmpSet = NULL;
				269	const char *locStart = localesList;
				270	const char *locEnd = NULL;
				271	const char *localesListEnd = localesList + uprv_strlen(localesList);
				272	int32_t localeListCount = 0; // Number of locales provided by caller.
				273
				274	// Loop runs once per locale from the localesList, a comma separated list of locales.
				275	do {
				276	locEnd = uprv_strchr(locStart, ',');
				277	if (locEnd == NULL) {
				278	locEnd = localesListEnd;
				279	}
				280	while (*locStart == ' ') {
				281	locStart++;
				282	}
				283	const char *trimmedEnd = locEnd-1;
				284	while (trimmedEnd > locStart && *trimmedEnd == ' ') {
				285	trimmedEnd--;
				286	}
				287	if (trimmedEnd <= locStart) {
				288	break;
				289	}
				290	const char *locale = uprv_strndup(locStart, (int32_t)(trimmedEnd + 1 - locStart));
				291	localeListCount++;
				292
				293	// We have one locale from the locales list.
				294	// Add the script chars for this locale to the accumulating set of allowed chars.
				295	// If the locale is no good, we will be notified back via status.
				296	addScriptChars(locale, &allowedChars, status);
				297	uprv_free((void *)locale);
				298	if (U_FAILURE(status)) {
				299	break;
				300	}
				301	locStart = locEnd + 1;
				302	} while (locStart < localesListEnd);
				303
				304	// If our caller provided an empty list of locales, we disable the allowed characters checking
				305	if (localeListCount == 0) {
				306	uprv_free((void *)fAllowedLocales);
				307	fAllowedLocales = uprv_strdup("");
				308	tmpSet = new UnicodeSet(0, 0x10ffff);
				309	if (fAllowedLocales == NULL \|\| tmpSet == NULL) {
				310	status = U_MEMORY_ALLOCATION_ERROR;
				311	return;
				312	}
				313	tmpSet->freeze();
				314	delete fAllowedCharsSet;
				315	fAllowedCharsSet = tmpSet;
				316	fChecks &= ~USPOOF_CHAR_LIMIT;
				317	return;
				318	}
				319
				320
				321	// Add all common and inherited characters to the set of allowed chars.
				322	UnicodeSet tempSet;
				323	tempSet.applyIntPropertyValue(UCHAR_SCRIPT, USCRIPT_COMMON, status);
				324	allowedChars.addAll(tempSet);
				325	tempSet.applyIntPropertyValue(UCHAR_SCRIPT, USCRIPT_INHERITED, status);
				326	allowedChars.addAll(tempSet);
				327
				328	// If anything went wrong, we bail out without changing
				329	// the state of the spoof checker.
				330	if (U_FAILURE(status)) {
				331	return;
				332	}
				333
				334	// Store the updated spoof checker state.
				335	tmpSet = static_cast<UnicodeSet *>(allowedChars.clone());
				336	const char *tmpLocalesList = uprv_strdup(localesList);
				337	if (tmpSet == NULL \|\| tmpLocalesList == NULL) {
				338	status = U_MEMORY_ALLOCATION_ERROR;
				339	return;
				340	}
				341	uprv_free((void *)fAllowedLocales);
				342	fAllowedLocales = tmpLocalesList;
				343	tmpSet->freeze();
				344	delete fAllowedCharsSet;
				345	fAllowedCharsSet = tmpSet;
				346	fChecks \|= USPOOF_CHAR_LIMIT;
				347	}
				348
				349
				350	const char * SpoofImpl::getAllowedLocales(UErrorCode &/status/) {
				351	return fAllowedLocales;
				352	}
				353
				354
				355	// Given a locale (a language), add all the characters from all of the scripts used with that language
				356	// to the allowedChars UnicodeSet
				357
				358	void SpoofImpl::addScriptChars(const char locale, UnicodeSet allowedChars, UErrorCode &status) {
				359	UScriptCode scripts[30];
				360
				361	int32_t numScripts = uscript_getCode(locale, scripts, sizeof(scripts)/sizeof(UScriptCode), &status);
				362	if (U_FAILURE(status)) {
				363	return;
				364	}
				365	if (status == U_USING_DEFAULT_WARNING) {
				366	status = U_ILLEGAL_ARGUMENT_ERROR;
				367	return;
				368	}
				369	UnicodeSet tmpSet;
				370	int32_t i;
				371	for (i=0; i<numScripts; i++) {
				372	tmpSet.applyIntPropertyValue(UCHAR_SCRIPT, scripts[i], status);
				373	allowedChars->addAll(tmpSet);
				374	}
				375	}
				376
				377
				378	// Convert a text format hex number. Utility function used by builder code. Static.
				379	// Input: UChar *string text. Output: a UChar32
				380	// Input has been pre-checked, and will have no non-hex chars.
				381	// The number must fall in the code point range of 0..0x10ffff
				382	// Static Function.
				383	UChar32 SpoofImpl::ScanHex(const UChar *s, int32_t start, int32_t limit, UErrorCode &status) {
				384	if (U_FAILURE(status)) {
				385	return 0;
				386	}
				387	U_ASSERT(limit-start > 0);
				388	uint32_t val = 0;
				389	int i;
				390	for (i=start; i<limit; i++) {
				391	int digitVal = s[i] - 0x30;
				392	if (digitVal>9) {
				393	digitVal = 0xa + (s[i] - 0x41); // Upper Case 'A'
				394	}
				395	if (digitVal>15) {
				396	digitVal = 0xa + (s[i] - 0x61); // Lower Case 'a'
				397	}
				398	U_ASSERT(digitVal <= 0xf);
				399	val <<= 4;
				400	val += digitVal;
				401	}
				402	if (val > 0x10ffff) {
				403	status = U_PARSE_ERROR;
				404	val = 0;
				405	}
				406	return (UChar32)val;
				407	}
				408
				409	// IdentifierInfo Cache. IdentifierInfo objects are somewhat expensive to create.
				410	// Maintain a one-element cache, which is sufficient to avoid repeatedly
				411	// creating new ones unless we get multi-thread concurrency in spoof
				412	// check operations, which should be statistically uncommon.
				413
				414	// These functions are used in place of new & delete of an IdentifierInfo.
				415	// They will recycle the IdentifierInfo when possible.
				416	// They are logically const, and used within const functions that must be thread safe.
				417	IdentifierInfo *SpoofImpl::getIdentifierInfo(UErrorCode &status) const {
				418	IdentifierInfo *returnIdInfo = NULL;
				419	if (U_FAILURE(status)) {
				420	return returnIdInfo;
				421	}
				422	SpoofImpl nonConstThis = const_cast<SpoofImpl >(this);
				423	{
				424	Mutex m;
				425	returnIdInfo = nonConstThis->fCachedIdentifierInfo;
				426	nonConstThis->fCachedIdentifierInfo = NULL;
				427	}
				428	if (returnIdInfo == NULL) {
				429	returnIdInfo = new IdentifierInfo(status);
				430	if (U_SUCCESS(status) && returnIdInfo == NULL) {
				431	status = U_MEMORY_ALLOCATION_ERROR;
				432	}
				433	if (U_FAILURE(status) && returnIdInfo != NULL) {
				434	delete returnIdInfo;
				435	returnIdInfo = NULL;
				436	}
				437	}
				438	return returnIdInfo;
				439	}
				440
				441
				442	void SpoofImpl::releaseIdentifierInfo(IdentifierInfo *idInfo) const {
				443	if (idInfo != NULL) {
				444	SpoofImpl nonConstThis = const_cast<SpoofImpl >(this);
				445	{
				446	Mutex m;
				447	if (nonConstThis->fCachedIdentifierInfo == NULL) {
				448	nonConstThis->fCachedIdentifierInfo = idInfo;
				449	idInfo = NULL;
				450	}
				451	}
				452	delete idInfo;
				453	}
				454	}
				455
				456
				457
				458
				459	//----------------------------------------------------------------------------------------------
				460	//
				461	// class SpoofData Implementation
				462	//
				463	//----------------------------------------------------------------------------------------------
				464
				465
				466	UBool SpoofData::validateDataVersion(const SpoofDataHeader *rawData, UErrorCode &status) {
				467	if (U_FAILURE(status) \|\|
				468	rawData == NULL \|\|
				469	rawData->fMagic != USPOOF_MAGIC \|\|
				470	rawData->fFormatVersion[0] > 1 \|\|
				471	rawData->fFormatVersion[1] > 0) {
				472	status = U_INVALID_FORMAT_ERROR;
				473	return FALSE;
				474	}
				475	return TRUE;
				476	}
				477
				478	//
				479	// SpoofData::getDefault() - return a wrapper around the spoof data that is
				480	// baked into the default ICU data.
				481	//
				482	SpoofData *SpoofData::getDefault(UErrorCode &status) {
				483	// TODO: Cache it. Lazy create, keep until cleanup.
				484
				485	UDataMemory *udm = udata_open(NULL, "cfu", "confusables", &status);
				486	if (U_FAILURE(status)) {
				487	return NULL;
				488	}
				489	SpoofData *This = new SpoofData(udm, status);
				490	if (U_FAILURE(status)) {
				491	delete This;
				492	return NULL;
				493	}
				494	if (This == NULL) {
				495	status = U_MEMORY_ALLOCATION_ERROR;
				496	}
				497	return This;
				498	}
				499
				500
				501	SpoofData::SpoofData(UDataMemory *udm, UErrorCode &status)
				502	{
				503	reset();
				504	if (U_FAILURE(status)) {
				505	return;
				506	}
				507	fRawData = reinterpret_cast<SpoofDataHeader *>
				508	((char *)(udm->pHeader) + udm->pHeader->dataHeader.headerSize);
				509	fUDM = udm;
				510	validateDataVersion(fRawData, status);
				511	initPtrs(status);
				512	}
				513
				514
				515	SpoofData::SpoofData(const void *data, int32_t length, UErrorCode &status)
				516	{
				517	reset();
				518	if (U_FAILURE(status)) {
				519	return;
				520	}
				521	if ((size_t)length < sizeof(SpoofDataHeader)) {
				522	status = U_INVALID_FORMAT_ERROR;
				523	return;
				524	}
				525	void ncData = const_cast<void >(data);
				526	fRawData = static_cast<SpoofDataHeader *>(ncData);
				527	if (length < fRawData->fLength) {
				528	status = U_INVALID_FORMAT_ERROR;
				529	return;
				530	}
				531	validateDataVersion(fRawData, status);
				532	initPtrs(status);
				533	}
				534
				535
				536	// Spoof Data constructor for use from data builder.
				537	// Initializes a new, empty data area that will be populated later.
				538	SpoofData::SpoofData(UErrorCode &status) {
				539	reset();
				540	if (U_FAILURE(status)) {
				541	return;
				542	}
				543	fDataOwned = true;
				544	fRefCount = 1;
				545
				546	// The spoof header should already be sized to be a multiple of 16 bytes.
				547	// Just in case it's not, round it up.
				548	uint32_t initialSize = (sizeof(SpoofDataHeader) + 15) & ~15;
				549	U_ASSERT(initialSize == sizeof(SpoofDataHeader));
				550
				551	fRawData = static_cast<SpoofDataHeader *>(uprv_malloc(initialSize));
				552	fMemLimit = initialSize;
				553	if (fRawData == NULL) {
				554	status = U_MEMORY_ALLOCATION_ERROR;
				555	return;
				556	}
				557	uprv_memset(fRawData, 0, initialSize);
				558
				559	fRawData->fMagic = USPOOF_MAGIC;
				560	fRawData->fFormatVersion[0] = 1;
				561	fRawData->fFormatVersion[1] = 0;
				562	fRawData->fFormatVersion[2] = 0;
				563	fRawData->fFormatVersion[3] = 0;
				564	initPtrs(status);
				565	}
				566
				567	// reset() - initialize all fields.
				568	// Should be updated if any new fields are added.
				569	// Called by constructors to put things in a known initial state.
				570	void SpoofData::reset() {
				571	fRawData = NULL;
				572	fDataOwned = FALSE;
				573	fUDM = NULL;
				574	fMemLimit = 0;
				575	fRefCount = 1;
				576	fCFUKeys = NULL;
				577	fCFUValues = NULL;
				578	fCFUStringLengths = NULL;
				579	fCFUStrings = NULL;
				580	fAnyCaseTrie = NULL;
				581	fLowerCaseTrie = NULL;
				582	fScriptSets = NULL;
				583	}
				584
				585
				586	// SpoofData::initPtrs()
				587	// Initialize the pointers to the various sections of the raw data.
				588	//
				589	// This function is used both during the Trie building process (multiple
				590	// times, as the individual data sections are added), and
				591	// during the opening of a Spoof Checker from prebuilt data.
				592	//
				593	// The pointers for non-existent data sections (identified by an offset of 0)
				594	// are set to NULL.
				595	//
				596	// Note: During building the data, adding each new data section
				597	// reallocs the raw data area, which likely relocates it, which
				598	// in turn requires reinitializing all of the pointers into it, hence
				599	// multiple calls to this function during building.
				600	//
				601	void SpoofData::initPtrs(UErrorCode &status) {
				602	fCFUKeys = NULL;
				603	fCFUValues = NULL;
				604	fCFUStringLengths = NULL;
				605	fCFUStrings = NULL;
				606	if (U_FAILURE(status)) {
				607	return;
				608	}
				609	if (fRawData->fCFUKeys != 0) {
				610	fCFUKeys = (int32_t )((char )fRawData + fRawData->fCFUKeys);
				611	}
				612	if (fRawData->fCFUStringIndex != 0) {
				613	fCFUValues = (uint16_t )((char )fRawData + fRawData->fCFUStringIndex);
				614	}
				615	if (fRawData->fCFUStringLengths != 0) {
				616	fCFUStringLengths = (SpoofStringLengthsElement )((char )fRawData + fRawData->fCFUStringLengths);
				617	}
				618	if (fRawData->fCFUStringTable != 0) {
				619	fCFUStrings = (UChar )((char )fRawData + fRawData->fCFUStringTable);
				620	}
				621
				622	if (fAnyCaseTrie == NULL && fRawData->fAnyCaseTrie != 0) {
				623	fAnyCaseTrie = utrie2_openFromSerialized(UTRIE2_16_VALUE_BITS,
				624	(char *)fRawData + fRawData->fAnyCaseTrie, fRawData->fAnyCaseTrieLength, NULL, &status);
				625	}
				626	if (fLowerCaseTrie == NULL && fRawData->fLowerCaseTrie != 0) {
				627	fLowerCaseTrie = utrie2_openFromSerialized(UTRIE2_16_VALUE_BITS,
				628	(char *)fRawData + fRawData->fLowerCaseTrie, fRawData->fLowerCaseTrieLength, NULL, &status);
				629	}
				630
				631	if (fRawData->fScriptSets != 0) {
				632	fScriptSets = (ScriptSet )((char )fRawData + fRawData->fScriptSets);
				633	}
				634	}
				635
				636
				637	SpoofData::~SpoofData() {
				638	utrie2_close(fAnyCaseTrie);
				639	fAnyCaseTrie = NULL;
				640	utrie2_close(fLowerCaseTrie);
				641	fLowerCaseTrie = NULL;
				642	if (fDataOwned) {
				643	uprv_free(fRawData);
				644	}
				645	fRawData = NULL;
				646	if (fUDM != NULL) {
				647	udata_close(fUDM);
				648	}
				649	fUDM = NULL;
				650	}
				651
				652
				653	void SpoofData::removeReference() {
				654	if (umtx_atomic_dec(&fRefCount) == 0) {
				655	delete this;
				656	}
				657	}
				658
				659
				660	SpoofData *SpoofData::addReference() {
				661	umtx_atomic_inc(&fRefCount);
				662	return this;
				663	}
				664
				665
				666	void *SpoofData::reserveSpace(int32_t numBytes, UErrorCode &status) {
				667	if (U_FAILURE(status)) {
				668	return NULL;
				669	}
				670	if (!fDataOwned) {
				671	U_ASSERT(FALSE);
				672	status = U_INTERNAL_PROGRAM_ERROR;
				673	return NULL;
				674	}
				675
				676	numBytes = (numBytes + 15) & ~15; // Round up to a multiple of 16
				677	uint32_t returnOffset = fMemLimit;
				678	fMemLimit += numBytes;
				679	fRawData = static_cast<SpoofDataHeader *>(uprv_realloc(fRawData, fMemLimit));
				680	fRawData->fLength = fMemLimit;
				681	uprv_memset((char *)fRawData + returnOffset, 0, numBytes);
				682	initPtrs(status);
				683	return (char *)fRawData + returnOffset;
				684	}
				685
				686
				687	U_NAMESPACE_END
				688
				689	U_NAMESPACE_USE
				690
				691	//-----------------------------------------------------------------------------
				692	//
				693	// uspoof_swap - byte swap and char encoding swap of spoof data
				694	//
				695	//-----------------------------------------------------------------------------
				696	U_CAPI int32_t U_EXPORT2
				697	uspoof_swap(const UDataSwapper ds, const void inData, int32_t length, void *outData,
				698	UErrorCode *status) {
				699
				700	if (status == NULL \|\| U_FAILURE(*status)) {
				701	return 0;
				702	}
				703	if(ds==NULL \|\| inData==NULL \|\| length<-1 \|\| (length>0 && outData==NULL)) {
				704	*status=U_ILLEGAL_ARGUMENT_ERROR;
				705	return 0;
				706	}
				707
				708	//
				709	// Check that the data header is for spoof data.
				710	// (Header contents are defined in gencfu.cpp)
				711	//
				712	const UDataInfo pInfo = (const UDataInfo )((const char *)inData+4);
				713	if(!( pInfo->dataFormat[0]==0x43 && /* dataFormat="Cfu " */
				714	pInfo->dataFormat[1]==0x66 &&
				715	pInfo->dataFormat[2]==0x75 &&
				716	pInfo->dataFormat[3]==0x20 &&
				717	pInfo->formatVersion[0]==1 )) {
				718	udata_printError(ds, "uspoof_swap(): data format %02x.%02x.%02x.%02x "
				719	"(format version %02x %02x %02x %02x) is not recognized\n",
				720	pInfo->dataFormat[0], pInfo->dataFormat[1],
				721	pInfo->dataFormat[2], pInfo->dataFormat[3],
				722	pInfo->formatVersion[0], pInfo->formatVersion[1],
				723	pInfo->formatVersion[2], pInfo->formatVersion[3]);
				724	*status=U_UNSUPPORTED_ERROR;
				725	return 0;
				726	}
				727
				728	//
				729	// Swap the data header. (This is the generic ICU Data Header, not the uspoof Specific
				730	// header). This swap also conveniently gets us
				731	// the size of the ICU d.h., which lets us locate the start
				732	// of the uspoof specific data.
				733	//
				734	int32_t headerSize=udata_swapDataHeader(ds, inData, length, outData, status);
				735
				736
				737	//
				738	// Get the Spoof Data Header, and check that it appears to be OK.
				739	//
				740	//
				741	const uint8_t inBytes =(const uint8_t )inData+headerSize;
				742	SpoofDataHeader spoofDH = (SpoofDataHeader )inBytes;
				743	if (ds->readUInt32(spoofDH->fMagic) != USPOOF_MAGIC \|\|
				744	ds->readUInt32(spoofDH->fLength) < sizeof(SpoofDataHeader))
				745	{
				746	udata_printError(ds, "uspoof_swap(): Spoof Data header is invalid.\n");
				747	*status=U_UNSUPPORTED_ERROR;
				748	return 0;
				749	}
				750
				751	//
				752	// Prefight operation? Just return the size
				753	//
				754	int32_t spoofDataLength = ds->readUInt32(spoofDH->fLength);
				755	int32_t totalSize = headerSize + spoofDataLength;
				756	if (length < 0) {
				757	return totalSize;
				758	}
				759
				760	//
				761	// Check that length passed in is consistent with length from Spoof data header.
				762	//
				763	if (length < totalSize) {
				764	udata_printError(ds, "uspoof_swap(): too few bytes (%d after ICU Data header) for spoof data.\n",
				765	spoofDataLength);
				766	*status=U_INDEX_OUTOFBOUNDS_ERROR;
				767	return 0;
				768	}
				769
				770
				771	//
				772	// Swap the Data. Do the data itself first, then the Spoof Data Header, because
				773	// we need to reference the header to locate the data, and an
				774	// inplace swap of the header leaves it unusable.
				775	//
				776	uint8_t outBytes = (uint8_t )outData + headerSize;
				777	SpoofDataHeader outputDH = (SpoofDataHeader )outBytes;
				778
				779	int32_t sectionStart;
				780	int32_t sectionLength;
				781
				782	//
				783	// If not swapping in place, zero out the output buffer before starting.
				784	// Gaps may exist between the individual sections, and these must be zeroed in
				785	// the output buffer. The simplest way to do that is to just zero the whole thing.
				786	//
				787	if (inBytes != outBytes) {
				788	uprv_memset(outBytes, 0, spoofDataLength);
				789	}
				790
				791	// Confusables Keys Section (fCFUKeys)
				792	sectionStart = ds->readUInt32(spoofDH->fCFUKeys);
				793	sectionLength = ds->readUInt32(spoofDH->fCFUKeysSize) * 4;
				794	ds->swapArray32(ds, inBytes+sectionStart, sectionLength, outBytes+sectionStart, status);
				795
				796	// String Index Section
				797	sectionStart = ds->readUInt32(spoofDH->fCFUStringIndex);
				798	sectionLength = ds->readUInt32(spoofDH->fCFUStringIndexSize) * 2;
				799	ds->swapArray16(ds, inBytes+sectionStart, sectionLength, outBytes+sectionStart, status);
				800
				801	// String Table Section
				802	sectionStart = ds->readUInt32(spoofDH->fCFUStringTable);
				803	sectionLength = ds->readUInt32(spoofDH->fCFUStringTableLen) * 2;
				804	ds->swapArray16(ds, inBytes+sectionStart, sectionLength, outBytes+sectionStart, status);
				805
				806	// String Lengths Section
				807	sectionStart = ds->readUInt32(spoofDH->fCFUStringLengths);
				808	sectionLength = ds->readUInt32(spoofDH->fCFUStringLengthsSize) * 4;
				809	ds->swapArray16(ds, inBytes+sectionStart, sectionLength, outBytes+sectionStart, status);
				810
				811	// Any Case Trie
				812	sectionStart = ds->readUInt32(spoofDH->fAnyCaseTrie);
				813	sectionLength = ds->readUInt32(spoofDH->fAnyCaseTrieLength);
				814	utrie2_swap(ds, inBytes+sectionStart, sectionLength, outBytes+sectionStart, status);
				815
				816	// Lower Case Trie
				817	sectionStart = ds->readUInt32(spoofDH->fLowerCaseTrie);
				818	sectionLength = ds->readUInt32(spoofDH->fLowerCaseTrieLength);
				819	utrie2_swap(ds, inBytes+sectionStart, sectionLength, outBytes+sectionStart, status);
				820
				821	// Script Sets. The data is an array of int32_t
				822	sectionStart = ds->readUInt32(spoofDH->fScriptSets);
				823	sectionLength = ds->readUInt32(spoofDH->fScriptSetsLength) * sizeof(ScriptSet);
				824	ds->swapArray32(ds, inBytes+sectionStart, sectionLength, outBytes+sectionStart, status);
				825
				826	// And, last, swap the header itself.
				827	// int32_t fMagic // swap this
				828	// uint8_t fFormatVersion[4] // Do not swap this, just copy
				829	// int32_t fLength and all the rest // Swap the rest, all is 32 bit stuff.
				830	//
				831	uint32_t magic = ds->readUInt32(spoofDH->fMagic);
				832	ds->writeUInt32((uint32_t *)&outputDH->fMagic, magic);
				833
				834	if (outputDH->fFormatVersion != spoofDH->fFormatVersion) {
				835	uprv_memcpy(outputDH->fFormatVersion, spoofDH->fFormatVersion, sizeof(spoofDH->fFormatVersion));
				836	}
				837	// swap starting at fLength
				838	ds->swapArray32(ds, &spoofDH->fLength, sizeof(SpoofDataHeader)-8 /* minus magic and fFormatVersion[4] */, &outputDH->fLength, status);
				839
				840	return totalSize;
				841	}
				842
				843	#endif
				844
				845