Blame - source/i18n/uspoof_impl.cpp - chromium.googlesource.com/chromium/deps/icu

blob: 2c1f088b12db24bb4746892396cbdd63780026b8 [file] [log] [blame]

Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	1	// © 2016 and later: Unicode, Inc. and others.
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	2	// License & terms of use: http://www.unicode.org/copyright.html
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	3	/*
				4	**********************************************************************
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	5	* Copyright (C) 2008-2016, International Business Machines
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	6	* Corporation and others. All Rights Reserved.
				7	**********************************************************************
				8	*/
				9
				10	#include "unicode/utypes.h"
				11	#include "unicode/uspoof.h"
				12	#include "unicode/uchar.h"
				13	#include "unicode/uniset.h"
				14	#include "unicode/utf16.h"
				15	#include "utrie2.h"
				16	#include "cmemory.h"
				17	#include "cstring.h"
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	18	#include "scriptset.h"
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	19	#include "umutex.h"
				20	#include "udataswp.h"
				21	#include "uassert.h"
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	22	#include "ucln_in.h"
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	23	#include "uspoof_impl.h"
				24
				25	#if !UCONFIG_NO_NORMALIZATION
				26
				27
				28	U_NAMESPACE_BEGIN
				29
				30	UOBJECT_DEFINE_RTTI_IMPLEMENTATION(SpoofImpl)
				31
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	32	SpoofImpl::SpoofImpl(SpoofData *data, UErrorCode& status) {
				33	construct(status);
				34	fSpoofData = data;
				35	}
				36
				37	SpoofImpl::SpoofImpl(UErrorCode& status) {
				38	construct(status);
				39
				40	// TODO: Call this method where it is actually needed, instead of in the
				41	// constructor, to allow for lazy data loading. See #12696.
				42	fSpoofData = SpoofData::getDefault(status);
				43	}
				44
				45	SpoofImpl::SpoofImpl() {
				46	UErrorCode status = U_ZERO_ERROR;
				47	construct(status);
				48
				49	// TODO: Call this method where it is actually needed, instead of in the
				50	// constructor, to allow for lazy data loading. See #12696.
				51	fSpoofData = SpoofData::getDefault(status);
				52	}
				53
				54	void SpoofImpl::construct(UErrorCode& status) {
				55	fMagic = USPOOF_MAGIC;
				56	fChecks = USPOOF_ALL_CHECKS;
				57	fSpoofData = NULL;
				58	fAllowedCharsSet = NULL;
				59	fAllowedLocales = NULL;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	60	fRestrictionLevel = USPOOF_HIGHLY_RESTRICTIVE;
				61
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	62	if (U_FAILURE(status)) { return; }
				63
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	64	UnicodeSet *allowedCharsSet = new UnicodeSet(0, 0x10ffff);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	65	fAllowedCharsSet = allowedCharsSet;
				66	fAllowedLocales = uprv_strdup("");
				67	if (fAllowedCharsSet == NULL \|\| fAllowedLocales == NULL) {
				68	status = U_MEMORY_ALLOCATION_ERROR;
				69	return;
				70	}
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	71	allowedCharsSet->freeze();
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	72	}
				73
				74
				75	// Copy Constructor, used by the user level clone() function.
				76	SpoofImpl::SpoofImpl(const SpoofImpl &src, UErrorCode &status) :
				77	fMagic(0), fChecks(USPOOF_ALL_CHECKS), fSpoofData(NULL), fAllowedCharsSet(NULL) ,
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	78	fAllowedLocales(NULL) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	79	if (U_FAILURE(status)) {
				80	return;
				81	}
				82	fMagic = src.fMagic;
				83	fChecks = src.fChecks;
				84	if (src.fSpoofData != NULL) {
				85	fSpoofData = src.fSpoofData->addReference();
				86	}
				87	fAllowedCharsSet = static_cast<const UnicodeSet *>(src.fAllowedCharsSet->clone());
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	88	fAllowedLocales = uprv_strdup(src.fAllowedLocales);
				89	if (fAllowedCharsSet == NULL \|\| fAllowedLocales == NULL) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	90	status = U_MEMORY_ALLOCATION_ERROR;
				91	}
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	92	fRestrictionLevel = src.fRestrictionLevel;
				93	}
				94
				95	SpoofImpl::~SpoofImpl() {
				96	fMagic = 0; // head off application errors by preventing use of
				97	// of deleted objects.
				98	if (fSpoofData != NULL) {
				99	fSpoofData->removeReference(); // Will delete if refCount goes to zero.
				100	}
				101	delete fAllowedCharsSet;
				102	uprv_free((void *)fAllowedLocales);
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	103	}
				104
				105	// Cast this instance as a USpoofChecker for the C API.
				106	USpoofChecker *SpoofImpl::asUSpoofChecker() {
				107	return reinterpret_cast<USpoofChecker*>(this);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	108	}
				109
				110	//
				111	// Incoming parameter check on Status and the SpoofChecker object
				112	// received from the C API.
				113	//
				114	const SpoofImpl SpoofImpl::validateThis(const USpoofChecker sc, UErrorCode &status) {
				115	if (U_FAILURE(status)) {
				116	return NULL;
				117	}
				118	if (sc == NULL) {
				119	status = U_ILLEGAL_ARGUMENT_ERROR;
				120	return NULL;
				121	}
				122	SpoofImpl This = (SpoofImpl )sc;
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	123	if (This->fMagic != USPOOF_MAGIC) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	124	status = U_INVALID_FORMAT_ERROR;
				125	return NULL;
				126	}
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	127	if (This->fSpoofData != NULL && !This->fSpoofData->validateDataVersion(status)) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	128	return NULL;
				129	}
				130	return This;
				131	}
				132
				133	SpoofImpl SpoofImpl::validateThis(USpoofChecker sc, UErrorCode &status) {
				134	return const_cast<SpoofImpl *>
				135	(SpoofImpl::validateThis(const_cast<const USpoofChecker *>(sc), status));
				136	}
				137
				138
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	139	void SpoofImpl::setAllowedLocales(const char *localesList, UErrorCode &status) {
				140	UnicodeSet allowedChars;
				141	UnicodeSet *tmpSet = NULL;
				142	const char *locStart = localesList;
				143	const char *locEnd = NULL;
				144	const char *localesListEnd = localesList + uprv_strlen(localesList);
				145	int32_t localeListCount = 0; // Number of locales provided by caller.
				146
				147	// Loop runs once per locale from the localesList, a comma separated list of locales.
				148	do {
				149	locEnd = uprv_strchr(locStart, ',');
				150	if (locEnd == NULL) {
				151	locEnd = localesListEnd;
				152	}
				153	while (*locStart == ' ') {
				154	locStart++;
				155	}
				156	const char *trimmedEnd = locEnd-1;
				157	while (trimmedEnd > locStart && *trimmedEnd == ' ') {
				158	trimmedEnd--;
				159	}
				160	if (trimmedEnd <= locStart) {
				161	break;
				162	}
				163	const char *locale = uprv_strndup(locStart, (int32_t)(trimmedEnd + 1 - locStart));
				164	localeListCount++;
				165
				166	// We have one locale from the locales list.
				167	// Add the script chars for this locale to the accumulating set of allowed chars.
				168	// If the locale is no good, we will be notified back via status.
				169	addScriptChars(locale, &allowedChars, status);
				170	uprv_free((void *)locale);
				171	if (U_FAILURE(status)) {
				172	break;
				173	}
				174	locStart = locEnd + 1;
				175	} while (locStart < localesListEnd);
				176
				177	// If our caller provided an empty list of locales, we disable the allowed characters checking
				178	if (localeListCount == 0) {
				179	uprv_free((void *)fAllowedLocales);
				180	fAllowedLocales = uprv_strdup("");
				181	tmpSet = new UnicodeSet(0, 0x10ffff);
				182	if (fAllowedLocales == NULL \|\| tmpSet == NULL) {
				183	status = U_MEMORY_ALLOCATION_ERROR;
				184	return;
				185	}
				186	tmpSet->freeze();
				187	delete fAllowedCharsSet;
				188	fAllowedCharsSet = tmpSet;
				189	fChecks &= ~USPOOF_CHAR_LIMIT;
				190	return;
				191	}
				192
				193
				194	// Add all common and inherited characters to the set of allowed chars.
				195	UnicodeSet tempSet;
				196	tempSet.applyIntPropertyValue(UCHAR_SCRIPT, USCRIPT_COMMON, status);
				197	allowedChars.addAll(tempSet);
				198	tempSet.applyIntPropertyValue(UCHAR_SCRIPT, USCRIPT_INHERITED, status);
				199	allowedChars.addAll(tempSet);
				200
				201	// If anything went wrong, we bail out without changing
				202	// the state of the spoof checker.
				203	if (U_FAILURE(status)) {
				204	return;
				205	}
				206
				207	// Store the updated spoof checker state.
				208	tmpSet = static_cast<UnicodeSet *>(allowedChars.clone());
				209	const char *tmpLocalesList = uprv_strdup(localesList);
				210	if (tmpSet == NULL \|\| tmpLocalesList == NULL) {
				211	status = U_MEMORY_ALLOCATION_ERROR;
				212	return;
				213	}
				214	uprv_free((void *)fAllowedLocales);
				215	fAllowedLocales = tmpLocalesList;
				216	tmpSet->freeze();
				217	delete fAllowedCharsSet;
				218	fAllowedCharsSet = tmpSet;
				219	fChecks \|= USPOOF_CHAR_LIMIT;
				220	}
				221
				222
				223	const char * SpoofImpl::getAllowedLocales(UErrorCode &/status/) {
				224	return fAllowedLocales;
				225	}
				226
				227
				228	// Given a locale (a language), add all the characters from all of the scripts used with that language
				229	// to the allowedChars UnicodeSet
				230
				231	void SpoofImpl::addScriptChars(const char locale, UnicodeSet allowedChars, UErrorCode &status) {
				232	UScriptCode scripts[30];
				233
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	234	int32_t numScripts = uscript_getCode(locale, scripts, UPRV_LENGTHOF(scripts), &status);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	235	if (U_FAILURE(status)) {
				236	return;
				237	}
				238	if (status == U_USING_DEFAULT_WARNING) {
				239	status = U_ILLEGAL_ARGUMENT_ERROR;
				240	return;
				241	}
				242	UnicodeSet tmpSet;
				243	int32_t i;
				244	for (i=0; i<numScripts; i++) {
				245	tmpSet.applyIntPropertyValue(UCHAR_SCRIPT, scripts[i], status);
				246	allowedChars->addAll(tmpSet);
				247	}
				248	}
				249
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	250	// Computes the augmented script set for a code point, according to UTS 39 section 5.1.
				251	void SpoofImpl::getAugmentedScriptSet(UChar32 codePoint, ScriptSet& result, UErrorCode& status) {
				252	result.resetAll();
				253	result.setScriptExtensions(codePoint, status);
				254	if (U_FAILURE(status)) { return; }
				255
				256	// Section 5.1 step 1
				257	if (result.test(USCRIPT_HAN, status)) {
				258	result.set(USCRIPT_HAN_WITH_BOPOMOFO, status);
				259	result.set(USCRIPT_JAPANESE, status);
				260	result.set(USCRIPT_KOREAN, status);
				261	}
				262	if (result.test(USCRIPT_HIRAGANA, status)) {
				263	result.set(USCRIPT_JAPANESE, status);
				264	}
				265	if (result.test(USCRIPT_KATAKANA, status)) {
				266	result.set(USCRIPT_JAPANESE, status);
				267	}
				268	if (result.test(USCRIPT_HANGUL, status)) {
				269	result.set(USCRIPT_KOREAN, status);
				270	}
				271	if (result.test(USCRIPT_BOPOMOFO, status)) {
				272	result.set(USCRIPT_HAN_WITH_BOPOMOFO, status);
				273	}
				274
				275	// Section 5.1 step 2
				276	if (result.test(USCRIPT_COMMON, status) \|\| result.test(USCRIPT_INHERITED, status)) {
				277	result.setAll();
				278	}
				279	}
				280
				281	// Computes the resolved script set for a string, according to UTS 39 section 5.1.
				282	void SpoofImpl::getResolvedScriptSet(const UnicodeString& input, ScriptSet& result, UErrorCode& status) const {
				283	getResolvedScriptSetWithout(input, USCRIPT_CODE_LIMIT, result, status);
				284	}
				285
				286	// Computes the resolved script set for a string, omitting characters having the specified script.
				287	// If USCRIPT_CODE_LIMIT is passed as the second argument, all characters are included.
				288	void SpoofImpl::getResolvedScriptSetWithout(const UnicodeString& input, UScriptCode script, ScriptSet& result, UErrorCode& status) const {
				289	result.setAll();
				290
				291	ScriptSet temp;
				292	UChar32 codePoint;
				293	for (int32_t i = 0; i < input.length(); i += U16_LENGTH(codePoint)) {
				294	codePoint = input.char32At(i);
				295
				296	// Compute the augmented script set for the character
				297	getAugmentedScriptSet(codePoint, temp, status);
				298	if (U_FAILURE(status)) { return; }
				299
				300	// Intersect the augmented script set with the resolved script set, but only if the character doesn't
				301	// have the script specified in the function call
				302	if (script == USCRIPT_CODE_LIMIT \|\| !temp.test(script, status)) {
				303	result.intersect(temp);
				304	}
				305	}
				306	}
				307
				308	// Computes the set of numerics for a string, according to UTS 39 section 5.3.
				309	void SpoofImpl::getNumerics(const UnicodeString& input, UnicodeSet& result, UErrorCode& /status/) const {
				310	result.clear();
				311
				312	UChar32 codePoint;
				313	for (int32_t i = 0; i < input.length(); i += U16_LENGTH(codePoint)) {
				314	codePoint = input.char32At(i);
				315
				316	// Store a representative character for each kind of decimal digit
				317	if (u_charType(codePoint) == U_DECIMAL_DIGIT_NUMBER) {
				318	// Store the zero character as a representative for comparison.
				319	// Unicode guarantees it is codePoint - value
				320	result.add(codePoint - (UChar32)u_getNumericValue(codePoint));
				321	}
				322	}
				323	}
				324
				325	// Computes the restriction level of a string, according to UTS 39 section 5.2.
				326	URestrictionLevel SpoofImpl::getRestrictionLevel(const UnicodeString& input, UErrorCode& status) const {
				327	// Section 5.2 step 1:
				328	if (!fAllowedCharsSet->containsAll(input)) {
				329	return USPOOF_UNRESTRICTIVE;
				330	}
				331
				332	// Section 5.2 step 2
				333	// Java use a static UnicodeSet for this test. In C++, avoid the static variable
				334	// and just do a simple for loop.
				335	UBool allASCII = TRUE;
				336	for (int32_t i=0, length=input.length(); i<length; i++) {
				337	if (input.charAt(i) > 0x7f) {
				338	allASCII = FALSE;
				339	break;
				340	}
				341	}
				342	if (allASCII) {
				343	return USPOOF_ASCII;
				344	}
				345
				346	// Section 5.2 steps 3:
				347	ScriptSet resolvedScriptSet;
				348	getResolvedScriptSet(input, resolvedScriptSet, status);
				349	if (U_FAILURE(status)) { return USPOOF_UNRESTRICTIVE; }
				350
				351	// Section 5.2 step 4:
				352	if (!resolvedScriptSet.isEmpty()) {
				353	return USPOOF_SINGLE_SCRIPT_RESTRICTIVE;
				354	}
				355
				356	// Section 5.2 step 5:
				357	ScriptSet resolvedNoLatn;
				358	getResolvedScriptSetWithout(input, USCRIPT_LATIN, resolvedNoLatn, status);
				359	if (U_FAILURE(status)) { return USPOOF_UNRESTRICTIVE; }
				360
				361	// Section 5.2 step 6:
				362	if (resolvedNoLatn.test(USCRIPT_HAN_WITH_BOPOMOFO, status)
				363	\|\| resolvedNoLatn.test(USCRIPT_JAPANESE, status)
				364	\|\| resolvedNoLatn.test(USCRIPT_KOREAN, status)) {
				365	return USPOOF_HIGHLY_RESTRICTIVE;
				366	}
				367
				368	// Section 5.2 step 7:
				369	if (!resolvedNoLatn.isEmpty()
				370	&& !resolvedNoLatn.test(USCRIPT_CYRILLIC, status)
				371	&& !resolvedNoLatn.test(USCRIPT_GREEK, status)
				372	&& !resolvedNoLatn.test(USCRIPT_CHEROKEE, status)) {
				373	return USPOOF_MODERATELY_RESTRICTIVE;
				374	}
				375
				376	// Section 5.2 step 8:
				377	return USPOOF_MINIMALLY_RESTRICTIVE;
				378	}
				379
Jungshik Shin	a9a2bd3	2018-07-07 03:36:01 -0700	[diff] [blame^]	380	int32_t SpoofImpl::findHiddenOverlay(const UnicodeString& input, UErrorCode&) const {
				381	bool sawLeadCharacter = false;
				382	for (int32_t i=0; i<input.length();) {
				383	UChar32 cp = input.char32At(i);
				384	if (sawLeadCharacter && cp == 0x0307) {
				385	return i;
				386	}
				387	uint8_t combiningClass = u_getCombiningClass(cp);
				388	// Skip over characters except for those with combining class 0 (non-combining characters) or with
				389	// combining class 230 (same class as U+0307)
				390	U_ASSERT(u_getCombiningClass(0x0307) == 230);
				391	if (combiningClass == 0 \|\| combiningClass == 230) {
				392	sawLeadCharacter = isIllegalCombiningDotLeadCharacter(cp);
				393	}
				394	i += U16_LENGTH(cp);
				395	}
				396	return -1;
				397	}
				398
				399	static inline bool isIllegalCombiningDotLeadCharacterNoLookup(UChar32 cp) {
				400	return cp == u'i' \|\| cp == u'j' \|\| cp == u'ı' \|\| cp == u'ȷ' \|\| cp == u'l' \|\|
				401	u_hasBinaryProperty(cp, UCHAR_SOFT_DOTTED);
				402	}
				403
				404	bool SpoofImpl::isIllegalCombiningDotLeadCharacter(UChar32 cp) const {
				405	if (isIllegalCombiningDotLeadCharacterNoLookup(cp)) {
				406	return true;
				407	}
				408	UnicodeString skelStr;
				409	fSpoofData->confusableLookup(cp, skelStr);
				410	UChar32 finalCp = skelStr.char32At(skelStr.moveIndex32(skelStr.length(), -1));
				411	if (finalCp != cp && isIllegalCombiningDotLeadCharacterNoLookup(finalCp)) {
				412	return true;
				413	}
				414	return false;
				415	}
				416
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	417
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	418
				419	// Convert a text format hex number. Utility function used by builder code. Static.
				420	// Input: UChar *string text. Output: a UChar32
				421	// Input has been pre-checked, and will have no non-hex chars.
				422	// The number must fall in the code point range of 0..0x10ffff
				423	// Static Function.
				424	UChar32 SpoofImpl::ScanHex(const UChar *s, int32_t start, int32_t limit, UErrorCode &status) {
				425	if (U_FAILURE(status)) {
				426	return 0;
				427	}
				428	U_ASSERT(limit-start > 0);
				429	uint32_t val = 0;
				430	int i;
				431	for (i=start; i<limit; i++) {
				432	int digitVal = s[i] - 0x30;
				433	if (digitVal>9) {
				434	digitVal = 0xa + (s[i] - 0x41); // Upper Case 'A'
				435	}
				436	if (digitVal>15) {
				437	digitVal = 0xa + (s[i] - 0x61); // Lower Case 'a'
				438	}
				439	U_ASSERT(digitVal <= 0xf);
				440	val <<= 4;
				441	val += digitVal;
				442	}
				443	if (val > 0x10ffff) {
				444	status = U_PARSE_ERROR;
				445	val = 0;
				446	}
				447	return (UChar32)val;
				448	}
				449
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	450
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	451	//-----------------------------------------
				452	//
				453	// class CheckResult Implementation
				454	//
				455	//-----------------------------------------
				456
				457	CheckResult::CheckResult() : fMagic(USPOOF_CHECK_MAGIC) {
				458	clear();
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	459	}
				460
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	461	USpoofCheckResult* CheckResult::asUSpoofCheckResult() {
				462	return reinterpret_cast<USpoofCheckResult*>(this);
				463	}
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	464
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	465	//
				466	// Incoming parameter check on Status and the CheckResult object
				467	// received from the C API.
				468	//
				469	const CheckResult* CheckResult::validateThis(const USpoofCheckResult *ptr, UErrorCode &status) {
				470	if (U_FAILURE(status)) { return NULL; }
				471	if (ptr == NULL) {
				472	status = U_ILLEGAL_ARGUMENT_ERROR;
				473	return NULL;
				474	}
				475	CheckResult This = (CheckResult) ptr;
				476	if (This->fMagic != USPOOF_CHECK_MAGIC) {
				477	status = U_INVALID_FORMAT_ERROR;
				478	return NULL;
				479	}
				480	return This;
				481	}
				482
				483	CheckResult* CheckResult::validateThis(USpoofCheckResult *ptr, UErrorCode &status) {
				484	return const_cast<CheckResult *>
				485	(CheckResult::validateThis(const_cast<const USpoofCheckResult*>(ptr), status));
				486	}
				487
				488	void CheckResult::clear() {
				489	fChecks = 0;
				490	fNumerics.clear();
				491	fRestrictionLevel = USPOOF_UNDEFINED_RESTRICTIVE;
				492	}
				493
				494	int32_t CheckResult::toCombinedBitmask(int32_t enabledChecks) {
				495	if ((enabledChecks & USPOOF_AUX_INFO) != 0 && fRestrictionLevel != USPOOF_UNDEFINED_RESTRICTIVE) {
				496	return fChecks \| fRestrictionLevel;
				497	} else {
				498	return fChecks;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	499	}
				500	}
				501
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	502	CheckResult::~CheckResult() {
				503	}
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	504
				505	//----------------------------------------------------------------------------------------------
				506	//
				507	// class SpoofData Implementation
				508	//
				509	//----------------------------------------------------------------------------------------------
				510
				511
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	512	UBool SpoofData::validateDataVersion(UErrorCode &status) const {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	513	if (U_FAILURE(status) \|\|
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	514	fRawData == NULL \|\|
				515	fRawData->fMagic != USPOOF_MAGIC \|\|
				516	fRawData->fFormatVersion[0] != USPOOF_CONFUSABLE_DATA_FORMAT_VERSION \|\|
				517	fRawData->fFormatVersion[1] != 0 \|\|
				518	fRawData->fFormatVersion[2] != 0 \|\|
				519	fRawData->fFormatVersion[3] != 0) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	520	status = U_INVALID_FORMAT_ERROR;
				521	return FALSE;
				522	}
				523	return TRUE;
				524	}
				525
Jungshik Shin (jungshik at google)	0f8746a	2015-01-08 15:46:45 -0800	[diff] [blame]	526	static UBool U_CALLCONV
				527	spoofDataIsAcceptable(void *context,
				528	const char * /* type /, const char /name/,
				529	const UDataInfo *pInfo) {
				530	if(
				531	pInfo->size >= 20 &&
				532	pInfo->isBigEndian == U_IS_BIG_ENDIAN &&
				533	pInfo->charsetFamily == U_CHARSET_FAMILY &&
				534	pInfo->dataFormat[0] == 0x43 && // dataFormat="Cfu "
				535	pInfo->dataFormat[1] == 0x66 &&
				536	pInfo->dataFormat[2] == 0x75 &&
				537	pInfo->dataFormat[3] == 0x20 &&
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	538	pInfo->formatVersion[0] == USPOOF_CONFUSABLE_DATA_FORMAT_VERSION
Jungshik Shin (jungshik at google)	0f8746a	2015-01-08 15:46:45 -0800	[diff] [blame]	539	) {
				540	UVersionInfo version = static_cast<UVersionInfo >(context);
				541	if(version != NULL) {
				542	uprv_memcpy(version, pInfo->dataVersion, 4);
				543	}
				544	return TRUE;
				545	} else {
				546	return FALSE;
				547	}
				548	}
				549
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	550	// Methods for the loading of the default confusables data file. The confusable
				551	// data is loaded only when it is needed.
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	552	//
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	553	// SpoofData::getDefault() - Return the default confusables data, and call the
				554	// initOnce() if it is not available. Adds a reference
				555	// to the SpoofData that the caller is responsible for
				556	// decrementing when they are done with the data.
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	557	//
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	558	// uspoof_loadDefaultData - Called once, from initOnce(). The resulting SpoofData
				559	// is shared by all spoof checkers using the default data.
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	560	//
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	561	// uspoof_cleanupDefaultData - Called during cleanup.
				562	//
				563
				564	static UInitOnce gSpoofInitDefaultOnce = U_INITONCE_INITIALIZER;
				565	static SpoofData* gDefaultSpoofData;
				566
				567	static UBool U_CALLCONV
				568	uspoof_cleanupDefaultData(void) {
				569	if (gDefaultSpoofData) {
				570	// Will delete, assuming all user-level spoof checkers were closed.
				571	gDefaultSpoofData->removeReference();
Jungshik Shin	a9a2bd3	2018-07-07 03:36:01 -0700	[diff] [blame^]	572	gDefaultSpoofData = nullptr;
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	573	gSpoofInitDefaultOnce.reset();
				574	}
				575	return TRUE;
				576	}
				577
				578	static void U_CALLCONV uspoof_loadDefaultData(UErrorCode& status) {
Jungshik Shin	a9a2bd3	2018-07-07 03:36:01 -0700	[diff] [blame^]	579	UDataMemory *udm = udata_openChoice(nullptr, "cfu", "confusables",
Jungshik Shin (jungshik at google)	0f8746a	2015-01-08 15:46:45 -0800	[diff] [blame]	580	spoofDataIsAcceptable,
Jungshik Shin	a9a2bd3	2018-07-07 03:36:01 -0700	[diff] [blame^]	581	nullptr, // context, would receive dataVersion if supplied.
Jungshik Shin (jungshik at google)	0f8746a	2015-01-08 15:46:45 -0800	[diff] [blame]	582	&status);
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	583	if (U_FAILURE(status)) { return; }
				584	gDefaultSpoofData = new SpoofData(udm, status);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	585	if (U_FAILURE(status)) {
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	586	delete gDefaultSpoofData;
Jungshik Shin	a9a2bd3	2018-07-07 03:36:01 -0700	[diff] [blame^]	587	gDefaultSpoofData = nullptr;
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	588	return;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	589	}
Jungshik Shin	a9a2bd3	2018-07-07 03:36:01 -0700	[diff] [blame^]	590	if (gDefaultSpoofData == nullptr) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	591	status = U_MEMORY_ALLOCATION_ERROR;
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	592	return;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	593	}
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	594	ucln_i18n_registerCleanup(UCLN_I18N_SPOOFDATA, uspoof_cleanupDefaultData);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	595	}
				596
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	597	SpoofData* SpoofData::getDefault(UErrorCode& status) {
				598	umtx_initOnce(gSpoofInitDefaultOnce, &uspoof_loadDefaultData, status);
				599	if (U_FAILURE(status)) { return NULL; }
				600	gDefaultSpoofData->addReference();
				601	return gDefaultSpoofData;
				602	}
				603
				604
				605
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	606	SpoofData::SpoofData(UDataMemory *udm, UErrorCode &status)
				607	{
				608	reset();
				609	if (U_FAILURE(status)) {
				610	return;
				611	}
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	612	fUDM = udm;
Jungshik Shin (jungshik at google)	0f8746a	2015-01-08 15:46:45 -0800	[diff] [blame]	613	// fRawData is non-const because it may be constructed by the data builder.
				614	fRawData = reinterpret_cast<SpoofDataHeader *>(
				615	const_cast<void *>(udata_getMemory(udm)));
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	616	validateDataVersion(status);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	617	initPtrs(status);
				618	}
				619
				620
				621	SpoofData::SpoofData(const void *data, int32_t length, UErrorCode &status)
				622	{
				623	reset();
				624	if (U_FAILURE(status)) {
				625	return;
				626	}
				627	if ((size_t)length < sizeof(SpoofDataHeader)) {
				628	status = U_INVALID_FORMAT_ERROR;
				629	return;
				630	}
Jungshik Shin	a9a2bd3	2018-07-07 03:36:01 -0700	[diff] [blame^]	631	if (data == NULL) {
				632	status = U_ILLEGAL_ARGUMENT_ERROR;
				633	return;
				634	}
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	635	void ncData = const_cast<void >(data);
				636	fRawData = static_cast<SpoofDataHeader *>(ncData);
				637	if (length < fRawData->fLength) {
				638	status = U_INVALID_FORMAT_ERROR;
				639	return;
				640	}
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	641	validateDataVersion(status);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	642	initPtrs(status);
				643	}
				644
				645
				646	// Spoof Data constructor for use from data builder.
				647	// Initializes a new, empty data area that will be populated later.
				648	SpoofData::SpoofData(UErrorCode &status) {
				649	reset();
				650	if (U_FAILURE(status)) {
				651	return;
				652	}
				653	fDataOwned = true;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	654
				655	// The spoof header should already be sized to be a multiple of 16 bytes.
				656	// Just in case it's not, round it up.
				657	uint32_t initialSize = (sizeof(SpoofDataHeader) + 15) & ~15;
				658	U_ASSERT(initialSize == sizeof(SpoofDataHeader));
				659
				660	fRawData = static_cast<SpoofDataHeader *>(uprv_malloc(initialSize));
				661	fMemLimit = initialSize;
				662	if (fRawData == NULL) {
				663	status = U_MEMORY_ALLOCATION_ERROR;
				664	return;
				665	}
				666	uprv_memset(fRawData, 0, initialSize);
				667
				668	fRawData->fMagic = USPOOF_MAGIC;
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	669	fRawData->fFormatVersion[0] = USPOOF_CONFUSABLE_DATA_FORMAT_VERSION;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	670	fRawData->fFormatVersion[1] = 0;
				671	fRawData->fFormatVersion[2] = 0;
				672	fRawData->fFormatVersion[3] = 0;
				673	initPtrs(status);
				674	}
				675
				676	// reset() - initialize all fields.
				677	// Should be updated if any new fields are added.
				678	// Called by constructors to put things in a known initial state.
				679	void SpoofData::reset() {
				680	fRawData = NULL;
				681	fDataOwned = FALSE;
				682	fUDM = NULL;
				683	fMemLimit = 0;
				684	fRefCount = 1;
				685	fCFUKeys = NULL;
				686	fCFUValues = NULL;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	687	fCFUStrings = NULL;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	688	}
				689
				690
				691	// SpoofData::initPtrs()
				692	// Initialize the pointers to the various sections of the raw data.
				693	//
				694	// This function is used both during the Trie building process (multiple
				695	// times, as the individual data sections are added), and
				696	// during the opening of a Spoof Checker from prebuilt data.
				697	//
				698	// The pointers for non-existent data sections (identified by an offset of 0)
				699	// are set to NULL.
				700	//
				701	// Note: During building the data, adding each new data section
				702	// reallocs the raw data area, which likely relocates it, which
				703	// in turn requires reinitializing all of the pointers into it, hence
				704	// multiple calls to this function during building.
				705	//
				706	void SpoofData::initPtrs(UErrorCode &status) {
				707	fCFUKeys = NULL;
				708	fCFUValues = NULL;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	709	fCFUStrings = NULL;
				710	if (U_FAILURE(status)) {
				711	return;
				712	}
				713	if (fRawData->fCFUKeys != 0) {
				714	fCFUKeys = (int32_t )((char )fRawData + fRawData->fCFUKeys);
				715	}
				716	if (fRawData->fCFUStringIndex != 0) {
				717	fCFUValues = (uint16_t )((char )fRawData + fRawData->fCFUStringIndex);
				718	}
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	719	if (fRawData->fCFUStringTable != 0) {
				720	fCFUStrings = (UChar )((char )fRawData + fRawData->fCFUStringTable);
				721	}
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	722	}
				723
				724
				725	SpoofData::~SpoofData() {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	726	if (fDataOwned) {
				727	uprv_free(fRawData);
				728	}
				729	fRawData = NULL;
				730	if (fUDM != NULL) {
				731	udata_close(fUDM);
				732	}
				733	fUDM = NULL;
				734	}
				735
				736
				737	void SpoofData::removeReference() {
				738	if (umtx_atomic_dec(&fRefCount) == 0) {
				739	delete this;
				740	}
				741	}
				742
				743
				744	SpoofData *SpoofData::addReference() {
				745	umtx_atomic_inc(&fRefCount);
				746	return this;
				747	}
				748
				749
				750	void *SpoofData::reserveSpace(int32_t numBytes, UErrorCode &status) {
				751	if (U_FAILURE(status)) {
				752	return NULL;
				753	}
				754	if (!fDataOwned) {
				755	U_ASSERT(FALSE);
				756	status = U_INTERNAL_PROGRAM_ERROR;
				757	return NULL;
				758	}
				759
				760	numBytes = (numBytes + 15) & ~15; // Round up to a multiple of 16
				761	uint32_t returnOffset = fMemLimit;
				762	fMemLimit += numBytes;
				763	fRawData = static_cast<SpoofDataHeader *>(uprv_realloc(fRawData, fMemLimit));
				764	fRawData->fLength = fMemLimit;
				765	uprv_memset((char *)fRawData + returnOffset, 0, numBytes);
				766	initPtrs(status);
				767	return (char *)fRawData + returnOffset;
				768	}
				769
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	770	int32_t SpoofData::serialize(void *buf, int32_t capacity, UErrorCode &status) const {
				771	int32_t dataSize = fRawData->fLength;
				772	if (capacity < dataSize) {
				773	status = U_BUFFER_OVERFLOW_ERROR;
				774	return dataSize;
				775	}
				776	uprv_memcpy(buf, fRawData, dataSize);
				777	return dataSize;
				778	}
				779
				780	int32_t SpoofData::size() const {
				781	return fRawData->fLength;
				782	}
				783
				784	//-------------------------------
				785	//
				786	// Front-end APIs for SpoofData
				787	//
				788	//-------------------------------
				789
				790	int32_t SpoofData::confusableLookup(UChar32 inChar, UnicodeString &dest) const {
				791	// Perform a binary search.
				792	// [lo, hi), i.e lo is inclusive, hi is exclusive.
				793	// The result after the loop will be in lo.
				794	int32_t lo = 0;
				795	int32_t hi = length();
				796	do {
				797	int32_t mid = (lo + hi) / 2;
				798	if (codePointAt(mid) > inChar) {
				799	hi = mid;
				800	} else if (codePointAt(mid) < inChar) {
				801	lo = mid;
				802	} else {
				803	// Found result. Break early.
				804	lo = mid;
				805	break;
				806	}
				807	} while (hi - lo > 1);
				808
				809	// Did we find an entry? If not, the char maps to itself.
				810	if (codePointAt(lo) != inChar) {
				811	dest.append(inChar);
				812	return 1;
				813	}
				814
				815	// Add the element to the string builder and return.
				816	return appendValueTo(lo, dest);
				817	}
				818
				819	int32_t SpoofData::length() const {
				820	return fRawData->fCFUKeysSize;
				821	}
				822
				823	UChar32 SpoofData::codePointAt(int32_t index) const {
				824	return ConfusableDataUtils::keyToCodePoint(fCFUKeys[index]);
				825	}
				826
				827	int32_t SpoofData::appendValueTo(int32_t index, UnicodeString& dest) const {
				828	int32_t stringLength = ConfusableDataUtils::keyToLength(fCFUKeys[index]);
				829
				830	// Value is either a char (for strings of length 1) or
				831	// an index into the string table (for longer strings)
				832	uint16_t value = fCFUValues[index];
				833	if (stringLength == 1) {
				834	dest.append((UChar)value);
				835	} else {
				836	dest.append(fCFUStrings + value, stringLength);
				837	}
				838
				839	return stringLength;
				840	}
				841
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	842
				843	U_NAMESPACE_END
				844
				845	U_NAMESPACE_USE
				846
				847	//-----------------------------------------------------------------------------
				848	//
				849	// uspoof_swap - byte swap and char encoding swap of spoof data
				850	//
				851	//-----------------------------------------------------------------------------
				852	U_CAPI int32_t U_EXPORT2
				853	uspoof_swap(const UDataSwapper ds, const void inData, int32_t length, void *outData,
				854	UErrorCode *status) {
				855
				856	if (status == NULL \|\| U_FAILURE(*status)) {
				857	return 0;
				858	}
				859	if(ds==NULL \|\| inData==NULL \|\| length<-1 \|\| (length>0 && outData==NULL)) {
				860	*status=U_ILLEGAL_ARGUMENT_ERROR;
				861	return 0;
				862	}
				863
				864	//
				865	// Check that the data header is for spoof data.
				866	// (Header contents are defined in gencfu.cpp)
				867	//
				868	const UDataInfo pInfo = (const UDataInfo )((const char *)inData+4);
				869	if(!( pInfo->dataFormat[0]==0x43 && /* dataFormat="Cfu " */
				870	pInfo->dataFormat[1]==0x66 &&
				871	pInfo->dataFormat[2]==0x75 &&
				872	pInfo->dataFormat[3]==0x20 &&
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	873	pInfo->formatVersion[0]==USPOOF_CONFUSABLE_DATA_FORMAT_VERSION &&
				874	pInfo->formatVersion[1]==0 &&
				875	pInfo->formatVersion[2]==0 &&
				876	pInfo->formatVersion[3]==0 )) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	877	udata_printError(ds, "uspoof_swap(): data format %02x.%02x.%02x.%02x "
				878	"(format version %02x %02x %02x %02x) is not recognized\n",
				879	pInfo->dataFormat[0], pInfo->dataFormat[1],
				880	pInfo->dataFormat[2], pInfo->dataFormat[3],
				881	pInfo->formatVersion[0], pInfo->formatVersion[1],
				882	pInfo->formatVersion[2], pInfo->formatVersion[3]);
				883	*status=U_UNSUPPORTED_ERROR;
				884	return 0;
				885	}
				886
				887	//
				888	// Swap the data header. (This is the generic ICU Data Header, not the uspoof Specific
				889	// header). This swap also conveniently gets us
				890	// the size of the ICU d.h., which lets us locate the start
				891	// of the uspoof specific data.
				892	//
				893	int32_t headerSize=udata_swapDataHeader(ds, inData, length, outData, status);
				894
				895
				896	//
				897	// Get the Spoof Data Header, and check that it appears to be OK.
				898	//
				899	//
				900	const uint8_t inBytes =(const uint8_t )inData+headerSize;
				901	SpoofDataHeader spoofDH = (SpoofDataHeader )inBytes;
				902	if (ds->readUInt32(spoofDH->fMagic) != USPOOF_MAGIC \|\|
				903	ds->readUInt32(spoofDH->fLength) < sizeof(SpoofDataHeader))
				904	{
				905	udata_printError(ds, "uspoof_swap(): Spoof Data header is invalid.\n");
				906	*status=U_UNSUPPORTED_ERROR;
				907	return 0;
				908	}
				909
				910	//
				911	// Prefight operation? Just return the size
				912	//
				913	int32_t spoofDataLength = ds->readUInt32(spoofDH->fLength);
				914	int32_t totalSize = headerSize + spoofDataLength;
				915	if (length < 0) {
				916	return totalSize;
				917	}
				918
				919	//
				920	// Check that length passed in is consistent with length from Spoof data header.
				921	//
				922	if (length < totalSize) {
				923	udata_printError(ds, "uspoof_swap(): too few bytes (%d after ICU Data header) for spoof data.\n",
				924	spoofDataLength);
				925	*status=U_INDEX_OUTOFBOUNDS_ERROR;
				926	return 0;
				927	}
				928
				929
				930	//
				931	// Swap the Data. Do the data itself first, then the Spoof Data Header, because
				932	// we need to reference the header to locate the data, and an
				933	// inplace swap of the header leaves it unusable.
				934	//
				935	uint8_t outBytes = (uint8_t )outData + headerSize;
				936	SpoofDataHeader outputDH = (SpoofDataHeader )outBytes;
				937
				938	int32_t sectionStart;
				939	int32_t sectionLength;
				940
				941	//
				942	// If not swapping in place, zero out the output buffer before starting.
				943	// Gaps may exist between the individual sections, and these must be zeroed in
				944	// the output buffer. The simplest way to do that is to just zero the whole thing.
				945	//
				946	if (inBytes != outBytes) {
				947	uprv_memset(outBytes, 0, spoofDataLength);
				948	}
				949
				950	// Confusables Keys Section (fCFUKeys)
				951	sectionStart = ds->readUInt32(spoofDH->fCFUKeys);
				952	sectionLength = ds->readUInt32(spoofDH->fCFUKeysSize) * 4;
				953	ds->swapArray32(ds, inBytes+sectionStart, sectionLength, outBytes+sectionStart, status);
				954
				955	// String Index Section
				956	sectionStart = ds->readUInt32(spoofDH->fCFUStringIndex);
				957	sectionLength = ds->readUInt32(spoofDH->fCFUStringIndexSize) * 2;
				958	ds->swapArray16(ds, inBytes+sectionStart, sectionLength, outBytes+sectionStart, status);
				959
				960	// String Table Section
				961	sectionStart = ds->readUInt32(spoofDH->fCFUStringTable);
				962	sectionLength = ds->readUInt32(spoofDH->fCFUStringTableLen) * 2;
				963	ds->swapArray16(ds, inBytes+sectionStart, sectionLength, outBytes+sectionStart, status);
				964
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	965	// And, last, swap the header itself.
				966	// int32_t fMagic // swap this
				967	// uint8_t fFormatVersion[4] // Do not swap this, just copy
				968	// int32_t fLength and all the rest // Swap the rest, all is 32 bit stuff.
				969	//
				970	uint32_t magic = ds->readUInt32(spoofDH->fMagic);
				971	ds->writeUInt32((uint32_t *)&outputDH->fMagic, magic);
				972
				973	if (outputDH->fFormatVersion != spoofDH->fFormatVersion) {
				974	uprv_memcpy(outputDH->fFormatVersion, spoofDH->fFormatVersion, sizeof(spoofDH->fFormatVersion));
				975	}
				976	// swap starting at fLength
				977	ds->swapArray32(ds, &spoofDH->fLength, sizeof(SpoofDataHeader)-8 /* minus magic and fFormatVersion[4] */, &outputDH->fLength, status);
				978
				979	return totalSize;
				980	}
				981
				982	#endif
				983
				984