Blame - source/test/perf/dicttrieperf/dicttrieperf.cpp - chromium.googlesource.com/chromium/deps/icu

blob: 7d70233069ee5db41776f389b8d07fa7d7d7e5d9 [file] [log] [blame]

Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame^]	1	/*
				2	***********************************************************************
				3	* © 2016 and later: Unicode, Inc. and others.
				4	* License & terms of use: http://www.unicode.org/copyright.html
				5	***********************************************************************
				6	***********************************************************************
				7	* Copyright (C) 2010-2014, International Business Machines
				8	* Corporation and others. All Rights Reserved.
				9	***********************************************************************
				10	* file name: dicttrieperf.cpp
				11	* encoding: UTF-8
				12	* tab size: 8 (not used)
				13	* indentation:4
				14	*
				15	* created on: 2010dec09
				16	* created by: Markus W. Scherer
				17	*
				18	* Performance test program for dictionary-type tries.
				19	*
				20	* Usage from within <ICU build tree>/test/perf/dicttrieperf/ :
				21	* (Linux)
				22	* make
				23	* export LD_LIBRARY_PATH=../../../lib:../../../stubdata:../../../tools/ctestfw
				24	* ./dicttrieperf --sourcedir <ICU build tree>/data/out/tmp --passes 3 --iterations 1000
				25	* or
				26	* ./dicttrieperf -f <ICU source tree>/source/data/brkitr/thaidict.txt --passes 3 --iterations 250
				27	*/
				28
				29	#include <stdio.h>
				30	#include <stdlib.h>
				31	#include "unicode/bytestrie.h"
				32	#include "unicode/bytestriebuilder.h"
				33	#include "unicode/localpointer.h"
				34	#include "unicode/ucharstrie.h"
				35	#include "unicode/ucharstriebuilder.h"
				36	#include "unicode/uperf.h"
				37	#include "unicode/utext.h"
				38	#include "charstr.h"
				39	#include "package.h"
				40	#include "toolutil.h"
				41	#include "ucbuf.h" // struct ULine
				42	#include "uoptions.h"
				43	#include "uvectr32.h"
				44	#include "cmemory.h" // for UPRV_LENGTHOF
				45
				46	// Test object.
				47	class DictionaryTriePerfTest : public UPerfTest {
				48	public:
				49	DictionaryTriePerfTest(int32_t argc, const char *argv[], UErrorCode &status)
				50	: UPerfTest(argc, argv, NULL, 0, "", status), numTextLines(0) {
				51	if(hasFile()) {
				52	getLines(status);
				53	for(int32_t i=0; i<numLines; ++i) {
				54	// Skip comment lines (start with a character below 'A').
				55	if(lines[i].name[0]>=0x41) {
				56	++numTextLines;
				57	// Remove trailing CR LF.
				58	int32_t len=lines[i].len;
				59	UChar c;
				60	while(len>0 && ((c=lines[i].name[len-1])==0xa \|\| c==0xd)) {
				61	--len;
				62	}
				63	lines[i].len=len;
				64	}
				65	}
				66	}
				67	}
				68
				69	virtual UPerfFunction runIndexedTest(int32_t index, UBool exec, const char &name, char *par=NULL);
				70
				71	const char *getSourceDir() const { return sourceDir; }
				72
				73	UBool hasFile() const { return ucharBuf!=NULL; }
				74	const ULine *getCachedLines() const { return lines; }
				75	int32_t getNumLines() const { return numLines; }
				76	int32_t numTextLines; // excluding comment lines
				77	};
				78
				79	// Performance test function object.
				80	// Loads icudt46l.dat (or whatever its current versioned filename)
				81	// from the -s or --sourcedir path.
				82	class PackageLookup : public UPerfFunction {
				83	protected:
				84	PackageLookup(const DictionaryTriePerfTest &perf) {
				85	IcuToolErrorCode errorCode("PackageLookup()");
				86	CharString filename(perf.getSourceDir(), errorCode);
				87	int32_t filenameLength=filename.length();
				88	if(filenameLength>0 && filename[filenameLength-1]!=U_FILE_SEP_CHAR &&
				89	filename[filenameLength-1]!=U_FILE_ALT_SEP_CHAR) {
				90	filename.append(U_FILE_SEP_CHAR, errorCode);
				91	}
				92	filename.append(U_ICUDATA_NAME, errorCode);
				93	filename.append(".dat", errorCode);
				94	pkg.readPackage(filename.data());
				95	}
				96
				97	public:
				98	virtual ~PackageLookup() {}
				99
				100	// virtual void call(UErrorCode* pErrorCode) { ... }
				101
				102	virtual long getOperationsPerIteration() {
				103	return pkg.getItemCount();
				104	}
				105
				106	// virtual long getEventsPerIteration();
				107
				108	protected:
				109	Package pkg;
				110	};
				111
				112	struct TOCEntry {
				113	int32_t nameOffset, dataOffset;
				114	};
				115
				116	// Similar to ICU 4.6 offsetTOCLookupFn() (in ucmndata.c).
				117	static int32_t simpleBinarySearch(const char s, const char names, const TOCEntry *toc, int32_t count) {
				118	int32_t start=0;
				119	int32_t limit=count;
				120	int32_t lastNumber=limit;
				121	for(;;) {
				122	int32_t number=(start+limit)/2;
				123	if(lastNumber==number) { // have we moved?
				124	return -1; // not found
				125	}
				126	lastNumber=number;
				127	int32_t cmp=strcmp(s, names+toc[number].nameOffset);
				128	if(cmp<0) {
				129	limit=number;
				130	} else if(cmp>0) {
				131	start=number;
				132	} else { // found s
				133	return number;
				134	}
				135	}
				136	}
				137
				138	class BinarySearchPackageLookup : public PackageLookup {
				139	public:
				140	BinarySearchPackageLookup(const DictionaryTriePerfTest &perf)
				141	: PackageLookup(perf) {
				142	IcuToolErrorCode errorCode("BinarySearchPackageLookup()");
				143	int32_t count=pkg.getItemCount();
				144	toc=new TOCEntry[count];
				145	for(int32_t i=0; i<count; ++i) {
				146	toc[i].nameOffset=itemNames.length();
				147	toc[i].dataOffset=i; // arbitrary value, see toc comment below
				148	// The Package class removes the "icudt46l/" prefix.
				149	// We restore that here for a fair performance test.
				150	const char *name=pkg.getItem(i)->name;
				151	itemNames.append("icudt46l/", errorCode);
				152	itemNames.append(name, strlen(name)+1, errorCode);
				153	}
				154	printf("size of item names: %6ld\n", (long)itemNames.length());
				155	printf("size of TOC: %6ld\n", (long)(count*8));
				156	printf("total index size: %6ld\n", (long)(itemNames.length()+count*8));
				157	}
				158	virtual ~BinarySearchPackageLookup() {
				159	delete[] toc;
				160	}
				161
				162	virtual void call(UErrorCode * /pErrorCode/) {
				163	int32_t count=pkg.getItemCount();
				164	const char *itemNameChars=itemNames.data();
				165	const char *name=itemNameChars;
				166	for(int32_t i=0; i<count; ++i) {
				167	if(simpleBinarySearch(name, itemNameChars, toc, count)<0) {
				168	fprintf(stderr, "item not found: %s\n", name);
				169	}
				170	name=strchr(name, 0)+1;
				171	}
				172	}
				173
				174	protected:
				175	CharString itemNames;
				176	// toc imitates a .dat file's array of UDataOffsetTOCEntry
				177	// with nameOffset and dataOffset.
				178	// We don't need the dataOffsets, but we want to imitate the real
				179	// memory density, to measure equivalent CPU cache usage.
				180	TOCEntry *toc;
				181	};
				182
				183	#ifndef MIN
				184	#define MIN(a,b) (((a)<(b)) ? (a) : (b))
				185	#endif
				186
				187	// Compare strings where we know the shared prefix length,
				188	// and advance the prefix length as we find that the strings share even more characters.
				189	static int32_t strcmpAfterPrefix(const char s1, const char s2, int32_t &prefixLength) {
				190	int32_t pl=prefixLength;
				191	s1+=pl;
				192	s2+=pl;
				193	int32_t cmp=0;
				194	for(;;) {
				195	int32_t c1=(uint8_t)*s1++;
				196	int32_t c2=(uint8_t)*s2++;
				197	cmp=c1-c2;
				198	if(cmp!=0 \|\| c1==0) { // different or done
				199	break;
				200	}
				201	++pl; // increment shared same-prefix length
				202	}
				203	prefixLength=pl;
				204	return cmp;
				205	}
				206
				207	static int32_t prefixBinarySearch(const char s, const char names, const TOCEntry *toc, int32_t count) {
				208	if(count==0) {
				209	return -1;
				210	}
				211	int32_t start=0;
				212	int32_t limit=count;
				213	// Remember the shared prefix between s, start and limit,
				214	// and don't compare that shared prefix again.
				215	// The shared prefix should get longer as we narrow the [start, limit[ range.
				216	int32_t startPrefixLength=0;
				217	int32_t limitPrefixLength=0;
				218	// Prime the prefix lengths so that we don't keep prefixLength at 0 until
				219	// both the start and limit indexes have moved.
				220	// At the same time, we find if s is one of the start and (limit-1) names,
				221	// and if not, exclude them from the actual binary search.
				222	if(0==strcmpAfterPrefix(s, names+toc[0].nameOffset, startPrefixLength)) {
				223	return 0;
				224	}
				225	++start;
				226	--limit;
				227	if(0==strcmpAfterPrefix(s, names+toc[limit].nameOffset, limitPrefixLength)) {
				228	return limit;
				229	}
				230	while(start<limit) {
				231	int32_t i=(start+limit)/2;
				232	int32_t prefixLength=MIN(startPrefixLength, limitPrefixLength);
				233	int32_t cmp=strcmpAfterPrefix(s, names+toc[i].nameOffset, prefixLength);
				234	if(cmp<0) {
				235	limit=i;
				236	limitPrefixLength=prefixLength;
				237	} else if(cmp==0) {
				238	return i;
				239	} else {
				240	start=i+1;
				241	startPrefixLength=prefixLength;
				242	}
				243	}
				244	return -1;
				245	}
				246
				247	class PrefixBinarySearchPackageLookup : public BinarySearchPackageLookup {
				248	public:
				249	PrefixBinarySearchPackageLookup(const DictionaryTriePerfTest &perf)
				250	: BinarySearchPackageLookup(perf) {}
				251
				252	virtual void call(UErrorCode * /pErrorCode/) {
				253	int32_t count=pkg.getItemCount();
				254	const char *itemNameChars=itemNames.data();
				255	const char *name=itemNameChars;
				256	for(int32_t i=0; i<count; ++i) {
				257	if(prefixBinarySearch(name, itemNameChars, toc, count)<0) {
				258	fprintf(stderr, "item not found: %s\n", name);
				259	}
				260	name=strchr(name, 0)+1;
				261	}
				262	}
				263	};
				264
				265	static int32_t bytesTrieLookup(const char s, const char nameTrieBytes) {
				266	BytesTrie trie(nameTrieBytes);
				267	if(USTRINGTRIE_HAS_VALUE(trie.next(s, -1))) {
				268	return trie.getValue();
				269	} else {
				270	return -1;
				271	}
				272	}
				273
				274	class BytesTriePackageLookup : public PackageLookup {
				275	public:
				276	BytesTriePackageLookup(const DictionaryTriePerfTest &perf)
				277	: PackageLookup(perf) {
				278	IcuToolErrorCode errorCode("BinarySearchPackageLookup()");
				279	builder=new BytesTrieBuilder(errorCode);
				280	int32_t count=pkg.getItemCount();
				281	for(int32_t i=0; i<count; ++i) {
				282	// The Package class removes the "icudt46l/" prefix.
				283	// We restore that here for a fair performance test.
				284	// We store all full names so that we do not have to reconstruct them
				285	// in the call() function.
				286	const char *name=pkg.getItem(i)->name;
				287	int32_t offset=itemNames.length();
				288	itemNames.append("icudt46l/", errorCode);
				289	itemNames.append(name, -1, errorCode);
				290	// As value, set the data item index.
				291	// In a real implementation, we would use that to get the
				292	// start and limit offset of the data item.
				293	StringPiece fullName(itemNames.toStringPiece());
				294	fullName.remove_prefix(offset);
				295	builder->add(fullName, i, errorCode);
				296	// NUL-terminate the name for call() to find the next one.
				297	itemNames.append(0, errorCode);
				298	}
				299	int32_t length=builder->buildStringPiece(USTRINGTRIE_BUILD_SMALL, errorCode).length();
				300	printf("size of BytesTrie: %6ld\n", (long)length);
				301	// count+1: +1 for the last-item limit offset which we should have always had
				302	printf("size of dataOffsets:%6ld\n", (long)((count+1)*4));
				303	printf("total index size: %6ld\n", (long)(length+(count+1)*4));
				304	}
				305	virtual ~BytesTriePackageLookup() {
				306	delete builder;
				307	}
				308
				309	virtual void call(UErrorCode *pErrorCode) {
				310	int32_t count=pkg.getItemCount();
				311	const char nameTrieBytes=builder->buildStringPiece(USTRINGTRIE_BUILD_SMALL, pErrorCode).data();
				312	const char *name=itemNames.data();
				313	for(int32_t i=0; i<count; ++i) {
				314	if(bytesTrieLookup(name, nameTrieBytes)<0) {
				315	fprintf(stderr, "item not found: %s\n", name);
				316	}
				317	name=strchr(name, 0)+1;
				318	}
				319	}
				320
				321	protected:
				322	BytesTrieBuilder *builder;
				323	CharString itemNames;
				324	};
				325
				326	// Performance test function object.
				327	// Each subclass loads a dictionary text file
				328	// from the -s or --sourcedir path plus -f or --file-name.
				329	// For example, <ICU source dir>/source/data/brkitr/thaidict.txt.
				330	class DictLookup : public UPerfFunction {
				331	public:
				332	DictLookup(const DictionaryTriePerfTest &perfTest) : perf(perfTest) {}
				333
				334	virtual long getOperationsPerIteration() {
				335	return perf.numTextLines;
				336	}
				337
				338	protected:
				339	const DictionaryTriePerfTest &perf;
				340	};
				341
				342	// Closely imitate CompactTrieDictionary::matches().
				343	// Note: CompactTrieDictionary::matches() is part of its trie implementation,
				344	// and while it loops over the text, it knows the current state.
				345	// By contrast, this implementation uses UCharsTrie API functions that have to
				346	// check the trie state each time and load/store state in the object.
				347	// (Whether it hasNext() and whether it is in the middle of a linear-match node.)
				348	static int32_t
				349	ucharsTrieMatches(UCharsTrie &trie,
				350	UText *text, int32_t textLimit,
				351	int32_t *lengths, int &count, int limit ) {
				352	UChar32 c=utext_next32(text);
				353	// Notes:
				354	// a) CompactTrieDictionary::matches() does not check for U_SENTINEL.
				355	// b) It also ignores non-BMP code points by casting to UChar!
				356	if(c<0) {
				357	return 0;
				358	}
				359	// Should be firstForCodePoint() but CompactTrieDictionary
				360	// handles only code units.
				361	UStringTrieResult result=trie.first(c);
				362	int32_t numChars=1;
				363	count=0;
				364	for(;;) {
				365	if(USTRINGTRIE_HAS_VALUE(result)) {
				366	if(count<limit) {
				367	// lengths[count++]=(int32_t)utext_getNativeIndex(text);
				368	lengths[count++]=numChars; // CompactTrieDictionary just counts chars too.
				369	}
				370	if(result==USTRINGTRIE_FINAL_VALUE) {
				371	break;
				372	}
				373	} else if(result==USTRINGTRIE_NO_MATCH) {
				374	break;
				375	}
				376	if(numChars>=textLimit) {
				377	// Note: Why do we have both a text limit and a UText that knows its length?
				378	break;
				379	}
				380	UChar32 c=utext_next32(text);
				381	// Notes:
				382	// a) CompactTrieDictionary::matches() does not check for U_SENTINEL.
				383	// b) It also ignores non-BMP code points by casting to UChar!
				384	if(c<0) {
				385	break;
				386	}
				387	++numChars;
				388	// Should be nextForCodePoint() but CompactTrieDictionary
				389	// handles only code units.
				390	result=trie.next(c);
				391	}
				392	#if 0
				393	// Note: CompactTrieDictionary::matches() comments say that it leaves the UText
				394	// after the longest prefix match and returns the number of characters
				395	// that were matched.
				396	if(index!=lastMatch) {
				397	utext_setNativeIndex(text, lastMatch);
				398	}
				399	return lastMatch-start;
				400	// However, it does not do either of these, so I am not trying to
				401	// imitate it (or its docs) 100%.
				402	#endif
				403	return numChars;
				404	}
				405
				406	class UCharsTrieDictLookup : public DictLookup {
				407	public:
				408	UCharsTrieDictLookup(const DictionaryTriePerfTest &perfTest)
				409	: DictLookup(perfTest), trie(NULL) {
				410	IcuToolErrorCode errorCode("UCharsTrieDictLookup()");
				411	builder=new UCharsTrieBuilder(errorCode);
				412	const ULine *lines=perf.getCachedLines();
				413	int32_t numLines=perf.getNumLines();
				414	for(int32_t i=0; i<numLines; ++i) {
				415	// Skip comment lines (start with a character below 'A').
				416	if(lines[i].name[0]<0x41) {
				417	continue;
				418	}
				419	builder->add(UnicodeString(FALSE, lines[i].name, lines[i].len), 0, errorCode);
				420	}
				421	UnicodeString trieUChars;
				422	int32_t length=builder->buildUnicodeString(USTRINGTRIE_BUILD_SMALL, trieUChars, errorCode).length();
				423	printf("size of UCharsTrie: %6ld bytes\n", (long)length*2);
				424	trie=builder->build(USTRINGTRIE_BUILD_SMALL, errorCode);
				425	}
				426
				427	virtual ~UCharsTrieDictLookup() {
				428	delete builder;
				429	delete trie;
				430	}
				431
				432	protected:
				433	UCharsTrieBuilder *builder;
				434	UCharsTrie *trie;
				435	};
				436
				437	class UCharsTrieDictMatches : public UCharsTrieDictLookup {
				438	public:
				439	UCharsTrieDictMatches(const DictionaryTriePerfTest &perfTest)
				440	: UCharsTrieDictLookup(perfTest) {}
				441
				442	virtual void call(UErrorCode *pErrorCode) {
				443	UText text=UTEXT_INITIALIZER;
				444	int32_t lengths[20];
				445	const ULine *lines=perf.getCachedLines();
				446	int32_t numLines=perf.getNumLines();
				447	for(int32_t i=0; i<numLines; ++i) {
				448	// Skip comment lines (start with a character below 'A').
				449	if(lines[i].name[0]<0x41) {
				450	continue;
				451	}
				452	utext_openUChars(&text, lines[i].name, lines[i].len, pErrorCode);
				453	int32_t count=0;
				454	ucharsTrieMatches(*trie, &text, lines[i].len,
				455	lengths, count, UPRV_LENGTHOF(lengths));
				456	if(count==0 \|\| lengths[count-1]!=lines[i].len) {
				457	fprintf(stderr, "word %ld (0-based) not found\n", (long)i);
				458	}
				459	}
				460	}
				461	};
				462
				463	class UCharsTrieDictContains : public UCharsTrieDictLookup {
				464	public:
				465	UCharsTrieDictContains(const DictionaryTriePerfTest &perfTest)
				466	: UCharsTrieDictLookup(perfTest) {}
				467
				468	virtual void call(UErrorCode * /pErrorCode/) {
				469	const ULine *lines=perf.getCachedLines();
				470	int32_t numLines=perf.getNumLines();
				471	for(int32_t i=0; i<numLines; ++i) {
				472	// Skip comment lines (which start with a character below 'A').
				473	if(lines[i].name[0]<0x41) {
				474	continue;
				475	}
				476	if(!USTRINGTRIE_HAS_VALUE(trie->reset().next(lines[i].name, lines[i].len))) {
				477	fprintf(stderr, "word %ld (0-based) not found\n", (long)i);
				478	}
				479	}
				480	}
				481	};
				482
				483	static inline int32_t thaiCharToByte(UChar32 c) {
				484	if(0xe00<=c && c<=0xefe) {
				485	return c&0xff;
				486	} else if(c==0x2e) {
				487	return 0xff;
				488	} else {
				489	return -1;
				490	}
				491	}
				492
				493	static UBool thaiWordToBytes(const UChar *s, int32_t length,
				494	CharString &str, UErrorCode &errorCode) {
				495	for(int32_t i=0; i<length; ++i) {
				496	UChar c=s[i];
				497	int32_t b=thaiCharToByte(c);
				498	if(b>=0) {
				499	str.append((char)b, errorCode);
				500	} else {
				501	fprintf(stderr, "thaiWordToBytes(): unable to encode U+%04X as a byte\n", c);
				502	return FALSE;
				503	}
				504	}
				505	return TRUE;
				506	}
				507
				508	class BytesTrieDictLookup : public DictLookup {
				509	public:
				510	BytesTrieDictLookup(const DictionaryTriePerfTest &perfTest)
				511	: DictLookup(perfTest), trie(NULL), noDict(FALSE) {
				512	IcuToolErrorCode errorCode("BytesTrieDictLookup()");
				513	builder=new BytesTrieBuilder(errorCode);
				514	CharString str;
				515	const ULine *lines=perf.getCachedLines();
				516	int32_t numLines=perf.getNumLines();
				517	for(int32_t i=0; i<numLines; ++i) {
				518	// Skip comment lines (start with a character below 'A').
				519	if(lines[i].name[0]<0x41) {
				520	continue;
				521	}
				522	if(!thaiWordToBytes(lines[i].name, lines[i].len, str.clear(), errorCode)) {
				523	fprintf(stderr, "thaiWordToBytes(): failed for word %ld (0-based)\n", (long)i);
				524	noDict=TRUE;
				525	break;
				526	}
				527	builder->add(str.toStringPiece(), 0, errorCode);
				528	}
				529	if(!noDict) {
				530	int32_t length=builder->buildStringPiece(USTRINGTRIE_BUILD_SMALL, errorCode).length();
				531	printf("size of BytesTrie: %6ld bytes\n", (long)length);
				532	trie=builder->build(USTRINGTRIE_BUILD_SMALL, errorCode);
				533	}
				534	}
				535
				536	virtual ~BytesTrieDictLookup() {
				537	delete builder;
				538	delete trie;
				539	}
				540
				541	protected:
				542	BytesTrieBuilder *builder;
				543	BytesTrie *trie;
				544	UBool noDict;
				545	};
				546
				547	static int32_t
				548	bytesTrieMatches(BytesTrie &trie,
				549	UText *text, int32_t textLimit,
				550	int32_t *lengths, int &count, int limit ) {
				551	UChar32 c=utext_next32(text);
				552	if(c<0) {
				553	return 0;
				554	}
				555	UStringTrieResult result=trie.first(thaiCharToByte(c));
				556	int32_t numChars=1;
				557	count=0;
				558	for(;;) {
				559	if(USTRINGTRIE_HAS_VALUE(result)) {
				560	if(count<limit) {
				561	// lengths[count++]=(int32_t)utext_getNativeIndex(text);
				562	lengths[count++]=numChars; // CompactTrieDictionary just counts chars too.
				563	}
				564	if(result==USTRINGTRIE_FINAL_VALUE) {
				565	break;
				566	}
				567	} else if(result==USTRINGTRIE_NO_MATCH) {
				568	break;
				569	}
				570	if(numChars>=textLimit) {
				571	break;
				572	}
				573	UChar32 c=utext_next32(text);
				574	if(c<0) {
				575	break;
				576	}
				577	++numChars;
				578	result=trie.next(thaiCharToByte(c));
				579	}
				580	return numChars;
				581	}
				582
				583	class BytesTrieDictMatches : public BytesTrieDictLookup {
				584	public:
				585	BytesTrieDictMatches(const DictionaryTriePerfTest &perfTest)
				586	: BytesTrieDictLookup(perfTest) {}
				587
				588	virtual void call(UErrorCode *pErrorCode) {
				589	if(noDict) {
				590	return;
				591	}
				592	UText text=UTEXT_INITIALIZER;
				593	int32_t lengths[20];
				594	const ULine *lines=perf.getCachedLines();
				595	int32_t numLines=perf.getNumLines();
				596	for(int32_t i=0; i<numLines; ++i) {
				597	// Skip comment lines (start with a character below 'A').
				598	if(lines[i].name[0]<0x41) {
				599	continue;
				600	}
				601	utext_openUChars(&text, lines[i].name, lines[i].len, pErrorCode);
				602	int32_t count=0;
				603	bytesTrieMatches(*trie, &text, lines[i].len,
				604	lengths, count, UPRV_LENGTHOF(lengths));
				605	if(count==0 \|\| lengths[count-1]!=lines[i].len) {
				606	fprintf(stderr, "word %ld (0-based) not found\n", (long)i);
				607	}
				608	}
				609	}
				610	};
				611
				612	class BytesTrieDictContains : public BytesTrieDictLookup {
				613	public:
				614	BytesTrieDictContains(const DictionaryTriePerfTest &perfTest)
				615	: BytesTrieDictLookup(perfTest) {}
				616
				617	virtual void call(UErrorCode * /pErrorCode/) {
				618	if(noDict) {
				619	return;
				620	}
				621	const ULine *lines=perf.getCachedLines();
				622	int32_t numLines=perf.getNumLines();
				623	for(int32_t i=0; i<numLines; ++i) {
				624	const UChar *line=lines[i].name;
				625	// Skip comment lines (start with a character below 'A').
				626	if(line[0]<0x41) {
				627	continue;
				628	}
				629	UStringTrieResult result=trie->first(thaiCharToByte(line[0]));
				630	int32_t lineLength=lines[i].len;
				631	for(int32_t j=1; j<lineLength; ++j) {
				632	if(!USTRINGTRIE_HAS_NEXT(result)) {
				633	fprintf(stderr, "word %ld (0-based) not found\n", (long)i);
				634	break;
				635	}
				636	result=trie->next(thaiCharToByte(line[j]));
				637	}
				638	if(!USTRINGTRIE_HAS_VALUE(result)) {
				639	fprintf(stderr, "word %ld (0-based) not found\n", (long)i);
				640	}
				641	}
				642	}
				643	};
				644
				645	UPerfFunction *DictionaryTriePerfTest::runIndexedTest(int32_t index, UBool exec,
				646	const char &name, char /par/) {
				647	if(hasFile()) {
				648	switch(index) {
				649	case 0:
				650	name="ucharstriematches";
				651	if(exec) {
				652	return new UCharsTrieDictMatches(*this);
				653	}
				654	break;
				655	case 1:
				656	name="ucharstriecontains";
				657	if(exec) {
				658	return new UCharsTrieDictContains(*this);
				659	}
				660	break;
				661	case 2:
				662	name="bytestriematches";
				663	if(exec) {
				664	return new BytesTrieDictMatches(*this);
				665	}
				666	break;
				667	case 3:
				668	name="bytestriecontains";
				669	if(exec) {
				670	return new BytesTrieDictContains(*this);
				671	}
				672	break;
				673	default:
				674	name="";
				675	break;
				676	}
				677	} else {
				678	if(index==0 && exec) {
				679	puts("Running BytesTrie perf tests on the .dat package file from the --sourcedir.\n"
				680	"For UCharsTrie perf tests on a dictionary text file, specify the -f or --file-name.\n");
				681	}
				682	switch(index) {
				683	case 0:
				684	name="simplebinarysearch";
				685	if(exec) {
				686	return new BinarySearchPackageLookup(*this);
				687	}
				688	break;
				689	case 1:
				690	name="prefixbinarysearch";
				691	if(exec) {
				692	return new PrefixBinarySearchPackageLookup(*this);
				693	}
				694	break;
				695	case 2:
				696	name="bytestrie";
				697	if(exec) {
				698	return new BytesTriePackageLookup(*this);
				699	}
				700	break;
				701	default:
				702	name="";
				703	break;
				704	}
				705	}
				706	return NULL;
				707	}
				708
				709	int main(int argc, const char *argv[]) {
				710	IcuToolErrorCode errorCode("dicttrieperf main()");
				711	DictionaryTriePerfTest test(argc, argv, errorCode);
				712	if(errorCode.isFailure()) {
				713	fprintf(stderr, "DictionaryTriePerfTest() failed: %s\n", errorCode.errorName());
				714	test.usage();
				715	return errorCode.reset();
				716	}
				717	if(!test.run()) {
				718	fprintf(stderr, "FAILED: Tests could not be run, please check the arguments.\n");
				719	return -1;
				720	}
				721	return 0;
				722	}