Blame - source/common/ustring.cpp - chromium.googlesource.com/chromium/deps/icu

blob: 40d23c06aea74d1eda5e9fd605181840f500031c [file] [log] [blame]

jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1	/*
				2	******************************************************************************
				3	*
Jungshik Shin (jungshik at google)	0f8746a	2015-01-08 15:46:45 -0800	[diff] [blame^]	4	* Copyright (C) 1998-2014, International Business Machines
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	5	* Corporation and others. All Rights Reserved.
				6	*
				7	******************************************************************************
				8	*
				9	* File ustring.cpp
				10	*
				11	* Modification History:
				12	*
				13	* Date Name Description
				14	* 12/07/98 bertrand Creation.
				15	******************************************************************************
				16	*/
				17
				18	#include "unicode/utypes.h"
				19	#include "unicode/putil.h"
				20	#include "unicode/ustring.h"
				21	#include "unicode/utf16.h"
				22	#include "cstring.h"
				23	#include "cwchar.h"
				24	#include "cmemory.h"
				25	#include "ustr_imp.h"
				26
				27	/* ANSI string.h - style functions ------------------------------------------ */
				28
				29	/* U+ffff is the highest BMP code point, the highest one that fits into a 16-bit UChar */
				30	#define U_BMP_MAX 0xffff
				31
				32	/* Forward binary string search functions ----------------------------------- */
				33
				34	/*
				35	* Test if a substring match inside a string is at code point boundaries.
				36	* All pointers refer to the same buffer.
				37	* The limit pointer may be NULL, all others must be real pointers.
				38	*/
				39	static inline UBool
				40	isMatchAtCPBoundary(const UChar start, const UChar match, const UChar matchLimit, const UChar limit) {
				41	if(U16_IS_TRAIL(match) && start!=match && U16_IS_LEAD((match-1))) {
				42	/* the leading edge of the match is in the middle of a surrogate pair */
				43	return FALSE;
				44	}
				45	if(U16_IS_LEAD((matchLimit-1)) && match!=limit && U16_IS_TRAIL(matchLimit)) {
				46	/* the trailing edge of the match is in the middle of a surrogate pair */
				47	return FALSE;
				48	}
				49	return TRUE;
				50	}
				51
				52	U_CAPI UChar * U_EXPORT2
				53	u_strFindFirst(const UChar *s, int32_t length,
				54	const UChar *sub, int32_t subLength) {
				55	const UChar start, p, q, subLimit;
				56	UChar c, cs, cq;
				57
				58	if(sub==NULL \|\| subLength<-1) {
				59	return (UChar *)s;
				60	}
				61	if(s==NULL \|\| length<-1) {
				62	return NULL;
				63	}
				64
				65	start=s;
				66
				67	if(length<0 && subLength<0) {
				68	/* both strings are NUL-terminated */
				69	if((cs=*sub++)==0) {
				70	return (UChar *)s;
				71	}
				72	if(*sub==0 && !U16_IS_SURROGATE(cs)) {
				73	/* the substring consists of a single, non-surrogate BMP code point */
				74	return u_strchr(s, cs);
				75	}
				76
				77	while((c=*s++)!=0) {
				78	if(c==cs) {
				79	/* found first substring UChar, compare rest */
				80	p=s;
				81	q=sub;
				82	for(;;) {
				83	if((cq=*q)==0) {
				84	if(isMatchAtCPBoundary(start, s-1, p, NULL)) {
				85	return (UChar )(s-1); / well-formed match */
				86	} else {
				87	break; /* no match because surrogate pair is split */
				88	}
				89	}
				90	if((c=*p)==0) {
				91	return NULL; /* no match, and none possible after s */
				92	}
				93	if(c!=cq) {
				94	break; /* no match */
				95	}
				96	++p;
				97	++q;
				98	}
				99	}
				100	}
				101
				102	/* not found */
				103	return NULL;
				104	}
				105
				106	if(subLength<0) {
				107	subLength=u_strlen(sub);
				108	}
				109	if(subLength==0) {
				110	return (UChar *)s;
				111	}
				112
				113	/* get sub[0] to search for it fast */
				114	cs=*sub++;
				115	--subLength;
				116	subLimit=sub+subLength;
				117
				118	if(subLength==0 && !U16_IS_SURROGATE(cs)) {
				119	/* the substring consists of a single, non-surrogate BMP code point */
				120	return length<0 ? u_strchr(s, cs) : u_memchr(s, cs, length);
				121	}
				122
				123	if(length<0) {
				124	/* s is NUL-terminated */
				125	while((c=*s++)!=0) {
				126	if(c==cs) {
				127	/* found first substring UChar, compare rest */
				128	p=s;
				129	q=sub;
				130	for(;;) {
				131	if(q==subLimit) {
				132	if(isMatchAtCPBoundary(start, s-1, p, NULL)) {
				133	return (UChar )(s-1); / well-formed match */
				134	} else {
				135	break; /* no match because surrogate pair is split */
				136	}
				137	}
				138	if((c=*p)==0) {
				139	return NULL; /* no match, and none possible after s */
				140	}
				141	if(c!=*q) {
				142	break; /* no match */
				143	}
				144	++p;
				145	++q;
				146	}
				147	}
				148	}
				149	} else {
				150	const UChar limit, preLimit;
				151
				152	/* subLength was decremented above */
				153	if(length<=subLength) {
				154	return NULL; /* s is shorter than sub */
				155	}
				156
				157	limit=s+length;
				158
				159	/* the substring must start before preLimit */
				160	preLimit=limit-subLength;
				161
				162	while(s!=preLimit) {
				163	c=*s++;
				164	if(c==cs) {
				165	/* found first substring UChar, compare rest */
				166	p=s;
				167	q=sub;
				168	for(;;) {
				169	if(q==subLimit) {
				170	if(isMatchAtCPBoundary(start, s-1, p, limit)) {
				171	return (UChar )(s-1); / well-formed match */
				172	} else {
				173	break; /* no match because surrogate pair is split */
				174	}
				175	}
				176	if(p!=q) {
				177	break; /* no match */
				178	}
				179	++p;
				180	++q;
				181	}
				182	}
				183	}
				184	}
				185
				186	/* not found */
				187	return NULL;
				188	}
				189
				190	U_CAPI UChar * U_EXPORT2
				191	u_strstr(const UChar s, const UChar substring) {
				192	return u_strFindFirst(s, -1, substring, -1);
				193	}
				194
				195	U_CAPI UChar * U_EXPORT2
				196	u_strchr(const UChar *s, UChar c) {
				197	if(U16_IS_SURROGATE(c)) {
				198	/* make sure to not find half of a surrogate pair */
				199	return u_strFindFirst(s, -1, &c, 1);
				200	} else {
				201	UChar cs;
				202
				203	/* trivial search for a BMP code point */
				204	for(;;) {
				205	if((cs=*s)==c) {
				206	return (UChar *)s;
				207	}
				208	if(cs==0) {
				209	return NULL;
				210	}
				211	++s;
				212	}
				213	}
				214	}
				215
				216	U_CAPI UChar * U_EXPORT2
				217	u_strchr32(const UChar *s, UChar32 c) {
				218	if((uint32_t)c<=U_BMP_MAX) {
				219	/* find BMP code point */
				220	return u_strchr(s, (UChar)c);
				221	} else if((uint32_t)c<=UCHAR_MAX_VALUE) {
				222	/* find supplementary code point as surrogate pair */
				223	UChar cs, lead=U16_LEAD(c), trail=U16_TRAIL(c);
				224
				225	while((cs=*s++)!=0) {
				226	if(cs==lead && *s==trail) {
				227	return (UChar *)(s-1);
				228	}
				229	}
				230	return NULL;
				231	} else {
				232	/* not a Unicode code point, not findable */
				233	return NULL;
				234	}
				235	}
				236
				237	U_CAPI UChar * U_EXPORT2
				238	u_memchr(const UChar *s, UChar c, int32_t count) {
				239	if(count<=0) {
				240	return NULL; /* no string */
				241	} else if(U16_IS_SURROGATE(c)) {
				242	/* make sure to not find half of a surrogate pair */
				243	return u_strFindFirst(s, count, &c, 1);
				244	} else {
				245	/* trivial search for a BMP code point */
				246	const UChar *limit=s+count;
				247	do {
				248	if(*s==c) {
				249	return (UChar *)s;
				250	}
				251	} while(++s!=limit);
				252	return NULL;
				253	}
				254	}
				255
				256	U_CAPI UChar * U_EXPORT2
				257	u_memchr32(const UChar *s, UChar32 c, int32_t count) {
				258	if((uint32_t)c<=U_BMP_MAX) {
				259	/* find BMP code point */
				260	return u_memchr(s, (UChar)c, count);
				261	} else if(count<2) {
				262	/* too short for a surrogate pair */
				263	return NULL;
				264	} else if((uint32_t)c<=UCHAR_MAX_VALUE) {
				265	/* find supplementary code point as surrogate pair */
				266	const UChar limit=s+count-1; / -1 so that we do not need a separate check for the trail unit */
				267	UChar lead=U16_LEAD(c), trail=U16_TRAIL(c);
				268
				269	do {
				270	if(s==lead && (s+1)==trail) {
				271	return (UChar *)s;
				272	}
				273	} while(++s!=limit);
				274	return NULL;
				275	} else {
				276	/* not a Unicode code point, not findable */
				277	return NULL;
				278	}
				279	}
				280
				281	/* Backward binary string search functions ---------------------------------- */
				282
				283	U_CAPI UChar * U_EXPORT2
				284	u_strFindLast(const UChar *s, int32_t length,
				285	const UChar *sub, int32_t subLength) {
				286	const UChar start, limit, p, q, *subLimit;
				287	UChar c, cs;
				288
				289	if(sub==NULL \|\| subLength<-1) {
				290	return (UChar *)s;
				291	}
				292	if(s==NULL \|\| length<-1) {
				293	return NULL;
				294	}
				295
				296	/*
				297	* This implementation is more lazy than the one for u_strFindFirst():
				298	* There is no special search code for NUL-terminated strings.
				299	* It does not seem to be worth it for searching substrings to
				300	* search forward and find all matches like in u_strrchr() and similar.
				301	* Therefore, we simply get both string lengths and search backward.
				302	*
				303	* markus 2002oct23
				304	*/
				305
				306	if(subLength<0) {
				307	subLength=u_strlen(sub);
				308	}
				309	if(subLength==0) {
				310	return (UChar *)s;
				311	}
				312
				313	/* get sub[subLength-1] to search for it fast */
				314	subLimit=sub+subLength;
				315	cs=*(--subLimit);
				316	--subLength;
				317
				318	if(subLength==0 && !U16_IS_SURROGATE(cs)) {
				319	/* the substring consists of a single, non-surrogate BMP code point */
				320	return length<0 ? u_strrchr(s, cs) : u_memrchr(s, cs, length);
				321	}
				322
				323	if(length<0) {
				324	length=u_strlen(s);
				325	}
				326
				327	/* subLength was decremented above */
				328	if(length<=subLength) {
				329	return NULL; /* s is shorter than sub */
				330	}
				331
				332	start=s;
				333	limit=s+length;
				334
				335	/* the substring must start no later than s+subLength */
				336	s+=subLength;
				337
				338	while(s!=limit) {
				339	c=*(--limit);
				340	if(c==cs) {
				341	/* found last substring UChar, compare rest */
				342	p=limit;
				343	q=subLimit;
				344	for(;;) {
				345	if(q==sub) {
				346	if(isMatchAtCPBoundary(start, p, limit+1, start+length)) {
				347	return (UChar )p; / well-formed match */
				348	} else {
				349	break; /* no match because surrogate pair is split */
				350	}
				351	}
				352	if((--p)!=(--q)) {
				353	break; /* no match */
				354	}
				355	}
				356	}
				357	}
				358
				359	/* not found */
				360	return NULL;
				361	}
				362
				363	U_CAPI UChar * U_EXPORT2
				364	u_strrstr(const UChar s, const UChar substring) {
				365	return u_strFindLast(s, -1, substring, -1);
				366	}
				367
				368	U_CAPI UChar * U_EXPORT2
				369	u_strrchr(const UChar *s, UChar c) {
				370	if(U16_IS_SURROGATE(c)) {
				371	/* make sure to not find half of a surrogate pair */
				372	return u_strFindLast(s, -1, &c, 1);
				373	} else {
				374	const UChar *result=NULL;
				375	UChar cs;
				376
				377	/* trivial search for a BMP code point */
				378	for(;;) {
				379	if((cs=*s)==c) {
				380	result=s;
				381	}
				382	if(cs==0) {
				383	return (UChar *)result;
				384	}
				385	++s;
				386	}
				387	}
				388	}
				389
				390	U_CAPI UChar * U_EXPORT2
				391	u_strrchr32(const UChar *s, UChar32 c) {
				392	if((uint32_t)c<=U_BMP_MAX) {
				393	/* find BMP code point */
				394	return u_strrchr(s, (UChar)c);
				395	} else if((uint32_t)c<=UCHAR_MAX_VALUE) {
				396	/* find supplementary code point as surrogate pair */
				397	const UChar *result=NULL;
				398	UChar cs, lead=U16_LEAD(c), trail=U16_TRAIL(c);
				399
				400	while((cs=*s++)!=0) {
				401	if(cs==lead && *s==trail) {
				402	result=s-1;
				403	}
				404	}
				405	return (UChar *)result;
				406	} else {
				407	/* not a Unicode code point, not findable */
				408	return NULL;
				409	}
				410	}
				411
				412	U_CAPI UChar * U_EXPORT2
				413	u_memrchr(const UChar *s, UChar c, int32_t count) {
				414	if(count<=0) {
				415	return NULL; /* no string */
				416	} else if(U16_IS_SURROGATE(c)) {
				417	/* make sure to not find half of a surrogate pair */
				418	return u_strFindLast(s, count, &c, 1);
				419	} else {
				420	/* trivial search for a BMP code point */
				421	const UChar *limit=s+count;
				422	do {
				423	if(*(--limit)==c) {
				424	return (UChar *)limit;
				425	}
				426	} while(s!=limit);
				427	return NULL;
				428	}
				429	}
				430
				431	U_CAPI UChar * U_EXPORT2
				432	u_memrchr32(const UChar *s, UChar32 c, int32_t count) {
				433	if((uint32_t)c<=U_BMP_MAX) {
				434	/* find BMP code point */
				435	return u_memrchr(s, (UChar)c, count);
				436	} else if(count<2) {
				437	/* too short for a surrogate pair */
				438	return NULL;
				439	} else if((uint32_t)c<=UCHAR_MAX_VALUE) {
				440	/* find supplementary code point as surrogate pair */
				441	const UChar *limit=s+count-1;
				442	UChar lead=U16_LEAD(c), trail=U16_TRAIL(c);
				443
				444	do {
				445	if(limit==trail && (limit-1)==lead) {
				446	return (UChar *)(limit-1);
				447	}
				448	} while(s!=--limit);
				449	return NULL;
				450	} else {
				451	/* not a Unicode code point, not findable */
				452	return NULL;
				453	}
				454	}
				455
				456	/* Tokenization functions --------------------------------------------------- */
				457
				458	/*
				459	* Match each code point in a string against each code point in the matchSet.
				460	* Return the index of the first string code point that
				461	* is (polarity==TRUE) or is not (FALSE) contained in the matchSet.
				462	* Return -(string length)-1 if there is no such code point.
				463	*/
				464	static int32_t
				465	_matchFromSet(const UChar string, const UChar matchSet, UBool polarity) {
				466	int32_t matchLen, matchBMPLen, strItr, matchItr;
				467	UChar32 stringCh, matchCh;
				468	UChar c, c2;
				469
				470	/* first part of matchSet contains only BMP code points */
				471	matchBMPLen = 0;
				472	while((c = matchSet[matchBMPLen]) != 0 && U16_IS_SINGLE(c)) {
				473	++matchBMPLen;
				474	}
				475
				476	/* second part of matchSet contains BMP and supplementary code points */
				477	matchLen = matchBMPLen;
				478	while(matchSet[matchLen] != 0) {
				479	++matchLen;
				480	}
				481
				482	for(strItr = 0; (c = string[strItr]) != 0;) {
				483	++strItr;
				484	if(U16_IS_SINGLE(c)) {
				485	if(polarity) {
				486	for(matchItr = 0; matchItr < matchLen; ++matchItr) {
				487	if(c == matchSet[matchItr]) {
				488	return strItr - 1; /* one matches */
				489	}
				490	}
				491	} else {
				492	for(matchItr = 0; matchItr < matchLen; ++matchItr) {
				493	if(c == matchSet[matchItr]) {
				494	goto endloop;
				495	}
				496	}
				497	return strItr - 1; /* none matches */
				498	}
				499	} else {
				500	/*
				501	* No need to check for string length before U16_IS_TRAIL
				502	* because c2 could at worst be the terminating NUL.
				503	*/
				504	if(U16_IS_SURROGATE_LEAD(c) && U16_IS_TRAIL(c2 = string[strItr])) {
				505	++strItr;
				506	stringCh = U16_GET_SUPPLEMENTARY(c, c2);
				507	} else {
				508	stringCh = c; /* unpaired trail surrogate */
				509	}
				510
				511	if(polarity) {
				512	for(matchItr = matchBMPLen; matchItr < matchLen;) {
				513	U16_NEXT(matchSet, matchItr, matchLen, matchCh);
				514	if(stringCh == matchCh) {
				515	return strItr - U16_LENGTH(stringCh); /* one matches */
				516	}
				517	}
				518	} else {
				519	for(matchItr = matchBMPLen; matchItr < matchLen;) {
				520	U16_NEXT(matchSet, matchItr, matchLen, matchCh);
				521	if(stringCh == matchCh) {
				522	goto endloop;
				523	}
				524	}
				525	return strItr - U16_LENGTH(stringCh); /* none matches */
				526	}
				527	}
				528	endloop:
				529	/* wish C had continue with labels like Java... */;
				530	}
				531
				532	/* Didn't find it. */
				533	return -strItr-1;
				534	}
				535
				536	/* Search for a codepoint in a string that matches one of the matchSet codepoints. */
				537	U_CAPI UChar * U_EXPORT2
				538	u_strpbrk(const UChar string, const UChar matchSet)
				539	{
				540	int32_t idx = _matchFromSet(string, matchSet, TRUE);
				541	if(idx >= 0) {
				542	return (UChar *)string + idx;
				543	} else {
				544	return NULL;
				545	}
				546	}
				547
				548	/* Search for a codepoint in a string that matches one of the matchSet codepoints. */
				549	U_CAPI int32_t U_EXPORT2
				550	u_strcspn(const UChar string, const UChar matchSet)
				551	{
				552	int32_t idx = _matchFromSet(string, matchSet, TRUE);
				553	if(idx >= 0) {
				554	return idx;
				555	} else {
				556	return -idx - 1; /* == u_strlen(string) */
				557	}
				558	}
				559
				560	/* Search for a codepoint in a string that does not match one of the matchSet codepoints. */
				561	U_CAPI int32_t U_EXPORT2
				562	u_strspn(const UChar string, const UChar matchSet)
				563	{
				564	int32_t idx = _matchFromSet(string, matchSet, FALSE);
				565	if(idx >= 0) {
				566	return idx;
				567	} else {
				568	return -idx - 1; /* == u_strlen(string) */
				569	}
				570	}
				571
				572	/* ----- Text manipulation functions --- */
				573
				574	U_CAPI UChar* U_EXPORT2
				575	u_strtok_r(UChar *src,
				576	const UChar *delim,
				577	UChar **saveState)
				578	{
				579	UChar *tokSource;
				580	UChar *nextToken;
				581	uint32_t nonDelimIdx;
				582
				583	/* If saveState is NULL, the user messed up. */
				584	if (src != NULL) {
				585	tokSource = src;
				586	saveState = src; / Set to "src" in case there are no delimiters */
				587	}
				588	else if (*saveState) {
				589	tokSource = *saveState;
				590	}
				591	else {
				592	/* src == NULL && saveState == NULL /
				593	/* This shouldn't happen. We already finished tokenizing. */
				594	return NULL;
				595	}
				596
				597	/* Skip initial delimiters */
				598	nonDelimIdx = u_strspn(tokSource, delim);
				599	tokSource = &tokSource[nonDelimIdx];
				600
				601	if (*tokSource) {
				602	nextToken = u_strpbrk(tokSource, delim);
				603	if (nextToken != NULL) {
				604	/* Create a token */
				605	*(nextToken++) = 0;
				606	*saveState = nextToken;
				607	return tokSource;
				608	}
				609	else if (*saveState) {
				610	/* Return the last token */
				611	*saveState = NULL;
				612	return tokSource;
				613	}
				614	}
				615	else {
				616	/* No tokens were found. Only delimiters were left. */
				617	*saveState = NULL;
				618	}
				619	return NULL;
				620	}
				621
				622	/* Miscellaneous functions -------------------------------------------------- */
				623
				624	U_CAPI UChar* U_EXPORT2
				625	u_strcat(UChar *dst,
				626	const UChar *src)
				627	{
				628	UChar anchor = dst; / save a pointer to start of dst */
				629
				630	while(dst != 0) { / To end of first string */
				631	++dst;
				632	}
				633	while(((dst++) = (src++)) != 0) { /* copy string 2 over */
				634	}
				635
				636	return anchor;
				637	}
				638
				639	U_CAPI UChar* U_EXPORT2
				640	u_strncat(UChar *dst,
				641	const UChar *src,
				642	int32_t n )
				643	{
				644	if(n > 0) {
				645	UChar anchor = dst; / save a pointer to start of dst */
				646
				647	while(dst != 0) { / To end of first string */
				648	++dst;
				649	}
				650	while((dst = src) != 0) { /* copy string 2 over */
				651	++dst;
				652	if(--n == 0) {
				653	*dst = 0;
				654	break;
				655	}
				656	++src;
				657	}
				658
				659	return anchor;
				660	} else {
				661	return dst;
				662	}
				663	}
				664
				665	/* ----- Text property functions --- */
				666
				667	U_CAPI int32_t U_EXPORT2
				668	u_strcmp(const UChar *s1,
				669	const UChar *s2)
				670	{
				671	UChar c1, c2;
				672
				673	for(;;) {
				674	c1=*s1++;
				675	c2=*s2++;
				676	if (c1 != c2 \|\| c1 == 0) {
				677	break;
				678	}
				679	}
				680	return (int32_t)c1 - (int32_t)c2;
				681	}
				682
				683	U_CFUNC int32_t U_EXPORT2
				684	uprv_strCompare(const UChar *s1, int32_t length1,
				685	const UChar *s2, int32_t length2,
				686	UBool strncmpStyle, UBool codePointOrder) {
				687	const UChar start1, start2, limit1, limit2;
				688	UChar c1, c2;
				689
				690	/* setup for fix-up */
				691	start1=s1;
				692	start2=s2;
				693
				694	/* compare identical prefixes - they do not need to be fixed up */
				695	if(length1<0 && length2<0) {
				696	/* strcmp style, both NUL-terminated */
				697	if(s1==s2) {
				698	return 0;
				699	}
				700
				701	for(;;) {
				702	c1=*s1;
				703	c2=*s2;
				704	if(c1!=c2) {
				705	break;
				706	}
				707	if(c1==0) {
				708	return 0;
				709	}
				710	++s1;
				711	++s2;
				712	}
				713
				714	/* setup for fix-up */
				715	limit1=limit2=NULL;
				716	} else if(strncmpStyle) {
				717	/* special handling for strncmp, assume length1==length2>=0 but also check for NUL */
				718	if(s1==s2) {
				719	return 0;
				720	}
				721
				722	limit1=start1+length1;
				723
				724	for(;;) {
				725	/* both lengths are same, check only one limit */
				726	if(s1==limit1) {
				727	return 0;
				728	}
				729
				730	c1=*s1;
				731	c2=*s2;
				732	if(c1!=c2) {
				733	break;
				734	}
				735	if(c1==0) {
				736	return 0;
				737	}
				738	++s1;
				739	++s2;
				740	}
				741
				742	/* setup for fix-up */
				743	limit2=start2+length1; /* use length1 here, too, to enforce assumption */
				744	} else {
				745	/* memcmp/UnicodeString style, both length-specified */
				746	int32_t lengthResult;
				747
				748	if(length1<0) {
				749	length1=u_strlen(s1);
				750	}
				751	if(length2<0) {
				752	length2=u_strlen(s2);
				753	}
				754
				755	/* limit1=start1+min(lenght1, length2) */
				756	if(length1<length2) {
				757	lengthResult=-1;
				758	limit1=start1+length1;
				759	} else if(length1==length2) {
				760	lengthResult=0;
				761	limit1=start1+length1;
				762	} else /* length1>length2 */ {
				763	lengthResult=1;
				764	limit1=start1+length2;
				765	}
				766
				767	if(s1==s2) {
				768	return lengthResult;
				769	}
				770
				771	for(;;) {
				772	/* check pseudo-limit */
				773	if(s1==limit1) {
				774	return lengthResult;
				775	}
				776
				777	c1=*s1;
				778	c2=*s2;
				779	if(c1!=c2) {
				780	break;
				781	}
				782	++s1;
				783	++s2;
				784	}
				785
				786	/* setup for fix-up */
				787	limit1=start1+length1;
				788	limit2=start2+length2;
				789	}
				790
				791	/* if both values are in or above the surrogate range, fix them up */
				792	if(c1>=0xd800 && c2>=0xd800 && codePointOrder) {
				793	/* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */
				794	if(
				795	(c1<=0xdbff && (s1+1)!=limit1 && U16_IS_TRAIL(*(s1+1))) \|\|
				796	(U16_IS_TRAIL(c1) && start1!=s1 && U16_IS_LEAD(*(s1-1)))
				797	) {
				798	/* part of a surrogate pair, leave >=d800 */
				799	} else {
				800	/* BMP code point - may be surrogate code point - make <d800 */
				801	c1-=0x2800;
				802	}
				803
				804	if(
				805	(c2<=0xdbff && (s2+1)!=limit2 && U16_IS_TRAIL(*(s2+1))) \|\|
				806	(U16_IS_TRAIL(c2) && start2!=s2 && U16_IS_LEAD(*(s2-1)))
				807	) {
				808	/* part of a surrogate pair, leave >=d800 */
				809	} else {
				810	/* BMP code point - may be surrogate code point - make <d800 */
				811	c2-=0x2800;
				812	}
				813	}
				814
				815	/* now c1 and c2 are in the requested (code unit or code point) order */
				816	return (int32_t)c1-(int32_t)c2;
				817	}
				818
				819	/*
				820	* Compare two strings as presented by UCharIterators.
				821	* Use code unit or code point order.
				822	* When the function returns, it is undefined where the iterators
				823	* have stopped.
				824	*/
				825	U_CAPI int32_t U_EXPORT2
				826	u_strCompareIter(UCharIterator iter1, UCharIterator iter2, UBool codePointOrder) {
				827	UChar32 c1, c2;
				828
				829	/* argument checking */
				830	if(iter1==NULL \|\| iter2==NULL) {
				831	return 0; /* bad arguments */
				832	}
				833	if(iter1==iter2) {
				834	return 0; /* identical iterators */
				835	}
				836
				837	/* reset iterators to start? */
				838	iter1->move(iter1, 0, UITER_START);
				839	iter2->move(iter2, 0, UITER_START);
				840
				841	/* compare identical prefixes - they do not need to be fixed up */
				842	for(;;) {
				843	c1=iter1->next(iter1);
				844	c2=iter2->next(iter2);
				845	if(c1!=c2) {
				846	break;
				847	}
				848	if(c1==-1) {
				849	return 0;
				850	}
				851	}
				852
				853	/* if both values are in or above the surrogate range, fix them up */
				854	if(c1>=0xd800 && c2>=0xd800 && codePointOrder) {
				855	/* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */
				856	if(
				857	(c1<=0xdbff && U16_IS_TRAIL(iter1->current(iter1))) \|\|
				858	(U16_IS_TRAIL(c1) && (iter1->previous(iter1), U16_IS_LEAD(iter1->previous(iter1))))
				859	) {
				860	/* part of a surrogate pair, leave >=d800 */
				861	} else {
				862	/* BMP code point - may be surrogate code point - make <d800 */
				863	c1-=0x2800;
				864	}
				865
				866	if(
				867	(c2<=0xdbff && U16_IS_TRAIL(iter2->current(iter2))) \|\|
				868	(U16_IS_TRAIL(c2) && (iter2->previous(iter2), U16_IS_LEAD(iter2->previous(iter2))))
				869	) {
				870	/* part of a surrogate pair, leave >=d800 */
				871	} else {
				872	/* BMP code point - may be surrogate code point - make <d800 */
				873	c2-=0x2800;
				874	}
				875	}
				876
				877	/* now c1 and c2 are in the requested (code unit or code point) order */
				878	return (int32_t)c1-(int32_t)c2;
				879	}
				880
				881	#if 0
				882	/*
				883	* u_strCompareIter() does not leave the iterators _on_ the different units.
				884	* This is possible but would cost a few extra indirect function calls to back
				885	* up if the last unit (c1 or c2 respectively) was >=0.
				886	*
				887	* Consistently leaving them _behind_ the different units is not an option
				888	* because the current "unit" is the end of the string if that is reached,
				889	* and in such a case the iterator does not move.
				890	* For example, when comparing "ab" with "abc", both iterators rest _on_ the end
				891	* of their strings. Calling previous() on each does not move them to where
				892	* the comparison fails.
				893	*
				894	* So the simplest semantics is to not define where the iterators end up.
				895	*
				896	* The following fragment is part of what would need to be done for backing up.
				897	*/
				898	void fragment {
				899	/* iff a surrogate is part of a surrogate pair, leave >=d800 */
				900	if(c1<=0xdbff) {
				901	if(!U16_IS_TRAIL(iter1->current(iter1))) {
				902	/* lead surrogate code point - make <d800 */
				903	c1-=0x2800;
				904	}
				905	} else if(c1<=0xdfff) {
				906	int32_t idx=iter1->getIndex(iter1, UITER_CURRENT);
				907	iter1->previous(iter1); /* ==c1 */
				908	if(!U16_IS_LEAD(iter1->previous(iter1))) {
				909	/* trail surrogate code point - make <d800 */
				910	c1-=0x2800;
				911	}
				912	/* go back to behind where the difference is */
				913	iter1->move(iter1, idx, UITER_ZERO);
				914	} else /* 0xe000<=c1<=0xffff */ {
				915	/* BMP code point - make <d800 */
				916	c1-=0x2800;
				917	}
				918	}
				919	#endif
				920
				921	U_CAPI int32_t U_EXPORT2
				922	u_strCompare(const UChar *s1, int32_t length1,
				923	const UChar *s2, int32_t length2,
				924	UBool codePointOrder) {
				925	/* argument checking */
				926	if(s1==NULL \|\| length1<-1 \|\| s2==NULL \|\| length2<-1) {
				927	return 0;
				928	}
				929	return uprv_strCompare(s1, length1, s2, length2, FALSE, codePointOrder);
				930	}
				931
				932	/* String compare in code point order - u_strcmp() compares in code unit order. */
				933	U_CAPI int32_t U_EXPORT2
				934	u_strcmpCodePointOrder(const UChar s1, const UChar s2) {
				935	return uprv_strCompare(s1, -1, s2, -1, FALSE, TRUE);
				936	}
				937
				938	U_CAPI int32_t U_EXPORT2
				939	u_strncmp(const UChar *s1,
				940	const UChar *s2,
				941	int32_t n)
				942	{
				943	if(n > 0) {
				944	int32_t rc;
				945	for(;;) {
				946	rc = (int32_t)s1 - (int32_t)s2;
				947	if(rc != 0 \|\| *s1 == 0 \|\| --n == 0) {
				948	return rc;
				949	}
				950	++s1;
				951	++s2;
				952	}
				953	} else {
				954	return 0;
				955	}
				956	}
				957
				958	U_CAPI int32_t U_EXPORT2
				959	u_strncmpCodePointOrder(const UChar s1, const UChar s2, int32_t n) {
				960	return uprv_strCompare(s1, n, s2, n, TRUE, TRUE);
				961	}
				962
				963	U_CAPI UChar* U_EXPORT2
				964	u_strcpy(UChar *dst,
				965	const UChar *src)
				966	{
				967	UChar anchor = dst; / save a pointer to start of dst */
				968
				969	while(((dst++) = (src++)) != 0) { /* copy string 2 over */
				970	}
				971
				972	return anchor;
				973	}
				974
				975	U_CAPI UChar* U_EXPORT2
				976	u_strncpy(UChar *dst,
				977	const UChar *src,
				978	int32_t n)
				979	{
				980	UChar anchor = dst; / save a pointer to start of dst */
				981
				982	/* copy string 2 over */
				983	while(n > 0 && ((dst++) = (src++)) != 0) {
				984	--n;
				985	}
				986
				987	return anchor;
				988	}
				989
				990	U_CAPI int32_t U_EXPORT2
				991	u_strlen(const UChar *s)
				992	{
				993	#if U_SIZEOF_WCHAR_T == U_SIZEOF_UCHAR
				994	return (int32_t)uprv_wcslen(s);
				995	#else
				996	const UChar *t = s;
				997	while(*t != 0) {
				998	++t;
				999	}
				1000	return t - s;
				1001	#endif
				1002	}
				1003
				1004	U_CAPI int32_t U_EXPORT2
				1005	u_countChar32(const UChar *s, int32_t length) {
				1006	int32_t count;
				1007
				1008	if(s==NULL \|\| length<-1) {
				1009	return 0;
				1010	}
				1011
				1012	count=0;
				1013	if(length>=0) {
				1014	while(length>0) {
				1015	++count;
				1016	if(U16_IS_LEAD(s) && length>=2 && U16_IS_TRAIL((s+1))) {
				1017	s+=2;
				1018	length-=2;
				1019	} else {
				1020	++s;
				1021	--length;
				1022	}
				1023	}
				1024	} else /* length==-1 */ {
				1025	UChar c;
				1026
				1027	for(;;) {
				1028	if((c=*s++)==0) {
				1029	break;
				1030	}
				1031	++count;
				1032
				1033	/*
				1034	* sufficient to look ahead one because of UTF-16;
				1035	* safe to look ahead one because at worst that would be the terminating NUL
				1036	*/
				1037	if(U16_IS_LEAD(c) && U16_IS_TRAIL(*s)) {
				1038	++s;
				1039	}
				1040	}
				1041	}
				1042	return count;
				1043	}
				1044
				1045	U_CAPI UBool U_EXPORT2
				1046	u_strHasMoreChar32Than(const UChar *s, int32_t length, int32_t number) {
				1047
				1048	if(number<0) {
				1049	return TRUE;
				1050	}
				1051	if(s==NULL \|\| length<-1) {
				1052	return FALSE;
				1053	}
				1054
				1055	if(length==-1) {
				1056	/* s is NUL-terminated */
				1057	UChar c;
				1058
				1059	/* count code points until they exceed */
				1060	for(;;) {
				1061	if((c=*s++)==0) {
				1062	return FALSE;
				1063	}
				1064	if(number==0) {
				1065	return TRUE;
				1066	}
				1067	if(U16_IS_LEAD(c) && U16_IS_TRAIL(*s)) {
				1068	++s;
				1069	}
				1070	--number;
				1071	}
				1072	} else {
				1073	/* length>=0 known */
				1074	const UChar *limit;
				1075	int32_t maxSupplementary;
				1076
				1077	/* s contains at least (length+1)/2 code points: <=2 UChars per cp */
				1078	if(((length+1)/2)>number) {
				1079	return TRUE;
				1080	}
				1081
				1082	/* check if s does not even contain enough UChars */
				1083	maxSupplementary=length-number;
				1084	if(maxSupplementary<=0) {
				1085	return FALSE;
				1086	}
				1087	/* there are maxSupplementary=length-number more UChars than asked-for code points */
				1088
				1089	/*
				1090	* count code points until they exceed and also check that there are
				1091	* no more than maxSupplementary supplementary code points (UChar pairs)
				1092	*/
				1093	limit=s+length;
				1094	for(;;) {
				1095	if(s==limit) {
				1096	return FALSE;
				1097	}
				1098	if(number==0) {
				1099	return TRUE;
				1100	}
				1101	if(U16_IS_LEAD(s++) && s!=limit && U16_IS_TRAIL(s)) {
				1102	++s;
				1103	if(--maxSupplementary<=0) {
				1104	/* too many pairs - too few code points */
				1105	return FALSE;
				1106	}
				1107	}
				1108	--number;
				1109	}
				1110	}
				1111	}
				1112
				1113	U_CAPI UChar * U_EXPORT2
				1114	u_memcpy(UChar dest, const UChar src, int32_t count) {
				1115	if(count > 0) {
				1116	uprv_memcpy(dest, src, count*U_SIZEOF_UCHAR);
				1117	}
				1118	return dest;
				1119	}
				1120
				1121	U_CAPI UChar * U_EXPORT2
				1122	u_memmove(UChar dest, const UChar src, int32_t count) {
				1123	if(count > 0) {
				1124	uprv_memmove(dest, src, count*U_SIZEOF_UCHAR);
				1125	}
				1126	return dest;
				1127	}
				1128
				1129	U_CAPI UChar * U_EXPORT2
				1130	u_memset(UChar *dest, UChar c, int32_t count) {
				1131	if(count > 0) {
				1132	UChar *ptr = dest;
				1133	UChar *limit = dest + count;
				1134
				1135	while (ptr < limit) {
				1136	*(ptr++) = c;
				1137	}
				1138	}
				1139	return dest;
				1140	}
				1141
				1142	U_CAPI int32_t U_EXPORT2
				1143	u_memcmp(const UChar buf1, const UChar buf2, int32_t count) {
				1144	if(count > 0) {
				1145	const UChar *limit = buf1 + count;
				1146	int32_t result;
				1147
				1148	while (buf1 < limit) {
				1149	result = (int32_t)(uint16_t)buf1 - (int32_t)(uint16_t)buf2;
				1150	if (result != 0) {
				1151	return result;
				1152	}
				1153	buf1++;
				1154	buf2++;
				1155	}
				1156	}
				1157	return 0;
				1158	}
				1159
				1160	U_CAPI int32_t U_EXPORT2
				1161	u_memcmpCodePointOrder(const UChar s1, const UChar s2, int32_t count) {
				1162	return uprv_strCompare(s1, count, s2, count, FALSE, TRUE);
				1163	}
				1164
				1165	/* u_unescape & support fns ------------------------------------------------- */
				1166
				1167	/* This map must be in ASCENDING ORDER OF THE ESCAPE CODE */
				1168	static const UChar UNESCAPE_MAP[] = {
				1169	/" 0x22, 0x22 /
				1170	/' 0x27, 0x27 /
				1171	/? 0x3F, 0x3F /
				1172	/\ 0x5C, 0x5C /
				1173	/a/ 0x61, 0x07,
				1174	/b/ 0x62, 0x08,
				1175	/e/ 0x65, 0x1b,
				1176	/f/ 0x66, 0x0c,
				1177	/n/ 0x6E, 0x0a,
				1178	/r/ 0x72, 0x0d,
				1179	/t/ 0x74, 0x09,
				1180	/v/ 0x76, 0x0b
				1181	};
				1182	enum { UNESCAPE_MAP_LENGTH = sizeof(UNESCAPE_MAP) / sizeof(UNESCAPE_MAP[0]) };
				1183
				1184	/* Convert one octal digit to a numeric value 0..7, or -1 on failure */
				1185	static int8_t _digit8(UChar c) {
				1186	if (c >= 0x0030 && c <= 0x0037) {
				1187	return (int8_t)(c - 0x0030);
				1188	}
				1189	return -1;
				1190	}
				1191
				1192	/* Convert one hex digit to a numeric value 0..F, or -1 on failure */
				1193	static int8_t _digit16(UChar c) {
				1194	if (c >= 0x0030 && c <= 0x0039) {
				1195	return (int8_t)(c - 0x0030);
				1196	}
				1197	if (c >= 0x0041 && c <= 0x0046) {
				1198	return (int8_t)(c - (0x0041 - 10));
				1199	}
				1200	if (c >= 0x0061 && c <= 0x0066) {
				1201	return (int8_t)(c - (0x0061 - 10));
				1202	}
				1203	return -1;
				1204	}
				1205
				1206	/* Parse a single escape sequence. Although this method deals in
				1207	* UChars, it does not use C++ or UnicodeString. This allows it to
				1208	* be used from C contexts. */
				1209	U_CAPI UChar32 U_EXPORT2
				1210	u_unescapeAt(UNESCAPE_CHAR_AT charAt,
				1211	int32_t *offset,
				1212	int32_t length,
				1213	void *context) {
				1214
				1215	int32_t start = *offset;
				1216	UChar c;
				1217	UChar32 result = 0;
				1218	int8_t n = 0;
				1219	int8_t minDig = 0;
				1220	int8_t maxDig = 0;
				1221	int8_t bitsPerDigit = 4;
				1222	int8_t dig;
				1223	int32_t i;
				1224	UBool braces = FALSE;
				1225
				1226	/* Check that offset is in range */
				1227	if (offset < 0 \|\| offset >= length) {
				1228	goto err;
				1229	}
				1230
				1231	/* Fetch first UChar after '\\' */
				1232	c = charAt((*offset)++, context);
				1233
				1234	/* Convert hexadecimal and octal escapes */
				1235	switch (c) {
				1236	case 0x0075 /'u'/:
				1237	minDig = maxDig = 4;
				1238	break;
				1239	case 0x0055 /'U'/:
				1240	minDig = maxDig = 8;
				1241	break;
				1242	case 0x0078 /'x'/:
				1243	minDig = 1;
				1244	if (offset < length && charAt(offset, context) == 0x7B /{/) {
				1245	++(*offset);
				1246	braces = TRUE;
				1247	maxDig = 8;
				1248	} else {
				1249	maxDig = 2;
				1250	}
				1251	break;
				1252	default:
				1253	dig = _digit8(c);
				1254	if (dig >= 0) {
				1255	minDig = 1;
				1256	maxDig = 3;
				1257	n = 1; /* Already have first octal digit */
				1258	bitsPerDigit = 3;
				1259	result = dig;
				1260	}
				1261	break;
				1262	}
				1263	if (minDig != 0) {
				1264	while (*offset < length && n < maxDig) {
				1265	c = charAt(*offset, context);
				1266	dig = (int8_t)((bitsPerDigit == 3) ? _digit8(c) : _digit16(c));
				1267	if (dig < 0) {
				1268	break;
				1269	}
				1270	result = (result << bitsPerDigit) \| dig;
				1271	++(*offset);
				1272	++n;
				1273	}
				1274	if (n < minDig) {
				1275	goto err;
				1276	}
				1277	if (braces) {
				1278	if (c != 0x7D /}/) {
				1279	goto err;
				1280	}
				1281	++(*offset);
				1282	}
				1283	if (result < 0 \|\| result >= 0x110000) {
				1284	goto err;
				1285	}
				1286	/* If an escape sequence specifies a lead surrogate, see if
				1287	* there is a trail surrogate after it, either as an escape or
				1288	* as a literal. If so, join them up into a supplementary.
				1289	*/
				1290	if (*offset < length && U16_IS_LEAD(result)) {
				1291	int32_t ahead = *offset + 1;
				1292	c = charAt(*offset, context);
				1293	if (c == 0x5C /'\\'/ && ahead < length) {
				1294	c = (UChar) u_unescapeAt(charAt, &ahead, length, context);
				1295	}
				1296	if (U16_IS_TRAIL(c)) {
				1297	*offset = ahead;
				1298	result = U16_GET_SUPPLEMENTARY(result, c);
				1299	}
				1300	}
				1301	return result;
				1302	}
				1303
				1304	/* Convert C-style escapes in table */
				1305	for (i=0; i<UNESCAPE_MAP_LENGTH; i+=2) {
				1306	if (c == UNESCAPE_MAP[i]) {
				1307	return UNESCAPE_MAP[i+1];
				1308	} else if (c < UNESCAPE_MAP[i]) {
				1309	break;
				1310	}
				1311	}
				1312
				1313	/* Map \cX to control-X: X & 0x1F */
				1314	if (c == 0x0063 /'c'/ && *offset < length) {
				1315	c = charAt((*offset)++, context);
				1316	if (U16_IS_LEAD(c) && *offset < length) {
				1317	UChar c2 = charAt(*offset, context);
				1318	if (U16_IS_TRAIL(c2)) {
				1319	++(*offset);
				1320	c = (UChar) U16_GET_SUPPLEMENTARY(c, c2); /* [sic] */
				1321	}
				1322	}
				1323	return 0x1F & c;
				1324	}
				1325
				1326	/* If no special forms are recognized, then consider
				1327	* the backslash to generically escape the next character.
				1328	* Deal with surrogate pairs. */
				1329	if (U16_IS_LEAD(c) && *offset < length) {
				1330	UChar c2 = charAt(*offset, context);
				1331	if (U16_IS_TRAIL(c2)) {
				1332	++(*offset);
				1333	return U16_GET_SUPPLEMENTARY(c, c2);
				1334	}
				1335	}
				1336	return c;
				1337
				1338	err:
				1339	/* Invalid escape sequence */
				1340	offset = start; / Reset to initial value */
				1341	return (UChar32)0xFFFFFFFF;
				1342	}
				1343
				1344	/* u_unescapeAt() callback to return a UChar from a char* */
				1345	static UChar U_CALLCONV
				1346	_charPtr_charAt(int32_t offset, void *context) {
				1347	UChar c16;
				1348	/* It would be more efficient to access the invariant tables
				1349	* directly but there is no API for that. */
				1350	u_charsToUChars(((char*) context) + offset, &c16, 1);
				1351	return c16;
				1352	}
				1353
				1354	/* Append an escape-free segment of the text; used by u_unescape() */
				1355	static void _appendUChars(UChar *dest, int32_t destCapacity,
				1356	const char *src, int32_t srcLen) {
				1357	if (destCapacity < 0) {
				1358	destCapacity = 0;
				1359	}
				1360	if (srcLen > destCapacity) {
				1361	srcLen = destCapacity;
				1362	}
				1363	u_charsToUChars(src, dest, srcLen);
				1364	}
				1365
				1366	/* Do an invariant conversion of char* -> UChar, with escape parsing /
				1367	U_CAPI int32_t U_EXPORT2
				1368	u_unescape(const char src, UChar dest, int32_t destCapacity) {
				1369	const char *segment = src;
				1370	int32_t i = 0;
				1371	char c;
				1372
				1373	while ((c=*src) != 0) {
				1374	/* '\\' intentionally written as compiler-specific
				1375	* character constant to correspond to compiler-specific
				1376	* char* constants. */
				1377	if (c == '\\') {
				1378	int32_t lenParsed = 0;
				1379	UChar32 c32;
				1380	if (src != segment) {
				1381	if (dest != NULL) {
				1382	_appendUChars(dest + i, destCapacity - i,
				1383	segment, (int32_t)(src - segment));
				1384	}
				1385	i += (int32_t)(src - segment);
				1386	}
				1387	++src; /* advance past '\\' */
				1388	c32 = (UChar32)u_unescapeAt(_charPtr_charAt, &lenParsed, (int32_t)uprv_strlen(src), (void*)src);
				1389	if (lenParsed == 0) {
				1390	goto err;
				1391	}
				1392	src += lenParsed; /* advance past escape seq. */
				1393	if (dest != NULL && U16_LENGTH(c32) <= (destCapacity - i)) {
				1394	U16_APPEND_UNSAFE(dest, i, c32);
				1395	} else {
				1396	i += U16_LENGTH(c32);
				1397	}
				1398	segment = src;
				1399	} else {
				1400	++src;
				1401	}
				1402	}
				1403	if (src != segment) {
				1404	if (dest != NULL) {
				1405	_appendUChars(dest + i, destCapacity - i,
				1406	segment, (int32_t)(src - segment));
				1407	}
				1408	i += (int32_t)(src - segment);
				1409	}
				1410	if (dest != NULL && i < destCapacity) {
				1411	dest[i] = 0;
				1412	}
				1413	return i;
				1414
				1415	err:
				1416	if (dest != NULL && destCapacity > 0) {
				1417	*dest = 0;
				1418	}
				1419	return 0;
				1420	}
				1421
				1422	/* NUL-termination of strings ----------------------------------------------- */
				1423
				1424	/**
				1425	* NUL-terminate a string no matter what its type.
				1426	* Set warning and error codes accordingly.
				1427	*/
				1428	#define __TERMINATE_STRING(dest, destCapacity, length, pErrorCode) \
				1429	if(pErrorCode!=NULL && U_SUCCESS(*pErrorCode)) { \
				1430	/* not a public function, so no complete argument checking */ \
				1431	\
				1432	if(length<0) { \
				1433	/* assume that the caller handles this */ \
				1434	} else if(length<destCapacity) { \
				1435	/* NUL-terminate the string, the NUL fits */ \
				1436	dest[length]=0; \
				1437	/* unset the not-terminated warning but leave all others */ \
				1438	if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) { \
				1439	*pErrorCode=U_ZERO_ERROR; \
				1440	} \
				1441	} else if(length==destCapacity) { \
				1442	/* unable to NUL-terminate, but the string itself fit - set a warning code */ \
				1443	*pErrorCode=U_STRING_NOT_TERMINATED_WARNING; \
				1444	} else /* length>destCapacity */ { \
				1445	/* even the string itself did not fit - set an error code */ \
				1446	*pErrorCode=U_BUFFER_OVERFLOW_ERROR; \
				1447	} \
				1448	}
				1449
				1450	U_CAPI int32_t U_EXPORT2
				1451	u_terminateUChars(UChar dest, int32_t destCapacity, int32_t length, UErrorCode pErrorCode) {
				1452	__TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
				1453	return length;
				1454	}
				1455
				1456	U_CAPI int32_t U_EXPORT2
				1457	u_terminateChars(char dest, int32_t destCapacity, int32_t length, UErrorCode pErrorCode) {
				1458	__TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
				1459	return length;
				1460	}
				1461
				1462	U_CAPI int32_t U_EXPORT2
				1463	u_terminateUChar32s(UChar32 dest, int32_t destCapacity, int32_t length, UErrorCode pErrorCode) {
				1464	__TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
				1465	return length;
				1466	}
				1467
				1468	U_CAPI int32_t U_EXPORT2
				1469	u_terminateWChars(wchar_t dest, int32_t destCapacity, int32_t length, UErrorCode pErrorCode) {
				1470	__TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
				1471	return length;
				1472	}
				1473
				1474	// Compute the hash code for a string -------------------------------------- ***
				1475
				1476	// Moved here from uhash.c so that UnicodeString::hashCode() does not depend
				1477	// on UHashtable code.
				1478
				1479	/*
				1480	Compute the hash by iterating sparsely over about 32 (up to 63)
				1481	characters spaced evenly through the string. For each character,
				1482	multiply the previous hash value by a prime number and add the new
				1483	character in, like a linear congruential random number generator,
				1484	producing a pseudorandom deterministic value well distributed over
				1485	the output range. [LIU]
				1486	*/
				1487
				1488	#define STRING_HASH(TYPE, STR, STRLEN, DEREF) \
Jungshik Shin (jungshik at google)	0f8746a	2015-01-08 15:46:45 -0800	[diff] [blame^]	1489	uint32_t hash = 0; \
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1490	const TYPE p = (const TYPE) STR; \
				1491	if (p != NULL) { \
				1492	int32_t len = (int32_t)(STRLEN); \
				1493	int32_t inc = ((len - 32) / 32) + 1; \
				1494	const TYPE *limit = p + len; \
				1495	while (p<limit) { \
				1496	hash = (hash * 37) + DEREF; \
				1497	p += inc; \
				1498	} \
				1499	} \
Jungshik Shin (jungshik at google)	0f8746a	2015-01-08 15:46:45 -0800	[diff] [blame^]	1500	return static_cast<int32_t>(hash)
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1501
				1502	/* Used by UnicodeString to compute its hashcode - Not public API. */
				1503	U_CAPI int32_t U_EXPORT2
				1504	ustr_hashUCharsN(const UChar *str, int32_t length) {
				1505	STRING_HASH(UChar, str, length, *p);
				1506	}
				1507
				1508	U_CAPI int32_t U_EXPORT2
				1509	ustr_hashCharsN(const char *str, int32_t length) {
				1510	STRING_HASH(uint8_t, str, length, *p);
				1511	}
				1512
				1513	U_CAPI int32_t U_EXPORT2
				1514	ustr_hashICharsN(const char *str, int32_t length) {
				1515	STRING_HASH(char, str, length, (uint8_t)uprv_tolower(*p));
				1516	}