Blame - source/common/ucnvbocu.cpp - chromium.googlesource.com/chromium/deps/icu

blob: b97d6662c7dbee310b4e678662edb436f2af0029 [file] [log] [blame]

jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1	/*
				2	******************************************************************************
				3	*
				4	* Copyright (C) 2002-2011, International Business Machines
				5	* Corporation and others. All Rights Reserved.
				6	*
				7	******************************************************************************
				8	* file name: ucnvbocu.cpp
				9	* encoding: US-ASCII
				10	* tab size: 8 (not used)
				11	* indentation:4
				12	*
				13	* created on: 2002mar27
				14	* created by: Markus W. Scherer
				15	*
				16	* This is an implementation of the Binary Ordered Compression for Unicode,
				17	* in its MIME-friendly form as defined in http://www.unicode.org/notes/tn6/
				18	*/
				19
				20	#include "unicode/utypes.h"
				21
Jungshik Shin (jungshik at google)	0f8746a	2015-01-08 15:46:45 -0800	[diff] [blame]	22	#if !UCONFIG_NO_CONVERSION
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	23
				24	#include "unicode/ucnv.h"
				25	#include "unicode/ucnv_cb.h"
				26	#include "unicode/utf16.h"
				27	#include "putilimp.h"
				28	#include "ucnv_bld.h"
				29	#include "ucnv_cnv.h"
				30	#include "uassert.h"
				31
				32	/* BOCU-1 constants and macros ---------------------------------------------- */
				33
				34	/*
				35	* BOCU-1 encodes the code points of a Unicode string as
				36	* a sequence of byte-encoded differences (slope detection),
				37	* preserving lexical order.
				38	*
				39	* Optimize the difference-taking for runs of Unicode text within
				40	* small scripts:
				41	*
				42	* Most small scripts are allocated within aligned 128-blocks of Unicode
				43	* code points. Lexical order is preserved if the "previous code point" state
				44	* is always moved into the middle of such a block.
				45	*
				46	* Additionally, "prev" is moved from anywhere in the Unihan and Hangul
				47	* areas into the middle of those areas.
				48	*
				49	* C0 control codes and space are encoded with their US-ASCII bytes.
				50	* "prev" is reset for C0 controls but not for space.
				51	*/
				52
				53	/* initial value for "prev": middle of the ASCII range */
				54	#define BOCU1_ASCII_PREV 0x40
				55
				56	/* bounding byte values for differences */
				57	#define BOCU1_MIN 0x21
				58	#define BOCU1_MIDDLE 0x90
				59	#define BOCU1_MAX_LEAD 0xfe
				60	#define BOCU1_MAX_TRAIL 0xff
				61	#define BOCU1_RESET 0xff
				62
				63	/* number of lead bytes */
				64	#define BOCU1_COUNT (BOCU1_MAX_LEAD-BOCU1_MIN+1)
				65
				66	/* adjust trail byte counts for the use of some C0 control byte values */
				67	#define BOCU1_TRAIL_CONTROLS_COUNT 20
				68	#define BOCU1_TRAIL_BYTE_OFFSET (BOCU1_MIN-BOCU1_TRAIL_CONTROLS_COUNT)
				69
				70	/* number of trail bytes */
				71	#define BOCU1_TRAIL_COUNT ((BOCU1_MAX_TRAIL-BOCU1_MIN+1)+BOCU1_TRAIL_CONTROLS_COUNT)
				72
				73	/*
				74	* number of positive and negative single-byte codes
				75	* (counting 0==BOCU1_MIDDLE among the positive ones)
				76	*/
				77	#define BOCU1_SINGLE 64
				78
				79	/* number of lead bytes for positive and negative 2/3/4-byte sequences */
				80	#define BOCU1_LEAD_2 43
				81	#define BOCU1_LEAD_3 3
				82	#define BOCU1_LEAD_4 1
				83
				84	/* The difference value range for single-byters. */
				85	#define BOCU1_REACH_POS_1 (BOCU1_SINGLE-1)
				86	#define BOCU1_REACH_NEG_1 (-BOCU1_SINGLE)
				87
				88	/* The difference value range for double-byters. */
				89	#define BOCU1_REACH_POS_2 (BOCU1_REACH_POS_1+BOCU1_LEAD_2*BOCU1_TRAIL_COUNT)
				90	#define BOCU1_REACH_NEG_2 (BOCU1_REACH_NEG_1-BOCU1_LEAD_2*BOCU1_TRAIL_COUNT)
				91
				92	/* The difference value range for 3-byters. */
				93	#define BOCU1_REACH_POS_3 \
				94	(BOCU1_REACH_POS_2+BOCU1_LEAD_3BOCU1_TRAIL_COUNTBOCU1_TRAIL_COUNT)
				95
				96	#define BOCU1_REACH_NEG_3 (BOCU1_REACH_NEG_2-BOCU1_LEAD_3BOCU1_TRAIL_COUNTBOCU1_TRAIL_COUNT)
				97
				98	/* The lead byte start values. */
				99	#define BOCU1_START_POS_2 (BOCU1_MIDDLE+BOCU1_REACH_POS_1+1)
				100	#define BOCU1_START_POS_3 (BOCU1_START_POS_2+BOCU1_LEAD_2)
				101	#define BOCU1_START_POS_4 (BOCU1_START_POS_3+BOCU1_LEAD_3)
				102	/* ==BOCU1_MAX_LEAD */
				103
				104	#define BOCU1_START_NEG_2 (BOCU1_MIDDLE+BOCU1_REACH_NEG_1)
				105	#define BOCU1_START_NEG_3 (BOCU1_START_NEG_2-BOCU1_LEAD_2)
				106	#define BOCU1_START_NEG_4 (BOCU1_START_NEG_3-BOCU1_LEAD_3)
				107	/* ==BOCU1_MIN+1 */
				108
				109	/* The length of a byte sequence, according to the lead byte (!=BOCU1_RESET). */
				110	#define BOCU1_LENGTH_FROM_LEAD(lead) \
				111	((BOCU1_START_NEG_2<=(lead) && (lead)<BOCU1_START_POS_2) ? 1 : \
				112	(BOCU1_START_NEG_3<=(lead) && (lead)<BOCU1_START_POS_3) ? 2 : \
				113	(BOCU1_START_NEG_4<=(lead) && (lead)<BOCU1_START_POS_4) ? 3 : 4)
				114
				115	/* The length of a byte sequence, according to its packed form. */
				116	#define BOCU1_LENGTH_FROM_PACKED(packed) \
				117	((uint32_t)(packed)<0x04000000 ? (packed)>>24 : 4)
				118
				119	/*
				120	* 12 commonly used C0 control codes (and space) are only used to encode
				121	* themselves directly,
				122	* which makes BOCU-1 MIME-usable and reasonably safe for
				123	* ASCII-oriented software.
				124	*
				125	* These controls are
				126	* 0 NUL
				127	*
				128	* 7 BEL
				129	* 8 BS
				130	*
				131	* 9 TAB
				132	* a LF
				133	* b VT
				134	* c FF
				135	* d CR
				136	*
				137	* e SO
				138	* f SI
				139	*
				140	* 1a SUB
				141	* 1b ESC
				142	*
				143	* The other 20 C0 controls are also encoded directly (to preserve order)
				144	* but are also used as trail bytes in difference encoding
				145	* (for better compression).
				146	*/
				147	#define BOCU1_TRAIL_TO_BYTE(t) ((t)>=BOCU1_TRAIL_CONTROLS_COUNT ? (t)+BOCU1_TRAIL_BYTE_OFFSET : bocu1TrailToByte[t])
				148
				149	/*
				150	* Byte value map for control codes,
				151	* from external byte values 0x00..0x20
				152	* to trail byte values 0..19 (0..0x13) as used in the difference calculation.
				153	* External byte values that are illegal as trail bytes are mapped to -1.
				154	*/
				155	static const int8_t
				156	bocu1ByteToTrail[BOCU1_MIN]={
				157	/* 0 1 2 3 4 5 6 7 */
				158	-1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, -1,
				159
				160	/* 8 9 a b c d e f */
				161	-1, -1, -1, -1, -1, -1, -1, -1,
				162
				163	/* 10 11 12 13 14 15 16 17 */
				164	0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d,
				165
				166	/* 18 19 1a 1b 1c 1d 1e 1f */
				167	0x0e, 0x0f, -1, -1, 0x10, 0x11, 0x12, 0x13,
				168
				169	/* 20 */
				170	-1
				171	};
				172
				173	/*
				174	* Byte value map for control codes,
				175	* from trail byte values 0..19 (0..0x13) as used in the difference calculation
				176	* to external byte values 0x00..0x20.
				177	*/
				178	static const int8_t
				179	bocu1TrailToByte[BOCU1_TRAIL_CONTROLS_COUNT]={
				180	/* 0 1 2 3 4 5 6 7 */
				181	0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x10, 0x11,
				182
				183	/* 8 9 a b c d e f */
				184	0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19,
				185
				186	/* 10 11 12 13 */
				187	0x1c, 0x1d, 0x1e, 0x1f
				188	};
				189
				190	/**
				191	* Integer division and modulo with negative numerators
				192	* yields negative modulo results and quotients that are one more than
				193	* what we need here.
				194	* This macro adjust the results so that the modulo-value m is always >=0.
				195	*
				196	* For positive n, the if() condition is always FALSE.
				197	*
				198	* @param n Number to be split into quotient and rest.
				199	* Will be modified to contain the quotient.
				200	* @param d Divisor.
				201	* @param m Output variable for the rest (modulo result).
				202	*/
				203	#define NEGDIVMOD(n, d, m) { \
				204	(m)=(n)%(d); \
				205	(n)/=(d); \
				206	if((m)<0) { \
				207	--(n); \
				208	(m)+=(d); \
				209	} \
				210	}
				211
				212	/* Faster versions of packDiff() for single-byte-encoded diff values. */
				213
				214	/** Is a diff value encodable in a single byte? */
				215	#define DIFF_IS_SINGLE(diff) (BOCU1_REACH_NEG_1<=(diff) && (diff)<=BOCU1_REACH_POS_1)
				216
				217	/** Encode a diff value in a single byte. */
				218	#define PACK_SINGLE_DIFF(diff) (BOCU1_MIDDLE+(diff))
				219
				220	/** Is a diff value encodable in two bytes? */
				221	#define DIFF_IS_DOUBLE(diff) (BOCU1_REACH_NEG_2<=(diff) && (diff)<=BOCU1_REACH_POS_2)
				222
				223	/* BOCU-1 implementation functions ------------------------------------------ */
				224
				225	#define BOCU1_SIMPLE_PREV(c) (((c)&~0x7f)+BOCU1_ASCII_PREV)
				226
				227	/**
				228	* Compute the next "previous" value for differencing
				229	* from the current code point.
				230	*
				231	* @param c current code point, 0x3040..0xd7a3 (rest handled by macro below)
				232	* @return "previous code point" state value
				233	*/
				234	static inline int32_t
				235	bocu1Prev(int32_t c) {
				236	/* compute new prev */
				237	if(/* 0x3040<=c && */ c<=0x309f) {
				238	/* Hiragana is not 128-aligned */
				239	return 0x3070;
				240	} else if(0x4e00<=c && c<=0x9fa5) {
				241	/* CJK Unihan */
				242	return 0x4e00-BOCU1_REACH_NEG_2;
				243	} else if(0xac00<=c /* && c<=0xd7a3 */) {
				244	/* Korean Hangul */
				245	return (0xd7a3+0xac00)/2;
				246	} else {
				247	/* mostly small scripts */
				248	return BOCU1_SIMPLE_PREV(c);
				249	}
				250	}
				251
				252	/** Fast version of bocu1Prev() for most scripts. */
				253	#define BOCU1_PREV(c) ((c)<0x3040 \|\| (c)>0xd7a3 ? BOCU1_SIMPLE_PREV(c) : bocu1Prev(c))
				254
				255	/*
				256	* The BOCU-1 converter uses the standard setup code in ucnv.c/ucnv_bld.c.
				257	* The UConverter fields are used as follows:
				258	*
				259	* fromUnicodeStatus encoder's prev (0 will be interpreted as BOCU1_ASCII_PREV)
				260	*
				261	* toUnicodeStatus decoder's prev (0 will be interpreted as BOCU1_ASCII_PREV)
				262	* mode decoder's incomplete (diff<<2)\|count (ignored when toULength==0)
				263	*/
				264
				265	/* BOCU-1-from-Unicode conversion functions --------------------------------- */
				266
				267	/**
				268	* Encode a difference -0x10ffff..0x10ffff in 1..4 bytes
				269	* and return a packed integer with them.
				270	*
				271	* The encoding favors small absolute differences with short encodings
				272	* to compress runs of same-script characters.
				273	*
				274	* Optimized version with unrolled loops and fewer floating-point operations
				275	* than the standard packDiff().
				276	*
				277	* @param diff difference value -0x10ffff..0x10ffff
				278	* @return
				279	* 0x010000zz for 1-byte sequence zz
				280	* 0x0200yyzz for 2-byte sequence yy zz
				281	* 0x03xxyyzz for 3-byte sequence xx yy zz
				282	* 0xwwxxyyzz for 4-byte sequence ww xx yy zz (ww>0x03)
				283	*/
				284	static int32_t
				285	packDiff(int32_t diff) {
				286	int32_t result, m;
				287
				288	U_ASSERT(!DIFF_IS_SINGLE(diff)); /* assume we won't be called where diff==BOCU1_REACH_NEG_1=-64 */
				289	if(diff>=BOCU1_REACH_NEG_1) {
				290	/* mostly positive differences, and single-byte negative ones */
				291	#if 0 /* single-byte case handled in macros, see below */
				292	if(diff<=BOCU1_REACH_POS_1) {
				293	/* single byte */
				294	return 0x01000000\|(BOCU1_MIDDLE+diff);
				295	} else
				296	#endif
				297	if(diff<=BOCU1_REACH_POS_2) {
				298	/* two bytes */
				299	diff-=BOCU1_REACH_POS_1+1;
				300	result=0x02000000;
				301
				302	m=diff%BOCU1_TRAIL_COUNT;
				303	diff/=BOCU1_TRAIL_COUNT;
				304	result\|=BOCU1_TRAIL_TO_BYTE(m);
				305
				306	result\|=(BOCU1_START_POS_2+diff)<<8;
				307	} else if(diff<=BOCU1_REACH_POS_3) {
				308	/* three bytes */
				309	diff-=BOCU1_REACH_POS_2+1;
				310	result=0x03000000;
				311
				312	m=diff%BOCU1_TRAIL_COUNT;
				313	diff/=BOCU1_TRAIL_COUNT;
				314	result\|=BOCU1_TRAIL_TO_BYTE(m);
				315
				316	m=diff%BOCU1_TRAIL_COUNT;
				317	diff/=BOCU1_TRAIL_COUNT;
				318	result\|=BOCU1_TRAIL_TO_BYTE(m)<<8;
				319
				320	result\|=(BOCU1_START_POS_3+diff)<<16;
				321	} else {
				322	/* four bytes */
				323	diff-=BOCU1_REACH_POS_3+1;
				324
				325	m=diff%BOCU1_TRAIL_COUNT;
				326	diff/=BOCU1_TRAIL_COUNT;
				327	result=BOCU1_TRAIL_TO_BYTE(m);
				328
				329	m=diff%BOCU1_TRAIL_COUNT;
				330	diff/=BOCU1_TRAIL_COUNT;
				331	result\|=BOCU1_TRAIL_TO_BYTE(m)<<8;
				332
				333	/*
				334	* We know that / and % would deliver quotient 0 and rest=diff.
				335	* Avoid division and modulo for performance.
				336	*/
				337	result\|=BOCU1_TRAIL_TO_BYTE(diff)<<16;
				338
				339	result\|=((uint32_t)BOCU1_START_POS_4)<<24;
				340	}
				341	} else {
				342	/* two- to four-byte negative differences */
				343	if(diff>=BOCU1_REACH_NEG_2) {
				344	/* two bytes */
				345	diff-=BOCU1_REACH_NEG_1;
				346	result=0x02000000;
				347
				348	NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
				349	result\|=BOCU1_TRAIL_TO_BYTE(m);
				350
				351	result\|=(BOCU1_START_NEG_2+diff)<<8;
				352	} else if(diff>=BOCU1_REACH_NEG_3) {
				353	/* three bytes */
				354	diff-=BOCU1_REACH_NEG_2;
				355	result=0x03000000;
				356
				357	NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
				358	result\|=BOCU1_TRAIL_TO_BYTE(m);
				359
				360	NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
				361	result\|=BOCU1_TRAIL_TO_BYTE(m)<<8;
				362
				363	result\|=(BOCU1_START_NEG_3+diff)<<16;
				364	} else {
				365	/* four bytes */
				366	diff-=BOCU1_REACH_NEG_3;
				367
				368	NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
				369	result=BOCU1_TRAIL_TO_BYTE(m);
				370
				371	NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
				372	result\|=BOCU1_TRAIL_TO_BYTE(m)<<8;
				373
				374	/*
				375	* We know that NEGDIVMOD would deliver
				376	* quotient -1 and rest=diff+BOCU1_TRAIL_COUNT.
				377	* Avoid division and modulo for performance.
				378	*/
				379	m=diff+BOCU1_TRAIL_COUNT;
				380	result\|=BOCU1_TRAIL_TO_BYTE(m)<<16;
				381
				382	result\|=BOCU1_MIN<<24;
				383	}
				384	}
				385	return result;
				386	}
				387
				388
				389	static void
				390	_Bocu1FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
				391	UErrorCode *pErrorCode) {
				392	UConverter *cnv;
				393	const UChar source, sourceLimit;
				394	uint8_t *target;
				395	int32_t targetCapacity;
				396	int32_t *offsets;
				397
				398	int32_t prev, c, diff;
				399
				400	int32_t sourceIndex, nextSourceIndex;
				401
				402	U_ALIGN_CODE(16)
				403
				404	/* set up the local pointers */
				405	cnv=pArgs->converter;
				406	source=pArgs->source;
				407	sourceLimit=pArgs->sourceLimit;
				408	target=(uint8_t *)pArgs->target;
				409	targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
				410	offsets=pArgs->offsets;
				411
				412	/* get the converter state from UConverter */
				413	c=cnv->fromUChar32;
				414	prev=(int32_t)cnv->fromUnicodeStatus;
				415	if(prev==0) {
				416	prev=BOCU1_ASCII_PREV;
				417	}
				418
				419	/* sourceIndex=-1 if the current character began in the previous buffer */
				420	sourceIndex= c==0 ? 0 : -1;
				421	nextSourceIndex=0;
				422
				423	/* conversion loop */
				424	if(c!=0 && targetCapacity>0) {
				425	goto getTrail;
				426	}
				427
				428	fastSingle:
				429	/* fast loop for single-byte differences */
				430	/* use only one loop counter variable, targetCapacity, not also source */
				431	diff=(int32_t)(sourceLimit-source);
				432	if(targetCapacity>diff) {
				433	targetCapacity=diff;
				434	}
				435	while(targetCapacity>0 && (c=*source)<0x3000) {
				436	if(c<=0x20) {
				437	if(c!=0x20) {
				438	prev=BOCU1_ASCII_PREV;
				439	}
				440	*target++=(uint8_t)c;
				441	*offsets++=nextSourceIndex++;
				442	++source;
				443	--targetCapacity;
				444	} else {
				445	diff=c-prev;
				446	if(DIFF_IS_SINGLE(diff)) {
				447	prev=BOCU1_SIMPLE_PREV(c);
				448	*target++=(uint8_t)PACK_SINGLE_DIFF(diff);
				449	*offsets++=nextSourceIndex++;
				450	++source;
				451	--targetCapacity;
				452	} else {
				453	break;
				454	}
				455	}
				456	}
				457	/* restore real values */
				458	targetCapacity=(int32_t)((const uint8_t *)pArgs->targetLimit-target);
				459	sourceIndex=nextSourceIndex; /* wrong if offsets==NULL but does not matter */
				460
				461	/* regular loop for all cases */
				462	while(source<sourceLimit) {
				463	if(targetCapacity>0) {
				464	c=*source++;
				465	++nextSourceIndex;
				466
				467	if(c<=0x20) {
				468	/*
				469	* ISO C0 control & space:
				470	* Encode directly for MIME compatibility,
				471	* and reset state except for space, to not disrupt compression.
				472	*/
				473	if(c!=0x20) {
				474	prev=BOCU1_ASCII_PREV;
				475	}
				476	*target++=(uint8_t)c;
				477	*offsets++=sourceIndex;
				478	--targetCapacity;
				479
				480	sourceIndex=nextSourceIndex;
				481	continue;
				482	}
				483
				484	if(U16_IS_LEAD(c)) {
				485	getTrail:
				486	if(source<sourceLimit) {
				487	/* test the following code unit */
				488	UChar trail=*source;
				489	if(U16_IS_TRAIL(trail)) {
				490	++source;
				491	++nextSourceIndex;
				492	c=U16_GET_SUPPLEMENTARY(c, trail);
				493	}
				494	} else {
				495	/* no more input */
				496	c=-c; /* negative lead surrogate as "incomplete" indicator to avoid c=0 everywhere else */
				497	break;
				498	}
				499	}
				500
				501	/*
				502	* all other Unicode code points c==U+0021..U+10ffff
				503	* are encoded with the difference c-prev
				504	*
				505	* a new prev is computed from c,
				506	* placed in the middle of a 0x80-block (for most small scripts) or
				507	* in the middle of the Unihan and Hangul blocks
				508	* to statistically minimize the following difference
				509	*/
				510	diff=c-prev;
				511	prev=BOCU1_PREV(c);
				512	if(DIFF_IS_SINGLE(diff)) {
				513	*target++=(uint8_t)PACK_SINGLE_DIFF(diff);
				514	*offsets++=sourceIndex;
				515	--targetCapacity;
				516	sourceIndex=nextSourceIndex;
				517	if(c<0x3000) {
				518	goto fastSingle;
				519	}
				520	} else if(DIFF_IS_DOUBLE(diff) && 2<=targetCapacity) {
				521	/* optimize 2-byte case */
				522	int32_t m;
				523
				524	if(diff>=0) {
				525	diff-=BOCU1_REACH_POS_1+1;
				526	m=diff%BOCU1_TRAIL_COUNT;
				527	diff/=BOCU1_TRAIL_COUNT;
				528	diff+=BOCU1_START_POS_2;
				529	} else {
				530	diff-=BOCU1_REACH_NEG_1;
				531	NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
				532	diff+=BOCU1_START_NEG_2;
				533	}
				534	*target++=(uint8_t)diff;
				535	*target++=(uint8_t)BOCU1_TRAIL_TO_BYTE(m);
				536	*offsets++=sourceIndex;
				537	*offsets++=sourceIndex;
				538	targetCapacity-=2;
				539	sourceIndex=nextSourceIndex;
				540	} else {
				541	int32_t length; /* will be 2..4 */
				542
				543	diff=packDiff(diff);
				544	length=BOCU1_LENGTH_FROM_PACKED(diff);
				545
				546	/* write the output character bytes from diff and length */
				547	/* from the first if in the loop we know that targetCapacity>0 */
				548	if(length<=targetCapacity) {
				549	switch(length) {
				550	/* each branch falls through to the next one */
				551	case 4:
				552	*target++=(uint8_t)(diff>>24);
				553	*offsets++=sourceIndex;
				554	case 3: /fall through/
				555	*target++=(uint8_t)(diff>>16);
				556	*offsets++=sourceIndex;
				557	case 2: /fall through/
				558	*target++=(uint8_t)(diff>>8);
				559	*offsets++=sourceIndex;
				560	/* case 1: handled above */
				561	*target++=(uint8_t)diff;
				562	*offsets++=sourceIndex;
				563	default:
				564	/* will never occur */
				565	break;
				566	}
				567	targetCapacity-=length;
				568	sourceIndex=nextSourceIndex;
				569	} else {
				570	uint8_t *charErrorBuffer;
				571
				572	/*
				573	* We actually do this backwards here:
				574	* In order to save an intermediate variable, we output
				575	* first to the overflow buffer what does not fit into the
				576	* regular target.
				577	*/
				578	/* we know that 1<=targetCapacity<length<=4 */
				579	length-=targetCapacity;
				580	charErrorBuffer=(uint8_t *)cnv->charErrorBuffer;
				581	switch(length) {
				582	/* each branch falls through to the next one */
				583	case 3:
				584	*charErrorBuffer++=(uint8_t)(diff>>16);
				585	case 2: /fall through/
				586	*charErrorBuffer++=(uint8_t)(diff>>8);
				587	case 1: /fall through/
				588	*charErrorBuffer=(uint8_t)diff;
				589	default:
				590	/* will never occur */
				591	break;
				592	}
				593	cnv->charErrorBufferLength=(int8_t)length;
				594
				595	/* now output what fits into the regular target */
				596	diff>>=8length; / length was reduced by targetCapacity */
				597	switch(targetCapacity) {
				598	/* each branch falls through to the next one */
				599	case 3:
				600	*target++=(uint8_t)(diff>>16);
				601	*offsets++=sourceIndex;
				602	case 2: /fall through/
				603	*target++=(uint8_t)(diff>>8);
				604	*offsets++=sourceIndex;
				605	case 1: /fall through/
				606	*target++=(uint8_t)diff;
				607	*offsets++=sourceIndex;
				608	default:
				609	/* will never occur */
				610	break;
				611	}
				612
				613	/* target overflow */
				614	targetCapacity=0;
				615	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
				616	break;
				617	}
				618	}
				619	} else {
				620	/* target is full */
				621	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
				622	break;
				623	}
				624	}
				625
				626	/* set the converter state back into UConverter */
				627	cnv->fromUChar32= c<0 ? -c : 0;
				628	cnv->fromUnicodeStatus=(uint32_t)prev;
				629
				630	/* write back the updated pointers */
				631	pArgs->source=source;
				632	pArgs->target=(char *)target;
				633	pArgs->offsets=offsets;
				634	}
				635
				636	/*
				637	* Identical to _Bocu1FromUnicodeWithOffsets but without offset handling.
				638	* If a change is made in the original function, then either
				639	* change this function the same way or
				640	* re-copy the original function and remove the variables
				641	* offsets, sourceIndex, and nextSourceIndex.
				642	*/
				643	static void
				644	_Bocu1FromUnicode(UConverterFromUnicodeArgs *pArgs,
				645	UErrorCode *pErrorCode) {
				646	UConverter *cnv;
				647	const UChar source, sourceLimit;
				648	uint8_t *target;
				649	int32_t targetCapacity;
				650
				651	int32_t prev, c, diff;
				652
				653	/* set up the local pointers */
				654	cnv=pArgs->converter;
				655	source=pArgs->source;
				656	sourceLimit=pArgs->sourceLimit;
				657	target=(uint8_t *)pArgs->target;
				658	targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
				659
				660	/* get the converter state from UConverter */
				661	c=cnv->fromUChar32;
				662	prev=(int32_t)cnv->fromUnicodeStatus;
				663	if(prev==0) {
				664	prev=BOCU1_ASCII_PREV;
				665	}
				666
				667	/* conversion loop */
				668	if(c!=0 && targetCapacity>0) {
				669	goto getTrail;
				670	}
				671
				672	fastSingle:
				673	/* fast loop for single-byte differences */
				674	/* use only one loop counter variable, targetCapacity, not also source */
				675	diff=(int32_t)(sourceLimit-source);
				676	if(targetCapacity>diff) {
				677	targetCapacity=diff;
				678	}
				679	while(targetCapacity>0 && (c=*source)<0x3000) {
				680	if(c<=0x20) {
				681	if(c!=0x20) {
				682	prev=BOCU1_ASCII_PREV;
				683	}
				684	*target++=(uint8_t)c;
				685	} else {
				686	diff=c-prev;
				687	if(DIFF_IS_SINGLE(diff)) {
				688	prev=BOCU1_SIMPLE_PREV(c);
				689	*target++=(uint8_t)PACK_SINGLE_DIFF(diff);
				690	} else {
				691	break;
				692	}
				693	}
				694	++source;
				695	--targetCapacity;
				696	}
				697	/* restore real values */
				698	targetCapacity=(int32_t)((const uint8_t *)pArgs->targetLimit-target);
				699
				700	/* regular loop for all cases */
				701	while(source<sourceLimit) {
				702	if(targetCapacity>0) {
				703	c=*source++;
				704
				705	if(c<=0x20) {
				706	/*
				707	* ISO C0 control & space:
				708	* Encode directly for MIME compatibility,
				709	* and reset state except for space, to not disrupt compression.
				710	*/
				711	if(c!=0x20) {
				712	prev=BOCU1_ASCII_PREV;
				713	}
				714	*target++=(uint8_t)c;
				715	--targetCapacity;
				716	continue;
				717	}
				718
				719	if(U16_IS_LEAD(c)) {
				720	getTrail:
				721	if(source<sourceLimit) {
				722	/* test the following code unit */
				723	UChar trail=*source;
				724	if(U16_IS_TRAIL(trail)) {
				725	++source;
				726	c=U16_GET_SUPPLEMENTARY(c, trail);
				727	}
				728	} else {
				729	/* no more input */
				730	c=-c; /* negative lead surrogate as "incomplete" indicator to avoid c=0 everywhere else */
				731	break;
				732	}
				733	}
				734
				735	/*
				736	* all other Unicode code points c==U+0021..U+10ffff
				737	* are encoded with the difference c-prev
				738	*
				739	* a new prev is computed from c,
				740	* placed in the middle of a 0x80-block (for most small scripts) or
				741	* in the middle of the Unihan and Hangul blocks
				742	* to statistically minimize the following difference
				743	*/
				744	diff=c-prev;
				745	prev=BOCU1_PREV(c);
				746	if(DIFF_IS_SINGLE(diff)) {
				747	*target++=(uint8_t)PACK_SINGLE_DIFF(diff);
				748	--targetCapacity;
				749	if(c<0x3000) {
				750	goto fastSingle;
				751	}
				752	} else if(DIFF_IS_DOUBLE(diff) && 2<=targetCapacity) {
				753	/* optimize 2-byte case */
				754	int32_t m;
				755
				756	if(diff>=0) {
				757	diff-=BOCU1_REACH_POS_1+1;
				758	m=diff%BOCU1_TRAIL_COUNT;
				759	diff/=BOCU1_TRAIL_COUNT;
				760	diff+=BOCU1_START_POS_2;
				761	} else {
				762	diff-=BOCU1_REACH_NEG_1;
				763	NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
				764	diff+=BOCU1_START_NEG_2;
				765	}
				766	*target++=(uint8_t)diff;
				767	*target++=(uint8_t)BOCU1_TRAIL_TO_BYTE(m);
				768	targetCapacity-=2;
				769	} else {
				770	int32_t length; /* will be 2..4 */
				771
				772	diff=packDiff(diff);
				773	length=BOCU1_LENGTH_FROM_PACKED(diff);
				774
				775	/* write the output character bytes from diff and length */
				776	/* from the first if in the loop we know that targetCapacity>0 */
				777	if(length<=targetCapacity) {
				778	switch(length) {
				779	/* each branch falls through to the next one */
				780	case 4:
				781	*target++=(uint8_t)(diff>>24);
				782	case 3: /fall through/
				783	*target++=(uint8_t)(diff>>16);
				784	/* case 2: handled above */
				785	*target++=(uint8_t)(diff>>8);
				786	/* case 1: handled above */
				787	*target++=(uint8_t)diff;
				788	default:
				789	/* will never occur */
				790	break;
				791	}
				792	targetCapacity-=length;
				793	} else {
				794	uint8_t *charErrorBuffer;
				795
				796	/*
				797	* We actually do this backwards here:
				798	* In order to save an intermediate variable, we output
				799	* first to the overflow buffer what does not fit into the
				800	* regular target.
				801	*/
				802	/* we know that 1<=targetCapacity<length<=4 */
				803	length-=targetCapacity;
				804	charErrorBuffer=(uint8_t *)cnv->charErrorBuffer;
				805	switch(length) {
				806	/* each branch falls through to the next one */
				807	case 3:
				808	*charErrorBuffer++=(uint8_t)(diff>>16);
				809	case 2: /fall through/
				810	*charErrorBuffer++=(uint8_t)(diff>>8);
				811	case 1: /fall through/
				812	*charErrorBuffer=(uint8_t)diff;
				813	default:
				814	/* will never occur */
				815	break;
				816	}
				817	cnv->charErrorBufferLength=(int8_t)length;
				818
				819	/* now output what fits into the regular target */
				820	diff>>=8length; / length was reduced by targetCapacity */
				821	switch(targetCapacity) {
				822	/* each branch falls through to the next one */
				823	case 3:
				824	*target++=(uint8_t)(diff>>16);
				825	case 2: /fall through/
				826	*target++=(uint8_t)(diff>>8);
				827	case 1: /fall through/
				828	*target++=(uint8_t)diff;
				829	default:
				830	/* will never occur */
				831	break;
				832	}
				833
				834	/* target overflow */
				835	targetCapacity=0;
				836	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
				837	break;
				838	}
				839	}
				840	} else {
				841	/* target is full */
				842	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
				843	break;
				844	}
				845	}
				846
				847	/* set the converter state back into UConverter */
				848	cnv->fromUChar32= c<0 ? -c : 0;
				849	cnv->fromUnicodeStatus=(uint32_t)prev;
				850
				851	/* write back the updated pointers */
				852	pArgs->source=source;
				853	pArgs->target=(char *)target;
				854	}
				855
				856	/* BOCU-1-to-Unicode conversion functions ----------------------------------- */
				857
				858	/**
				859	* Function for BOCU-1 decoder; handles multi-byte lead bytes.
				860	*
				861	* @param b lead byte;
				862	* BOCU1_MIN<=b<BOCU1_START_NEG_2 or BOCU1_START_POS_2<=b<BOCU1_MAX_LEAD
				863	* @return (diff<<2)\|count
				864	*/
				865	static inline int32_t
				866	decodeBocu1LeadByte(int32_t b) {
				867	int32_t diff, count;
				868
				869	if(b>=BOCU1_START_NEG_2) {
				870	/* positive difference */
				871	if(b<BOCU1_START_POS_3) {
				872	/* two bytes */
				873	diff=((int32_t)b-BOCU1_START_POS_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_POS_1+1;
				874	count=1;
				875	} else if(b<BOCU1_START_POS_4) {
				876	/* three bytes */
				877	diff=((int32_t)b-BOCU1_START_POS_3)BOCU1_TRAIL_COUNTBOCU1_TRAIL_COUNT+BOCU1_REACH_POS_2+1;
				878	count=2;
				879	} else {
				880	/* four bytes */
				881	diff=BOCU1_REACH_POS_3+1;
				882	count=3;
				883	}
				884	} else {
				885	/* negative difference */
				886	if(b>=BOCU1_START_NEG_3) {
				887	/* two bytes */
				888	diff=((int32_t)b-BOCU1_START_NEG_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_1;
				889	count=1;
				890	} else if(b>BOCU1_MIN) {
				891	/* three bytes */
				892	diff=((int32_t)b-BOCU1_START_NEG_3)BOCU1_TRAIL_COUNTBOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_2;
				893	count=2;
				894	} else {
				895	/* four bytes */
				896	diff=-BOCU1_TRAIL_COUNTBOCU1_TRAIL_COUNTBOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_3;
				897	count=3;
				898	}
				899	}
				900
				901	/* return the state for decoding the trail byte(s) */
				902	return (diff<<2)\|count;
				903	}
				904
				905	/**
				906	* Function for BOCU-1 decoder; handles multi-byte trail bytes.
				907	*
				908	* @param count number of remaining trail bytes including this one
				909	* @param b trail byte
				910	* @return new delta for diff including b - <0 indicates an error
				911	*
				912	* @see decodeBocu1
				913	*/
				914	static inline int32_t
				915	decodeBocu1TrailByte(int32_t count, int32_t b) {
				916	if(b<=0x20) {
				917	/* skip some C0 controls and make the trail byte range contiguous */
				918	b=bocu1ByteToTrail[b];
				919	/* b<0 for an illegal trail byte value will result in return<0 below */
				920	#if BOCU1_MAX_TRAIL<0xff
				921	} else if(b>BOCU1_MAX_TRAIL) {
				922	return -99;
				923	#endif
				924	} else {
				925	b-=BOCU1_TRAIL_BYTE_OFFSET;
				926	}
				927
				928	/* add trail byte into difference and decrement count */
				929	if(count==1) {
				930	return b;
				931	} else if(count==2) {
				932	return b*BOCU1_TRAIL_COUNT;
				933	} else /* count==3 */ {
				934	return b(BOCU1_TRAIL_COUNTBOCU1_TRAIL_COUNT);
				935	}
				936	}
				937
				938	static void
				939	_Bocu1ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
				940	UErrorCode *pErrorCode) {
				941	UConverter *cnv;
				942	const uint8_t source, sourceLimit;
				943	UChar *target;
				944	const UChar *targetLimit;
				945	int32_t *offsets;
				946
				947	int32_t prev, count, diff, c;
				948
				949	int8_t byteIndex;
				950	uint8_t *bytes;
				951
				952	int32_t sourceIndex, nextSourceIndex;
				953
				954	/* set up the local pointers */
				955	cnv=pArgs->converter;
				956	source=(const uint8_t *)pArgs->source;
				957	sourceLimit=(const uint8_t *)pArgs->sourceLimit;
				958	target=pArgs->target;
				959	targetLimit=pArgs->targetLimit;
				960	offsets=pArgs->offsets;
				961
				962	/* get the converter state from UConverter */
				963	prev=(int32_t)cnv->toUnicodeStatus;
				964	if(prev==0) {
				965	prev=BOCU1_ASCII_PREV;
				966	}
				967	diff=cnv->mode; /* mode may be set to UCNV_SI by ucnv_bld.c but then toULength==0 */
				968	count=diff&3;
				969	diff>>=2;
				970
				971	byteIndex=cnv->toULength;
				972	bytes=cnv->toUBytes;
				973
				974	/* sourceIndex=-1 if the current character began in the previous buffer */
				975	sourceIndex=byteIndex==0 ? 0 : -1;
				976	nextSourceIndex=0;
				977
				978	/* conversion "loop" similar to _SCSUToUnicodeWithOffsets() */
				979	if(count>0 && byteIndex>0 && target<targetLimit) {
				980	goto getTrail;
				981	}
				982
				983	fastSingle:
				984	/* fast loop for single-byte differences */
				985	/* use count as the only loop counter variable */
				986	diff=(int32_t)(sourceLimit-source);
				987	count=(int32_t)(pArgs->targetLimit-target);
				988	if(count>diff) {
				989	count=diff;
				990	}
				991	while(count>0) {
				992	if(BOCU1_START_NEG_2<=(c=*source) && c<BOCU1_START_POS_2) {
				993	c=prev+(c-BOCU1_MIDDLE);
				994	if(c<0x3000) {
				995	*target++=(UChar)c;
				996	*offsets++=nextSourceIndex++;
				997	prev=BOCU1_SIMPLE_PREV(c);
				998	} else {
				999	break;
				1000	}
				1001	} else if(c<=0x20) {
				1002	if(c!=0x20) {
				1003	prev=BOCU1_ASCII_PREV;
				1004	}
				1005	*target++=(UChar)c;
				1006	*offsets++=nextSourceIndex++;
				1007	} else {
				1008	break;
				1009	}
				1010	++source;
				1011	--count;
				1012	}
				1013	sourceIndex=nextSourceIndex; /* wrong if offsets==NULL but does not matter */
				1014
				1015	/* decode a sequence of single and lead bytes */
				1016	while(source<sourceLimit) {
				1017	if(target>=targetLimit) {
				1018	/* target is full */
				1019	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
				1020	break;
				1021	}
				1022
				1023	++nextSourceIndex;
				1024	c=*source++;
				1025	if(BOCU1_START_NEG_2<=c && c<BOCU1_START_POS_2) {
				1026	/* Write a code point directly from a single-byte difference. */
				1027	c=prev+(c-BOCU1_MIDDLE);
				1028	if(c<0x3000) {
				1029	*target++=(UChar)c;
				1030	*offsets++=sourceIndex;
				1031	prev=BOCU1_SIMPLE_PREV(c);
				1032	sourceIndex=nextSourceIndex;
				1033	goto fastSingle;
				1034	}
				1035	} else if(c<=0x20) {
				1036	/*
				1037	* Direct-encoded C0 control code or space.
				1038	* Reset prev for C0 control codes but not for space.
				1039	*/
				1040	if(c!=0x20) {
				1041	prev=BOCU1_ASCII_PREV;
				1042	}
				1043	*target++=(UChar)c;
				1044	*offsets++=sourceIndex;
				1045	sourceIndex=nextSourceIndex;
				1046	continue;
				1047	} else if(BOCU1_START_NEG_3<=c && c<BOCU1_START_POS_3 && source<sourceLimit) {
				1048	/* Optimize two-byte case. */
				1049	if(c>=BOCU1_MIDDLE) {
				1050	diff=((int32_t)c-BOCU1_START_POS_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_POS_1+1;
				1051	} else {
				1052	diff=((int32_t)c-BOCU1_START_NEG_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_1;
				1053	}
				1054
				1055	/* trail byte */
				1056	++nextSourceIndex;
				1057	c=decodeBocu1TrailByte(1, *source++);
				1058	if(c<0 \|\| (uint32_t)(c=prev+diff+c)>0x10ffff) {
				1059	bytes[0]=source[-2];
				1060	bytes[1]=source[-1];
				1061	byteIndex=2;
				1062	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
				1063	break;
				1064	}
				1065	} else if(c==BOCU1_RESET) {
				1066	/* only reset the state, no code point */
				1067	prev=BOCU1_ASCII_PREV;
				1068	sourceIndex=nextSourceIndex;
				1069	continue;
				1070	} else {
				1071	/*
				1072	* For multi-byte difference lead bytes, set the decoder state
				1073	* with the partial difference value from the lead byte and
				1074	* with the number of trail bytes.
				1075	*/
				1076	bytes[0]=(uint8_t)c;
				1077	byteIndex=1;
				1078
				1079	diff=decodeBocu1LeadByte(c);
				1080	count=diff&3;
				1081	diff>>=2;
				1082	getTrail:
				1083	for(;;) {
				1084	if(source>=sourceLimit) {
				1085	goto endloop;
				1086	}
				1087	++nextSourceIndex;
				1088	c=bytes[byteIndex++]=*source++;
				1089
				1090	/* trail byte in any position */
				1091	c=decodeBocu1TrailByte(count, c);
				1092	if(c<0) {
				1093	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
				1094	goto endloop;
				1095	}
				1096
				1097	diff+=c;
				1098	if(--count==0) {
				1099	/* final trail byte, deliver a code point */
				1100	byteIndex=0;
				1101	c=prev+diff;
				1102	if((uint32_t)c>0x10ffff) {
				1103	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
				1104	goto endloop;
				1105	}
				1106	break;
				1107	}
				1108	}
				1109	}
				1110
				1111	/* calculate the next prev and output c */
				1112	prev=BOCU1_PREV(c);
				1113	if(c<=0xffff) {
				1114	*target++=(UChar)c;
				1115	*offsets++=sourceIndex;
				1116	} else {
				1117	/* output surrogate pair */
				1118	*target++=U16_LEAD(c);
				1119	if(target<targetLimit) {
				1120	*target++=U16_TRAIL(c);
				1121	*offsets++=sourceIndex;
				1122	*offsets++=sourceIndex;
				1123	} else {
				1124	/* target overflow */
				1125	*offsets++=sourceIndex;
				1126	cnv->UCharErrorBuffer[0]=U16_TRAIL(c);
				1127	cnv->UCharErrorBufferLength=1;
				1128	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
				1129	break;
				1130	}
				1131	}
				1132	sourceIndex=nextSourceIndex;
				1133	}
				1134	endloop:
				1135
				1136	if(*pErrorCode==U_ILLEGAL_CHAR_FOUND) {
				1137	/* set the converter state in UConverter to deal with the next character */
				1138	cnv->toUnicodeStatus=BOCU1_ASCII_PREV;
				1139	cnv->mode=0;
				1140	} else {
				1141	/* set the converter state back into UConverter */
				1142	cnv->toUnicodeStatus=(uint32_t)prev;
				1143	cnv->mode=(diff<<2)\|count;
				1144	}
				1145	cnv->toULength=byteIndex;
				1146
				1147	/* write back the updated pointers */
				1148	pArgs->source=(const char *)source;
				1149	pArgs->target=target;
				1150	pArgs->offsets=offsets;
				1151	return;
				1152	}
				1153
				1154	/*
				1155	* Identical to _Bocu1ToUnicodeWithOffsets but without offset handling.
				1156	* If a change is made in the original function, then either
				1157	* change this function the same way or
				1158	* re-copy the original function and remove the variables
				1159	* offsets, sourceIndex, and nextSourceIndex.
				1160	*/
				1161	static void
				1162	_Bocu1ToUnicode(UConverterToUnicodeArgs *pArgs,
				1163	UErrorCode *pErrorCode) {
				1164	UConverter *cnv;
				1165	const uint8_t source, sourceLimit;
				1166	UChar *target;
				1167	const UChar *targetLimit;
				1168
				1169	int32_t prev, count, diff, c;
				1170
				1171	int8_t byteIndex;
				1172	uint8_t *bytes;
				1173
				1174	U_ALIGN_CODE(16)
				1175
				1176	/* set up the local pointers */
				1177	cnv=pArgs->converter;
				1178	source=(const uint8_t *)pArgs->source;
				1179	sourceLimit=(const uint8_t *)pArgs->sourceLimit;
				1180	target=pArgs->target;
				1181	targetLimit=pArgs->targetLimit;
				1182
				1183	/* get the converter state from UConverter */
				1184	prev=(int32_t)cnv->toUnicodeStatus;
				1185	if(prev==0) {
				1186	prev=BOCU1_ASCII_PREV;
				1187	}
				1188	diff=cnv->mode; /* mode may be set to UCNV_SI by ucnv_bld.c but then toULength==0 */
				1189	count=diff&3;
				1190	diff>>=2;
				1191
				1192	byteIndex=cnv->toULength;
				1193	bytes=cnv->toUBytes;
				1194
				1195	/* conversion "loop" similar to _SCSUToUnicodeWithOffsets() */
				1196	if(count>0 && byteIndex>0 && target<targetLimit) {
				1197	goto getTrail;
				1198	}
				1199
				1200	fastSingle:
				1201	/* fast loop for single-byte differences */
				1202	/* use count as the only loop counter variable */
				1203	diff=(int32_t)(sourceLimit-source);
				1204	count=(int32_t)(pArgs->targetLimit-target);
				1205	if(count>diff) {
				1206	count=diff;
				1207	}
				1208	while(count>0) {
				1209	if(BOCU1_START_NEG_2<=(c=*source) && c<BOCU1_START_POS_2) {
				1210	c=prev+(c-BOCU1_MIDDLE);
				1211	if(c<0x3000) {
				1212	*target++=(UChar)c;
				1213	prev=BOCU1_SIMPLE_PREV(c);
				1214	} else {
				1215	break;
				1216	}
				1217	} else if(c<=0x20) {
				1218	if(c!=0x20) {
				1219	prev=BOCU1_ASCII_PREV;
				1220	}
				1221	*target++=(UChar)c;
				1222	} else {
				1223	break;
				1224	}
				1225	++source;
				1226	--count;
				1227	}
				1228
				1229	/* decode a sequence of single and lead bytes */
				1230	while(source<sourceLimit) {
				1231	if(target>=targetLimit) {
				1232	/* target is full */
				1233	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
				1234	break;
				1235	}
				1236
				1237	c=*source++;
				1238	if(BOCU1_START_NEG_2<=c && c<BOCU1_START_POS_2) {
				1239	/* Write a code point directly from a single-byte difference. */
				1240	c=prev+(c-BOCU1_MIDDLE);
				1241	if(c<0x3000) {
				1242	*target++=(UChar)c;
				1243	prev=BOCU1_SIMPLE_PREV(c);
				1244	goto fastSingle;
				1245	}
				1246	} else if(c<=0x20) {
				1247	/*
				1248	* Direct-encoded C0 control code or space.
				1249	* Reset prev for C0 control codes but not for space.
				1250	*/
				1251	if(c!=0x20) {
				1252	prev=BOCU1_ASCII_PREV;
				1253	}
				1254	*target++=(UChar)c;
				1255	continue;
				1256	} else if(BOCU1_START_NEG_3<=c && c<BOCU1_START_POS_3 && source<sourceLimit) {
				1257	/* Optimize two-byte case. */
				1258	if(c>=BOCU1_MIDDLE) {
				1259	diff=((int32_t)c-BOCU1_START_POS_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_POS_1+1;
				1260	} else {
				1261	diff=((int32_t)c-BOCU1_START_NEG_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_1;
				1262	}
				1263
				1264	/* trail byte */
				1265	c=decodeBocu1TrailByte(1, *source++);
				1266	if(c<0 \|\| (uint32_t)(c=prev+diff+c)>0x10ffff) {
				1267	bytes[0]=source[-2];
				1268	bytes[1]=source[-1];
				1269	byteIndex=2;
				1270	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
				1271	break;
				1272	}
				1273	} else if(c==BOCU1_RESET) {
				1274	/* only reset the state, no code point */
				1275	prev=BOCU1_ASCII_PREV;
				1276	continue;
				1277	} else {
				1278	/*
				1279	* For multi-byte difference lead bytes, set the decoder state
				1280	* with the partial difference value from the lead byte and
				1281	* with the number of trail bytes.
				1282	*/
				1283	bytes[0]=(uint8_t)c;
				1284	byteIndex=1;
				1285
				1286	diff=decodeBocu1LeadByte(c);
				1287	count=diff&3;
				1288	diff>>=2;
				1289	getTrail:
				1290	for(;;) {
				1291	if(source>=sourceLimit) {
				1292	goto endloop;
				1293	}
				1294	c=bytes[byteIndex++]=*source++;
				1295
				1296	/* trail byte in any position */
				1297	c=decodeBocu1TrailByte(count, c);
				1298	if(c<0) {
				1299	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
				1300	goto endloop;
				1301	}
				1302
				1303	diff+=c;
				1304	if(--count==0) {
				1305	/* final trail byte, deliver a code point */
				1306	byteIndex=0;
				1307	c=prev+diff;
				1308	if((uint32_t)c>0x10ffff) {
				1309	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
				1310	goto endloop;
				1311	}
				1312	break;
				1313	}
				1314	}
				1315	}
				1316
				1317	/* calculate the next prev and output c */
				1318	prev=BOCU1_PREV(c);
				1319	if(c<=0xffff) {
				1320	*target++=(UChar)c;
				1321	} else {
				1322	/* output surrogate pair */
				1323	*target++=U16_LEAD(c);
				1324	if(target<targetLimit) {
				1325	*target++=U16_TRAIL(c);
				1326	} else {
				1327	/* target overflow */
				1328	cnv->UCharErrorBuffer[0]=U16_TRAIL(c);
				1329	cnv->UCharErrorBufferLength=1;
				1330	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
				1331	break;
				1332	}
				1333	}
				1334	}
				1335	endloop:
				1336
				1337	if(*pErrorCode==U_ILLEGAL_CHAR_FOUND) {
				1338	/* set the converter state in UConverter to deal with the next character */
				1339	cnv->toUnicodeStatus=BOCU1_ASCII_PREV;
				1340	cnv->mode=0;
				1341	} else {
				1342	/* set the converter state back into UConverter */
				1343	cnv->toUnicodeStatus=(uint32_t)prev;
				1344	cnv->mode=(diff<<2)\|count;
				1345	}
				1346	cnv->toULength=byteIndex;
				1347
				1348	/* write back the updated pointers */
				1349	pArgs->source=(const char *)source;
				1350	pArgs->target=target;
				1351	return;
				1352	}
				1353
				1354	/* miscellaneous ------------------------------------------------------------ */
				1355
				1356	static const UConverterImpl _Bocu1Impl={
				1357	UCNV_BOCU1,
				1358
				1359	NULL,
				1360	NULL,
				1361
				1362	NULL,
				1363	NULL,
				1364	NULL,
				1365
				1366	_Bocu1ToUnicode,
				1367	_Bocu1ToUnicodeWithOffsets,
				1368	_Bocu1FromUnicode,
				1369	_Bocu1FromUnicodeWithOffsets,
				1370	NULL,
				1371
				1372	NULL,
				1373	NULL,
				1374	NULL,
				1375	NULL,
				1376	ucnv_getCompleteUnicodeSet,
				1377
				1378	NULL,
				1379	NULL
				1380	};
				1381
				1382	static const UConverterStaticData _Bocu1StaticData={
				1383	sizeof(UConverterStaticData),
				1384	"BOCU-1",
				1385	1214, /* CCSID for BOCU-1 */
				1386	UCNV_IBM, UCNV_BOCU1,
				1387	1, 4, /* one UChar generates at least 1 byte and at most 4 bytes */
				1388	{ 0x1a, 0, 0, 0 }, 1, /* BOCU-1 never needs to write a subchar */
				1389	FALSE, FALSE,
				1390	0,
				1391	0,
				1392	{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
				1393	};
				1394
				1395	const UConverterSharedData _Bocu1Data={
				1396	sizeof(UConverterSharedData), ~((uint32_t)0),
				1397	NULL, NULL, &_Bocu1StaticData, FALSE, &_Bocu1Impl,
				1398	0,
				1399	UCNV_MBCS_TABLE_INITIALIZER
				1400	};
				1401
				1402	#endif