Blame - source/common/ucnvlat1.cpp - chromium.googlesource.com/chromium/deps/icu

blob: 05aad6a0e039b8b34034405c9ce01f3efdcccc37 [file] [log] [blame]

Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	1	// © 2016 and later: Unicode, Inc. and others.
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	2	// License & terms of use: http://www.unicode.org/copyright.html
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	3	/*
				4	**********************************************************************
Jungshik Shin	a05f412	2015-06-09 15:33:54 -0700	[diff] [blame]	5	* Copyright (C) 2000-2015, International Business Machines
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	6	* Corporation and others. All Rights Reserved.
				7	**********************************************************************
				8	* file name: ucnvlat1.cpp
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	9	* encoding: UTF-8
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	10	* tab size: 8 (not used)
				11	* indentation:4
				12	*
				13	* created on: 2000feb07
				14	* created by: Markus W. Scherer
				15	*/
				16
				17	#include "unicode/utypes.h"
				18
				19	#if !UCONFIG_NO_CONVERSION
				20
				21	#include "unicode/ucnv.h"
				22	#include "unicode/uset.h"
				23	#include "unicode/utf8.h"
				24	#include "ucnv_bld.h"
				25	#include "ucnv_cnv.h"
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame]	26	#include "ustr_imp.h"
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	27
				28	/* control optimizations according to the platform */
				29	#define LATIN1_UNROLL_FROM_UNICODE 1
				30
				31	/* ISO 8859-1 --------------------------------------------------------------- */
				32
				33	/* This is a table-less and callback-less version of ucnv_MBCSSingleToBMPWithOffsets(). */
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	34	U_CDECL_BEGIN
				35	static void U_CALLCONV
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	36	_Latin1ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
				37	UErrorCode *pErrorCode) {
				38	const uint8_t *source;
				39	UChar *target;
				40	int32_t targetCapacity, length;
				41	int32_t *offsets;
				42
				43	int32_t sourceIndex;
				44
				45	/* set up the local pointers */
				46	source=(const uint8_t *)pArgs->source;
				47	target=pArgs->target;
				48	targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
				49	offsets=pArgs->offsets;
				50
				51	sourceIndex=0;
				52
				53	/*
				54	* since the conversion here is 1:1 UChar:uint8_t, we need only one counter
				55	* for the minimum of the sourceLength and targetCapacity
				56	*/
				57	length=(int32_t)((const uint8_t *)pArgs->sourceLimit-source);
				58	if(length<=targetCapacity) {
				59	targetCapacity=length;
				60	} else {
				61	/* target will be full */
				62	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
				63	length=targetCapacity;
				64	}
				65
				66	if(targetCapacity>=8) {
				67	/* This loop is unrolled for speed and improved pipelining. */
				68	int32_t count, loops;
				69
				70	loops=count=targetCapacity>>3;
				71	length=targetCapacity&=0x7;
				72	do {
				73	target[0]=source[0];
				74	target[1]=source[1];
				75	target[2]=source[2];
				76	target[3]=source[3];
				77	target[4]=source[4];
				78	target[5]=source[5];
				79	target[6]=source[6];
				80	target[7]=source[7];
				81	target+=8;
				82	source+=8;
				83	} while(--count>0);
				84
				85	if(offsets!=NULL) {
				86	do {
				87	offsets[0]=sourceIndex++;
				88	offsets[1]=sourceIndex++;
				89	offsets[2]=sourceIndex++;
				90	offsets[3]=sourceIndex++;
				91	offsets[4]=sourceIndex++;
				92	offsets[5]=sourceIndex++;
				93	offsets[6]=sourceIndex++;
				94	offsets[7]=sourceIndex++;
				95	offsets+=8;
				96	} while(--loops>0);
				97	}
				98	}
				99
				100	/* conversion loop */
				101	while(targetCapacity>0) {
				102	target++=source++;
				103	--targetCapacity;
				104	}
				105
				106	/* write back the updated pointers */
				107	pArgs->source=(const char *)source;
				108	pArgs->target=target;
				109
				110	/* set offsets */
				111	if(offsets!=NULL) {
				112	while(length>0) {
				113	*offsets++=sourceIndex++;
				114	--length;
				115	}
				116	pArgs->offsets=offsets;
				117	}
				118	}
				119
				120	/* This is a table-less and callback-less version of ucnv_MBCSSingleGetNextUChar(). */
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	121	static UChar32 U_CALLCONV
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	122	_Latin1GetNextUChar(UConverterToUnicodeArgs *pArgs,
				123	UErrorCode *pErrorCode) {
				124	const uint8_t source=(const uint8_t )pArgs->source;
				125	if(source<(const uint8_t *)pArgs->sourceLimit) {
				126	pArgs->source=(const char *)(source+1);
				127	return *source;
				128	}
				129
				130	/* no output because of empty input */
				131	*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
				132	return 0xffff;
				133	}
				134
				135	/* This is a table-less version of ucnv_MBCSSingleFromBMPWithOffsets(). */
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	136	static void U_CALLCONV
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	137	_Latin1FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
				138	UErrorCode *pErrorCode) {
				139	UConverter *cnv;
				140	const UChar source, sourceLimit;
				141	uint8_t target, oldTarget;
				142	int32_t targetCapacity, length;
				143	int32_t *offsets;
				144
				145	UChar32 cp;
				146	UChar c, max;
				147
				148	int32_t sourceIndex;
				149
				150	/* set up the local pointers */
				151	cnv=pArgs->converter;
				152	source=pArgs->source;
				153	sourceLimit=pArgs->sourceLimit;
				154	target=oldTarget=(uint8_t *)pArgs->target;
				155	targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
				156	offsets=pArgs->offsets;
				157
				158	if(cnv->sharedData==&_Latin1Data) {
				159	max=0xff; /* Latin-1 */
				160	} else {
				161	max=0x7f; /* US-ASCII */
				162	}
				163
				164	/* get the converter state from UConverter */
				165	cp=cnv->fromUChar32;
				166
				167	/* sourceIndex=-1 if the current character began in the previous buffer */
				168	sourceIndex= cp==0 ? 0 : -1;
				169
				170	/*
				171	* since the conversion here is 1:1 UChar:uint8_t, we need only one counter
				172	* for the minimum of the sourceLength and targetCapacity
				173	*/
				174	length=(int32_t)(sourceLimit-source);
				175	if(length<targetCapacity) {
				176	targetCapacity=length;
				177	}
				178
				179	/* conversion loop */
				180	if(cp!=0 && targetCapacity>0) {
				181	goto getTrail;
				182	}
				183
				184	#if LATIN1_UNROLL_FROM_UNICODE
				185	/* unroll the loop with the most common case */
				186	if(targetCapacity>=16) {
				187	int32_t count, loops;
				188	UChar u, oredChars;
				189
				190	loops=count=targetCapacity>>4;
				191	do {
				192	oredChars=u=*source++;
				193	*target++=(uint8_t)u;
				194	oredChars\|=u=*source++;
				195	*target++=(uint8_t)u;
				196	oredChars\|=u=*source++;
				197	*target++=(uint8_t)u;
				198	oredChars\|=u=*source++;
				199	*target++=(uint8_t)u;
				200	oredChars\|=u=*source++;
				201	*target++=(uint8_t)u;
				202	oredChars\|=u=*source++;
				203	*target++=(uint8_t)u;
				204	oredChars\|=u=*source++;
				205	*target++=(uint8_t)u;
				206	oredChars\|=u=*source++;
				207	*target++=(uint8_t)u;
				208	oredChars\|=u=*source++;
				209	*target++=(uint8_t)u;
				210	oredChars\|=u=*source++;
				211	*target++=(uint8_t)u;
				212	oredChars\|=u=*source++;
				213	*target++=(uint8_t)u;
				214	oredChars\|=u=*source++;
				215	*target++=(uint8_t)u;
				216	oredChars\|=u=*source++;
				217	*target++=(uint8_t)u;
				218	oredChars\|=u=*source++;
				219	*target++=(uint8_t)u;
				220	oredChars\|=u=*source++;
				221	*target++=(uint8_t)u;
				222	oredChars\|=u=*source++;
				223	*target++=(uint8_t)u;
				224
				225	/* were all 16 entries really valid? */
				226	if(oredChars>max) {
				227	/* no, return to the first of these 16 */
				228	source-=16;
				229	target-=16;
				230	break;
				231	}
				232	} while(--count>0);
				233	count=loops-count;
				234	targetCapacity-=16*count;
				235
				236	if(offsets!=NULL) {
				237	oldTarget+=16*count;
				238	while(count>0) {
				239	*offsets++=sourceIndex++;
				240	*offsets++=sourceIndex++;
				241	*offsets++=sourceIndex++;
				242	*offsets++=sourceIndex++;
				243	*offsets++=sourceIndex++;
				244	*offsets++=sourceIndex++;
				245	*offsets++=sourceIndex++;
				246	*offsets++=sourceIndex++;
				247	*offsets++=sourceIndex++;
				248	*offsets++=sourceIndex++;
				249	*offsets++=sourceIndex++;
				250	*offsets++=sourceIndex++;
				251	*offsets++=sourceIndex++;
				252	*offsets++=sourceIndex++;
				253	*offsets++=sourceIndex++;
				254	*offsets++=sourceIndex++;
				255	--count;
				256	}
				257	}
				258	}
				259	#endif
				260
				261	/* conversion loop */
				262	c=0;
				263	while(targetCapacity>0 && (c=*source++)<=max) {
				264	/* convert the Unicode code point */
				265	*target++=(uint8_t)c;
				266	--targetCapacity;
				267	}
				268
				269	if(c>max) {
				270	cp=c;
				271	if(!U_IS_SURROGATE(cp)) {
				272	/* callback(unassigned) */
				273	} else if(U_IS_SURROGATE_LEAD(cp)) {
				274	getTrail:
				275	if(source<sourceLimit) {
				276	/* test the following code unit */
				277	UChar trail=*source;
				278	if(U16_IS_TRAIL(trail)) {
				279	++source;
				280	cp=U16_GET_SUPPLEMENTARY(cp, trail);
				281	/* this codepage does not map supplementary code points */
				282	/* callback(unassigned) */
				283	} else {
				284	/* this is an unmatched lead code unit (1st surrogate) */
				285	/* callback(illegal) */
				286	}
				287	} else {
				288	/* no more input */
				289	cnv->fromUChar32=cp;
				290	goto noMoreInput;
				291	}
				292	} else {
				293	/* this is an unmatched trail code unit (2nd surrogate) */
				294	/* callback(illegal) */
				295	}
				296
				297	*pErrorCode= U_IS_SURROGATE(cp) ? U_ILLEGAL_CHAR_FOUND : U_INVALID_CHAR_FOUND;
				298	cnv->fromUChar32=cp;
				299	}
				300	noMoreInput:
				301
				302	/* set offsets since the start */
				303	if(offsets!=NULL) {
				304	size_t count=target-oldTarget;
				305	while(count>0) {
				306	*offsets++=sourceIndex++;
				307	--count;
				308	}
				309	}
				310
				311	if(U_SUCCESS(pErrorCode) && source<sourceLimit && target>=(uint8_t )pArgs->targetLimit) {
				312	/* target is full */
				313	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
				314	}
				315
				316	/* write back the updated pointers */
				317	pArgs->source=source;
				318	pArgs->target=(char *)target;
				319	pArgs->offsets=offsets;
				320	}
				321
				322	/* Convert UTF-8 to Latin-1. Adapted from ucnv_SBCSFromUTF8(). */
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	323	static void U_CALLCONV
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	324	ucnv_Latin1FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
				325	UConverterToUnicodeArgs *pToUArgs,
				326	UErrorCode *pErrorCode) {
				327	UConverter *utf8;
				328	const uint8_t source, sourceLimit;
				329	uint8_t *target;
				330	int32_t targetCapacity;
				331
				332	UChar32 c;
				333	uint8_t b, t1;
				334
				335	/* set up the local pointers */
				336	utf8=pToUArgs->converter;
				337	source=(uint8_t *)pToUArgs->source;
				338	sourceLimit=(uint8_t *)pToUArgs->sourceLimit;
				339	target=(uint8_t *)pFromUArgs->target;
				340	targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target);
				341
				342	/* get the converter state from the UTF-8 UConverter */
Jungshik Shin	f61e46d	2018-05-04 13:00:45 -0700	[diff] [blame]	343	if (utf8->toULength > 0) {
				344	c=(UChar32)utf8->toUnicodeStatus;
				345	} else {
				346	c = 0;
				347	}
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	348	if(c!=0 && source<sourceLimit) {
				349	if(targetCapacity==0) {
				350	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
				351	return;
				352	} else if(c>=0xc2 && c<=0xc3 && (t1=(uint8_t)(*source-0x80)) <= 0x3f) {
				353	++source;
				354	*target++=(uint8_t)(((c&3)<<6)\|t1);
				355	--targetCapacity;
				356
				357	utf8->toUnicodeStatus=0;
				358	utf8->toULength=0;
				359	} else {
				360	/* complicated, illegal or unmappable input: fall back to the pivoting implementation */
				361	*pErrorCode=U_USING_DEFAULT_WARNING;
				362	return;
				363	}
				364	}
				365
				366	/*
				367	* Make sure that the last byte sequence before sourceLimit is complete
				368	* or runs into a lead byte.
				369	* In the conversion loop compare source with sourceLimit only once
				370	* per multi-byte character.
				371	* For Latin-1, adjust sourceLimit only for 1 trail byte because
				372	* the conversion loop handles at most 2-byte sequences.
				373	*/
				374	if(source<sourceLimit && U8_IS_LEAD(*(sourceLimit-1))) {
				375	--sourceLimit;
				376	}
				377
				378	/* conversion loop */
				379	while(source<sourceLimit) {
				380	if(targetCapacity>0) {
				381	b=*source++;
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame]	382	if(U8_IS_SINGLE(b)) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	383	/* convert ASCII */
				384	*target++=(uint8_t)b;
				385	--targetCapacity;
				386	} else if( /* handle U+0080..U+00FF inline */
				387	b>=0xc2 && b<=0xc3 &&
				388	(t1=(uint8_t)(*source-0x80)) <= 0x3f
				389	) {
				390	++source;
				391	*target++=(uint8_t)(((b&3)<<6)\|t1);
				392	--targetCapacity;
				393	} else {
				394	/* complicated, illegal or unmappable input: fall back to the pivoting implementation */
				395	pToUArgs->source=(char *)(source-1);
				396	pFromUArgs->target=(char *)target;
				397	*pErrorCode=U_USING_DEFAULT_WARNING;
				398	return;
				399	}
				400	} else {
				401	/* target is full */
				402	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
				403	break;
				404	}
				405	}
				406
				407	/*
				408	* The sourceLimit may have been adjusted before the conversion loop
				409	* to stop before a truncated sequence.
				410	* If so, then collect the truncated sequence now.
				411	* For Latin-1, there is at most exactly one lead byte because of the
				412	* smaller sourceLimit adjustment logic.
				413	*/
				414	if(U_SUCCESS(pErrorCode) && source<(sourceLimit=(uint8_t )pToUArgs->sourceLimit)) {
				415	utf8->toUnicodeStatus=utf8->toUBytes[0]=b=*source++;
				416	utf8->toULength=1;
Jungshik Shin	b318966	2017-11-07 11:18:34 -0800	[diff] [blame]	417	utf8->mode=U8_COUNT_BYTES(b);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	418	}
				419
				420	/* write back the updated pointers */
				421	pToUArgs->source=(char *)source;
				422	pFromUArgs->target=(char *)target;
				423	}
				424
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	425	static void U_CALLCONV
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	426	_Latin1GetUnicodeSet(const UConverter *cnv,
				427	const USetAdder *sa,
				428	UConverterUnicodeSet which,
				429	UErrorCode *pErrorCode) {
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	430	(void)cnv;
				431	(void)which;
				432	(void)pErrorCode;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	433	sa->addRange(sa->set, 0, 0xff);
				434	}
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	435	U_CDECL_END
				436
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	437
				438	static const UConverterImpl _Latin1Impl={
				439	UCNV_LATIN_1,
				440
				441	NULL,
				442	NULL,
				443
				444	NULL,
				445	NULL,
				446	NULL,
				447
				448	_Latin1ToUnicodeWithOffsets,
				449	_Latin1ToUnicodeWithOffsets,
				450	_Latin1FromUnicodeWithOffsets,
				451	_Latin1FromUnicodeWithOffsets,
				452	_Latin1GetNextUChar,
				453
				454	NULL,
				455	NULL,
				456	NULL,
				457	NULL,
				458	_Latin1GetUnicodeSet,
				459
				460	NULL,
				461	ucnv_Latin1FromUTF8
				462	};
				463
				464	static const UConverterStaticData _Latin1StaticData={
				465	sizeof(UConverterStaticData),
				466	"ISO-8859-1",
				467	819, UCNV_IBM, UCNV_LATIN_1, 1, 1,
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	468	{ 0x1a, 0, 0, 0 }, 1, false, false,
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	469	0,
				470	0,
				471	{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
				472	};
				473
Jungshik Shin	a05f412	2015-06-09 15:33:54 -0700	[diff] [blame]	474	const UConverterSharedData _Latin1Data=
				475	UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_Latin1StaticData, &_Latin1Impl);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	476
				477	/* US-ASCII ----------------------------------------------------------------- */
				478
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	479	U_CDECL_BEGIN
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	480	/* This is a table-less version of ucnv_MBCSSingleToBMPWithOffsets(). */
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	481	static void U_CALLCONV
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	482	_ASCIIToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
				483	UErrorCode *pErrorCode) {
				484	const uint8_t source, sourceLimit;
				485	UChar target, oldTarget;
				486	int32_t targetCapacity, length;
				487	int32_t *offsets;
				488
				489	int32_t sourceIndex;
				490
				491	uint8_t c;
				492
				493	/* set up the local pointers */
				494	source=(const uint8_t *)pArgs->source;
				495	sourceLimit=(const uint8_t *)pArgs->sourceLimit;
				496	target=oldTarget=pArgs->target;
				497	targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
				498	offsets=pArgs->offsets;
				499
				500	/* sourceIndex=-1 if the current character began in the previous buffer */
				501	sourceIndex=0;
				502
				503	/*
				504	* since the conversion here is 1:1 UChar:uint8_t, we need only one counter
				505	* for the minimum of the sourceLength and targetCapacity
				506	*/
				507	length=(int32_t)(sourceLimit-source);
				508	if(length<targetCapacity) {
				509	targetCapacity=length;
				510	}
				511
				512	if(targetCapacity>=8) {
				513	/* This loop is unrolled for speed and improved pipelining. */
				514	int32_t count, loops;
				515	UChar oredChars;
				516
				517	loops=count=targetCapacity>>3;
				518	do {
				519	oredChars=target[0]=source[0];
				520	oredChars\|=target[1]=source[1];
				521	oredChars\|=target[2]=source[2];
				522	oredChars\|=target[3]=source[3];
				523	oredChars\|=target[4]=source[4];
				524	oredChars\|=target[5]=source[5];
				525	oredChars\|=target[6]=source[6];
				526	oredChars\|=target[7]=source[7];
				527
				528	/* were all 16 entries really valid? */
				529	if(oredChars>0x7f) {
				530	/* no, return to the first of these 16 */
				531	break;
				532	}
				533	source+=8;
				534	target+=8;
				535	} while(--count>0);
				536	count=loops-count;
				537	targetCapacity-=count*8;
				538
				539	if(offsets!=NULL) {
				540	oldTarget+=count*8;
				541	while(count>0) {
				542	offsets[0]=sourceIndex++;
				543	offsets[1]=sourceIndex++;
				544	offsets[2]=sourceIndex++;
				545	offsets[3]=sourceIndex++;
				546	offsets[4]=sourceIndex++;
				547	offsets[5]=sourceIndex++;
				548	offsets[6]=sourceIndex++;
				549	offsets[7]=sourceIndex++;
				550	offsets+=8;
				551	--count;
				552	}
				553	}
				554	}
				555
				556	/* conversion loop */
				557	c=0;
				558	while(targetCapacity>0 && (c=*source++)<=0x7f) {
				559	*target++=c;
				560	--targetCapacity;
				561	}
				562
				563	if(c>0x7f) {
				564	/* callback(illegal); copy the current bytes to toUBytes[] */
				565	UConverter *cnv=pArgs->converter;
				566	cnv->toUBytes[0]=c;
				567	cnv->toULength=1;
				568	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
				569	} else if(source<sourceLimit && target>=pArgs->targetLimit) {
				570	/* target is full */
				571	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
				572	}
				573
				574	/* set offsets since the start */
				575	if(offsets!=NULL) {
				576	size_t count=target-oldTarget;
				577	while(count>0) {
				578	*offsets++=sourceIndex++;
				579	--count;
				580	}
				581	}
				582
				583	/* write back the updated pointers */
				584	pArgs->source=(const char *)source;
				585	pArgs->target=target;
				586	pArgs->offsets=offsets;
				587	}
				588
				589	/* This is a table-less version of ucnv_MBCSSingleGetNextUChar(). */
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	590	static UChar32 U_CALLCONV
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	591	_ASCIIGetNextUChar(UConverterToUnicodeArgs *pArgs,
				592	UErrorCode *pErrorCode) {
				593	const uint8_t *source;
				594	uint8_t b;
				595
				596	source=(const uint8_t *)pArgs->source;
				597	if(source<(const uint8_t *)pArgs->sourceLimit) {
				598	b=*source++;
				599	pArgs->source=(const char *)source;
				600	if(b<=0x7f) {
				601	return b;
				602	} else {
				603	UConverter *cnv=pArgs->converter;
				604	cnv->toUBytes[0]=b;
				605	cnv->toULength=1;
				606	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
				607	return 0xffff;
				608	}
				609	}
				610
				611	/* no output because of empty input */
				612	*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
				613	return 0xffff;
				614	}
				615
				616	/* "Convert" UTF-8 to US-ASCII: Validate and copy. */
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	617	static void U_CALLCONV
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	618	ucnv_ASCIIFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
				619	UConverterToUnicodeArgs *pToUArgs,
				620	UErrorCode *pErrorCode) {
				621	const uint8_t source, sourceLimit;
				622	uint8_t *target;
				623	int32_t targetCapacity, length;
				624
				625	uint8_t c;
				626
Jungshik Shin	f61e46d	2018-05-04 13:00:45 -0700	[diff] [blame]	627	if(pToUArgs->converter->toULength > 0) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	628	/* no handling of partial UTF-8 characters here, fall back to pivoting */
				629	*pErrorCode=U_USING_DEFAULT_WARNING;
				630	return;
				631	}
				632
				633	/* set up the local pointers */
				634	source=(const uint8_t *)pToUArgs->source;
				635	sourceLimit=(const uint8_t *)pToUArgs->sourceLimit;
				636	target=(uint8_t *)pFromUArgs->target;
				637	targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target);
				638
				639	/*
				640	* since the conversion here is 1:1 uint8_t:uint8_t, we need only one counter
				641	* for the minimum of the sourceLength and targetCapacity
				642	*/
				643	length=(int32_t)(sourceLimit-source);
				644	if(length<targetCapacity) {
				645	targetCapacity=length;
				646	}
				647
				648	/* unroll the loop with the most common case */
				649	if(targetCapacity>=16) {
				650	int32_t count, loops;
				651	uint8_t oredChars;
				652
				653	loops=count=targetCapacity>>4;
				654	do {
				655	oredChars=target++=source++;
				656	oredChars\|=target++=source++;
				657	oredChars\|=target++=source++;
				658	oredChars\|=target++=source++;
				659	oredChars\|=target++=source++;
				660	oredChars\|=target++=source++;
				661	oredChars\|=target++=source++;
				662	oredChars\|=target++=source++;
				663	oredChars\|=target++=source++;
				664	oredChars\|=target++=source++;
				665	oredChars\|=target++=source++;
				666	oredChars\|=target++=source++;
				667	oredChars\|=target++=source++;
				668	oredChars\|=target++=source++;
				669	oredChars\|=target++=source++;
				670	oredChars\|=target++=source++;
				671
				672	/* were all 16 entries really valid? */
				673	if(oredChars>0x7f) {
				674	/* no, return to the first of these 16 */
				675	source-=16;
				676	target-=16;
				677	break;
				678	}
				679	} while(--count>0);
				680	count=loops-count;
				681	targetCapacity-=16*count;
				682	}
				683
				684	/* conversion loop */
				685	c=0;
				686	while(targetCapacity>0 && (c=*source)<=0x7f) {
				687	++source;
				688	*target++=c;
				689	--targetCapacity;
				690	}
				691
				692	if(c>0x7f) {
				693	/* non-ASCII character, handle in standard converter */
				694	*pErrorCode=U_USING_DEFAULT_WARNING;
				695	} else if(source<sourceLimit && target>=(const uint8_t *)pFromUArgs->targetLimit) {
				696	/* target is full */
				697	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
				698	}
				699
				700	/* write back the updated pointers */
				701	pToUArgs->source=(const char *)source;
				702	pFromUArgs->target=(char *)target;
				703	}
				704
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	705	static void U_CALLCONV
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	706	_ASCIIGetUnicodeSet(const UConverter *cnv,
				707	const USetAdder *sa,
				708	UConverterUnicodeSet which,
				709	UErrorCode *pErrorCode) {
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	710	(void)cnv;
				711	(void)which;
				712	(void)pErrorCode;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	713	sa->addRange(sa->set, 0, 0x7f);
				714	}
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	715	U_CDECL_END
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	716
				717	static const UConverterImpl _ASCIIImpl={
				718	UCNV_US_ASCII,
				719
				720	NULL,
				721	NULL,
				722
				723	NULL,
				724	NULL,
				725	NULL,
				726
				727	_ASCIIToUnicodeWithOffsets,
				728	_ASCIIToUnicodeWithOffsets,
				729	_Latin1FromUnicodeWithOffsets,
				730	_Latin1FromUnicodeWithOffsets,
				731	_ASCIIGetNextUChar,
				732
				733	NULL,
				734	NULL,
				735	NULL,
				736	NULL,
				737	_ASCIIGetUnicodeSet,
				738
				739	NULL,
				740	ucnv_ASCIIFromUTF8
				741	};
				742
				743	static const UConverterStaticData _ASCIIStaticData={
				744	sizeof(UConverterStaticData),
				745	"US-ASCII",
				746	367, UCNV_IBM, UCNV_US_ASCII, 1, 1,
Frank Tang	1f164ee	2022-11-08 12:31:27 -0800	[diff] [blame^]	747	{ 0x1a, 0, 0, 0 }, 1, false, false,
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	748	0,
				749	0,
				750	{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
				751	};
				752
Jungshik Shin	a05f412	2015-06-09 15:33:54 -0700	[diff] [blame]	753	const UConverterSharedData _ASCIIData=
				754	UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ASCIIStaticData, &_ASCIIImpl);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	755
				756	#endif