Blame - source/common/ucnv_u32.cpp - chromium.googlesource.com/chromium/deps/icu

blob: bf6bd11dbacb7346230a64ba373cf636f3e7f7fc [file] [log] [blame]

Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	1	// © 2016 and later: Unicode, Inc. and others.
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	2	// License & terms of use: http://www.unicode.org/copyright.html
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	3	/*
				4	**********************************************************************
Jungshik Shin	70f8250	2016-01-29 00:32:36 -0800	[diff] [blame]	5	* Copyright (C) 2002-2015, International Business Machines
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	6	* Corporation and others. All Rights Reserved.
				7	**********************************************************************
				8	* file name: ucnv_u32.c
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	9	* encoding: UTF-8
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	10	* tab size: 8 (not used)
				11	* indentation:4
				12	*
				13	* created on: 2002jul01
				14	* created by: Markus W. Scherer
				15	*
				16	* UTF-32 converter implementation. Used to be in ucnv_utf.c.
				17	*/
				18
				19	#include "unicode/utypes.h"
				20
Jungshik Shin	be894db	2017-07-19 15:34:14 -0700	[diff] [blame]	21	#if !UCONFIG_NO_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	22
				23	#include "unicode/ucnv.h"
				24	#include "unicode/utf.h"
				25	#include "ucnv_bld.h"
				26	#include "ucnv_cnv.h"
				27	#include "cmemory.h"
				28
				29	#define MAXIMUM_UCS2 0x0000FFFF
				30	#define MAXIMUM_UTF 0x0010FFFF
				31	#define HALF_SHIFT 10
				32	#define HALF_BASE 0x0010000
				33	#define HALF_MASK 0x3FF
				34	#define SURROGATE_HIGH_START 0xD800
				35	#define SURROGATE_LOW_START 0xDC00
				36
				37	/* -SURROGATE_LOW_START + HALF_BASE */
				38	#define SURROGATE_LOW_BASE 9216
				39
				40	enum {
				41	UCNV_NEED_TO_WRITE_BOM=1
				42	};
				43
				44	/* UTF-32BE ----------------------------------------------------------------- */
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	45	U_CDECL_BEGIN
				46	static void U_CALLCONV
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	47	T_UConverter_toUnicode_UTF32_BE(UConverterToUnicodeArgs * args,
				48	UErrorCode * err)
				49	{
				50	const unsigned char mySource = (unsigned char ) args->source;
				51	UChar *myTarget = args->target;
				52	const unsigned char sourceLimit = (unsigned char ) args->sourceLimit;
				53	const UChar *targetLimit = args->targetLimit;
				54	unsigned char *toUBytes = args->converter->toUBytes;
				55	uint32_t ch, i;
				56
				57	/* Restore state of current sequence */
Jungshik Shin	f61e46d	2018-05-04 13:00:45 -0700	[diff] [blame]	58	if (args->converter->toULength > 0 && myTarget < targetLimit) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	59	i = args->converter->toULength; /* restore # of bytes consumed */
				60	args->converter->toULength = 0;
				61
				62	ch = args->converter->toUnicodeStatus - 1;/Stores the previously calculated ch from a previous call/
				63	args->converter->toUnicodeStatus = 0;
				64	goto morebytes;
				65	}
				66
				67	while (mySource < sourceLimit && myTarget < targetLimit) {
				68	i = 0;
				69	ch = 0;
				70	morebytes:
				71	while (i < sizeof(uint32_t)) {
				72	if (mySource < sourceLimit) {
				73	ch = (ch << 8) \| (uint8_t)(*mySource);
				74	toUBytes[i++] = (char) *(mySource++);
				75	}
				76	else {
				77	/* stores a partially calculated target*/
				78	/* + 1 to make 0 a valid character */
				79	args->converter->toUnicodeStatus = ch + 1;
				80	args->converter->toULength = (int8_t) i;
				81	goto donefornow;
				82	}
				83	}
				84
				85	if (ch <= MAXIMUM_UTF && !U_IS_SURROGATE(ch)) {
				86	/* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
				87	if (ch <= MAXIMUM_UCS2)
				88	{
				89	/* fits in 16 bits */
				90	*(myTarget++) = (UChar) ch;
				91	}
				92	else {
				93	/* write out the surrogates */
				94	*(myTarget++) = U16_LEAD(ch);
				95	ch = U16_TRAIL(ch);
				96	if (myTarget < targetLimit) {
				97	*(myTarget++) = (UChar)ch;
				98	}
				99	else {
				100	/* Put in overflow buffer (not handled here) */
				101	args->converter->UCharErrorBuffer[0] = (UChar) ch;
				102	args->converter->UCharErrorBufferLength = 1;
				103	*err = U_BUFFER_OVERFLOW_ERROR;
				104	break;
				105	}
				106	}
				107	}
				108	else {
				109	args->converter->toULength = (int8_t)i;
				110	*err = U_ILLEGAL_CHAR_FOUND;
				111	break;
				112	}
				113	}
				114
				115	donefornow:
				116	if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) {
				117	/* End of target buffer */
				118	*err = U_BUFFER_OVERFLOW_ERROR;
				119	}
				120
				121	args->target = myTarget;
				122	args->source = (const char *) mySource;
				123	}
				124
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	125	static void U_CALLCONV
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	126	T_UConverter_toUnicode_UTF32_BE_OFFSET_LOGIC(UConverterToUnicodeArgs * args,
				127	UErrorCode * err)
				128	{
				129	const unsigned char mySource = (unsigned char ) args->source;
				130	UChar *myTarget = args->target;
				131	int32_t *myOffsets = args->offsets;
				132	const unsigned char sourceLimit = (unsigned char ) args->sourceLimit;
				133	const UChar *targetLimit = args->targetLimit;
				134	unsigned char *toUBytes = args->converter->toUBytes;
				135	uint32_t ch, i;
				136	int32_t offsetNum = 0;
				137
				138	/* Restore state of current sequence */
Jungshik Shin	f61e46d	2018-05-04 13:00:45 -0700	[diff] [blame]	139	if (args->converter->toULength > 0 && myTarget < targetLimit) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	140	i = args->converter->toULength; /* restore # of bytes consumed */
				141	args->converter->toULength = 0;
				142
				143	ch = args->converter->toUnicodeStatus - 1;/Stores the previously calculated ch from a previous call/
				144	args->converter->toUnicodeStatus = 0;
				145	goto morebytes;
				146	}
				147
				148	while (mySource < sourceLimit && myTarget < targetLimit) {
				149	i = 0;
				150	ch = 0;
				151	morebytes:
				152	while (i < sizeof(uint32_t)) {
				153	if (mySource < sourceLimit) {
				154	ch = (ch << 8) \| (uint8_t)(*mySource);
				155	toUBytes[i++] = (char) *(mySource++);
				156	}
				157	else {
				158	/* stores a partially calculated target*/
				159	/* + 1 to make 0 a valid character */
				160	args->converter->toUnicodeStatus = ch + 1;
				161	args->converter->toULength = (int8_t) i;
				162	goto donefornow;
				163	}
				164	}
				165
				166	if (ch <= MAXIMUM_UTF && !U_IS_SURROGATE(ch)) {
				167	/* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
				168	if (ch <= MAXIMUM_UCS2) {
				169	/* fits in 16 bits */
				170	*(myTarget++) = (UChar) ch;
				171	*(myOffsets++) = offsetNum;
				172	}
				173	else {
				174	/* write out the surrogates */
				175	*(myTarget++) = U16_LEAD(ch);
				176	*myOffsets++ = offsetNum;
				177	ch = U16_TRAIL(ch);
				178	if (myTarget < targetLimit)
				179	{
				180	*(myTarget++) = (UChar)ch;
				181	*(myOffsets++) = offsetNum;
				182	}
				183	else {
				184	/* Put in overflow buffer (not handled here) */
				185	args->converter->UCharErrorBuffer[0] = (UChar) ch;
				186	args->converter->UCharErrorBufferLength = 1;
				187	*err = U_BUFFER_OVERFLOW_ERROR;
				188	break;
				189	}
				190	}
				191	}
				192	else {
				193	args->converter->toULength = (int8_t)i;
				194	*err = U_ILLEGAL_CHAR_FOUND;
				195	break;
				196	}
				197	offsetNum += i;
				198	}
				199
				200	donefornow:
				201	if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err))
				202	{
				203	/* End of target buffer */
				204	*err = U_BUFFER_OVERFLOW_ERROR;
				205	}
				206
				207	args->target = myTarget;
				208	args->source = (const char *) mySource;
				209	args->offsets = myOffsets;
				210	}
				211
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	212	static void U_CALLCONV
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	213	T_UConverter_fromUnicode_UTF32_BE(UConverterFromUnicodeArgs * args,
				214	UErrorCode * err)
				215	{
				216	const UChar *mySource = args->source;
				217	unsigned char *myTarget;
				218	const UChar *sourceLimit = args->sourceLimit;
				219	const unsigned char targetLimit = (unsigned char ) args->targetLimit;
				220	UChar32 ch, ch2;
				221	unsigned int indexToWrite;
				222	unsigned char temp[sizeof(uint32_t)];
				223
				224	if(mySource >= sourceLimit) {
				225	/* no input, nothing to do */
				226	return;
				227	}
				228
				229	/* write the BOM if necessary */
				230	if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) {
Jungshik Shin	42d5027	2018-10-24 01:22:09 -0700	[diff] [blame]	231	static const char bom[]={ 0, 0, (char)0xfeu, (char)0xffu };
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	232	ucnv_fromUWriteBytes(args->converter,
				233	bom, 4,
				234	&args->target, args->targetLimit,
				235	&args->offsets, -1,
				236	err);
				237	args->converter->fromUnicodeStatus=0;
				238	}
				239
				240	myTarget = (unsigned char *) args->target;
				241	temp[0] = 0;
				242
				243	if (args->converter->fromUChar32) {
				244	ch = args->converter->fromUChar32;
				245	args->converter->fromUChar32 = 0;
				246	goto lowsurogate;
				247	}
				248
				249	while (mySource < sourceLimit && myTarget < targetLimit) {
				250	ch = *(mySource++);
				251
				252	if (U_IS_SURROGATE(ch)) {
				253	if (U_IS_LEAD(ch)) {
				254	lowsurogate:
				255	if (mySource < sourceLimit) {
				256	ch2 = *mySource;
				257	if (U_IS_TRAIL(ch2)) {
				258	ch = ((ch - SURROGATE_HIGH_START) << HALF_SHIFT) + ch2 + SURROGATE_LOW_BASE;
				259	mySource++;
				260	}
				261	else {
				262	/* this is an unmatched trail code unit (2nd surrogate) */
				263	/* callback(illegal) */
				264	args->converter->fromUChar32 = ch;
				265	*err = U_ILLEGAL_CHAR_FOUND;
				266	break;
				267	}
				268	}
				269	else {
				270	/* ran out of source */
				271	args->converter->fromUChar32 = ch;
				272	if (args->flush) {
				273	/* this is an unmatched trail code unit (2nd surrogate) */
				274	/* callback(illegal) */
				275	*err = U_ILLEGAL_CHAR_FOUND;
				276	}
				277	break;
				278	}
				279	}
				280	else {
				281	/* this is an unmatched trail code unit (2nd surrogate) */
				282	/* callback(illegal) */
				283	args->converter->fromUChar32 = ch;
				284	*err = U_ILLEGAL_CHAR_FOUND;
				285	break;
				286	}
				287	}
				288
				289	/* We cannot get any larger than 10FFFF because we are coming from UTF-16 */
				290	temp[1] = (uint8_t) (ch >> 16 & 0x1F);
				291	temp[2] = (uint8_t) (ch >> 8); /* unsigned cast implicitly does (ch & FF) */
				292	temp[3] = (uint8_t) (ch); /* unsigned cast implicitly does (ch & FF) */
				293
				294	for (indexToWrite = 0; indexToWrite <= sizeof(uint32_t) - 1; indexToWrite++) {
				295	if (myTarget < targetLimit) {
				296	*(myTarget++) = temp[indexToWrite];
				297	}
				298	else {
				299	args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = temp[indexToWrite];
				300	*err = U_BUFFER_OVERFLOW_ERROR;
				301	}
				302	}
				303	}
				304
				305	if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) {
				306	*err = U_BUFFER_OVERFLOW_ERROR;
				307	}
				308
				309	args->target = (char *) myTarget;
				310	args->source = mySource;
				311	}
				312
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	313	static void U_CALLCONV
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	314	T_UConverter_fromUnicode_UTF32_BE_OFFSET_LOGIC(UConverterFromUnicodeArgs * args,
				315	UErrorCode * err)
				316	{
				317	const UChar *mySource = args->source;
				318	unsigned char *myTarget;
				319	int32_t *myOffsets;
				320	const UChar *sourceLimit = args->sourceLimit;
				321	const unsigned char targetLimit = (unsigned char ) args->targetLimit;
				322	UChar32 ch, ch2;
				323	int32_t offsetNum = 0;
				324	unsigned int indexToWrite;
				325	unsigned char temp[sizeof(uint32_t)];
				326
				327	if(mySource >= sourceLimit) {
				328	/* no input, nothing to do */
				329	return;
				330	}
				331
				332	/* write the BOM if necessary */
				333	if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) {
Jungshik Shin	42d5027	2018-10-24 01:22:09 -0700	[diff] [blame]	334	static const char bom[]={ 0, 0, (char)0xfeu, (char)0xffu };
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	335	ucnv_fromUWriteBytes(args->converter,
				336	bom, 4,
				337	&args->target, args->targetLimit,
				338	&args->offsets, -1,
				339	err);
				340	args->converter->fromUnicodeStatus=0;
				341	}
				342
				343	myTarget = (unsigned char *) args->target;
				344	myOffsets = args->offsets;
				345	temp[0] = 0;
				346
				347	if (args->converter->fromUChar32) {
				348	ch = args->converter->fromUChar32;
				349	args->converter->fromUChar32 = 0;
				350	goto lowsurogate;
				351	}
				352
				353	while (mySource < sourceLimit && myTarget < targetLimit) {
				354	ch = *(mySource++);
				355
				356	if (U_IS_SURROGATE(ch)) {
				357	if (U_IS_LEAD(ch)) {
				358	lowsurogate:
				359	if (mySource < sourceLimit) {
				360	ch2 = *mySource;
				361	if (U_IS_TRAIL(ch2)) {
				362	ch = ((ch - SURROGATE_HIGH_START) << HALF_SHIFT) + ch2 + SURROGATE_LOW_BASE;
				363	mySource++;
				364	}
				365	else {
				366	/* this is an unmatched trail code unit (2nd surrogate) */
				367	/* callback(illegal) */
				368	args->converter->fromUChar32 = ch;
				369	*err = U_ILLEGAL_CHAR_FOUND;
				370	break;
				371	}
				372	}
				373	else {
				374	/* ran out of source */
				375	args->converter->fromUChar32 = ch;
				376	if (args->flush) {
				377	/* this is an unmatched trail code unit (2nd surrogate) */
				378	/* callback(illegal) */
				379	*err = U_ILLEGAL_CHAR_FOUND;
				380	}
				381	break;
				382	}
				383	}
				384	else {
				385	/* this is an unmatched trail code unit (2nd surrogate) */
				386	/* callback(illegal) */
				387	args->converter->fromUChar32 = ch;
				388	*err = U_ILLEGAL_CHAR_FOUND;
				389	break;
				390	}
				391	}
				392
				393	/* We cannot get any larger than 10FFFF because we are coming from UTF-16 */
				394	temp[1] = (uint8_t) (ch >> 16 & 0x1F);
				395	temp[2] = (uint8_t) (ch >> 8); /* unsigned cast implicitly does (ch & FF) */
				396	temp[3] = (uint8_t) (ch); /* unsigned cast implicitly does (ch & FF) */
				397
				398	for (indexToWrite = 0; indexToWrite <= sizeof(uint32_t) - 1; indexToWrite++) {
				399	if (myTarget < targetLimit) {
				400	*(myTarget++) = temp[indexToWrite];
				401	*(myOffsets++) = offsetNum;
				402	}
				403	else {
				404	args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = temp[indexToWrite];
				405	*err = U_BUFFER_OVERFLOW_ERROR;
				406	}
				407	}
				408	offsetNum = offsetNum + 1 + (temp[1] != 0);
				409	}
				410
				411	if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) {
				412	*err = U_BUFFER_OVERFLOW_ERROR;
				413	}
				414
				415	args->target = (char *) myTarget;
				416	args->source = mySource;
				417	args->offsets = myOffsets;
				418	}
				419
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	420	static UChar32 U_CALLCONV
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	421	T_UConverter_getNextUChar_UTF32_BE(UConverterToUnicodeArgs* args,
				422	UErrorCode* err)
				423	{
				424	const uint8_t *mySource;
				425	UChar32 myUChar;
				426	int32_t length;
				427
				428	mySource = (const uint8_t *)args->source;
				429	if (mySource >= (const uint8_t *)args->sourceLimit)
				430	{
				431	/* no input */
				432	*err = U_INDEX_OUTOFBOUNDS_ERROR;
				433	return 0xffff;
				434	}
				435
				436	length = (int32_t)((const uint8_t *)args->sourceLimit - mySource);
				437	if (length < 4)
				438	{
				439	/* got a partial character */
				440	uprv_memcpy(args->converter->toUBytes, mySource, length);
				441	args->converter->toULength = (int8_t)length;
				442	args->source = (const char *)(mySource + length);
				443	*err = U_TRUNCATED_CHAR_FOUND;
				444	return 0xffff;
				445	}
				446
				447	/* Don't even try to do a direct cast because the value may be on an odd address. */
				448	myUChar = ((UChar32)mySource[0] << 24)
				449	\| ((UChar32)mySource[1] << 16)
				450	\| ((UChar32)mySource[2] << 8)
				451	\| ((UChar32)mySource[3]);
				452
				453	args->source = (const char *)(mySource + 4);
				454	if ((uint32_t)myUChar <= MAXIMUM_UTF && !U_IS_SURROGATE(myUChar)) {
				455	return myUChar;
				456	}
				457
				458	uprv_memcpy(args->converter->toUBytes, mySource, 4);
				459	args->converter->toULength = 4;
				460
				461	*err = U_ILLEGAL_CHAR_FOUND;
				462	return 0xffff;
				463	}
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	464	U_CDECL_END
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	465	static const UConverterImpl _UTF32BEImpl = {
				466	UCNV_UTF32_BigEndian,
				467
				468	NULL,
				469	NULL,
				470
				471	NULL,
				472	NULL,
				473	NULL,
				474
				475	T_UConverter_toUnicode_UTF32_BE,
				476	T_UConverter_toUnicode_UTF32_BE_OFFSET_LOGIC,
				477	T_UConverter_fromUnicode_UTF32_BE,
				478	T_UConverter_fromUnicode_UTF32_BE_OFFSET_LOGIC,
				479	T_UConverter_getNextUChar_UTF32_BE,
				480
				481	NULL,
				482	NULL,
				483	NULL,
				484	NULL,
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	485	ucnv_getNonSurrogateUnicodeSet,
				486
				487	NULL,
				488	NULL
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	489	};
				490
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame^]	491	/* The 1232 CCSID refers to any version of Unicode with any endianness of UTF-32 */
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	492	static const UConverterStaticData _UTF32BEStaticData = {
				493	sizeof(UConverterStaticData),
				494	"UTF-32BE",
				495	1232,
				496	UCNV_IBM, UCNV_UTF32_BigEndian, 4, 4,
				497	{ 0, 0, 0xff, 0xfd }, 4, FALSE, FALSE,
				498	0,
				499	0,
				500	{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
				501	};
				502
Jungshik Shin	a05f412	2015-06-09 15:33:54 -0700	[diff] [blame]	503	const UConverterSharedData _UTF32BEData =
				504	UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF32BEStaticData, &_UTF32BEImpl);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	505
				506	/* UTF-32LE ---------------------------------------------------------- */
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	507	U_CDECL_BEGIN
				508	static void U_CALLCONV
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	509	T_UConverter_toUnicode_UTF32_LE(UConverterToUnicodeArgs * args,
				510	UErrorCode * err)
				511	{
				512	const unsigned char mySource = (unsigned char ) args->source;
				513	UChar *myTarget = args->target;
				514	const unsigned char sourceLimit = (unsigned char ) args->sourceLimit;
				515	const UChar *targetLimit = args->targetLimit;
				516	unsigned char *toUBytes = args->converter->toUBytes;
				517	uint32_t ch, i;
				518
				519	/* Restore state of current sequence */
Jungshik Shin	f61e46d	2018-05-04 13:00:45 -0700	[diff] [blame]	520	if (args->converter->toULength > 0 && myTarget < targetLimit)
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	521	{
				522	i = args->converter->toULength; /* restore # of bytes consumed */
				523	args->converter->toULength = 0;
				524
				525	/* Stores the previously calculated ch from a previous call*/
				526	ch = args->converter->toUnicodeStatus - 1;
				527	args->converter->toUnicodeStatus = 0;
				528	goto morebytes;
				529	}
				530
				531	while (mySource < sourceLimit && myTarget < targetLimit)
				532	{
				533	i = 0;
				534	ch = 0;
				535	morebytes:
				536	while (i < sizeof(uint32_t))
				537	{
				538	if (mySource < sourceLimit)
				539	{
				540	ch \|= ((uint8_t)(mySource)) << (i 8);
				541	toUBytes[i++] = (char) *(mySource++);
				542	}
				543	else
				544	{
				545	/* stores a partially calculated target*/
				546	/* + 1 to make 0 a valid character */
				547	args->converter->toUnicodeStatus = ch + 1;
				548	args->converter->toULength = (int8_t) i;
				549	goto donefornow;
				550	}
				551	}
				552
				553	if (ch <= MAXIMUM_UTF && !U_IS_SURROGATE(ch)) {
				554	/* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
				555	if (ch <= MAXIMUM_UCS2) {
				556	/* fits in 16 bits */
				557	*(myTarget++) = (UChar) ch;
				558	}
				559	else {
				560	/* write out the surrogates */
				561	*(myTarget++) = U16_LEAD(ch);
				562	ch = U16_TRAIL(ch);
				563	if (myTarget < targetLimit) {
				564	*(myTarget++) = (UChar)ch;
				565	}
				566	else {
				567	/* Put in overflow buffer (not handled here) */
				568	args->converter->UCharErrorBuffer[0] = (UChar) ch;
				569	args->converter->UCharErrorBufferLength = 1;
				570	*err = U_BUFFER_OVERFLOW_ERROR;
				571	break;
				572	}
				573	}
				574	}
				575	else {
				576	args->converter->toULength = (int8_t)i;
				577	*err = U_ILLEGAL_CHAR_FOUND;
				578	break;
				579	}
				580	}
				581
				582	donefornow:
				583	if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err))
				584	{
				585	/* End of target buffer */
				586	*err = U_BUFFER_OVERFLOW_ERROR;
				587	}
				588
				589	args->target = myTarget;
				590	args->source = (const char *) mySource;
				591	}
				592
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	593	static void U_CALLCONV
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	594	T_UConverter_toUnicode_UTF32_LE_OFFSET_LOGIC(UConverterToUnicodeArgs * args,
				595	UErrorCode * err)
				596	{
				597	const unsigned char mySource = (unsigned char ) args->source;
				598	UChar *myTarget = args->target;
				599	int32_t *myOffsets = args->offsets;
				600	const unsigned char sourceLimit = (unsigned char ) args->sourceLimit;
				601	const UChar *targetLimit = args->targetLimit;
				602	unsigned char *toUBytes = args->converter->toUBytes;
				603	uint32_t ch, i;
				604	int32_t offsetNum = 0;
				605
				606	/* Restore state of current sequence */
Jungshik Shin	f61e46d	2018-05-04 13:00:45 -0700	[diff] [blame]	607	if (args->converter->toULength > 0 && myTarget < targetLimit)
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	608	{
				609	i = args->converter->toULength; /* restore # of bytes consumed */
				610	args->converter->toULength = 0;
				611
				612	/* Stores the previously calculated ch from a previous call*/
				613	ch = args->converter->toUnicodeStatus - 1;
				614	args->converter->toUnicodeStatus = 0;
				615	goto morebytes;
				616	}
				617
				618	while (mySource < sourceLimit && myTarget < targetLimit)
				619	{
				620	i = 0;
				621	ch = 0;
				622	morebytes:
				623	while (i < sizeof(uint32_t))
				624	{
				625	if (mySource < sourceLimit)
				626	{
				627	ch \|= ((uint8_t)(mySource)) << (i 8);
				628	toUBytes[i++] = (char) *(mySource++);
				629	}
				630	else
				631	{
				632	/* stores a partially calculated target*/
				633	/* + 1 to make 0 a valid character */
				634	args->converter->toUnicodeStatus = ch + 1;
				635	args->converter->toULength = (int8_t) i;
				636	goto donefornow;
				637	}
				638	}
				639
				640	if (ch <= MAXIMUM_UTF && !U_IS_SURROGATE(ch))
				641	{
				642	/* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
				643	if (ch <= MAXIMUM_UCS2)
				644	{
				645	/* fits in 16 bits */
				646	*(myTarget++) = (UChar) ch;
				647	*(myOffsets++) = offsetNum;
				648	}
				649	else {
				650	/* write out the surrogates */
				651	*(myTarget++) = U16_LEAD(ch);
				652	*(myOffsets++) = offsetNum;
				653	ch = U16_TRAIL(ch);
				654	if (myTarget < targetLimit)
				655	{
				656	*(myTarget++) = (UChar)ch;
				657	*(myOffsets++) = offsetNum;
				658	}
				659	else
				660	{
				661	/* Put in overflow buffer (not handled here) */
				662	args->converter->UCharErrorBuffer[0] = (UChar) ch;
				663	args->converter->UCharErrorBufferLength = 1;
				664	*err = U_BUFFER_OVERFLOW_ERROR;
				665	break;
				666	}
				667	}
				668	}
				669	else
				670	{
				671	args->converter->toULength = (int8_t)i;
				672	*err = U_ILLEGAL_CHAR_FOUND;
				673	break;
				674	}
				675	offsetNum += i;
				676	}
				677
				678	donefornow:
				679	if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err))
				680	{
				681	/* End of target buffer */
				682	*err = U_BUFFER_OVERFLOW_ERROR;
				683	}
				684
				685	args->target = myTarget;
				686	args->source = (const char *) mySource;
				687	args->offsets = myOffsets;
				688	}
				689
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	690	static void U_CALLCONV
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	691	T_UConverter_fromUnicode_UTF32_LE(UConverterFromUnicodeArgs * args,
				692	UErrorCode * err)
				693	{
				694	const UChar *mySource = args->source;
				695	unsigned char *myTarget;
				696	const UChar *sourceLimit = args->sourceLimit;
				697	const unsigned char targetLimit = (unsigned char ) args->targetLimit;
				698	UChar32 ch, ch2;
				699	unsigned int indexToWrite;
				700	unsigned char temp[sizeof(uint32_t)];
				701
				702	if(mySource >= sourceLimit) {
				703	/* no input, nothing to do */
				704	return;
				705	}
				706
				707	/* write the BOM if necessary */
				708	if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) {
Jungshik Shin	42d5027	2018-10-24 01:22:09 -0700	[diff] [blame]	709	static const char bom[]={ (char)0xffu, (char)0xfeu, 0, 0 };
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	710	ucnv_fromUWriteBytes(args->converter,
				711	bom, 4,
				712	&args->target, args->targetLimit,
				713	&args->offsets, -1,
				714	err);
				715	args->converter->fromUnicodeStatus=0;
				716	}
				717
				718	myTarget = (unsigned char *) args->target;
				719	temp[3] = 0;
				720
				721	if (args->converter->fromUChar32)
				722	{
				723	ch = args->converter->fromUChar32;
				724	args->converter->fromUChar32 = 0;
				725	goto lowsurogate;
				726	}
				727
				728	while (mySource < sourceLimit && myTarget < targetLimit)
				729	{
				730	ch = *(mySource++);
				731
				732	if (U16_IS_SURROGATE(ch)) {
				733	if (U16_IS_LEAD(ch))
				734	{
				735	lowsurogate:
				736	if (mySource < sourceLimit)
				737	{
				738	ch2 = *mySource;
				739	if (U16_IS_TRAIL(ch2)) {
				740	ch = ((ch - SURROGATE_HIGH_START) << HALF_SHIFT) + ch2 + SURROGATE_LOW_BASE;
				741	mySource++;
				742	}
				743	else {
				744	/* this is an unmatched trail code unit (2nd surrogate) */
				745	/* callback(illegal) */
				746	args->converter->fromUChar32 = ch;
				747	*err = U_ILLEGAL_CHAR_FOUND;
				748	break;
				749	}
				750	}
				751	else {
				752	/* ran out of source */
				753	args->converter->fromUChar32 = ch;
				754	if (args->flush) {
				755	/* this is an unmatched trail code unit (2nd surrogate) */
				756	/* callback(illegal) */
				757	*err = U_ILLEGAL_CHAR_FOUND;
				758	}
				759	break;
				760	}
				761	}
				762	else {
				763	/* this is an unmatched trail code unit (2nd surrogate) */
				764	/* callback(illegal) */
				765	args->converter->fromUChar32 = ch;
				766	*err = U_ILLEGAL_CHAR_FOUND;
				767	break;
				768	}
				769	}
				770
				771	/* We cannot get any larger than 10FFFF because we are coming from UTF-16 */
				772	temp[2] = (uint8_t) (ch >> 16 & 0x1F);
				773	temp[1] = (uint8_t) (ch >> 8); /* unsigned cast implicitly does (ch & FF) */
				774	temp[0] = (uint8_t) (ch); /* unsigned cast implicitly does (ch & FF) */
				775
				776	for (indexToWrite = 0; indexToWrite <= sizeof(uint32_t) - 1; indexToWrite++)
				777	{
				778	if (myTarget < targetLimit)
				779	{
				780	*(myTarget++) = temp[indexToWrite];
				781	}
				782	else
				783	{
				784	args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = temp[indexToWrite];
				785	*err = U_BUFFER_OVERFLOW_ERROR;
				786	}
				787	}
				788	}
				789
				790	if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err))
				791	{
				792	*err = U_BUFFER_OVERFLOW_ERROR;
				793	}
				794
				795	args->target = (char *) myTarget;
				796	args->source = mySource;
				797	}
				798
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	799	static void U_CALLCONV
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	800	T_UConverter_fromUnicode_UTF32_LE_OFFSET_LOGIC(UConverterFromUnicodeArgs * args,
				801	UErrorCode * err)
				802	{
				803	const UChar *mySource = args->source;
				804	unsigned char *myTarget;
				805	int32_t *myOffsets;
				806	const UChar *sourceLimit = args->sourceLimit;
				807	const unsigned char targetLimit = (unsigned char ) args->targetLimit;
				808	UChar32 ch, ch2;
				809	unsigned int indexToWrite;
				810	unsigned char temp[sizeof(uint32_t)];
				811	int32_t offsetNum = 0;
				812
				813	if(mySource >= sourceLimit) {
				814	/* no input, nothing to do */
				815	return;
				816	}
				817
				818	/* write the BOM if necessary */
				819	if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) {
Jungshik Shin	42d5027	2018-10-24 01:22:09 -0700	[diff] [blame]	820	static const char bom[]={ (char)0xffu, (char)0xfeu, 0, 0 };
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	821	ucnv_fromUWriteBytes(args->converter,
				822	bom, 4,
				823	&args->target, args->targetLimit,
				824	&args->offsets, -1,
				825	err);
				826	args->converter->fromUnicodeStatus=0;
				827	}
				828
				829	myTarget = (unsigned char *) args->target;
				830	myOffsets = args->offsets;
				831	temp[3] = 0;
				832
				833	if (args->converter->fromUChar32)
				834	{
				835	ch = args->converter->fromUChar32;
				836	args->converter->fromUChar32 = 0;
				837	goto lowsurogate;
				838	}
				839
				840	while (mySource < sourceLimit && myTarget < targetLimit)
				841	{
				842	ch = *(mySource++);
				843
				844	if (U16_IS_SURROGATE(ch)) {
				845	if (U16_IS_LEAD(ch))
				846	{
				847	lowsurogate:
				848	if (mySource < sourceLimit)
				849	{
				850	ch2 = *mySource;
				851	if (U16_IS_TRAIL(ch2))
				852	{
				853	ch = ((ch - SURROGATE_HIGH_START) << HALF_SHIFT) + ch2 + SURROGATE_LOW_BASE;
				854	mySource++;
				855	}
				856	else {
				857	/* this is an unmatched trail code unit (2nd surrogate) */
				858	/* callback(illegal) */
				859	args->converter->fromUChar32 = ch;
				860	*err = U_ILLEGAL_CHAR_FOUND;
				861	break;
				862	}
				863	}
				864	else {
				865	/* ran out of source */
				866	args->converter->fromUChar32 = ch;
				867	if (args->flush) {
				868	/* this is an unmatched trail code unit (2nd surrogate) */
				869	/* callback(illegal) */
				870	*err = U_ILLEGAL_CHAR_FOUND;
				871	}
				872	break;
				873	}
				874	}
				875	else {
				876	/* this is an unmatched trail code unit (2nd surrogate) */
				877	/* callback(illegal) */
				878	args->converter->fromUChar32 = ch;
				879	*err = U_ILLEGAL_CHAR_FOUND;
				880	break;
				881	}
				882	}
				883
				884	/* We cannot get any larger than 10FFFF because we are coming from UTF-16 */
				885	temp[2] = (uint8_t) (ch >> 16 & 0x1F);
				886	temp[1] = (uint8_t) (ch >> 8); /* unsigned cast implicitly does (ch & FF) */
				887	temp[0] = (uint8_t) (ch); /* unsigned cast implicitly does (ch & FF) */
				888
				889	for (indexToWrite = 0; indexToWrite <= sizeof(uint32_t) - 1; indexToWrite++)
				890	{
				891	if (myTarget < targetLimit)
				892	{
				893	*(myTarget++) = temp[indexToWrite];
				894	*(myOffsets++) = offsetNum;
				895	}
				896	else
				897	{
				898	args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = temp[indexToWrite];
				899	*err = U_BUFFER_OVERFLOW_ERROR;
				900	}
				901	}
				902	offsetNum = offsetNum + 1 + (temp[2] != 0);
				903	}
				904
				905	if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err))
				906	{
				907	*err = U_BUFFER_OVERFLOW_ERROR;
				908	}
				909
				910	args->target = (char *) myTarget;
				911	args->source = mySource;
				912	args->offsets = myOffsets;
				913	}
				914
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	915	static UChar32 U_CALLCONV
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	916	T_UConverter_getNextUChar_UTF32_LE(UConverterToUnicodeArgs* args,
				917	UErrorCode* err)
				918	{
				919	const uint8_t *mySource;
				920	UChar32 myUChar;
				921	int32_t length;
				922
				923	mySource = (const uint8_t *)args->source;
				924	if (mySource >= (const uint8_t *)args->sourceLimit)
				925	{
				926	/* no input */
				927	*err = U_INDEX_OUTOFBOUNDS_ERROR;
				928	return 0xffff;
				929	}
				930
				931	length = (int32_t)((const uint8_t *)args->sourceLimit - mySource);
				932	if (length < 4)
				933	{
				934	/* got a partial character */
				935	uprv_memcpy(args->converter->toUBytes, mySource, length);
				936	args->converter->toULength = (int8_t)length;
				937	args->source = (const char *)(mySource + length);
				938	*err = U_TRUNCATED_CHAR_FOUND;
				939	return 0xffff;
				940	}
				941
				942	/* Don't even try to do a direct cast because the value may be on an odd address. */
				943	myUChar = ((UChar32)mySource[3] << 24)
				944	\| ((UChar32)mySource[2] << 16)
				945	\| ((UChar32)mySource[1] << 8)
				946	\| ((UChar32)mySource[0]);
				947
				948	args->source = (const char *)(mySource + 4);
				949	if ((uint32_t)myUChar <= MAXIMUM_UTF && !U_IS_SURROGATE(myUChar)) {
				950	return myUChar;
				951	}
				952
				953	uprv_memcpy(args->converter->toUBytes, mySource, 4);
				954	args->converter->toULength = 4;
				955
				956	*err = U_ILLEGAL_CHAR_FOUND;
				957	return 0xffff;
				958	}
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	959	U_CDECL_END
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	960	static const UConverterImpl _UTF32LEImpl = {
				961	UCNV_UTF32_LittleEndian,
				962
				963	NULL,
				964	NULL,
				965
				966	NULL,
				967	NULL,
				968	NULL,
				969
				970	T_UConverter_toUnicode_UTF32_LE,
				971	T_UConverter_toUnicode_UTF32_LE_OFFSET_LOGIC,
				972	T_UConverter_fromUnicode_UTF32_LE,
				973	T_UConverter_fromUnicode_UTF32_LE_OFFSET_LOGIC,
				974	T_UConverter_getNextUChar_UTF32_LE,
				975
				976	NULL,
				977	NULL,
				978	NULL,
				979	NULL,
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	980	ucnv_getNonSurrogateUnicodeSet,
				981
				982	NULL,
				983	NULL
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	984	};
				985
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame^]	986	/* The 1232 CCSID refers to any version of Unicode with any endianness of UTF-32 */
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	987	static const UConverterStaticData _UTF32LEStaticData = {
				988	sizeof(UConverterStaticData),
				989	"UTF-32LE",
				990	1234,
				991	UCNV_IBM, UCNV_UTF32_LittleEndian, 4, 4,
				992	{ 0xfd, 0xff, 0, 0 }, 4, FALSE, FALSE,
				993	0,
				994	0,
				995	{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
				996	};
				997
				998
Jungshik Shin	a05f412	2015-06-09 15:33:54 -0700	[diff] [blame]	999	const UConverterSharedData _UTF32LEData =
				1000	UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF32LEStaticData, &_UTF32LEImpl);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1001
				1002	/* UTF-32 (Detect BOM) ------------------------------------------------------ */
				1003
				1004	/*
				1005	* Detect a BOM at the beginning of the stream and select UTF-32BE or UTF-32LE
				1006	* accordingly.
				1007	*
				1008	* State values:
				1009	* 0 initial state
				1010	* 1 saw 00
				1011	* 2 saw 00 00
				1012	* 3 saw 00 00 FE
				1013	* 4 -
				1014	* 5 saw FF
				1015	* 6 saw FF FE
				1016	* 7 saw FF FE 00
				1017	* 8 UTF-32BE mode
				1018	* 9 UTF-32LE mode
				1019	*
				1020	* During detection: state&3==number of matching bytes so far.
				1021	*
				1022	* On output, emit U+FEFF as the first code point.
				1023	*/
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	1024	U_CDECL_BEGIN
				1025	static void U_CALLCONV
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1026	_UTF32Reset(UConverter *cnv, UConverterResetChoice choice) {
				1027	if(choice<=UCNV_RESET_TO_UNICODE) {
				1028	/* reset toUnicode: state=0 */
				1029	cnv->mode=0;
				1030	}
				1031	if(choice!=UCNV_RESET_TO_UNICODE) {
				1032	/* reset fromUnicode: prepare to output the UTF-32PE BOM */
				1033	cnv->fromUnicodeStatus=UCNV_NEED_TO_WRITE_BOM;
				1034	}
				1035	}
				1036
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	1037	static void U_CALLCONV
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1038	_UTF32Open(UConverter *cnv,
				1039	UConverterLoadArgs *pArgs,
				1040	UErrorCode *pErrorCode) {
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	1041	(void)pArgs;
				1042	(void)pErrorCode;
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1043	_UTF32Reset(cnv, UCNV_RESET_BOTH);
				1044	}
				1045
Jungshik Shin	42d5027	2018-10-24 01:22:09 -0700	[diff] [blame]	1046	static const char utf32BOM[8]={ 0, 0, (char)0xfeu, (char)0xffu, (char)0xffu, (char)0xfeu, 0, 0 };
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1047
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	1048	static void U_CALLCONV
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1049	_UTF32ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
				1050	UErrorCode *pErrorCode) {
				1051	UConverter *cnv=pArgs->converter;
				1052	const char *source=pArgs->source;
				1053	const char *sourceLimit=pArgs->sourceLimit;
				1054	int32_t *offsets=pArgs->offsets;
				1055
				1056	int32_t state, offsetDelta;
				1057	char b;
				1058
				1059	state=cnv->mode;
				1060
				1061	/*
				1062	* If we detect a BOM in this buffer, then we must add the BOM size to the
				1063	* offsets because the actual converter function will not see and count the BOM.
				1064	* offsetDelta will have the number of the BOM bytes that are in the current buffer.
				1065	*/
				1066	offsetDelta=0;
				1067
				1068	while(source<sourceLimit && U_SUCCESS(*pErrorCode)) {
				1069	switch(state) {
				1070	case 0:
				1071	b=*source;
				1072	if(b==0) {
				1073	state=1; /* could be 00 00 FE FF */
Jungshik Shin	42d5027	2018-10-24 01:22:09 -0700	[diff] [blame]	1074	} else if(b==(char)0xffu) {
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1075	state=5; /* could be FF FE 00 00 */
				1076	} else {
				1077	state=8; /* default to UTF-32BE */
				1078	continue;
				1079	}
				1080	++source;
				1081	break;
				1082	case 1:
				1083	case 2:
				1084	case 3:
				1085	case 5:
				1086	case 6:
				1087	case 7:
				1088	if(*source==utf32BOM[state]) {
				1089	++state;
				1090	++source;
				1091	if(state==4) {
				1092	state=8; /* detect UTF-32BE */
				1093	offsetDelta=(int32_t)(source-pArgs->source);
				1094	} else if(state==8) {
				1095	state=9; /* detect UTF-32LE */
				1096	offsetDelta=(int32_t)(source-pArgs->source);
				1097	}
				1098	} else {
				1099	/* switch to UTF-32BE and pass the previous bytes */
				1100	int32_t count=(int32_t)(source-pArgs->source); /* number of bytes from this buffer */
				1101
				1102	/* reset the source */
				1103	source=pArgs->source;
				1104
				1105	if(count==(state&3)) {
				1106	/* simple: all in the same buffer, just reset source */
				1107	} else {
				1108	UBool oldFlush=pArgs->flush;
				1109
				1110	/* some of the bytes are from a previous buffer, replay those first */
				1111	pArgs->source=utf32BOM+(state&4); /* select the correct BOM */
				1112	pArgs->sourceLimit=pArgs->source+((state&3)-count); /* replay previous bytes */
				1113	pArgs->flush=FALSE; /* this sourceLimit is not the real source stream limit */
				1114
				1115	/* no offsets: bytes from previous buffer, and not enough for output */
				1116	T_UConverter_toUnicode_UTF32_BE(pArgs, pErrorCode);
				1117
				1118	/* restore real pointers; pArgs->source will be set in case 8/9 */
				1119	pArgs->sourceLimit=sourceLimit;
				1120	pArgs->flush=oldFlush;
				1121	}
				1122	state=8;
				1123	continue;
				1124	}
				1125	break;
				1126	case 8:
				1127	/* call UTF-32BE */
				1128	pArgs->source=source;
				1129	if(offsets==NULL) {
				1130	T_UConverter_toUnicode_UTF32_BE(pArgs, pErrorCode);
				1131	} else {
				1132	T_UConverter_toUnicode_UTF32_BE_OFFSET_LOGIC(pArgs, pErrorCode);
				1133	}
				1134	source=pArgs->source;
				1135	break;
				1136	case 9:
				1137	/* call UTF-32LE */
				1138	pArgs->source=source;
				1139	if(offsets==NULL) {
				1140	T_UConverter_toUnicode_UTF32_LE(pArgs, pErrorCode);
				1141	} else {
				1142	T_UConverter_toUnicode_UTF32_LE_OFFSET_LOGIC(pArgs, pErrorCode);
				1143	}
				1144	source=pArgs->source;
				1145	break;
				1146	default:
				1147	break; /* does not occur */
				1148	}
				1149	}
				1150
				1151	/* add BOM size to offsets - see comment at offsetDelta declaration */
				1152	if(offsets!=NULL && offsetDelta!=0) {
				1153	int32_t *offsetsLimit=pArgs->offsets;
				1154	while(offsets<offsetsLimit) {
				1155	*offsets++ += offsetDelta;
				1156	}
				1157	}
				1158
				1159	pArgs->source=source;
				1160
				1161	if(source==sourceLimit && pArgs->flush) {
				1162	/* handle truncated input */
				1163	switch(state) {
				1164	case 0:
				1165	break; /* no input at all, nothing to do */
				1166	case 8:
				1167	T_UConverter_toUnicode_UTF32_BE(pArgs, pErrorCode);
				1168	break;
				1169	case 9:
				1170	T_UConverter_toUnicode_UTF32_LE(pArgs, pErrorCode);
				1171	break;
				1172	default:
				1173	/* handle 0<state<8: call UTF-32BE with too-short input */
				1174	pArgs->source=utf32BOM+(state&4); /* select the correct BOM */
				1175	pArgs->sourceLimit=pArgs->source+(state&3); /* replay bytes */
				1176
				1177	/* no offsets: not enough for output */
				1178	T_UConverter_toUnicode_UTF32_BE(pArgs, pErrorCode);
				1179	pArgs->source=source;
				1180	pArgs->sourceLimit=sourceLimit;
				1181	state=8;
				1182	break;
				1183	}
				1184	}
				1185
				1186	cnv->mode=state;
				1187	}
				1188
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	1189	static UChar32 U_CALLCONV
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1190	_UTF32GetNextUChar(UConverterToUnicodeArgs *pArgs,
				1191	UErrorCode *pErrorCode) {
				1192	switch(pArgs->converter->mode) {
				1193	case 8:
				1194	return T_UConverter_getNextUChar_UTF32_BE(pArgs, pErrorCode);
				1195	case 9:
				1196	return T_UConverter_getNextUChar_UTF32_LE(pArgs, pErrorCode);
				1197	default:
				1198	return UCNV_GET_NEXT_UCHAR_USE_TO_U;
				1199	}
				1200	}
Jungshik Shin	87232d8	2017-05-13 21:10:13 -0700	[diff] [blame]	1201	U_CDECL_END
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1202	static const UConverterImpl _UTF32Impl = {
				1203	UCNV_UTF32,
				1204
				1205	NULL,
				1206	NULL,
				1207
				1208	_UTF32Open,
				1209	NULL,
				1210	_UTF32Reset,
				1211
				1212	_UTF32ToUnicodeWithOffsets,
				1213	_UTF32ToUnicodeWithOffsets,
				1214	#if U_IS_BIG_ENDIAN
				1215	T_UConverter_fromUnicode_UTF32_BE,
				1216	T_UConverter_fromUnicode_UTF32_BE_OFFSET_LOGIC,
				1217	#else
				1218	T_UConverter_fromUnicode_UTF32_LE,
				1219	T_UConverter_fromUnicode_UTF32_LE_OFFSET_LOGIC,
				1220	#endif
				1221	_UTF32GetNextUChar,
				1222
				1223	NULL, /* ### TODO implement getStarters for all Unicode encodings?! */
				1224	NULL,
				1225	NULL,
				1226	NULL,
Jungshik Shin	5feb9ad	2016-10-21 12:52:48 -0700	[diff] [blame]	1227	ucnv_getNonSurrogateUnicodeSet,
				1228
				1229	NULL,
				1230	NULL
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1231	};
				1232
Frank Tang	3e05d9d	2021-11-08 14:04:04 -0800	[diff] [blame^]	1233	/* The 1236 CCSID refers to any version of Unicode with a BOM sensitive endianness of UTF-32 */
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1234	static const UConverterStaticData _UTF32StaticData = {
				1235	sizeof(UConverterStaticData),
				1236	"UTF-32",
				1237	1236,
				1238	UCNV_IBM, UCNV_UTF32, 4, 4,
				1239	#if U_IS_BIG_ENDIAN
				1240	{ 0, 0, 0xff, 0xfd }, 4,
				1241	#else
				1242	{ 0xfd, 0xff, 0, 0 }, 4,
				1243	#endif
				1244	FALSE, FALSE,
				1245	0,
				1246	0,
				1247	{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
				1248	};
				1249
Jungshik Shin	a05f412	2015-06-09 15:33:54 -0700	[diff] [blame]	1250	const UConverterSharedData _UTF32Data =
				1251	UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF32StaticData, &_UTF32Impl);
jshin@chromium.org	6f31ac3	2014-03-26 22:15:14 +0000	[diff] [blame]	1252
				1253	#endif