Blame - source/common/ucnvmbcs.cpp - chromium.googlesource.com/chromium/deps/icu

blob: 4e4de8d2e9f124d33f4f3362261d810fc14b0d09 [file] [log] [blame]

Jungshik Shin (jungshik at google)	0f8746a	2015-01-08 15:46:45 -0800	[diff] [blame]	1	/*
				2	******************************************************************************
				3	*
Jungshik Shin	a05f412	2015-06-09 15:33:54 -0700	[diff] [blame^]	4	* Copyright (C) 2000-2015, International Business Machines
Jungshik Shin (jungshik at google)	0f8746a	2015-01-08 15:46:45 -0800	[diff] [blame]	5	* Corporation and others. All Rights Reserved.
				6	*
				7	******************************************************************************
				8	* file name: ucnvmbcs.cpp
				9	* encoding: US-ASCII
				10	* tab size: 8 (not used)
				11	* indentation:4
				12	*
				13	* created on: 2000jul03
				14	* created by: Markus W. Scherer
				15	*
				16	* The current code in this file replaces the previous implementation
				17	* of conversion code from multi-byte codepages to Unicode and back.
				18	* This implementation supports the following:
				19	* - legacy variable-length codepages with up to 4 bytes per character
				20	* - all Unicode code points (up to 0x10ffff)
				21	* - efficient distinction of unassigned vs. illegal byte sequences
				22	* - it is possible in fromUnicode() to directly deal with simple
				23	* stateful encodings (used for EBCDIC_STATEFUL)
				24	* - it is possible to convert Unicode code points
				25	* to a single zero byte (but not as a fallback except for SBCS)
				26	*
				27	* Remaining limitations in fromUnicode:
				28	* - byte sequences must not have leading zero bytes
				29	* - except for SBCS codepages: no fallback mapping from Unicode to a zero byte
				30	* - limitation to up to 4 bytes per character
				31	*
				32	* ICU 2.8 (late 2003) adds a secondary data structure which lifts some of these
				33	* limitations and adds m:n character mappings and other features.
				34	* See ucnv_ext.h for details.
				35	*
				36	* Change history:
				37	*
				38	* 5/6/2001 Ram Moved MBCS_SINGLE_RESULT_FROM_U,MBCS_STAGE_2_FROM_U,
				39	* MBCS_VALUE_2_FROM_STAGE_2, MBCS_VALUE_4_FROM_STAGE_2
				40	* macros to ucnvmbcs.h file
				41	*/
				42
				43	#include "unicode/utypes.h"
				44
				45	#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
				46
				47	#include "unicode/ucnv.h"
				48	#include "unicode/ucnv_cb.h"
				49	#include "unicode/udata.h"
				50	#include "unicode/uset.h"
				51	#include "unicode/utf8.h"
				52	#include "unicode/utf16.h"
				53	#include "ucnv_bld.h"
				54	#include "ucnvmbcs.h"
				55	#include "ucnv_ext.h"
				56	#include "ucnv_cnv.h"
				57	#include "cmemory.h"
				58	#include "cstring.h"
				59	#include "umutex.h"
				60
				61	/* control optimizations according to the platform */
				62	#define MBCS_UNROLL_SINGLE_TO_BMP 1
				63	#define MBCS_UNROLL_SINGLE_FROM_BMP 0
				64
				65	/*
				66	* _MBCSHeader versions 5.3 & 4.3
				67	* (Note that the _MBCSHeader version is in addition to the converter formatVersion.)
				68	*
				69	* This version is optional. Version 5 is used for incompatible data format changes.
				70	* makeconv will continue to generate version 4 files if possible.
				71	*
				72	* Changes from version 4:
				73	*
				74	* The main difference is an additional _MBCSHeader field with
				75	* - the length (number of uint32_t) of the _MBCSHeader
				76	* - flags for further incompatible data format changes
				77	* - flags for further, backward compatible data format changes
				78	*
				79	* The MBCS_OPT_FROM_U flag indicates that most of the fromUnicode data is omitted from
				80	* the file and needs to be reconstituted at load time.
				81	* This requires a utf8Friendly format with an additional mbcsIndex table for fast
				82	* (and UTF-8-friendly) fromUnicode conversion for Unicode code points up to maxFastUChar.
				83	* (For details about these structures see below, and see ucnvmbcs.h.)
				84	*
				85	* utf8Friendly also implies that the fromUnicode mappings are stored in ascending order
				86	* of the Unicode code points. (This requires that the .ucm file has the \|0 etc.
				87	* precision markers for all mappings.)
				88	*
				89	* All fallbacks have been moved to the extension table, leaving only roundtrips in the
				90	* omitted data that can be reconstituted from the toUnicode data.
				91	*
				92	* Of the stage 2 table, the part corresponding to maxFastUChar and below is omitted.
				93	* With only roundtrip mappings in the base fromUnicode data, this part is fully
				94	* redundant with the mbcsIndex and will be reconstituted from that (also using the
				95	* stage 1 table which contains the information about how stage 2 was compacted).
				96	*
				97	* The rest of the stage 2 table, the part for code points above maxFastUChar,
				98	* is stored in the file and will be appended to the reconstituted part.
				99	*
				100	* The entire fromUBytes array is omitted from the file and will be reconstitued.
				101	* This is done by enumerating all toUnicode roundtrip mappings, performing
				102	* each mapping (using the stage 1 and reconstituted stage 2 tables) and
				103	* writing instead of reading the byte values.
				104	*
				105	* _MBCSHeader version 4.3
				106	*
				107	* Change from version 4.2:
				108	* - Optional utf8Friendly data structures, with 64-entry stage 3 block
				109	* allocation for parts of the BMP, and an additional mbcsIndex in non-SBCS
				110	* files which can be used instead of stages 1 & 2.
				111	* Faster lookups for roundtrips from most commonly used characters,
				112	* and lookups from UTF-8 byte sequences with a natural bit distribution.
				113	* See ucnvmbcs.h for more details.
				114	*
				115	* Change from version 4.1:
				116	* - Added an optional extension table structure at the end of the .cnv file.
				117	* It is present if the upper bits of the header flags field contains a non-zero
				118	* byte offset to it.
				119	* Files that contain only a conversion table and no base table
				120	* use the special outputType MBCS_OUTPUT_EXT_ONLY.
				121	* These contain the base table name between the MBCS header and the extension
				122	* data.
				123	*
				124	* Change from version 4.0:
				125	* - Replace header.reserved with header.fromUBytesLength so that all
				126	* fields in the data have length.
				127	*
				128	* Changes from version 3 (for performance improvements):
				129	* - new bit distribution for state table entries
				130	* - reordered action codes
				131	* - new data structure for single-byte fromUnicode
				132	* + stage 2 only contains indexes
				133	* + stage 3 stores 16 bits per character with classification bits 15..8
				134	* - no multiplier for stage 1 entries
				135	* - stage 2 for non-single-byte codepages contains the index and the flags in
				136	* one 32-bit value
				137	* - 2-byte and 4-byte fromUnicode results are stored directly as 16/32-bit integers
				138	*
				139	* For more details about old versions of the MBCS data structure, see
				140	* the corresponding versions of this file.
				141	*
				142	* Converting stateless codepage data ---------------------------------------***
				143	* (or codepage data with simple states) to Unicode.
				144	*
				145	* Data structure and algorithm for converting from complex legacy codepages
				146	* to Unicode. (Designed before 2000-may-22.)
				147	*
				148	* The basic idea is that the structure of legacy codepages can be described
				149	* with state tables.
				150	* When reading a byte stream, each input byte causes a state transition.
				151	* Some transitions result in the output of a code point, some result in
				152	* "unassigned" or "illegal" output.
				153	* This is used here for character conversion.
				154	*
				155	* The data structure begins with a state table consisting of a row
				156	* per state, with 256 entries (columns) per row for each possible input
				157	* byte value.
				158	* Each entry is 32 bits wide, with two formats distinguished by
				159	* the sign bit (bit 31):
				160	*
				161	* One format for transitional entries (bit 31 not set) for non-final bytes, and
				162	* one format for final entries (bit 31 set).
				163	* Both formats contain the number of the next state in the same bit
				164	* positions.
				165	* State 0 is the initial state.
				166	*
				167	* Most of the time, the offset values of subsequent states are added
				168	* up to a scalar value. This value will eventually be the index of
				169	* the Unicode code point in a table that follows the state table.
				170	* The effect is that the code points for final state table rows
				171	* are contiguous. The code points of final state rows follow each other
				172	* in the order of the references to those final states by previous
				173	* states, etc.
				174	*
				175	* For some terminal states, the offset is itself the output Unicode
				176	* code point (16 bits for a BMP code point or 20 bits for a supplementary
				177	* code point (stored as code point minus 0x10000 so that 20 bits are enough).
				178	* For others, the code point in the Unicode table is stored with either
				179	* one or two code units: one for BMP code points, two for a pair of
				180	* surrogates.
				181	* All code points for a final state entry take up the same number of code
				182	* units, regardless of whether they all actually _use_ the same number
				183	* of code units. This is necessary for simple array access.
				184	*
				185	* An additional feature comes in with what in ICU is called "fallback"
				186	* mappings:
				187	*
				188	* In addition to round-trippable, precise, 1:1 mappings, there are often
				189	* mappings defined between similar, though not the same, characters.
				190	* Typically, such mappings occur only in fromUnicode mapping tables because
				191	* Unicode has a superset repertoire of most other codepages. However, it
				192	* is possible to provide such mappings in the toUnicode tables, too.
				193	* In this case, the fallback mappings are partly integrated into the
				194	* general state tables because the structure of the encoding includes their
				195	* byte sequences.
				196	* For final entries in an initial state, fallback mappings are stored in
				197	* the entry itself like with roundtrip mappings.
				198	* For other final entries, they are stored in the code units table if
				199	* the entry is for a pair of code units.
				200	* For single-unit results in the code units table, there is no space to
				201	* alternatively hold a fallback mapping; in this case, the code unit
				202	* is stored as U+fffe (unassigned), and the fallback mapping needs to
				203	* be looked up by the scalar offset value in a separate table.
				204	*
				205	* "Unassigned" state entries really mean "structurally unassigned",
				206	* i.e., such a byte sequence will never have a mapping result.
				207	*
				208	* The interpretation of the bits in each entry is as follows:
				209	*
				210	* Bit 31 not set, not a terminal entry ("transitional"):
				211	* 30..24 next state
				212	* 23..0 offset delta, to be added up
				213	*
				214	* Bit 31 set, terminal ("final") entry:
				215	* 30..24 next state (regardless of action code)
				216	* 23..20 action code:
				217	* action codes 0 and 1 result in precise-mapping Unicode code points
				218	* 0 valid byte sequence
				219	* 19..16 not used, 0
				220	* 15..0 16-bit Unicode BMP code point
				221	* never U+fffe or U+ffff
				222	* 1 valid byte sequence
				223	* 19..0 20-bit Unicode supplementary code point
				224	* never U+fffe or U+ffff
				225	*
				226	* action codes 2 and 3 result in fallback (unidirectional-mapping) Unicode code points
				227	* 2 valid byte sequence (fallback)
				228	* 19..16 not used, 0
				229	* 15..0 16-bit Unicode BMP code point as fallback result
				230	* 3 valid byte sequence (fallback)
				231	* 19..0 20-bit Unicode supplementary code point as fallback result
				232	*
				233	* action codes 4 and 5 may result in roundtrip/fallback/unassigned/illegal results
				234	* depending on the code units they result in
				235	* 4 valid byte sequence
				236	* 19..9 not used, 0
				237	* 8..0 final offset delta
				238	* pointing to one 16-bit code unit which may be
				239	* fffe unassigned -- look for a fallback for this offset
				240	* ffff illegal
				241	* 5 valid byte sequence
				242	* 19..9 not used, 0
				243	* 8..0 final offset delta
				244	* pointing to two 16-bit code units
				245	* (typically UTF-16 surrogates)
				246	* the result depends on the first code unit as follows:
				247	* 0000..d7ff roundtrip BMP code point (1st alone)
				248	* d800..dbff roundtrip surrogate pair (1st, 2nd)
				249	* dc00..dfff fallback surrogate pair (1st-400, 2nd)
				250	* e000 roundtrip BMP code point (2nd alone)
				251	* e001 fallback BMP code point (2nd alone)
				252	* fffe unassigned
				253	* ffff illegal
				254	* (the final offset deltas are at most 255 * 2,
				255	* times 2 because of storing code unit pairs)
				256	*
				257	* 6 unassigned byte sequence
				258	* 19..16 not used, 0
				259	* 15..0 16-bit Unicode BMP code point U+fffe (new with version 2)
				260	* this does not contain a final offset delta because the main
				261	* purpose of this action code is to save scalar offset values;
				262	* therefore, fallback values cannot be assigned to byte
				263	* sequences that result in this action code
				264	* 7 illegal byte sequence
				265	* 19..16 not used, 0
				266	* 15..0 16-bit Unicode BMP code point U+ffff (new with version 2)
				267	* 8 state change only
				268	* 19..0 not used, 0
				269	* useful for state changes in simple stateful encodings,
				270	* at Shift-In/Shift-Out codes
				271	*
				272	*
				273	* 9..15 reserved for future use
				274	* current implementations will only perform a state change
				275	* and ignore bits 19..0
				276	*
				277	* An encoding with contiguous ranges of unassigned byte sequences, like
				278	* Shift-JIS and especially EUC-TW, can be stored efficiently by having
				279	* at least two states for the trail bytes:
				280	* One trail byte state that results in code points, and one that only
				281	* has "unassigned" and "illegal" terminal states.
				282	*
				283	* Note: partly by accident, this data structure supports simple stateful
				284	* encodings without any additional logic.
				285	* Currently, only simple Shift-In/Shift-Out schemes are handled with
				286	* appropriate state tables (especially EBCDIC_STATEFUL!).
				287	*
				288	* MBCS version 2 added:
				289	* unassigned and illegal action codes have U+fffe and U+ffff
				290	* instead of unused bits; this is useful for _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP()
				291	*
				292	* Converting from Unicode to codepage bytes --------------------------------***
				293	*
				294	* The conversion data structure for fromUnicode is designed for the known
				295	* structure of Unicode. It maps from 21-bit code points (0..0x10ffff) to
				296	* a sequence of 1..4 bytes, in addition to a flag that indicates if there is
				297	* a roundtrip mapping.
				298	*
				299	* The lookup is done with a 3-stage trie, using 11/6/4 bits for stage 1/2/3
				300	* like in the character properties table.
				301	* The beginning of the trie is at offsetFromUTable, the beginning of stage 3
				302	* with the resulting bytes is at offsetFromUBytes.
				303	*
				304	* Beginning with version 4, single-byte codepages have a significantly different
				305	* trie compared to other codepages.
				306	* In all cases, the entry in stage 1 is directly the index of the block of
				307	* 64 entries in stage 2.
				308	*
				309	* Single-byte lookup:
				310	*
				311	* Stage 2 only contains 16-bit indexes directly to the 16-blocks in stage 3.
				312	* Stage 3 contains one 16-bit word per result:
				313	* Bits 15..8 indicate the kind of result:
				314	* f roundtrip result
				315	* c fallback result from private-use code point
				316	* 8 fallback result from other code points
				317	* 0 unassigned
				318	* Bits 7..0 contain the codepage byte. A zero byte is always possible.
				319	*
				320	* In version 4.3, the runtime code can build an sbcsIndex for a utf8Friendly
				321	* file. For 2-byte UTF-8 byte sequences and some 3-byte sequences the lookup
				322	* becomes a 2-stage (single-index) trie lookup with 6 bits for stage 3.
				323	* ASCII code points can be looked up with a linear array access into stage 3.
				324	* See maxFastUChar and other details in ucnvmbcs.h.
				325	*
				326	* Multi-byte lookup:
				327	*
				328	* Stage 2 contains a 32-bit word for each 16-block in stage 3:
				329	* Bits 31..16 contain flags for which stage 3 entries contain roundtrip results
				330	* test: MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c)
				331	* If this test is false, then a non-zero result will be interpreted as
				332	* a fallback mapping.
				333	* Bits 15..0 contain the index to stage 3, which must be multiplied by 16*(bytes per char)
				334	*
				335	* Stage 3 contains 2, 3, or 4 bytes per result.
				336	* 2 or 4 bytes are stored as uint16_t/uint32_t in platform endianness,
				337	* while 3 bytes are stored as bytes in big-endian order.
				338	* Leading zero bytes are ignored, and the number of bytes is counted.
				339	* A zero byte mapping result is possible as a roundtrip result.
				340	* For some output types, the actual result is processed from this;
				341	* see ucnv_MBCSFromUnicodeWithOffsets().
				342	*
				343	* Note that stage 1 always contains 0x440=1088 entries (0x440==0x110000>>10),
				344	* or (version 3 and up) for BMP-only codepages, it contains 64 entries.
				345	*
				346	* In version 4.3, a utf8Friendly file contains an mbcsIndex table.
				347	* For 2-byte UTF-8 byte sequences and most 3-byte sequences the lookup
				348	* becomes a 2-stage (single-index) trie lookup with 6 bits for stage 3.
				349	* ASCII code points can be looked up with a linear array access into stage 3.
				350	* See maxFastUChar, mbcsIndex and other details in ucnvmbcs.h.
				351	*
				352	* In version 3, stage 2 blocks may overlap by multiples of the multiplier
				353	* for compaction.
				354	* In version 4, stage 2 blocks (and for single-byte codepages, stage 3 blocks)
				355	* may overlap by any number of entries.
				356	*
				357	* MBCS version 2 added:
				358	* the converter checks for known output types, which allows
				359	* adding new ones without crashing an unaware converter
				360	*/
				361
				362	/**
				363	* Callback from ucnv_MBCSEnumToUnicode(), takes 32 mappings from
				364	* consecutive sequences of bytes, starting from the one encoded in value,
				365	* to Unicode code points. (Multiple mappings to reduce per-function call overhead.)
				366	* Does not currently support m:n mappings or reverse fallbacks.
				367	* This function will not be called for sequences of bytes with leading zeros.
				368	*
				369	* @param context an opaque pointer, as passed into ucnv_MBCSEnumToUnicode()
				370	* @param value contains 1..4 bytes of the first byte sequence, right-aligned
				371	* @param codePoints resulting Unicode code points, or negative if a byte sequence does
				372	* not map to anything
				373	* @return TRUE to continue enumeration, FALSE to stop
				374	*/
				375	typedef UBool U_CALLCONV
				376	UConverterEnumToUCallback(const void *context, uint32_t value, UChar32 codePoints[32]);
				377
				378	static void
				379	ucnv_MBCSLoad(UConverterSharedData *sharedData,
				380	UConverterLoadArgs *pArgs,
				381	const uint8_t *raw,
				382	UErrorCode *pErrorCode);
				383
				384	static void
				385	ucnv_MBCSUnload(UConverterSharedData *sharedData);
				386
				387	static void
				388	ucnv_MBCSOpen(UConverter *cnv,
				389	UConverterLoadArgs *pArgs,
				390	UErrorCode *pErrorCode);
				391
				392	static UChar32
				393	ucnv_MBCSGetNextUChar(UConverterToUnicodeArgs *pArgs,
				394	UErrorCode *pErrorCode);
				395
				396	static void
				397	ucnv_MBCSGetStarters(const UConverter* cnv,
				398	UBool starters[256],
				399	UErrorCode *pErrorCode);
				400
				401	static const char *
				402	ucnv_MBCSGetName(const UConverter *cnv);
				403
				404	static void
				405	ucnv_MBCSWriteSub(UConverterFromUnicodeArgs *pArgs,
				406	int32_t offsetIndex,
				407	UErrorCode *pErrorCode);
				408
				409	static UChar32
				410	ucnv_MBCSGetNextUChar(UConverterToUnicodeArgs *pArgs,
				411	UErrorCode *pErrorCode);
				412
				413	static void
				414	ucnv_SBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
				415	UConverterToUnicodeArgs *pToUArgs,
				416	UErrorCode *pErrorCode);
				417
				418	static void
				419	ucnv_MBCSGetUnicodeSet(const UConverter *cnv,
				420	const USetAdder *sa,
				421	UConverterUnicodeSet which,
				422	UErrorCode *pErrorCode);
				423
				424	static void
				425	ucnv_DBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
				426	UConverterToUnicodeArgs *pToUArgs,
				427	UErrorCode *pErrorCode);
				428
				429	static const UConverterImpl _SBCSUTF8Impl={
				430	UCNV_MBCS,
				431
				432	ucnv_MBCSLoad,
				433	ucnv_MBCSUnload,
				434
				435	ucnv_MBCSOpen,
				436	NULL,
				437	NULL,
				438
				439	ucnv_MBCSToUnicodeWithOffsets,
				440	ucnv_MBCSToUnicodeWithOffsets,
				441	ucnv_MBCSFromUnicodeWithOffsets,
				442	ucnv_MBCSFromUnicodeWithOffsets,
				443	ucnv_MBCSGetNextUChar,
				444
				445	ucnv_MBCSGetStarters,
				446	ucnv_MBCSGetName,
				447	ucnv_MBCSWriteSub,
				448	NULL,
				449	ucnv_MBCSGetUnicodeSet,
				450
				451	NULL,
				452	ucnv_SBCSFromUTF8
				453	};
				454
				455	static const UConverterImpl _DBCSUTF8Impl={
				456	UCNV_MBCS,
				457
				458	ucnv_MBCSLoad,
				459	ucnv_MBCSUnload,
				460
				461	ucnv_MBCSOpen,
				462	NULL,
				463	NULL,
				464
				465	ucnv_MBCSToUnicodeWithOffsets,
				466	ucnv_MBCSToUnicodeWithOffsets,
				467	ucnv_MBCSFromUnicodeWithOffsets,
				468	ucnv_MBCSFromUnicodeWithOffsets,
				469	ucnv_MBCSGetNextUChar,
				470
				471	ucnv_MBCSGetStarters,
				472	ucnv_MBCSGetName,
				473	ucnv_MBCSWriteSub,
				474	NULL,
				475	ucnv_MBCSGetUnicodeSet,
				476
				477	NULL,
				478	ucnv_DBCSFromUTF8
				479	};
				480
				481	static const UConverterImpl _MBCSImpl={
				482	UCNV_MBCS,
				483
				484	ucnv_MBCSLoad,
				485	ucnv_MBCSUnload,
				486
				487	ucnv_MBCSOpen,
				488	NULL,
				489	NULL,
				490
				491	ucnv_MBCSToUnicodeWithOffsets,
				492	ucnv_MBCSToUnicodeWithOffsets,
				493	ucnv_MBCSFromUnicodeWithOffsets,
				494	ucnv_MBCSFromUnicodeWithOffsets,
				495	ucnv_MBCSGetNextUChar,
				496
				497	ucnv_MBCSGetStarters,
				498	ucnv_MBCSGetName,
				499	ucnv_MBCSWriteSub,
				500	NULL,
				501	ucnv_MBCSGetUnicodeSet,
				502	NULL,
				503	NULL
				504	};
				505
				506
				507	/* Static data is in tools/makeconv/ucnvstat.c for data-based
				508	* converters. Be sure to update it as well.
				509	*/
				510
				511	const UConverterSharedData _MBCSData={
				512	sizeof(UConverterSharedData), 1,
Jungshik Shin	a05f412	2015-06-09 15:33:54 -0700	[diff] [blame^]	513	NULL, NULL, FALSE, TRUE, &_MBCSImpl,
Jungshik Shin (jungshik at google)	0f8746a	2015-01-08 15:46:45 -0800	[diff] [blame]	514	0, UCNV_MBCS_TABLE_INITIALIZER
				515	};
				516
				517
				518	/* GB 18030 data ------------------------------------------------------------ */
				519
				520	/* helper macros for linear values for GB 18030 four-byte sequences */
				521	#define LINEAR_18030(a, b, c, d) ((((a)10+(b))126L+(c))*10L+(d))
				522
				523	#define LINEAR_18030_BASE LINEAR_18030(0x81, 0x30, 0x81, 0x30)
				524
				525	#define LINEAR(x) LINEAR_18030(x>>24, (x>>16)&0xff, (x>>8)&0xff, x&0xff)
				526
				527	/*
				528	* Some ranges of GB 18030 where both the Unicode code points and the
				529	* GB four-byte sequences are contiguous and are handled algorithmically by
				530	* the special callback functions below.
				531	* The values are start & end of Unicode & GB codes.
				532	*
				533	* Note that single surrogates are not mapped by GB 18030
				534	* as of the re-released mapping tables from 2000-nov-30.
				535	*/
				536	static const uint32_t
				537	gb18030Ranges[14][4]={
				538	{0x10000, 0x10FFFF, LINEAR(0x90308130), LINEAR(0xE3329A35)},
				539	{0x9FA6, 0xD7FF, LINEAR(0x82358F33), LINEAR(0x8336C738)},
				540	{0x0452, 0x1E3E, LINEAR(0x8130D330), LINEAR(0x8135F436)},
				541	{0x1E40, 0x200F, LINEAR(0x8135F438), LINEAR(0x8136A531)},
				542	{0xE865, 0xF92B, LINEAR(0x8336D030), LINEAR(0x84308534)},
				543	{0x2643, 0x2E80, LINEAR(0x8137A839), LINEAR(0x8138FD38)},
				544	{0xFA2A, 0xFE2F, LINEAR(0x84309C38), LINEAR(0x84318537)},
				545	{0x3CE1, 0x4055, LINEAR(0x8231D438), LINEAR(0x8232AF32)},
				546	{0x361B, 0x3917, LINEAR(0x8230A633), LINEAR(0x8230F237)},
				547	{0x49B8, 0x4C76, LINEAR(0x8234A131), LINEAR(0x8234E733)},
				548	{0x4160, 0x4336, LINEAR(0x8232C937), LINEAR(0x8232F837)},
				549	{0x478E, 0x4946, LINEAR(0x8233E838), LINEAR(0x82349638)},
				550	{0x44D7, 0x464B, LINEAR(0x8233A339), LINEAR(0x8233C931)},
				551	{0xFFE6, 0xFFFF, LINEAR(0x8431A234), LINEAR(0x8431A439)}
				552	};
				553
				554	/* bit flag for UConverter.options indicating GB 18030 special handling */
				555	#define _MBCS_OPTION_GB18030 0x8000
				556
				557	/* bit flag for UConverter.options indicating KEIS,JEF,JIF special handling */
				558	#define _MBCS_OPTION_KEIS 0x01000
				559	#define _MBCS_OPTION_JEF 0x02000
				560	#define _MBCS_OPTION_JIPS 0x04000
				561
				562	#define KEIS_SO_CHAR_1 0x0A
				563	#define KEIS_SO_CHAR_2 0x42
				564	#define KEIS_SI_CHAR_1 0x0A
				565	#define KEIS_SI_CHAR_2 0x41
				566
				567	#define JEF_SO_CHAR 0x28
				568	#define JEF_SI_CHAR 0x29
				569
				570	#define JIPS_SO_CHAR_1 0x1A
				571	#define JIPS_SO_CHAR_2 0x70
				572	#define JIPS_SI_CHAR_1 0x1A
				573	#define JIPS_SI_CHAR_2 0x71
				574
				575	enum SISO_Option {
				576	SI,
				577	SO
				578	};
				579	typedef enum SISO_Option SISO_Option;
				580
				581	static int32_t getSISOBytes(SISO_Option option, uint32_t cnvOption, uint8_t *value) {
				582	int32_t SISOLength = 0;
				583
				584	switch (option) {
				585	case SI:
				586	if ((cnvOption&_MBCS_OPTION_KEIS)!=0) {
				587	value[0] = KEIS_SI_CHAR_1;
				588	value[1] = KEIS_SI_CHAR_2;
				589	SISOLength = 2;
				590	} else if ((cnvOption&_MBCS_OPTION_JEF)!=0) {
				591	value[0] = JEF_SI_CHAR;
				592	SISOLength = 1;
				593	} else if ((cnvOption&_MBCS_OPTION_JIPS)!=0) {
				594	value[0] = JIPS_SI_CHAR_1;
				595	value[1] = JIPS_SI_CHAR_2;
				596	SISOLength = 2;
				597	} else {
				598	value[0] = UCNV_SI;
				599	SISOLength = 1;
				600	}
				601	break;
				602	case SO:
				603	if ((cnvOption&_MBCS_OPTION_KEIS)!=0) {
				604	value[0] = KEIS_SO_CHAR_1;
				605	value[1] = KEIS_SO_CHAR_2;
				606	SISOLength = 2;
				607	} else if ((cnvOption&_MBCS_OPTION_JEF)!=0) {
				608	value[0] = JEF_SO_CHAR;
				609	SISOLength = 1;
				610	} else if ((cnvOption&_MBCS_OPTION_JIPS)!=0) {
				611	value[0] = JIPS_SO_CHAR_1;
				612	value[1] = JIPS_SO_CHAR_2;
				613	SISOLength = 2;
				614	} else {
				615	value[0] = UCNV_SO;
				616	SISOLength = 1;
				617	}
				618	break;
				619	default:
				620	/* Should never happen. */
				621	break;
				622	}
				623
				624	return SISOLength;
				625	}
				626
				627	/* Miscellaneous ------------------------------------------------------------ */
				628
				629	/* similar to ucnv_MBCSGetNextUChar() but recursive */
				630	static UBool
				631	enumToU(UConverterMBCSTable *mbcsTable, int8_t stateProps[],
				632	int32_t state, uint32_t offset,
				633	uint32_t value,
				634	UConverterEnumToUCallback callback, const void context,
				635	UErrorCode *pErrorCode) {
				636	UChar32 codePoints[32];
				637	const int32_t *row;
				638	const uint16_t *unicodeCodeUnits;
				639	UChar32 anyCodePoints;
				640	int32_t b, limit;
				641
				642	row=mbcsTable->stateTable[state];
				643	unicodeCodeUnits=mbcsTable->unicodeCodeUnits;
				644
				645	value<<=8;
				646	anyCodePoints=-1; /* becomes non-negative if there is a mapping */
				647
				648	b=(stateProps[state]&0x38)<<2;
				649	if(b==0 && stateProps[state]>=0x40) {
				650	/* skip byte sequences with leading zeros because they are not stored in the fromUnicode table */
				651	codePoints[0]=U_SENTINEL;
				652	b=1;
				653	}
				654	limit=((stateProps[state]&7)+1)<<5;
				655	while(b<limit) {
				656	int32_t entry=row[b];
				657	if(MBCS_ENTRY_IS_TRANSITION(entry)) {
				658	int32_t nextState=MBCS_ENTRY_TRANSITION_STATE(entry);
				659	if(stateProps[nextState]>=0) {
				660	/* recurse to a state with non-ignorable actions */
				661	if(!enumToU(
				662	mbcsTable, stateProps, nextState,
				663	offset+MBCS_ENTRY_TRANSITION_OFFSET(entry),
				664	value\|(uint32_t)b,
				665	callback, context,
				666	pErrorCode)) {
				667	return FALSE;
				668	}
				669	}
				670	codePoints[b&0x1f]=U_SENTINEL;
				671	} else {
				672	UChar32 c;
				673	int32_t action;
				674
				675	/*
				676	* An if-else-if chain provides more reliable performance for
				677	* the most common cases compared to a switch.
				678	*/
				679	action=MBCS_ENTRY_FINAL_ACTION(entry);
				680	if(action==MBCS_STATE_VALID_DIRECT_16) {
				681	/* output BMP code point */
				682	c=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
				683	} else if(action==MBCS_STATE_VALID_16) {
				684	int32_t finalOffset=offset+MBCS_ENTRY_FINAL_VALUE_16(entry);
				685	c=unicodeCodeUnits[finalOffset];
				686	if(c<0xfffe) {
				687	/* output BMP code point */
				688	} else {
				689	c=U_SENTINEL;
				690	}
				691	} else if(action==MBCS_STATE_VALID_16_PAIR) {
				692	int32_t finalOffset=offset+MBCS_ENTRY_FINAL_VALUE_16(entry);
				693	c=unicodeCodeUnits[finalOffset++];
				694	if(c<0xd800) {
				695	/* output BMP code point below 0xd800 */
				696	} else if(c<=0xdbff) {
				697	/* output roundtrip or fallback supplementary code point */
				698	c=((c&0x3ff)<<10)+unicodeCodeUnits[finalOffset]+(0x10000-0xdc00);
				699	} else if(c==0xe000) {
				700	/* output roundtrip BMP code point above 0xd800 or fallback BMP code point */
				701	c=unicodeCodeUnits[finalOffset];
				702	} else {
				703	c=U_SENTINEL;
				704	}
				705	} else if(action==MBCS_STATE_VALID_DIRECT_20) {
				706	/* output supplementary code point */
				707	c=(UChar32)(MBCS_ENTRY_FINAL_VALUE(entry)+0x10000);
				708	} else {
				709	c=U_SENTINEL;
				710	}
				711
				712	codePoints[b&0x1f]=c;
				713	anyCodePoints&=c;
				714	}
				715	if(((++b)&0x1f)==0) {
				716	if(anyCodePoints>=0) {
				717	if(!callback(context, value\|(uint32_t)(b-0x20), codePoints)) {
				718	return FALSE;
				719	}
				720	anyCodePoints=-1;
				721	}
				722	}
				723	}
				724	return TRUE;
				725	}
				726
				727	/*
				728	* Only called if stateProps[state]==-1.
				729	* A recursive call may do stateProps[state]\|=0x40 if this state is the target of an
				730	* MBCS_STATE_CHANGE_ONLY.
				731	*/
				732	static int8_t
				733	getStateProp(const int32_t (*stateTable)[256], int8_t stateProps[], int state) {
				734	const int32_t *row;
				735	int32_t min, max, entry, nextState;
				736
				737	row=stateTable[state];
				738	stateProps[state]=0;
				739
				740	/* find first non-ignorable state */
				741	for(min=0;; ++min) {
				742	entry=row[min];
				743	nextState=MBCS_ENTRY_STATE(entry);
				744	if(stateProps[nextState]==-1) {
				745	getStateProp(stateTable, stateProps, nextState);
				746	}
				747	if(MBCS_ENTRY_IS_TRANSITION(entry)) {
				748	if(stateProps[nextState]>=0) {
				749	break;
				750	}
				751	} else if(MBCS_ENTRY_FINAL_ACTION(entry)<MBCS_STATE_UNASSIGNED) {
				752	break;
				753	}
				754	if(min==0xff) {
				755	stateProps[state]=-0x40; /* (int8_t)0xc0 */
				756	return stateProps[state];
				757	}
				758	}
				759	stateProps[state]\|=(int8_t)((min>>5)<<3);
				760
				761	/* find last non-ignorable state */
				762	for(max=0xff; min<max; --max) {
				763	entry=row[max];
				764	nextState=MBCS_ENTRY_STATE(entry);
				765	if(stateProps[nextState]==-1) {
				766	getStateProp(stateTable, stateProps, nextState);
				767	}
				768	if(MBCS_ENTRY_IS_TRANSITION(entry)) {
				769	if(stateProps[nextState]>=0) {
				770	break;
				771	}
				772	} else if(MBCS_ENTRY_FINAL_ACTION(entry)<MBCS_STATE_UNASSIGNED) {
				773	break;
				774	}
				775	}
				776	stateProps[state]\|=(int8_t)(max>>5);
				777
				778	/* recurse further and collect direct-state information */
				779	while(min<=max) {
				780	entry=row[min];
				781	nextState=MBCS_ENTRY_STATE(entry);
				782	if(stateProps[nextState]==-1) {
				783	getStateProp(stateTable, stateProps, nextState);
				784	}
				785	if(MBCS_ENTRY_IS_FINAL(entry)) {
				786	stateProps[nextState]\|=0x40;
				787	if(MBCS_ENTRY_FINAL_ACTION(entry)<=MBCS_STATE_FALLBACK_DIRECT_20) {
				788	stateProps[state]\|=0x40;
				789	}
				790	}
				791	++min;
				792	}
				793	return stateProps[state];
				794	}
				795
				796	/*
				797	* Internal function enumerating the toUnicode data of an MBCS converter.
				798	* Currently only used for reconstituting data for a MBCS_OPT_NO_FROM_U
				799	* table, but could also be used for a future ucnv_getUnicodeSet() option
				800	* that includes reverse fallbacks (after updating this function's implementation).
				801	* Currently only handles roundtrip mappings.
				802	* Does not currently handle extensions.
				803	*/
				804	static void
				805	ucnv_MBCSEnumToUnicode(UConverterMBCSTable *mbcsTable,
				806	UConverterEnumToUCallback callback, const void context,
				807	UErrorCode *pErrorCode) {
				808	/*
				809	* Properties for each state, to speed up the enumeration.
				810	* Ignorable actions are unassigned/illegal/state-change-only:
				811	* They do not lead to mappings.
				812	*
				813	* Bits 7..6:
				814	* 1 direct/initial state (stateful converters have multiple)
				815	* 0 non-initial state with transitions or with non-ignorable result actions
				816	* -1 final state with only ignorable actions
				817	*
				818	* Bits 5..3:
				819	* The lowest byte value with non-ignorable actions is
				820	* value<<5 (rounded down).
				821	*
				822	* Bits 2..0:
				823	* The highest byte value with non-ignorable actions is
				824	* (value<<5)&0x1f (rounded up).
				825	*/
				826	int8_t stateProps[MBCS_MAX_STATE_COUNT];
				827	int32_t state;
				828
				829	uprv_memset(stateProps, -1, sizeof(stateProps));
				830
				831	/* recurse from state 0 and set all stateProps */
				832	getStateProp(mbcsTable->stateTable, stateProps, 0);
				833
				834	for(state=0; state<mbcsTable->countStates; ++state) {
				835	/*if(stateProps[state]==-1) {
				836	printf("unused/unreachable <icu:state> %d\n", state);
				837	}*/
				838	if(stateProps[state]>=0x40) {
				839	/* start from each direct state */
				840	enumToU(
				841	mbcsTable, stateProps, state, 0, 0,
				842	callback, context,
				843	pErrorCode);
				844	}
				845	}
				846	}
				847
				848	U_CFUNC void
				849	ucnv_MBCSGetFilteredUnicodeSetForUnicode(const UConverterSharedData *sharedData,
				850	const USetAdder *sa,
				851	UConverterUnicodeSet which,
				852	UConverterSetFilter filter,
				853	UErrorCode *pErrorCode) {
				854	const UConverterMBCSTable *mbcsTable;
				855	const uint16_t *table;
				856
				857	uint32_t st3;
				858	uint16_t st1, maxStage1, st2;
				859
				860	UChar32 c;
				861
				862	/* enumerate the from-Unicode trie table */
				863	mbcsTable=&sharedData->mbcs;
				864	table=mbcsTable->fromUnicodeTable;
				865	if(mbcsTable->unicodeMask&UCNV_HAS_SUPPLEMENTARY) {
				866	maxStage1=0x440;
				867	} else {
				868	maxStage1=0x40;
				869	}
				870
				871	c=0; /* keep track of the current code point while enumerating */
				872
				873	if(mbcsTable->outputType==MBCS_OUTPUT_1) {
				874	const uint16_t stage2, stage3, *results;
				875	uint16_t minValue;
				876
				877	results=(const uint16_t *)mbcsTable->fromUnicodeBytes;
				878
				879	/*
				880	* Set a threshold variable for selecting which mappings to use.
				881	* See ucnv_MBCSSingleFromBMPWithOffsets() and
				882	* MBCS_SINGLE_RESULT_FROM_U() for details.
				883	*/
				884	if(which==UCNV_ROUNDTRIP_SET) {
				885	/* use only roundtrips */
				886	minValue=0xf00;
				887	} else /* UCNV_ROUNDTRIP_AND_FALLBACK_SET */ {
				888	/* use all roundtrip and fallback results */
				889	minValue=0x800;
				890	}
				891
				892	for(st1=0; st1<maxStage1; ++st1) {
				893	st2=table[st1];
				894	if(st2>maxStage1) {
				895	stage2=table+st2;
				896	for(st2=0; st2<64; ++st2) {
				897	if((st3=stage2[st2])!=0) {
				898	/* read the stage 3 block */
				899	stage3=results+st3;
				900
				901	do {
				902	if(*stage3++>=minValue) {
				903	sa->add(sa->set, c);
				904	}
				905	} while((++c&0xf)!=0);
				906	} else {
				907	c+=16; /* empty stage 3 block */
				908	}
				909	}
				910	} else {
				911	c+=1024; /* empty stage 2 block */
				912	}
				913	}
				914	} else {
				915	const uint32_t *stage2;
				916	const uint8_t stage3, bytes;
				917	uint32_t st3Multiplier;
				918	uint32_t value;
				919	UBool useFallback;
				920
				921	bytes=mbcsTable->fromUnicodeBytes;
				922
				923	useFallback=(UBool)(which==UCNV_ROUNDTRIP_AND_FALLBACK_SET);
				924
				925	switch(mbcsTable->outputType) {
				926	case MBCS_OUTPUT_3:
				927	case MBCS_OUTPUT_4_EUC:
				928	st3Multiplier=3;
				929	break;
				930	case MBCS_OUTPUT_4:
				931	st3Multiplier=4;
				932	break;
				933	default:
				934	st3Multiplier=2;
				935	break;
				936	}
				937
				938	for(st1=0; st1<maxStage1; ++st1) {
				939	st2=table[st1];
				940	if(st2>(maxStage1>>1)) {
				941	stage2=(const uint32_t *)table+st2;
				942	for(st2=0; st2<64; ++st2) {
				943	if((st3=stage2[st2])!=0) {
				944	/* read the stage 3 block */
				945	stage3=bytes+st3Multiplier16(uint32_t)(uint16_t)st3;
				946
				947	/* get the roundtrip flags for the stage 3 block */
				948	st3>>=16;
				949
				950	/*
				951	* Add code points for which the roundtrip flag is set,
				952	* or which map to non-zero bytes if we use fallbacks.
				953	* See ucnv_MBCSFromUnicodeWithOffsets() for details.
				954	*/
				955	switch(filter) {
				956	case UCNV_SET_FILTER_NONE:
				957	do {
				958	if(st3&1) {
				959	sa->add(sa->set, c);
				960	stage3+=st3Multiplier;
				961	} else if(useFallback) {
				962	uint8_t b=0;
				963	switch(st3Multiplier) {
				964	case 4:
				965	b\|=*stage3++;
				966	case 3: /fall through/
				967	b\|=*stage3++;
				968	case 2: /fall through/
				969	b\|=stage3[0]\|stage3[1];
				970	stage3+=2;
				971	default:
				972	break;
				973	}
				974	if(b!=0) {
				975	sa->add(sa->set, c);
				976	}
				977	}
				978	st3>>=1;
				979	} while((++c&0xf)!=0);
				980	break;
				981	case UCNV_SET_FILTER_DBCS_ONLY:
				982	/* Ignore single-byte results (<0x100). */
				983	do {
				984	if(((st3&1)!=0 \|\| useFallback) && ((const uint16_t )stage3)>=0x100) {
				985	sa->add(sa->set, c);
				986	}
				987	st3>>=1;
				988	stage3+=2; /* +=st3Multiplier */
				989	} while((++c&0xf)!=0);
				990	break;
				991	case UCNV_SET_FILTER_2022_CN:
				992	/* Only add code points that map to CNS 11643 planes 1 & 2 for non-EXT ISO-2022-CN. */
				993	do {
				994	if(((st3&1)!=0 \|\| useFallback) && ((value=*stage3)==0x81 \|\| value==0x82)) {
				995	sa->add(sa->set, c);
				996	}
				997	st3>>=1;
				998	stage3+=3; /* +=st3Multiplier */
				999	} while((++c&0xf)!=0);
				1000	break;
				1001	case UCNV_SET_FILTER_SJIS:
				1002	/* Only add code points that map to Shift-JIS codes corresponding to JIS X 0208. */
				1003	do {
				1004	if(((st3&1)!=0 \|\| useFallback) && (value=((const uint16_t )stage3))>=0x8140 && value<=0xeffc) {
				1005	sa->add(sa->set, c);
				1006	}
				1007	st3>>=1;
				1008	stage3+=2; /* +=st3Multiplier */
				1009	} while((++c&0xf)!=0);
				1010	break;
				1011	case UCNV_SET_FILTER_GR94DBCS:
				1012	/* Only add code points that map to ISO 2022 GR 94 DBCS codes (each byte A1..FE). */
				1013	do {
				1014	if( ((st3&1)!=0 \|\| useFallback) &&
				1015	(uint16_t)((value=((const uint16_t )stage3)) - 0xa1a1)<=(0xfefe - 0xa1a1) &&
				1016	(uint8_t)(value-0xa1)<=(0xfe - 0xa1)
				1017	) {
				1018	sa->add(sa->set, c);
				1019	}
				1020	st3>>=1;
				1021	stage3+=2; /* +=st3Multiplier */
				1022	} while((++c&0xf)!=0);
				1023	break;
				1024	case UCNV_SET_FILTER_HZ:
				1025	/* Only add code points that are suitable for HZ DBCS (lead byte A1..FD). */
				1026	do {
				1027	if( ((st3&1)!=0 \|\| useFallback) &&
				1028	(uint16_t)((value=((const uint16_t )stage3))-0xa1a1)<=(0xfdfe - 0xa1a1) &&
				1029	(uint8_t)(value-0xa1)<=(0xfe - 0xa1)
				1030	) {
				1031	sa->add(sa->set, c);
				1032	}
				1033	st3>>=1;
				1034	stage3+=2; /* +=st3Multiplier */
				1035	} while((++c&0xf)!=0);
				1036	break;
				1037	default:
				1038	*pErrorCode=U_INTERNAL_PROGRAM_ERROR;
				1039	return;
				1040	}
				1041	} else {
				1042	c+=16; /* empty stage 3 block */
				1043	}
				1044	}
				1045	} else {
				1046	c+=1024; /* empty stage 2 block */
				1047	}
				1048	}
				1049	}
				1050
				1051	ucnv_extGetUnicodeSet(sharedData, sa, which, filter, pErrorCode);
				1052	}
				1053
				1054	U_CFUNC void
				1055	ucnv_MBCSGetUnicodeSetForUnicode(const UConverterSharedData *sharedData,
				1056	const USetAdder *sa,
				1057	UConverterUnicodeSet which,
				1058	UErrorCode *pErrorCode) {
				1059	ucnv_MBCSGetFilteredUnicodeSetForUnicode(
				1060	sharedData, sa, which,
				1061	sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY ?
				1062	UCNV_SET_FILTER_DBCS_ONLY :
				1063	UCNV_SET_FILTER_NONE,
				1064	pErrorCode);
				1065	}
				1066
				1067	static void
				1068	ucnv_MBCSGetUnicodeSet(const UConverter *cnv,
				1069	const USetAdder *sa,
				1070	UConverterUnicodeSet which,
				1071	UErrorCode *pErrorCode) {
				1072	if(cnv->options&_MBCS_OPTION_GB18030) {
				1073	sa->addRange(sa->set, 0, 0xd7ff);
				1074	sa->addRange(sa->set, 0xe000, 0x10ffff);
				1075	} else {
				1076	ucnv_MBCSGetUnicodeSetForUnicode(cnv->sharedData, sa, which, pErrorCode);
				1077	}
				1078	}
				1079
				1080	/* conversion extensions for input not in the main table -------------------- */
				1081
				1082	/*
				1083	* Hardcoded extension handling for GB 18030.
				1084	* Definition of LINEAR macros and gb18030Ranges see near the beginning of the file.
				1085	*
				1086	* In the future, conversion extensions may handle m:n mappings and delta tables,
				1087	* see http://source.icu-project.org/repos/icu/icuhtml/trunk/design/conversion/conversion_extensions.html
				1088	*
				1089	* If an input character cannot be mapped, then these functions set an error
				1090	* code. The framework will then call the callback function.
				1091	*/
				1092
				1093	/*
				1094	* @return if(U_FAILURE) return the code point for cnv->fromUChar32
				1095	* else return 0 after output has been written to the target
				1096	*/
				1097	static UChar32
				1098	_extFromU(UConverter cnv, const UConverterSharedData sharedData,
				1099	UChar32 cp,
				1100	const UChar *source, const UChar sourceLimit,
				1101	uint8_t *target, const uint8_t targetLimit,
				1102	int32_t **offsets, int32_t sourceIndex,
				1103	UBool flush,
				1104	UErrorCode *pErrorCode) {
				1105	const int32_t *cx;
				1106
				1107	cnv->useSubChar1=FALSE;
				1108
				1109	if( (cx=sharedData->mbcs.extIndexes)!=NULL &&
				1110	ucnv_extInitialMatchFromU(
				1111	cnv, cx,
				1112	cp, source, sourceLimit,
				1113	(char *)target, (char )targetLimit,
				1114	offsets, sourceIndex,
				1115	flush,
				1116	pErrorCode)
				1117	) {
				1118	return 0; /* an extension mapping handled the input */
				1119	}
				1120
				1121	/* GB 18030 */
				1122	if((cnv->options&_MBCS_OPTION_GB18030)!=0) {
				1123	const uint32_t *range;
				1124	int32_t i;
				1125
				1126	range=gb18030Ranges[0];
				1127	for(i=0; i<UPRV_LENGTHOF(gb18030Ranges); range+=4, ++i) {
				1128	if(range[0]<=(uint32_t)cp && (uint32_t)cp<=range[1]) {
				1129	/* found the Unicode code point, output the four-byte sequence for it */
				1130	uint32_t linear;
				1131	char bytes[4];
				1132
				1133	/* get the linear value of the first GB 18030 code in this range */
				1134	linear=range[2]-LINEAR_18030_BASE;
				1135
				1136	/* add the offset from the beginning of the range */
				1137	linear+=((uint32_t)cp-range[0]);
				1138
				1139	/* turn this into a four-byte sequence */
				1140	bytes[3]=(char)(0x30+linear%10); linear/=10;
				1141	bytes[2]=(char)(0x81+linear%126); linear/=126;
				1142	bytes[1]=(char)(0x30+linear%10); linear/=10;
				1143	bytes[0]=(char)(0x81+linear);
				1144
				1145	/* output this sequence */
				1146	ucnv_fromUWriteBytes(cnv,
				1147	bytes, 4, (char *)target, (char )targetLimit,
				1148	offsets, sourceIndex, pErrorCode);
				1149	return 0;
				1150	}
				1151	}
				1152	}
				1153
				1154	/* no mapping */
				1155	*pErrorCode=U_INVALID_CHAR_FOUND;
				1156	return cp;
				1157	}
				1158
				1159	/*
				1160	* Input sequence: cnv->toUBytes[0..length[
				1161	* @return if(U_FAILURE) return the length (toULength, byteIndex) for the input
				1162	* else return 0 after output has been written to the target
				1163	*/
				1164	static int8_t
				1165	_extToU(UConverter cnv, const UConverterSharedData sharedData,
				1166	int8_t length,
				1167	const uint8_t *source, const uint8_t sourceLimit,
				1168	UChar *target, const UChar targetLimit,
				1169	int32_t **offsets, int32_t sourceIndex,
				1170	UBool flush,
				1171	UErrorCode *pErrorCode) {
				1172	const int32_t *cx;
				1173
				1174	if( (cx=sharedData->mbcs.extIndexes)!=NULL &&
				1175	ucnv_extInitialMatchToU(
				1176	cnv, cx,
				1177	length, (const char *)source, (const char )sourceLimit,
				1178	target, targetLimit,
				1179	offsets, sourceIndex,
				1180	flush,
				1181	pErrorCode)
				1182	) {
				1183	return 0; /* an extension mapping handled the input */
				1184	}
				1185
				1186	/* GB 18030 */
				1187	if(length==4 && (cnv->options&_MBCS_OPTION_GB18030)!=0) {
				1188	const uint32_t *range;
				1189	uint32_t linear;
				1190	int32_t i;
				1191
				1192	linear=LINEAR_18030(cnv->toUBytes[0], cnv->toUBytes[1], cnv->toUBytes[2], cnv->toUBytes[3]);
				1193	range=gb18030Ranges[0];
				1194	for(i=0; i<UPRV_LENGTHOF(gb18030Ranges); range+=4, ++i) {
				1195	if(range[2]<=linear && linear<=range[3]) {
				1196	/* found the sequence, output the Unicode code point for it */
				1197	*pErrorCode=U_ZERO_ERROR;
				1198
				1199	/* add the linear difference between the input and start sequences to the start code point */
				1200	linear=range[0]+(linear-range[2]);
				1201
				1202	/* output this code point */
				1203	ucnv_toUWriteCodePoint(cnv, linear, target, targetLimit, offsets, sourceIndex, pErrorCode);
				1204
				1205	return 0;
				1206	}
				1207	}
				1208	}
				1209
				1210	/* no mapping */
				1211	*pErrorCode=U_INVALID_CHAR_FOUND;
				1212	return length;
				1213	}
				1214
				1215	/* EBCDIC swap LF<->NL ------------------------------------------------------ */
				1216
				1217	/*
				1218	* This code modifies a standard EBCDIC<->Unicode mapping table for
				1219	* OS/390 (z/OS) Unix System Services (Open Edition).
				1220	* The difference is in the mapping of Line Feed and New Line control codes:
				1221	* Standard EBCDIC maps
				1222	*
				1223	* <U000A> \x25 \|0
				1224	* <U0085> \x15 \|0
				1225	*
				1226	* but OS/390 USS EBCDIC swaps the control codes for LF and NL,
				1227	* mapping
				1228	*
				1229	* <U000A> \x15 \|0
				1230	* <U0085> \x25 \|0
				1231	*
				1232	* This code modifies a loaded standard EBCDIC<->Unicode mapping table
				1233	* by copying it into allocated memory and swapping the LF and NL values.
				1234	* It allows to support the same EBCDIC charset in both versions without
				1235	* duplicating the entire installed table.
				1236	*/
				1237
				1238	/* standard EBCDIC codes */
				1239	#define EBCDIC_LF 0x25
				1240	#define EBCDIC_NL 0x15
				1241
				1242	/* standard EBCDIC codes with roundtrip flag as stored in Unicode-to-single-byte tables */
				1243	#define EBCDIC_RT_LF 0xf25
				1244	#define EBCDIC_RT_NL 0xf15
				1245
				1246	/* Unicode code points */
				1247	#define U_LF 0x0a
				1248	#define U_NL 0x85
				1249
				1250	static UBool
				1251	_EBCDICSwapLFNL(UConverterSharedData sharedData, UErrorCode pErrorCode) {
				1252	UConverterMBCSTable *mbcsTable;
				1253
				1254	const uint16_t table, results;
				1255	const uint8_t *bytes;
				1256
				1257	int32_t (*newStateTable)[256];
				1258	uint16_t *newResults;
				1259	uint8_t *p;
				1260	char *name;
				1261
				1262	uint32_t stage2Entry;
				1263	uint32_t size, sizeofFromUBytes;
				1264
				1265	mbcsTable=&sharedData->mbcs;
				1266
				1267	table=mbcsTable->fromUnicodeTable;
				1268	bytes=mbcsTable->fromUnicodeBytes;
				1269	results=(const uint16_t *)bytes;
				1270
				1271	/*
				1272	* Check that this is an EBCDIC table with SBCS portion -
				1273	* SBCS or EBCDIC_STATEFUL with standard EBCDIC LF and NL mappings.
				1274	*
				1275	* If not, ignore the option. Options are always ignored if they do not apply.
				1276	*/
				1277	if(!(
				1278	(mbcsTable->outputType==MBCS_OUTPUT_1 \|\| mbcsTable->outputType==MBCS_OUTPUT_2_SISO) &&
				1279	mbcsTable->stateTable[0][EBCDIC_LF]==MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, U_LF) &&
				1280	mbcsTable->stateTable[0][EBCDIC_NL]==MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, U_NL)
				1281	)) {
				1282	return FALSE;
				1283	}
				1284
				1285	if(mbcsTable->outputType==MBCS_OUTPUT_1) {
				1286	if(!(
				1287	EBCDIC_RT_LF==MBCS_SINGLE_RESULT_FROM_U(table, results, U_LF) &&
				1288	EBCDIC_RT_NL==MBCS_SINGLE_RESULT_FROM_U(table, results, U_NL)
				1289	)) {
				1290	return FALSE;
				1291	}
				1292	} else /* MBCS_OUTPUT_2_SISO */ {
				1293	stage2Entry=MBCS_STAGE_2_FROM_U(table, U_LF);
				1294	if(!(
				1295	MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, U_LF)!=0 &&
				1296	EBCDIC_LF==MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, U_LF)
				1297	)) {
				1298	return FALSE;
				1299	}
				1300
				1301	stage2Entry=MBCS_STAGE_2_FROM_U(table, U_NL);
				1302	if(!(
				1303	MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, U_NL)!=0 &&
				1304	EBCDIC_NL==MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, U_NL)
				1305	)) {
				1306	return FALSE;
				1307	}
				1308	}
				1309
				1310	if(mbcsTable->fromUBytesLength>0) {
				1311	/*
				1312	* We _know_ the number of bytes in the fromUnicodeBytes array
				1313	* starting with header.version 4.1.
				1314	*/
				1315	sizeofFromUBytes=mbcsTable->fromUBytesLength;
				1316	} else {
				1317	/*
				1318	* Otherwise:
				1319	* There used to be code to enumerate the fromUnicode
				1320	* trie and find the highest entry, but it was removed in ICU 3.2
				1321	* because it was not tested and caused a low code coverage number.
				1322	* See Jitterbug 3674.
				1323	* This affects only some .cnv file formats with a header.version
				1324	* below 4.1, and only when swaplfnl is requested.
				1325	*
				1326	* ucnvmbcs.c revision 1.99 is the last one with the
				1327	* ucnv_MBCSSizeofFromUBytes() function.
				1328	*/
				1329	*pErrorCode=U_INVALID_FORMAT_ERROR;
				1330	return FALSE;
				1331	}
				1332
				1333	/*
				1334	* The table has an appropriate format.
				1335	* Allocate and build
				1336	* - a modified to-Unicode state table
				1337	* - a modified from-Unicode output array
				1338	* - a converter name string with the swap option appended
				1339	*/
				1340	size=
				1341	mbcsTable->countStates*1024+
				1342	sizeofFromUBytes+
				1343	UCNV_MAX_CONVERTER_NAME_LENGTH+20;
				1344	p=(uint8_t *)uprv_malloc(size);
				1345	if(p==NULL) {
				1346	*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
				1347	return FALSE;
				1348	}
				1349
				1350	/* copy and modify the to-Unicode state table */
				1351	newStateTable=(int32_t (*)[256])p;
				1352	uprv_memcpy(newStateTable, mbcsTable->stateTable, mbcsTable->countStates*1024);
				1353
				1354	newStateTable[0][EBCDIC_LF]=MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, U_NL);
				1355	newStateTable[0][EBCDIC_NL]=MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, U_LF);
				1356
				1357	/* copy and modify the from-Unicode result table */
				1358	newResults=(uint16_t *)newStateTable[mbcsTable->countStates];
				1359	uprv_memcpy(newResults, bytes, sizeofFromUBytes);
				1360
				1361	/* conveniently, the table access macros work on the left side of expressions */
				1362	if(mbcsTable->outputType==MBCS_OUTPUT_1) {
				1363	MBCS_SINGLE_RESULT_FROM_U(table, newResults, U_LF)=EBCDIC_RT_NL;
				1364	MBCS_SINGLE_RESULT_FROM_U(table, newResults, U_NL)=EBCDIC_RT_LF;
				1365	} else /* MBCS_OUTPUT_2_SISO */ {
				1366	stage2Entry=MBCS_STAGE_2_FROM_U(table, U_LF);
				1367	MBCS_VALUE_2_FROM_STAGE_2(newResults, stage2Entry, U_LF)=EBCDIC_NL;
				1368
				1369	stage2Entry=MBCS_STAGE_2_FROM_U(table, U_NL);
				1370	MBCS_VALUE_2_FROM_STAGE_2(newResults, stage2Entry, U_NL)=EBCDIC_LF;
				1371	}
				1372
				1373	/* set the canonical converter name */
				1374	name=(char *)newResults+sizeofFromUBytes;
				1375	uprv_strcpy(name, sharedData->staticData->name);
				1376	uprv_strcat(name, UCNV_SWAP_LFNL_OPTION_STRING);
				1377
				1378	/* set the pointers */
				1379	umtx_lock(NULL);
				1380	if(mbcsTable->swapLFNLStateTable==NULL) {
				1381	mbcsTable->swapLFNLStateTable=newStateTable;
				1382	mbcsTable->swapLFNLFromUnicodeBytes=(uint8_t *)newResults;
				1383	mbcsTable->swapLFNLName=name;
				1384
				1385	newStateTable=NULL;
				1386	}
				1387	umtx_unlock(NULL);
				1388
				1389	/* release the allocated memory if another thread beat us to it */
				1390	if(newStateTable!=NULL) {
				1391	uprv_free(newStateTable);
				1392	}
				1393	return TRUE;
				1394	}
				1395
				1396	/* reconstitute omitted fromUnicode data ------------------------------------ */
				1397
				1398	/* for details, compare with genmbcs.c MBCSAddFromUnicode() and transformEUC() */
				1399	static UBool U_CALLCONV
				1400	writeStage3Roundtrip(const void *context, uint32_t value, UChar32 codePoints[32]) {
				1401	UConverterMBCSTable mbcsTable=(UConverterMBCSTable )context;
				1402	const uint16_t *table;
				1403	uint32_t *stage2;
				1404	uint8_t bytes, p;
				1405	UChar32 c;
				1406	int32_t i, st3;
				1407
				1408	table=mbcsTable->fromUnicodeTable;
				1409	bytes=(uint8_t *)mbcsTable->fromUnicodeBytes;
				1410
				1411	/* for EUC outputTypes, modify the value like genmbcs.c's transformEUC() */
				1412	switch(mbcsTable->outputType) {
				1413	case MBCS_OUTPUT_3_EUC:
				1414	if(value<=0xffff) {
				1415	/* short sequences are stored directly */
				1416	/* code set 0 or 1 */
				1417	} else if(value<=0x8effff) {
				1418	/* code set 2 */
				1419	value&=0x7fff;
				1420	} else /* first byte is 0x8f */ {
				1421	/* code set 3 */
				1422	value&=0xff7f;
				1423	}
				1424	break;
				1425	case MBCS_OUTPUT_4_EUC:
				1426	if(value<=0xffffff) {
				1427	/* short sequences are stored directly */
				1428	/* code set 0 or 1 */
				1429	} else if(value<=0x8effffff) {
				1430	/* code set 2 */
				1431	value&=0x7fffff;
				1432	} else /* first byte is 0x8f */ {
				1433	/* code set 3 */
				1434	value&=0xff7fff;
				1435	}
				1436	break;
				1437	default:
				1438	break;
				1439	}
				1440
				1441	for(i=0; i<=0x1f; ++value, ++i) {
				1442	c=codePoints[i];
				1443	if(c<0) {
				1444	continue;
				1445	}
				1446
				1447	/* locate the stage 2 & 3 data */
				1448	stage2=((uint32_t *)table)+table[c>>10]+((c>>4)&0x3f);
				1449	p=bytes;
				1450	st3=(int32_t)(uint16_t)stage216+(c&0xf);
				1451
				1452	/* write the codepage bytes into stage 3 */
				1453	switch(mbcsTable->outputType) {
				1454	case MBCS_OUTPUT_3:
				1455	case MBCS_OUTPUT_4_EUC:
				1456	p+=st3*3;
				1457	p[0]=(uint8_t)(value>>16);
				1458	p[1]=(uint8_t)(value>>8);
				1459	p[2]=(uint8_t)value;
				1460	break;
				1461	case MBCS_OUTPUT_4:
				1462	((uint32_t *)p)[st3]=value;
				1463	break;
				1464	default:
				1465	/* 2 bytes per character */
				1466	((uint16_t *)p)[st3]=(uint16_t)value;
				1467	break;
				1468	}
				1469
				1470	/* set the roundtrip flag */
				1471	*stage2\|=(1UL<<(16+(c&0xf)));
				1472	}
				1473	return TRUE;
				1474	}
				1475
				1476	static void
				1477	reconstituteData(UConverterMBCSTable *mbcsTable,
				1478	uint32_t stage1Length, uint32_t stage2Length,
				1479	uint32_t fullStage2Length, /* lengths are numbers of units, not bytes */
				1480	UErrorCode *pErrorCode) {
				1481	uint16_t *stage1;
				1482	uint32_t *stage2;
				1483	uint32_t dataLength=stage1Length2+fullStage2Length4+mbcsTable->fromUBytesLength;
				1484	mbcsTable->reconstitutedData=(uint8_t *)uprv_malloc(dataLength);
				1485	if(mbcsTable->reconstitutedData==NULL) {
				1486	*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
				1487	return;
				1488	}
				1489	uprv_memset(mbcsTable->reconstitutedData, 0, dataLength);
				1490
				1491	/* copy existing data and reroute the pointers */
				1492	stage1=(uint16_t *)mbcsTable->reconstitutedData;
				1493	uprv_memcpy(stage1, mbcsTable->fromUnicodeTable, stage1Length*2);
				1494
				1495	stage2=(uint32_t *)(stage1+stage1Length);
				1496	uprv_memcpy(stage2+(fullStage2Length-stage2Length),
				1497	mbcsTable->fromUnicodeTable+stage1Length,
				1498	stage2Length*4);
				1499
				1500	mbcsTable->fromUnicodeTable=stage1;
				1501	mbcsTable->fromUnicodeBytes=(uint8_t *)(stage2+fullStage2Length);
				1502
				1503	/* indexes into stage 2 count from the bottom of the fromUnicodeTable */
				1504	stage2=(uint32_t *)stage1;
				1505
				1506	/* reconstitute the initial part of stage 2 from the mbcsIndex */
				1507	{
				1508	int32_t stageUTF8Length=((int32_t)mbcsTable->maxFastUChar+1)>>6;
				1509	int32_t stageUTF8Index=0;
				1510	int32_t st1, st2, st3, i;
				1511
				1512	for(st1=0; stageUTF8Index<stageUTF8Length; ++st1) {
				1513	st2=stage1[st1];
				1514	if(st2!=(int32_t)stage1Length/2) {
				1515	/* each stage 2 block has 64 entries corresponding to 16 entries in the mbcsIndex */
				1516	for(i=0; i<16; ++i) {
				1517	st3=mbcsTable->mbcsIndex[stageUTF8Index++];
				1518	if(st3!=0) {
				1519	/* an stage 2 entry's index is per stage 3 16-block, not per stage 3 entry */
				1520	st3>>=4;
				1521	/*
				1522	* 4 stage 2 entries point to 4 consecutive stage 3 16-blocks which are
				1523	* allocated together as a single 64-block for access from the mbcsIndex
				1524	*/
				1525	stage2[st2++]=st3++;
				1526	stage2[st2++]=st3++;
				1527	stage2[st2++]=st3++;
				1528	stage2[st2++]=st3;
				1529	} else {
				1530	/* no stage 3 block, skip */
				1531	st2+=4;
				1532	}
				1533	}
				1534	} else {
				1535	/* no stage 2 block, skip */
				1536	stageUTF8Index+=16;
				1537	}
				1538	}
				1539	}
				1540
				1541	/* reconstitute fromUnicodeBytes with roundtrips from toUnicode data */
				1542	ucnv_MBCSEnumToUnicode(mbcsTable, writeStage3Roundtrip, mbcsTable, pErrorCode);
				1543	}
				1544
				1545	/* MBCS setup functions ----------------------------------------------------- */
				1546
				1547	static void
				1548	ucnv_MBCSLoad(UConverterSharedData *sharedData,
				1549	UConverterLoadArgs *pArgs,
				1550	const uint8_t *raw,
				1551	UErrorCode *pErrorCode) {
				1552	UDataInfo info;
				1553	UConverterMBCSTable *mbcsTable=&sharedData->mbcs;
				1554	_MBCSHeader header=(_MBCSHeader )raw;
				1555	uint32_t offset;
				1556	uint32_t headerLength;
				1557	UBool noFromU=FALSE;
				1558
				1559	if(header->version[0]==4) {
				1560	headerLength=MBCS_HEADER_V4_LENGTH;
				1561	} else if(header->version[0]==5 && header->version[1]>=3 &&
				1562	(header->options&MBCS_OPT_UNKNOWN_INCOMPATIBLE_MASK)==0) {
				1563	headerLength=header->options&MBCS_OPT_LENGTH_MASK;
				1564	noFromU=(UBool)((header->options&MBCS_OPT_NO_FROM_U)!=0);
				1565	} else {
				1566	*pErrorCode=U_INVALID_TABLE_FORMAT;
				1567	return;
				1568	}
				1569
				1570	mbcsTable->outputType=(uint8_t)header->flags;
				1571	if(noFromU && mbcsTable->outputType==MBCS_OUTPUT_1) {
				1572	*pErrorCode=U_INVALID_TABLE_FORMAT;
				1573	return;
				1574	}
				1575
				1576	/* extension data, header version 4.2 and higher */
				1577	offset=header->flags>>8;
				1578	if(offset!=0) {
				1579	mbcsTable->extIndexes=(const int32_t *)(raw+offset);
				1580	}
				1581
				1582	if(mbcsTable->outputType==MBCS_OUTPUT_EXT_ONLY) {
				1583	UConverterLoadArgs args=UCNV_LOAD_ARGS_INITIALIZER;
				1584	UConverterSharedData *baseSharedData;
				1585	const int32_t *extIndexes;
				1586	const char *baseName;
				1587
				1588	/* extension-only file, load the base table and set values appropriately */
				1589	if((extIndexes=mbcsTable->extIndexes)==NULL) {
				1590	/* extension-only file without extension */
				1591	*pErrorCode=U_INVALID_TABLE_FORMAT;
				1592	return;
				1593	}
				1594
				1595	if(pArgs->nestedLoads!=1) {
				1596	/* an extension table must not be loaded as a base table */
				1597	*pErrorCode=U_INVALID_TABLE_FILE;
				1598	return;
				1599	}
				1600
				1601	/* load the base table */
				1602	baseName=(const char )header+headerLength4;
				1603	if(0==uprv_strcmp(baseName, sharedData->staticData->name)) {
				1604	/* forbid loading this same extension-only file */
				1605	*pErrorCode=U_INVALID_TABLE_FORMAT;
				1606	return;
				1607	}
				1608
				1609	/* TODO parse package name out of the prefix of the base name in the extension .cnv file? */
				1610	args.size=sizeof(UConverterLoadArgs);
				1611	args.nestedLoads=2;
				1612	args.onlyTestIsLoadable=pArgs->onlyTestIsLoadable;
				1613	args.reserved=pArgs->reserved;
				1614	args.options=pArgs->options;
				1615	args.pkg=pArgs->pkg;
				1616	args.name=baseName;
				1617	baseSharedData=ucnv_load(&args, pErrorCode);
				1618	if(U_FAILURE(*pErrorCode)) {
				1619	return;
				1620	}
				1621	if( baseSharedData->staticData->conversionType!=UCNV_MBCS \|\|
				1622	baseSharedData->mbcs.baseSharedData!=NULL
				1623	) {
				1624	ucnv_unload(baseSharedData);
				1625	*pErrorCode=U_INVALID_TABLE_FORMAT;
				1626	return;
				1627	}
				1628	if(pArgs->onlyTestIsLoadable) {
				1629	/*
				1630	* Exit as soon as we know that we can load the converter
				1631	* and the format is valid and supported.
				1632	* The worst that can happen in the following code is a memory
				1633	* allocation error.
				1634	*/
				1635	ucnv_unload(baseSharedData);
				1636	return;
				1637	}
				1638
				1639	/* copy the base table data */
				1640	uprv_memcpy(mbcsTable, &baseSharedData->mbcs, sizeof(UConverterMBCSTable));
				1641
				1642	/* overwrite values with relevant ones for the extension converter */
				1643	mbcsTable->baseSharedData=baseSharedData;
				1644	mbcsTable->extIndexes=extIndexes;
				1645
				1646	/*
				1647	* It would be possible to share the swapLFNL data with a base converter,
				1648	* but the generated name would have to be different, and the memory
				1649	* would have to be free'd only once.
				1650	* It is easier to just create the data for the extension converter
				1651	* separately when it is requested.
				1652	*/
				1653	mbcsTable->swapLFNLStateTable=NULL;
				1654	mbcsTable->swapLFNLFromUnicodeBytes=NULL;
				1655	mbcsTable->swapLFNLName=NULL;
				1656
				1657	/*
				1658	* The reconstitutedData must be deleted only when the base converter
				1659	* is unloaded.
				1660	*/
				1661	mbcsTable->reconstitutedData=NULL;
				1662
				1663	/*
				1664	* Set a special, runtime-only outputType if the extension converter
				1665	* is a DBCS version of a base converter that also maps single bytes.
				1666	*/
				1667	if( sharedData->staticData->conversionType==UCNV_DBCS \|\|
				1668	(sharedData->staticData->conversionType==UCNV_MBCS &&
				1669	sharedData->staticData->minBytesPerChar>=2)
				1670	) {
				1671	if(baseSharedData->mbcs.outputType==MBCS_OUTPUT_2_SISO) {
				1672	/* the base converter is SI/SO-stateful */
				1673	int32_t entry;
				1674
				1675	/* get the dbcs state from the state table entry for SO=0x0e */
				1676	entry=mbcsTable->stateTable[0][0xe];
				1677	if( MBCS_ENTRY_IS_FINAL(entry) &&
				1678	MBCS_ENTRY_FINAL_ACTION(entry)==MBCS_STATE_CHANGE_ONLY &&
				1679	MBCS_ENTRY_FINAL_STATE(entry)!=0
				1680	) {
				1681	mbcsTable->dbcsOnlyState=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry);
				1682
				1683	mbcsTable->outputType=MBCS_OUTPUT_DBCS_ONLY;
				1684	}
				1685	} else if(
				1686	baseSharedData->staticData->conversionType==UCNV_MBCS &&
				1687	baseSharedData->staticData->minBytesPerChar==1 &&
				1688	baseSharedData->staticData->maxBytesPerChar==2 &&
				1689	mbcsTable->countStates<=127
				1690	) {
				1691	/* non-stateful base converter, need to modify the state table */
				1692	int32_t (*newStateTable)[256];
				1693	int32_t *state;
				1694	int32_t i, count;
				1695
				1696	/* allocate a new state table and copy the base state table contents */
				1697	count=mbcsTable->countStates;
				1698	newStateTable=(int32_t ()[256])uprv_malloc((count+1)1024);
				1699	if(newStateTable==NULL) {
				1700	ucnv_unload(baseSharedData);
				1701	*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
				1702	return;
				1703	}
				1704
				1705	uprv_memcpy(newStateTable, mbcsTable->stateTable, count*1024);
				1706
				1707	/* change all final single-byte entries to go to a new all-illegal state */
				1708	state=newStateTable[0];
				1709	for(i=0; i<256; ++i) {
				1710	if(MBCS_ENTRY_IS_FINAL(state[i])) {
				1711	state[i]=MBCS_ENTRY_TRANSITION(count, 0);
				1712	}
				1713	}
				1714
				1715	/* build the new all-illegal state */
				1716	state=newStateTable[count];
				1717	for(i=0; i<256; ++i) {
				1718	state[i]=MBCS_ENTRY_FINAL(0, MBCS_STATE_ILLEGAL, 0);
				1719	}
				1720	mbcsTable->stateTable=(const int32_t (*)[256])newStateTable;
				1721	mbcsTable->countStates=(uint8_t)(count+1);
				1722	mbcsTable->stateTableOwned=TRUE;
				1723
				1724	mbcsTable->outputType=MBCS_OUTPUT_DBCS_ONLY;
				1725	}
				1726	}
				1727
				1728	/*
				1729	* unlike below for files with base tables, do not get the unicodeMask
				1730	* from the sharedData; instead, use the base table's unicodeMask,
				1731	* which we copied in the memcpy above;
				1732	* this is necessary because the static data unicodeMask, especially
				1733	* the UCNV_HAS_SUPPLEMENTARY flag, is part of the base table data
				1734	*/
				1735	} else {
				1736	/* conversion file with a base table; an additional extension table is optional */
				1737	/* make sure that the output type is known */
				1738	switch(mbcsTable->outputType) {
				1739	case MBCS_OUTPUT_1:
				1740	case MBCS_OUTPUT_2:
				1741	case MBCS_OUTPUT_3:
				1742	case MBCS_OUTPUT_4:
				1743	case MBCS_OUTPUT_3_EUC:
				1744	case MBCS_OUTPUT_4_EUC:
				1745	case MBCS_OUTPUT_2_SISO:
				1746	/* OK */
				1747	break;
				1748	default:
				1749	*pErrorCode=U_INVALID_TABLE_FORMAT;
				1750	return;
				1751	}
				1752	if(pArgs->onlyTestIsLoadable) {
				1753	/*
				1754	* Exit as soon as we know that we can load the converter
				1755	* and the format is valid and supported.
				1756	* The worst that can happen in the following code is a memory
				1757	* allocation error.
				1758	*/
				1759	return;
				1760	}
				1761
				1762	mbcsTable->countStates=(uint8_t)header->countStates;
				1763	mbcsTable->countToUFallbacks=header->countToUFallbacks;
				1764	mbcsTable->stateTable=(const int32_t ()[256])(raw+headerLength4);
				1765	mbcsTable->toUFallbacks=(const _MBCSToUFallback *)(mbcsTable->stateTable+header->countStates);
				1766	mbcsTable->unicodeCodeUnits=(const uint16_t *)(raw+header->offsetToUCodeUnits);
				1767
				1768	mbcsTable->fromUnicodeTable=(const uint16_t *)(raw+header->offsetFromUTable);
				1769	mbcsTable->fromUnicodeBytes=(const uint8_t *)(raw+header->offsetFromUBytes);
				1770	mbcsTable->fromUBytesLength=header->fromUBytesLength;
				1771
				1772	/*
				1773	* converter versions 6.1 and up contain a unicodeMask that is
				1774	* used here to select the most efficient function implementations
				1775	*/
				1776	info.size=sizeof(UDataInfo);
				1777	udata_getInfo((UDataMemory *)sharedData->dataMemory, &info);
				1778	if(info.formatVersion[0]>6 \|\| (info.formatVersion[0]==6 && info.formatVersion[1]>=1)) {
				1779	/* mask off possible future extensions to be safe */
				1780	mbcsTable->unicodeMask=(uint8_t)(sharedData->staticData->unicodeMask&3);
				1781	} else {
				1782	/* for older versions, assume worst case: contains anything possible (prevent over-optimizations) */
				1783	mbcsTable->unicodeMask=UCNV_HAS_SUPPLEMENTARY\|UCNV_HAS_SURROGATES;
				1784	}
				1785
				1786	/*
				1787	* _MBCSHeader.version 4.3 adds utf8Friendly data structures.
				1788	* Check for the header version, SBCS vs. MBCS, and for whether the
				1789	* data structures are optimized for code points as high as what the
				1790	* runtime code is designed for.
				1791	* The implementation does not handle mapping tables with entries for
				1792	* unpaired surrogates.
				1793	*/
				1794	if( header->version[1]>=3 &&
				1795	(mbcsTable->unicodeMask&UCNV_HAS_SURROGATES)==0 &&
				1796	(mbcsTable->countStates==1 ?
				1797	(header->version[2]>=(SBCS_FAST_MAX>>8)) :
				1798	(header->version[2]>=(MBCS_FAST_MAX>>8))
				1799	)
				1800	) {
				1801	mbcsTable->utf8Friendly=TRUE;
				1802
				1803	if(mbcsTable->countStates==1) {
				1804	/*
				1805	* SBCS: Stage 3 is allocated in 64-entry blocks for U+0000..SBCS_FAST_MAX or higher.
				1806	* Build a table with indexes to each block, to be used instead of
				1807	* the regular stage 1/2 table.
				1808	*/
				1809	int32_t i;
				1810	for(i=0; i<(SBCS_FAST_LIMIT>>6); ++i) {
				1811	mbcsTable->sbcsIndex[i]=mbcsTable->fromUnicodeTable[mbcsTable->fromUnicodeTable[i>>4]+((i<<2)&0x3c)];
				1812	}
				1813	/* set SBCS_FAST_MAX to reflect the reach of sbcsIndex[] even if header->version[2]>(SBCS_FAST_MAX>>8) */
				1814	mbcsTable->maxFastUChar=SBCS_FAST_MAX;
				1815	} else {
				1816	/*
				1817	* MBCS: Stage 3 is allocated in 64-entry blocks for U+0000..MBCS_FAST_MAX or higher.
				1818	* The .cnv file is prebuilt with an additional stage table with indexes
				1819	* to each block.
				1820	*/
				1821	mbcsTable->mbcsIndex=(const uint16_t *)
				1822	(mbcsTable->fromUnicodeBytes+
				1823	(noFromU ? 0 : mbcsTable->fromUBytesLength));
				1824	mbcsTable->maxFastUChar=(((UChar)header->version[2])<<8)\|0xff;
				1825	}
				1826	}
				1827
				1828	/* calculate a bit set of 4 ASCII characters per bit that round-trip to ASCII bytes */
				1829	{
				1830	uint32_t asciiRoundtrips=0xffffffff;
				1831	int32_t i;
				1832
				1833	for(i=0; i<0x80; ++i) {
				1834	if(mbcsTable->stateTable[0][i]!=MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, i)) {
				1835	asciiRoundtrips&=~((uint32_t)1<<(i>>2));
				1836	}
				1837	}
				1838	mbcsTable->asciiRoundtrips=asciiRoundtrips;
				1839	}
				1840
				1841	if(noFromU) {
				1842	uint32_t stage1Length=
				1843	mbcsTable->unicodeMask&UCNV_HAS_SUPPLEMENTARY ?
				1844	0x440 : 0x40;
				1845	uint32_t stage2Length=
				1846	(header->offsetFromUBytes-header->offsetFromUTable)/4-
				1847	stage1Length/2;
				1848	reconstituteData(mbcsTable, stage1Length, stage2Length, header->fullStage2Length, pErrorCode);
				1849	}
				1850	}
				1851
				1852	/* Set the impl pointer here so that it is set for both extension-only and base tables. */
				1853	if(mbcsTable->utf8Friendly) {
				1854	if(mbcsTable->countStates==1) {
				1855	sharedData->impl=&_SBCSUTF8Impl;
				1856	} else {
				1857	if(mbcsTable->outputType==MBCS_OUTPUT_2) {
				1858	sharedData->impl=&_DBCSUTF8Impl;
				1859	}
				1860	}
				1861	}
				1862
				1863	if(mbcsTable->outputType==MBCS_OUTPUT_DBCS_ONLY \|\| mbcsTable->outputType==MBCS_OUTPUT_2_SISO) {
				1864	/*
				1865	* MBCS_OUTPUT_DBCS_ONLY: No SBCS mappings, therefore ASCII does not roundtrip.
				1866	* MBCS_OUTPUT_2_SISO: Bypass the ASCII fastpath to handle prevLength correctly.
				1867	*/
				1868	mbcsTable->asciiRoundtrips=0;
				1869	}
				1870	}
				1871
				1872	static void
				1873	ucnv_MBCSUnload(UConverterSharedData *sharedData) {
				1874	UConverterMBCSTable *mbcsTable=&sharedData->mbcs;
				1875
				1876	if(mbcsTable->swapLFNLStateTable!=NULL) {
				1877	uprv_free(mbcsTable->swapLFNLStateTable);
				1878	}
				1879	if(mbcsTable->stateTableOwned) {
				1880	uprv_free((void *)mbcsTable->stateTable);
				1881	}
				1882	if(mbcsTable->baseSharedData!=NULL) {
				1883	ucnv_unload(mbcsTable->baseSharedData);
				1884	}
				1885	if(mbcsTable->reconstitutedData!=NULL) {
				1886	uprv_free(mbcsTable->reconstitutedData);
				1887	}
				1888	}
				1889
				1890	static void
				1891	ucnv_MBCSOpen(UConverter *cnv,
				1892	UConverterLoadArgs *pArgs,
				1893	UErrorCode *pErrorCode) {
				1894	UConverterMBCSTable *mbcsTable;
				1895	const int32_t *extIndexes;
				1896	uint8_t outputType;
				1897	int8_t maxBytesPerUChar;
				1898
				1899	if(pArgs->onlyTestIsLoadable) {
				1900	return;
				1901	}
				1902
				1903	mbcsTable=&cnv->sharedData->mbcs;
				1904	outputType=mbcsTable->outputType;
				1905
				1906	if(outputType==MBCS_OUTPUT_DBCS_ONLY) {
				1907	/* the swaplfnl option does not apply, remove it */
				1908	cnv->options=pArgs->options&=~UCNV_OPTION_SWAP_LFNL;
				1909	}
				1910
				1911	if((pArgs->options&UCNV_OPTION_SWAP_LFNL)!=0) {
				1912	/* do this because double-checked locking is broken */
				1913	UBool isCached;
				1914
				1915	umtx_lock(NULL);
				1916	isCached=mbcsTable->swapLFNLStateTable!=NULL;
				1917	umtx_unlock(NULL);
				1918
				1919	if(!isCached) {
				1920	if(!_EBCDICSwapLFNL(cnv->sharedData, pErrorCode)) {
				1921	if(U_FAILURE(*pErrorCode)) {
				1922	return; /* something went wrong */
				1923	}
				1924
				1925	/* the option does not apply, remove it */
				1926	cnv->options=pArgs->options&=~UCNV_OPTION_SWAP_LFNL;
				1927	}
				1928	}
				1929	}
				1930
				1931	if(uprv_strstr(pArgs->name, "18030")!=NULL) {
				1932	if(uprv_strstr(pArgs->name, "gb18030")!=NULL \|\| uprv_strstr(pArgs->name, "GB18030")!=NULL) {
				1933	/* set a flag for GB 18030 mode, which changes the callback behavior */
				1934	cnv->options\|=_MBCS_OPTION_GB18030;
				1935	}
				1936	} else if((uprv_strstr(pArgs->name, "KEIS")!=NULL) \|\| (uprv_strstr(pArgs->name, "keis")!=NULL)) {
				1937	/* set a flag for KEIS converter, which changes the SI/SO character sequence */
				1938	cnv->options\|=_MBCS_OPTION_KEIS;
				1939	} else if((uprv_strstr(pArgs->name, "JEF")!=NULL) \|\| (uprv_strstr(pArgs->name, "jef")!=NULL)) {
				1940	/* set a flag for JEF converter, which changes the SI/SO character sequence */
				1941	cnv->options\|=_MBCS_OPTION_JEF;
				1942	} else if((uprv_strstr(pArgs->name, "JIPS")!=NULL) \|\| (uprv_strstr(pArgs->name, "jips")!=NULL)) {
				1943	/* set a flag for JIPS converter, which changes the SI/SO character sequence */
				1944	cnv->options\|=_MBCS_OPTION_JIPS;
				1945	}
				1946
				1947	/* fix maxBytesPerUChar depending on outputType and options etc. */
				1948	if(outputType==MBCS_OUTPUT_2_SISO) {
				1949	cnv->maxBytesPerUChar=3; /* SO+DBCS */
				1950	}
				1951
				1952	extIndexes=mbcsTable->extIndexes;
				1953	if(extIndexes!=NULL) {
				1954	maxBytesPerUChar=(int8_t)UCNV_GET_MAX_BYTES_PER_UCHAR(extIndexes);
				1955	if(outputType==MBCS_OUTPUT_2_SISO) {
				1956	++maxBytesPerUChar; /* SO + multiple DBCS */
				1957	}
				1958
				1959	if(maxBytesPerUChar>cnv->maxBytesPerUChar) {
				1960	cnv->maxBytesPerUChar=maxBytesPerUChar;
				1961	}
				1962	}
				1963
				1964	#if 0
				1965	/*
				1966	* documentation of UConverter fields used for status
				1967	* all of these fields are (re)set to 0 by ucnv_bld.c and ucnv_reset()
				1968	*/
				1969
				1970	/* toUnicode */
				1971	cnv->toUnicodeStatus=0; /* offset */
				1972	cnv->mode=0; /* state */
				1973	cnv->toULength=0; /* byteIndex */
				1974
				1975	/* fromUnicode */
				1976	cnv->fromUChar32=0;
				1977	cnv->fromUnicodeStatus=1; /* prevLength */
				1978	#endif
				1979	}
				1980
				1981	static const char *
				1982	ucnv_MBCSGetName(const UConverter *cnv) {
				1983	if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0 && cnv->sharedData->mbcs.swapLFNLName!=NULL) {
				1984	return cnv->sharedData->mbcs.swapLFNLName;
				1985	} else {
				1986	return cnv->sharedData->staticData->name;
				1987	}
				1988	}
				1989
				1990	/* MBCS-to-Unicode conversion functions ------------------------------------- */
				1991
				1992	static UChar32
				1993	ucnv_MBCSGetFallback(UConverterMBCSTable *mbcsTable, uint32_t offset) {
				1994	const _MBCSToUFallback *toUFallbacks;
				1995	uint32_t i, start, limit;
				1996
				1997	limit=mbcsTable->countToUFallbacks;
				1998	if(limit>0) {
				1999	/* do a binary search for the fallback mapping */
				2000	toUFallbacks=mbcsTable->toUFallbacks;
				2001	start=0;
				2002	while(start<limit-1) {
				2003	i=(start+limit)/2;
				2004	if(offset<toUFallbacks[i].offset) {
				2005	limit=i;
				2006	} else {
				2007	start=i;
				2008	}
				2009	}
				2010
				2011	/* did we really find it? */
				2012	if(offset==toUFallbacks[start].offset) {
				2013	return toUFallbacks[start].codePoint;
				2014	}
				2015	}
				2016
				2017	return 0xfffe;
				2018	}
				2019
				2020	/* This version of ucnv_MBCSToUnicodeWithOffsets() is optimized for single-byte, single-state codepages. */
				2021	static void
				2022	ucnv_MBCSSingleToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
				2023	UErrorCode *pErrorCode) {
				2024	UConverter *cnv;
				2025	const uint8_t source, sourceLimit;
				2026	UChar *target;
				2027	const UChar *targetLimit;
				2028	int32_t *offsets;
				2029
				2030	const int32_t (*stateTable)[256];
				2031
				2032	int32_t sourceIndex;
				2033
				2034	int32_t entry;
				2035	UChar c;
				2036	uint8_t action;
				2037
				2038	/* set up the local pointers */
				2039	cnv=pArgs->converter;
				2040	source=(const uint8_t *)pArgs->source;
				2041	sourceLimit=(const uint8_t *)pArgs->sourceLimit;
				2042	target=pArgs->target;
				2043	targetLimit=pArgs->targetLimit;
				2044	offsets=pArgs->offsets;
				2045
				2046	if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
				2047	stateTable=(const int32_t (*)[256])cnv->sharedData->mbcs.swapLFNLStateTable;
				2048	} else {
				2049	stateTable=cnv->sharedData->mbcs.stateTable;
				2050	}
				2051
				2052	/* sourceIndex=-1 if the current character began in the previous buffer */
				2053	sourceIndex=0;
				2054
				2055	/* conversion loop */
				2056	while(source<sourceLimit) {
				2057	/*
				2058	* This following test is to see if available input would overflow the output.
				2059	* It does not catch output of more than one code unit that
				2060	* overflows as a result of a surrogate pair or callback output
				2061	* from the last source byte.
				2062	* Therefore, those situations also test for overflows and will
				2063	* then break the loop, too.
				2064	*/
				2065	if(target>=targetLimit) {
				2066	/* target is full */
				2067	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
				2068	break;
				2069	}
				2070
				2071	entry=stateTable[0][*source++];
				2072	/* MBCS_ENTRY_IS_FINAL(entry) */
				2073
				2074	/* test the most common case first */
				2075	if(MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) {
				2076	/* output BMP code point */
				2077	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
				2078	if(offsets!=NULL) {
				2079	*offsets++=sourceIndex;
				2080	}
				2081
				2082	/* normal end of action codes: prepare for a new character */
				2083	++sourceIndex;
				2084	continue;
				2085	}
				2086
				2087	/*
				2088	* An if-else-if chain provides more reliable performance for
				2089	* the most common cases compared to a switch.
				2090	*/
				2091	action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry));
				2092	if(action==MBCS_STATE_VALID_DIRECT_20 \|\|
				2093	(action==MBCS_STATE_FALLBACK_DIRECT_20 && UCNV_TO_U_USE_FALLBACK(cnv))
				2094	) {
				2095	entry=MBCS_ENTRY_FINAL_VALUE(entry);
				2096	/* output surrogate pair */
				2097	*target++=(UChar)(0xd800\|(UChar)(entry>>10));
				2098	if(offsets!=NULL) {
				2099	*offsets++=sourceIndex;
				2100	}
				2101	c=(UChar)(0xdc00\|(UChar)(entry&0x3ff));
				2102	if(target<targetLimit) {
				2103	*target++=c;
				2104	if(offsets!=NULL) {
				2105	*offsets++=sourceIndex;
				2106	}
				2107	} else {
				2108	/* target overflow */
				2109	cnv->UCharErrorBuffer[0]=c;
				2110	cnv->UCharErrorBufferLength=1;
				2111	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
				2112	break;
				2113	}
				2114
				2115	++sourceIndex;
				2116	continue;
				2117	} else if(action==MBCS_STATE_FALLBACK_DIRECT_16) {
				2118	if(UCNV_TO_U_USE_FALLBACK(cnv)) {
				2119	/* output BMP code point */
				2120	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
				2121	if(offsets!=NULL) {
				2122	*offsets++=sourceIndex;
				2123	}
				2124
				2125	++sourceIndex;
				2126	continue;
				2127	}
				2128	} else if(action==MBCS_STATE_UNASSIGNED) {
				2129	/* just fall through */
				2130	} else if(action==MBCS_STATE_ILLEGAL) {
				2131	/* callback(illegal) */
				2132	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
				2133	} else {
				2134	/* reserved, must never occur */
				2135	++sourceIndex;
				2136	continue;
				2137	}
				2138
				2139	if(U_FAILURE(*pErrorCode)) {
				2140	/* callback(illegal) */
				2141	break;
				2142	} else /* unassigned sequences indicated with byteIndex>0 */ {
				2143	/* try an extension mapping */
				2144	pArgs->source=(const char *)source;
				2145	cnv->toUBytes[0]=*(source-1);
				2146	cnv->toULength=_extToU(cnv, cnv->sharedData,
				2147	1, &source, sourceLimit,
				2148	&target, targetLimit,
				2149	&offsets, sourceIndex,
				2150	pArgs->flush,
				2151	pErrorCode);
				2152	sourceIndex+=1+(int32_t)(source-(const uint8_t *)pArgs->source);
				2153
				2154	if(U_FAILURE(*pErrorCode)) {
				2155	/* not mappable or buffer overflow */
				2156	break;
				2157	}
				2158	}
				2159	}
				2160
				2161	/* write back the updated pointers */
				2162	pArgs->source=(const char *)source;
				2163	pArgs->target=target;
				2164	pArgs->offsets=offsets;
				2165	}
				2166
				2167	/*
				2168	* This version of ucnv_MBCSSingleToUnicodeWithOffsets() is optimized for single-byte, single-state codepages
				2169	* that only map to and from the BMP.
				2170	* In addition to single-byte optimizations, the offset calculations
				2171	* become much easier.
				2172	*/
				2173	static void
				2174	ucnv_MBCSSingleToBMPWithOffsets(UConverterToUnicodeArgs *pArgs,
				2175	UErrorCode *pErrorCode) {
				2176	UConverter *cnv;
				2177	const uint8_t source, sourceLimit, *lastSource;
				2178	UChar *target;
				2179	int32_t targetCapacity, length;
				2180	int32_t *offsets;
				2181
				2182	const int32_t (*stateTable)[256];
				2183
				2184	int32_t sourceIndex;
				2185
				2186	int32_t entry;
				2187	uint8_t action;
				2188
				2189	/* set up the local pointers */
				2190	cnv=pArgs->converter;
				2191	source=(const uint8_t *)pArgs->source;
				2192	sourceLimit=(const uint8_t *)pArgs->sourceLimit;
				2193	target=pArgs->target;
				2194	targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
				2195	offsets=pArgs->offsets;
				2196
				2197	if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
				2198	stateTable=(const int32_t (*)[256])cnv->sharedData->mbcs.swapLFNLStateTable;
				2199	} else {
				2200	stateTable=cnv->sharedData->mbcs.stateTable;
				2201	}
				2202
				2203	/* sourceIndex=-1 if the current character began in the previous buffer */
				2204	sourceIndex=0;
				2205	lastSource=source;
				2206
				2207	/*
				2208	* since the conversion here is 1:1 UChar:uint8_t, we need only one counter
				2209	* for the minimum of the sourceLength and targetCapacity
				2210	*/
				2211	length=(int32_t)(sourceLimit-source);
				2212	if(length<targetCapacity) {
				2213	targetCapacity=length;
				2214	}
				2215
				2216	#if MBCS_UNROLL_SINGLE_TO_BMP
				2217	/* unrolling makes it faster on Pentium III/Windows 2000 */
				2218	/* unroll the loop with the most common case */
				2219	unrolled:
				2220	if(targetCapacity>=16) {
				2221	int32_t count, loops, oredEntries;
				2222
				2223	loops=count=targetCapacity>>4;
				2224	do {
				2225	oredEntries=entry=stateTable[0][*source++];
				2226	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
				2227	oredEntries\|=entry=stateTable[0][*source++];
				2228	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
				2229	oredEntries\|=entry=stateTable[0][*source++];
				2230	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
				2231	oredEntries\|=entry=stateTable[0][*source++];
				2232	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
				2233	oredEntries\|=entry=stateTable[0][*source++];
				2234	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
				2235	oredEntries\|=entry=stateTable[0][*source++];
				2236	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
				2237	oredEntries\|=entry=stateTable[0][*source++];
				2238	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
				2239	oredEntries\|=entry=stateTable[0][*source++];
				2240	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
				2241	oredEntries\|=entry=stateTable[0][*source++];
				2242	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
				2243	oredEntries\|=entry=stateTable[0][*source++];
				2244	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
				2245	oredEntries\|=entry=stateTable[0][*source++];
				2246	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
				2247	oredEntries\|=entry=stateTable[0][*source++];
				2248	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
				2249	oredEntries\|=entry=stateTable[0][*source++];
				2250	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
				2251	oredEntries\|=entry=stateTable[0][*source++];
				2252	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
				2253	oredEntries\|=entry=stateTable[0][*source++];
				2254	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
				2255	oredEntries\|=entry=stateTable[0][*source++];
				2256	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
				2257
				2258	/* were all 16 entries really valid? */
				2259	if(!MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(oredEntries)) {
				2260	/* no, return to the first of these 16 */
				2261	source-=16;
				2262	target-=16;
				2263	break;
				2264	}
				2265	} while(--count>0);
				2266	count=loops-count;
				2267	targetCapacity-=16*count;
				2268
				2269	if(offsets!=NULL) {
				2270	lastSource+=16*count;
				2271	while(count>0) {
				2272	*offsets++=sourceIndex++;
				2273	*offsets++=sourceIndex++;
				2274	*offsets++=sourceIndex++;
				2275	*offsets++=sourceIndex++;
				2276	*offsets++=sourceIndex++;
				2277	*offsets++=sourceIndex++;
				2278	*offsets++=sourceIndex++;
				2279	*offsets++=sourceIndex++;
				2280	*offsets++=sourceIndex++;
				2281	*offsets++=sourceIndex++;
				2282	*offsets++=sourceIndex++;
				2283	*offsets++=sourceIndex++;
				2284	*offsets++=sourceIndex++;
				2285	*offsets++=sourceIndex++;
				2286	*offsets++=sourceIndex++;
				2287	*offsets++=sourceIndex++;
				2288	--count;
				2289	}
				2290	}
				2291	}
				2292	#endif
				2293
				2294	/* conversion loop */
				2295	while(targetCapacity > 0 && source < sourceLimit) {
				2296	entry=stateTable[0][*source++];
				2297	/* MBCS_ENTRY_IS_FINAL(entry) */
				2298
				2299	/* test the most common case first */
				2300	if(MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) {
				2301	/* output BMP code point */
				2302	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
				2303	--targetCapacity;
				2304	continue;
				2305	}
				2306
				2307	/*
				2308	* An if-else-if chain provides more reliable performance for
				2309	* the most common cases compared to a switch.
				2310	*/
				2311	action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry));
				2312	if(action==MBCS_STATE_FALLBACK_DIRECT_16) {
				2313	if(UCNV_TO_U_USE_FALLBACK(cnv)) {
				2314	/* output BMP code point */
				2315	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
				2316	--targetCapacity;
				2317	continue;
				2318	}
				2319	} else if(action==MBCS_STATE_UNASSIGNED) {
				2320	/* just fall through */
				2321	} else if(action==MBCS_STATE_ILLEGAL) {
				2322	/* callback(illegal) */
				2323	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
				2324	} else {
				2325	/* reserved, must never occur */
				2326	continue;
				2327	}
				2328
				2329	/* set offsets since the start or the last extension */
				2330	if(offsets!=NULL) {
				2331	int32_t count=(int32_t)(source-lastSource);
				2332
				2333	/* predecrement: do not set the offset for the callback-causing character */
				2334	while(--count>0) {
				2335	*offsets++=sourceIndex++;
				2336	}
				2337	/* offset and sourceIndex are now set for the current character */
				2338	}
				2339
				2340	if(U_FAILURE(*pErrorCode)) {
				2341	/* callback(illegal) */
				2342	break;
				2343	} else /* unassigned sequences indicated with byteIndex>0 */ {
				2344	/* try an extension mapping */
				2345	lastSource=source;
				2346	cnv->toUBytes[0]=*(source-1);
				2347	cnv->toULength=_extToU(cnv, cnv->sharedData,
				2348	1, &source, sourceLimit,
				2349	&target, pArgs->targetLimit,
				2350	&offsets, sourceIndex,
				2351	pArgs->flush,
				2352	pErrorCode);
				2353	sourceIndex+=1+(int32_t)(source-lastSource);
				2354
				2355	if(U_FAILURE(*pErrorCode)) {
				2356	/* not mappable or buffer overflow */
				2357	break;
				2358	}
				2359
				2360	/* recalculate the targetCapacity after an extension mapping */
				2361	targetCapacity=(int32_t)(pArgs->targetLimit-target);
				2362	length=(int32_t)(sourceLimit-source);
				2363	if(length<targetCapacity) {
				2364	targetCapacity=length;
				2365	}
				2366	}
				2367
				2368	#if MBCS_UNROLL_SINGLE_TO_BMP
				2369	/* unrolling makes it faster on Pentium III/Windows 2000 */
				2370	goto unrolled;
				2371	#endif
				2372	}
				2373
				2374	if(U_SUCCESS(*pErrorCode) && source<sourceLimit && target>=pArgs->targetLimit) {
				2375	/* target is full */
				2376	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
				2377	}
				2378
				2379	/* set offsets since the start or the last callback */
				2380	if(offsets!=NULL) {
				2381	size_t count=source-lastSource;
				2382	while(count>0) {
				2383	*offsets++=sourceIndex++;
				2384	--count;
				2385	}
				2386	}
				2387
				2388	/* write back the updated pointers */
				2389	pArgs->source=(const char *)source;
				2390	pArgs->target=target;
				2391	pArgs->offsets=offsets;
				2392	}
				2393
				2394	static UBool
				2395	hasValidTrailBytes(const int32_t (*stateTable)[256], uint8_t state) {
				2396	const int32_t *row=stateTable[state];
				2397	int32_t b, entry;
				2398	/* First test for final entries in this state for some commonly valid byte values. */
				2399	entry=row[0xa1];
				2400	if( !MBCS_ENTRY_IS_TRANSITION(entry) &&
				2401	MBCS_ENTRY_FINAL_ACTION(entry)!=MBCS_STATE_ILLEGAL
				2402	) {
				2403	return TRUE;
				2404	}
				2405	entry=row[0x41];
				2406	if( !MBCS_ENTRY_IS_TRANSITION(entry) &&
				2407	MBCS_ENTRY_FINAL_ACTION(entry)!=MBCS_STATE_ILLEGAL
				2408	) {
				2409	return TRUE;
				2410	}
				2411	/* Then test for final entries in this state. */
				2412	for(b=0; b<=0xff; ++b) {
				2413	entry=row[b];
				2414	if( !MBCS_ENTRY_IS_TRANSITION(entry) &&
				2415	MBCS_ENTRY_FINAL_ACTION(entry)!=MBCS_STATE_ILLEGAL
				2416	) {
				2417	return TRUE;
				2418	}
				2419	}
				2420	/* Then recurse for transition entries. */
				2421	for(b=0; b<=0xff; ++b) {
				2422	entry=row[b];
				2423	if( MBCS_ENTRY_IS_TRANSITION(entry) &&
				2424	hasValidTrailBytes(stateTable, (uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry))
				2425	) {
				2426	return TRUE;
				2427	}
				2428	}
				2429	return FALSE;
				2430	}
				2431
				2432	/*
				2433	* Is byte b a single/lead byte in this state?
				2434	* Recurse for transition states, because here we don't want to say that
				2435	* b is a lead byte if all byte sequences that start with b are illegal.
				2436	*/
				2437	static UBool
				2438	isSingleOrLead(const int32_t (*stateTable)[256], uint8_t state, UBool isDBCSOnly, uint8_t b) {
				2439	const int32_t *row=stateTable[state];
				2440	int32_t entry=row[b];
				2441	if(MBCS_ENTRY_IS_TRANSITION(entry)) { /* lead byte */
				2442	return hasValidTrailBytes(stateTable, (uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry));
				2443	} else {
				2444	uint8_t action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry));
				2445	if(action==MBCS_STATE_CHANGE_ONLY && isDBCSOnly) {
				2446	return FALSE; /* SI/SO are illegal for DBCS-only conversion */
				2447	} else {
				2448	return action!=MBCS_STATE_ILLEGAL;
				2449	}
				2450	}
				2451	}
				2452
				2453	U_CFUNC void
				2454	ucnv_MBCSToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
				2455	UErrorCode *pErrorCode) {
				2456	UConverter *cnv;
				2457	const uint8_t source, sourceLimit;
				2458	UChar *target;
				2459	const UChar *targetLimit;
				2460	int32_t *offsets;
				2461
				2462	const int32_t (*stateTable)[256];
				2463	const uint16_t *unicodeCodeUnits;
				2464
				2465	uint32_t offset;
				2466	uint8_t state;
				2467	int8_t byteIndex;
				2468	uint8_t *bytes;
				2469
				2470	int32_t sourceIndex, nextSourceIndex;
				2471
				2472	int32_t entry;
				2473	UChar c;
				2474	uint8_t action;
				2475
				2476	/* use optimized function if possible */
				2477	cnv=pArgs->converter;
				2478
				2479	if(cnv->preToULength>0) {
				2480	/*
				2481	* pass sourceIndex=-1 because we continue from an earlier buffer
				2482	* in the future, this may change with continuous offsets
				2483	*/
				2484	ucnv_extContinueMatchToU(cnv, pArgs, -1, pErrorCode);
				2485
				2486	if(U_FAILURE(*pErrorCode) \|\| cnv->preToULength<0) {
				2487	return;
				2488	}
				2489	}
				2490
				2491	if(cnv->sharedData->mbcs.countStates==1) {
				2492	if(!(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
				2493	ucnv_MBCSSingleToBMPWithOffsets(pArgs, pErrorCode);
				2494	} else {
				2495	ucnv_MBCSSingleToUnicodeWithOffsets(pArgs, pErrorCode);
				2496	}
				2497	return;
				2498	}
				2499
				2500	/* set up the local pointers */
				2501	source=(const uint8_t *)pArgs->source;
				2502	sourceLimit=(const uint8_t *)pArgs->sourceLimit;
				2503	target=pArgs->target;
				2504	targetLimit=pArgs->targetLimit;
				2505	offsets=pArgs->offsets;
				2506
				2507	if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
				2508	stateTable=(const int32_t (*)[256])cnv->sharedData->mbcs.swapLFNLStateTable;
				2509	} else {
				2510	stateTable=cnv->sharedData->mbcs.stateTable;
				2511	}
				2512	unicodeCodeUnits=cnv->sharedData->mbcs.unicodeCodeUnits;
				2513
				2514	/* get the converter state from UConverter */
				2515	offset=cnv->toUnicodeStatus;
				2516	byteIndex=cnv->toULength;
				2517	bytes=cnv->toUBytes;
				2518
				2519	/*
				2520	* if we are in the SBCS state for a DBCS-only converter,
				2521	* then load the DBCS state from the MBCS data
				2522	* (dbcsOnlyState==0 if it is not a DBCS-only converter)
				2523	*/
				2524	if((state=(uint8_t)(cnv->mode))==0) {
				2525	state=cnv->sharedData->mbcs.dbcsOnlyState;
				2526	}
				2527
				2528	/* sourceIndex=-1 if the current character began in the previous buffer */
				2529	sourceIndex=byteIndex==0 ? 0 : -1;
				2530	nextSourceIndex=0;
				2531
				2532	/* conversion loop */
				2533	while(source<sourceLimit) {
				2534	/*
				2535	* This following test is to see if available input would overflow the output.
				2536	* It does not catch output of more than one code unit that
				2537	* overflows as a result of a surrogate pair or callback output
				2538	* from the last source byte.
				2539	* Therefore, those situations also test for overflows and will
				2540	* then break the loop, too.
				2541	*/
				2542	if(target>=targetLimit) {
				2543	/* target is full */
				2544	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
				2545	break;
				2546	}
				2547
				2548	if(byteIndex==0) {
				2549	/* optimized loop for 1/2-byte input and BMP output */
				2550	if(offsets==NULL) {
				2551	do {
				2552	entry=stateTable[state][*source];
				2553	if(MBCS_ENTRY_IS_TRANSITION(entry)) {
				2554	state=(uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry);
				2555	offset=MBCS_ENTRY_TRANSITION_OFFSET(entry);
				2556
				2557	++source;
				2558	if( source<sourceLimit &&
				2559	MBCS_ENTRY_IS_FINAL(entry=stateTable[state][*source]) &&
				2560	MBCS_ENTRY_FINAL_ACTION(entry)==MBCS_STATE_VALID_16 &&
				2561	(c=unicodeCodeUnits[offset+MBCS_ENTRY_FINAL_VALUE_16(entry)])<0xfffe
				2562	) {
				2563	++source;
				2564	*target++=c;
				2565	state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
				2566	offset=0;
				2567	} else {
				2568	/* set the state and leave the optimized loop */
				2569	bytes[0]=*(source-1);
				2570	byteIndex=1;
				2571	break;
				2572	}
				2573	} else {
				2574	if(MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) {
				2575	/* output BMP code point */
				2576	++source;
				2577	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
				2578	state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
				2579	} else {
				2580	/* leave the optimized loop */
				2581	break;
				2582	}
				2583	}
				2584	} while(source<sourceLimit && target<targetLimit);
				2585	} else /* offsets!=NULL */ {
				2586	do {
				2587	entry=stateTable[state][*source];
				2588	if(MBCS_ENTRY_IS_TRANSITION(entry)) {
				2589	state=(uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry);
				2590	offset=MBCS_ENTRY_TRANSITION_OFFSET(entry);
				2591
				2592	++source;
				2593	if( source<sourceLimit &&
				2594	MBCS_ENTRY_IS_FINAL(entry=stateTable[state][*source]) &&
				2595	MBCS_ENTRY_FINAL_ACTION(entry)==MBCS_STATE_VALID_16 &&
				2596	(c=unicodeCodeUnits[offset+MBCS_ENTRY_FINAL_VALUE_16(entry)])<0xfffe
				2597	) {
				2598	++source;
				2599	*target++=c;
				2600	if(offsets!=NULL) {
				2601	*offsets++=sourceIndex;
				2602	sourceIndex=(nextSourceIndex+=2);
				2603	}
				2604	state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
				2605	offset=0;
				2606	} else {
				2607	/* set the state and leave the optimized loop */
				2608	++nextSourceIndex;
				2609	bytes[0]=*(source-1);
				2610	byteIndex=1;
				2611	break;
				2612	}
				2613	} else {
				2614	if(MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) {
				2615	/* output BMP code point */
				2616	++source;
				2617	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
				2618	if(offsets!=NULL) {
				2619	*offsets++=sourceIndex;
				2620	sourceIndex=++nextSourceIndex;
				2621	}
				2622	state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
				2623	} else {
				2624	/* leave the optimized loop */
				2625	break;
				2626	}
				2627	}
				2628	} while(source<sourceLimit && target<targetLimit);
				2629	}
				2630
				2631	/*
				2632	* these tests and break statements could be put inside the loop
				2633	* if C had "break outerLoop" like Java
				2634	*/
				2635	if(source>=sourceLimit) {
				2636	break;
				2637	}
				2638	if(target>=targetLimit) {
				2639	/* target is full */
				2640	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
				2641	break;
				2642	}
				2643
				2644	++nextSourceIndex;
				2645	bytes[byteIndex++]=*source++;
				2646	} else /* byteIndex>0 */ {
				2647	++nextSourceIndex;
				2648	entry=stateTable[state][bytes[byteIndex++]=*source++];
				2649	}
				2650
				2651	if(MBCS_ENTRY_IS_TRANSITION(entry)) {
				2652	state=(uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry);
				2653	offset+=MBCS_ENTRY_TRANSITION_OFFSET(entry);
				2654	continue;
				2655	}
				2656
				2657	/* save the previous state for proper extension mapping with SI/SO-stateful converters */
				2658	cnv->mode=state;
				2659
				2660	/* set the next state early so that we can reuse the entry variable */
				2661	state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
				2662
				2663	/*
				2664	* An if-else-if chain provides more reliable performance for
				2665	* the most common cases compared to a switch.
				2666	*/
				2667	action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry));
				2668	if(action==MBCS_STATE_VALID_16) {
				2669	offset+=MBCS_ENTRY_FINAL_VALUE_16(entry);
				2670	c=unicodeCodeUnits[offset];
				2671	if(c<0xfffe) {
				2672	/* output BMP code point */
				2673	*target++=c;
				2674	if(offsets!=NULL) {
				2675	*offsets++=sourceIndex;
				2676	}
				2677	byteIndex=0;
				2678	} else if(c==0xfffe) {
				2679	if(UCNV_TO_U_USE_FALLBACK(cnv) && (entry=(int32_t)ucnv_MBCSGetFallback(&cnv->sharedData->mbcs, offset))!=0xfffe) {
				2680	/* output fallback BMP code point */
				2681	*target++=(UChar)entry;
				2682	if(offsets!=NULL) {
				2683	*offsets++=sourceIndex;
				2684	}
				2685	byteIndex=0;
				2686	}
				2687	} else {
				2688	/* callback(illegal) */
				2689	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
				2690	}
				2691	} else if(action==MBCS_STATE_VALID_DIRECT_16) {
				2692	/* output BMP code point */
				2693	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
				2694	if(offsets!=NULL) {
				2695	*offsets++=sourceIndex;
				2696	}
				2697	byteIndex=0;
				2698	} else if(action==MBCS_STATE_VALID_16_PAIR) {
				2699	offset+=MBCS_ENTRY_FINAL_VALUE_16(entry);
				2700	c=unicodeCodeUnits[offset++];
				2701	if(c<0xd800) {
				2702	/* output BMP code point below 0xd800 */
				2703	*target++=c;
				2704	if(offsets!=NULL) {
				2705	*offsets++=sourceIndex;
				2706	}
				2707	byteIndex=0;
				2708	} else if(UCNV_TO_U_USE_FALLBACK(cnv) ? c<=0xdfff : c<=0xdbff) {
				2709	/* output roundtrip or fallback surrogate pair */
				2710	*target++=(UChar)(c&0xdbff);
				2711	if(offsets!=NULL) {
				2712	*offsets++=sourceIndex;
				2713	}
				2714	byteIndex=0;
				2715	if(target<targetLimit) {
				2716	*target++=unicodeCodeUnits[offset];
				2717	if(offsets!=NULL) {
				2718	*offsets++=sourceIndex;
				2719	}
				2720	} else {
				2721	/* target overflow */
				2722	cnv->UCharErrorBuffer[0]=unicodeCodeUnits[offset];
				2723	cnv->UCharErrorBufferLength=1;
				2724	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
				2725
				2726	offset=0;
				2727	break;
				2728	}
				2729	} else if(UCNV_TO_U_USE_FALLBACK(cnv) ? (c&0xfffe)==0xe000 : c==0xe000) {
				2730	/* output roundtrip BMP code point above 0xd800 or fallback BMP code point */
				2731	*target++=unicodeCodeUnits[offset];
				2732	if(offsets!=NULL) {
				2733	*offsets++=sourceIndex;
				2734	}
				2735	byteIndex=0;
				2736	} else if(c==0xffff) {
				2737	/* callback(illegal) */
				2738	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
				2739	}
				2740	} else if(action==MBCS_STATE_VALID_DIRECT_20 \|\|
				2741	(action==MBCS_STATE_FALLBACK_DIRECT_20 && UCNV_TO_U_USE_FALLBACK(cnv))
				2742	) {
				2743	entry=MBCS_ENTRY_FINAL_VALUE(entry);
				2744	/* output surrogate pair */
				2745	*target++=(UChar)(0xd800\|(UChar)(entry>>10));
				2746	if(offsets!=NULL) {
				2747	*offsets++=sourceIndex;
				2748	}
				2749	byteIndex=0;
				2750	c=(UChar)(0xdc00\|(UChar)(entry&0x3ff));
				2751	if(target<targetLimit) {
				2752	*target++=c;
				2753	if(offsets!=NULL) {
				2754	*offsets++=sourceIndex;
				2755	}
				2756	} else {
				2757	/* target overflow */
				2758	cnv->UCharErrorBuffer[0]=c;
				2759	cnv->UCharErrorBufferLength=1;
				2760	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
				2761
				2762	offset=0;
				2763	break;
				2764	}
				2765	} else if(action==MBCS_STATE_CHANGE_ONLY) {
				2766	/*
				2767	* This serves as a state change without any output.
				2768	* It is useful for reading simple stateful encodings,
				2769	* for example using just Shift-In/Shift-Out codes.
				2770	* The 21 unused bits may later be used for more sophisticated
				2771	* state transitions.
				2772	*/
				2773	if(cnv->sharedData->mbcs.dbcsOnlyState==0) {
				2774	byteIndex=0;
				2775	} else {
				2776	/* SI/SO are illegal for DBCS-only conversion */
				2777	state=(uint8_t)(cnv->mode); /* restore the previous state */
				2778
				2779	/* callback(illegal) */
				2780	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
				2781	}
				2782	} else if(action==MBCS_STATE_FALLBACK_DIRECT_16) {
				2783	if(UCNV_TO_U_USE_FALLBACK(cnv)) {
				2784	/* output BMP code point */
				2785	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
				2786	if(offsets!=NULL) {
				2787	*offsets++=sourceIndex;
				2788	}
				2789	byteIndex=0;
				2790	}
				2791	} else if(action==MBCS_STATE_UNASSIGNED) {
				2792	/* just fall through */
				2793	} else if(action==MBCS_STATE_ILLEGAL) {
				2794	/* callback(illegal) */
				2795	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
				2796	} else {
				2797	/* reserved, must never occur */
				2798	byteIndex=0;
				2799	}
				2800
				2801	/* end of action codes: prepare for a new character */
				2802	offset=0;
				2803
				2804	if(byteIndex==0) {
				2805	sourceIndex=nextSourceIndex;
				2806	} else if(U_FAILURE(*pErrorCode)) {
				2807	/* callback(illegal) */
				2808	if(byteIndex>1) {
				2809	/*
				2810	* Ticket 5691: consistent illegal sequences:
				2811	* - We include at least the first byte in the illegal sequence.
				2812	* - If any of the non-initial bytes could be the start of a character,
				2813	* we stop the illegal sequence before the first one of those.
				2814	*/
				2815	UBool isDBCSOnly=(UBool)(cnv->sharedData->mbcs.dbcsOnlyState!=0);
				2816	int8_t i;
				2817	for(i=1;
				2818	i<byteIndex && !isSingleOrLead(stateTable, state, isDBCSOnly, bytes[i]);
				2819	++i) {}
				2820	if(i<byteIndex) {
				2821	/* Back out some bytes. */
				2822	int8_t backOutDistance=byteIndex-i;
				2823	int32_t bytesFromThisBuffer=(int32_t)(source-(const uint8_t *)pArgs->source);
				2824	byteIndex=i; /* length of reported illegal byte sequence */
				2825	if(backOutDistance<=bytesFromThisBuffer) {
				2826	source-=backOutDistance;
				2827	} else {
				2828	/* Back out bytes from the previous buffer: Need to replay them. */
				2829	cnv->preToULength=(int8_t)(bytesFromThisBuffer-backOutDistance);
				2830	/* preToULength is negative! */
				2831	uprv_memcpy(cnv->preToU, bytes+i, -cnv->preToULength);
				2832	source=(const uint8_t *)pArgs->source;
				2833	}
				2834	}
				2835	}
				2836	break;
				2837	} else /* unassigned sequences indicated with byteIndex>0 */ {
				2838	/* try an extension mapping */
				2839	pArgs->source=(const char *)source;
				2840	byteIndex=_extToU(cnv, cnv->sharedData,
				2841	byteIndex, &source, sourceLimit,
				2842	&target, targetLimit,
				2843	&offsets, sourceIndex,
				2844	pArgs->flush,
				2845	pErrorCode);
				2846	sourceIndex=nextSourceIndex+=(int32_t)(source-(const uint8_t *)pArgs->source);
				2847
				2848	if(U_FAILURE(*pErrorCode)) {
				2849	/* not mappable or buffer overflow */
				2850	break;
				2851	}
				2852	}
				2853	}
				2854
				2855	/* set the converter state back into UConverter */
				2856	cnv->toUnicodeStatus=offset;
				2857	cnv->mode=state;
				2858	cnv->toULength=byteIndex;
				2859
				2860	/* write back the updated pointers */
				2861	pArgs->source=(const char *)source;
				2862	pArgs->target=target;
				2863	pArgs->offsets=offsets;
				2864	}
				2865
				2866	/*
				2867	* This version of ucnv_MBCSGetNextUChar() is optimized for single-byte, single-state codepages.
				2868	* We still need a conversion loop in case we find reserved action codes, which are to be ignored.
				2869	*/
				2870	static UChar32
				2871	ucnv_MBCSSingleGetNextUChar(UConverterToUnicodeArgs *pArgs,
				2872	UErrorCode *pErrorCode) {
				2873	UConverter *cnv;
				2874	const int32_t (*stateTable)[256];
				2875	const uint8_t source, sourceLimit;
				2876
				2877	int32_t entry;
				2878	uint8_t action;
				2879
				2880	/* set up the local pointers */
				2881	cnv=pArgs->converter;
				2882	source=(const uint8_t *)pArgs->source;
				2883	sourceLimit=(const uint8_t *)pArgs->sourceLimit;
				2884	if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
				2885	stateTable=(const int32_t (*)[256])cnv->sharedData->mbcs.swapLFNLStateTable;
				2886	} else {
				2887	stateTable=cnv->sharedData->mbcs.stateTable;
				2888	}
				2889
				2890	/* conversion loop */
				2891	while(source<sourceLimit) {
				2892	entry=stateTable[0][*source++];
				2893	/* MBCS_ENTRY_IS_FINAL(entry) */
				2894
				2895	/* write back the updated pointer early so that we can return directly */
				2896	pArgs->source=(const char *)source;
				2897
				2898	if(MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) {
				2899	/* output BMP code point */
				2900	return (UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
				2901	}
				2902
				2903	/*
				2904	* An if-else-if chain provides more reliable performance for
				2905	* the most common cases compared to a switch.
				2906	*/
				2907	action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry));
				2908	if( action==MBCS_STATE_VALID_DIRECT_20 \|\|
				2909	(action==MBCS_STATE_FALLBACK_DIRECT_20 && UCNV_TO_U_USE_FALLBACK(cnv))
				2910	) {
				2911	/* output supplementary code point */
				2912	return (UChar32)(MBCS_ENTRY_FINAL_VALUE(entry)+0x10000);
				2913	} else if(action==MBCS_STATE_FALLBACK_DIRECT_16) {
				2914	if(UCNV_TO_U_USE_FALLBACK(cnv)) {
				2915	/* output BMP code point */
				2916	return (UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
				2917	}
				2918	} else if(action==MBCS_STATE_UNASSIGNED) {
				2919	/* just fall through */
				2920	} else if(action==MBCS_STATE_ILLEGAL) {
				2921	/* callback(illegal) */
				2922	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
				2923	} else {
				2924	/* reserved, must never occur */
				2925	continue;
				2926	}
				2927
				2928	if(U_FAILURE(*pErrorCode)) {
				2929	/* callback(illegal) */
				2930	break;
				2931	} else /* unassigned sequence */ {
				2932	/* defer to the generic implementation */
				2933	pArgs->source=(const char *)source-1;
				2934	return UCNV_GET_NEXT_UCHAR_USE_TO_U;
				2935	}
				2936	}
				2937
				2938	/* no output because of empty input or only state changes */
				2939	*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
				2940	return 0xffff;
				2941	}
				2942
				2943	/*
				2944	* Version of _MBCSToUnicodeWithOffsets() optimized for single-character
				2945	* conversion without offset handling.
				2946	*
				2947	* When a character does not have a mapping to Unicode, then we return to the
				2948	* generic ucnv_getNextUChar() code for extension/GB 18030 and error/callback
				2949	* handling.
				2950	* We also defer to the generic code in other complicated cases and have them
				2951	* ultimately handled by _MBCSToUnicodeWithOffsets() itself.
				2952	*
				2953	* All normal mappings and errors are handled here.
				2954	*/
				2955	static UChar32
				2956	ucnv_MBCSGetNextUChar(UConverterToUnicodeArgs *pArgs,
				2957	UErrorCode *pErrorCode) {
				2958	UConverter *cnv;
				2959	const uint8_t source, sourceLimit, *lastSource;
				2960
				2961	const int32_t (*stateTable)[256];
				2962	const uint16_t *unicodeCodeUnits;
				2963
				2964	uint32_t offset;
				2965	uint8_t state;
				2966
				2967	int32_t entry;
				2968	UChar32 c;
				2969	uint8_t action;
				2970
				2971	/* use optimized function if possible */
				2972	cnv=pArgs->converter;
				2973
				2974	if(cnv->preToULength>0) {
				2975	/* use the generic code in ucnv_getNextUChar() to continue with a partial match */
				2976	return UCNV_GET_NEXT_UCHAR_USE_TO_U;
				2977	}
				2978
				2979	if(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SURROGATES) {
				2980	/*
				2981	* Using the generic ucnv_getNextUChar() code lets us deal correctly
				2982	* with the rare case of a codepage that maps single surrogates
				2983	* without adding the complexity to this already complicated function here.
				2984	*/
				2985	return UCNV_GET_NEXT_UCHAR_USE_TO_U;
				2986	} else if(cnv->sharedData->mbcs.countStates==1) {
				2987	return ucnv_MBCSSingleGetNextUChar(pArgs, pErrorCode);
				2988	}
				2989
				2990	/* set up the local pointers */
				2991	source=lastSource=(const uint8_t *)pArgs->source;
				2992	sourceLimit=(const uint8_t *)pArgs->sourceLimit;
				2993
				2994	if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
				2995	stateTable=(const int32_t (*)[256])cnv->sharedData->mbcs.swapLFNLStateTable;
				2996	} else {
				2997	stateTable=cnv->sharedData->mbcs.stateTable;
				2998	}
				2999	unicodeCodeUnits=cnv->sharedData->mbcs.unicodeCodeUnits;
				3000
				3001	/* get the converter state from UConverter */
				3002	offset=cnv->toUnicodeStatus;
				3003
				3004	/*
				3005	* if we are in the SBCS state for a DBCS-only converter,
				3006	* then load the DBCS state from the MBCS data
				3007	* (dbcsOnlyState==0 if it is not a DBCS-only converter)
				3008	*/
				3009	if((state=(uint8_t)(cnv->mode))==0) {
				3010	state=cnv->sharedData->mbcs.dbcsOnlyState;
				3011	}
				3012
				3013	/* conversion loop */
				3014	c=U_SENTINEL;
				3015	while(source<sourceLimit) {
				3016	entry=stateTable[state][*source++];
				3017	if(MBCS_ENTRY_IS_TRANSITION(entry)) {
				3018	state=(uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry);
				3019	offset+=MBCS_ENTRY_TRANSITION_OFFSET(entry);
				3020
				3021	/* optimization for 1/2-byte input and BMP output */
				3022	if( source<sourceLimit &&
				3023	MBCS_ENTRY_IS_FINAL(entry=stateTable[state][*source]) &&
				3024	MBCS_ENTRY_FINAL_ACTION(entry)==MBCS_STATE_VALID_16 &&
				3025	(c=unicodeCodeUnits[offset+MBCS_ENTRY_FINAL_VALUE_16(entry)])<0xfffe
				3026	) {
				3027	++source;
				3028	state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
				3029	/* output BMP code point */
				3030	break;
				3031	}
				3032	} else {
				3033	/* save the previous state for proper extension mapping with SI/SO-stateful converters */
				3034	cnv->mode=state;
				3035
				3036	/* set the next state early so that we can reuse the entry variable */
				3037	state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
				3038
				3039	/*
				3040	* An if-else-if chain provides more reliable performance for
				3041	* the most common cases compared to a switch.
				3042	*/
				3043	action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry));
				3044	if(action==MBCS_STATE_VALID_DIRECT_16) {
				3045	/* output BMP code point */
				3046	c=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
				3047	break;
				3048	} else if(action==MBCS_STATE_VALID_16) {
				3049	offset+=MBCS_ENTRY_FINAL_VALUE_16(entry);
				3050	c=unicodeCodeUnits[offset];
				3051	if(c<0xfffe) {
				3052	/* output BMP code point */
				3053	break;
				3054	} else if(c==0xfffe) {
				3055	if(UCNV_TO_U_USE_FALLBACK(cnv) && (c=ucnv_MBCSGetFallback(&cnv->sharedData->mbcs, offset))!=0xfffe) {
				3056	break;
				3057	}
				3058	} else {
				3059	/* callback(illegal) */
				3060	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
				3061	}
				3062	} else if(action==MBCS_STATE_VALID_16_PAIR) {
				3063	offset+=MBCS_ENTRY_FINAL_VALUE_16(entry);
				3064	c=unicodeCodeUnits[offset++];
				3065	if(c<0xd800) {
				3066	/* output BMP code point below 0xd800 */
				3067	break;
				3068	} else if(UCNV_TO_U_USE_FALLBACK(cnv) ? c<=0xdfff : c<=0xdbff) {
				3069	/* output roundtrip or fallback supplementary code point */
				3070	c=((c&0x3ff)<<10)+unicodeCodeUnits[offset]+(0x10000-0xdc00);
				3071	break;
				3072	} else if(UCNV_TO_U_USE_FALLBACK(cnv) ? (c&0xfffe)==0xe000 : c==0xe000) {
				3073	/* output roundtrip BMP code point above 0xd800 or fallback BMP code point */
				3074	c=unicodeCodeUnits[offset];
				3075	break;
				3076	} else if(c==0xffff) {
				3077	/* callback(illegal) */
				3078	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
				3079	}
				3080	} else if(action==MBCS_STATE_VALID_DIRECT_20 \|\|
				3081	(action==MBCS_STATE_FALLBACK_DIRECT_20 && UCNV_TO_U_USE_FALLBACK(cnv))
				3082	) {
				3083	/* output supplementary code point */
				3084	c=(UChar32)(MBCS_ENTRY_FINAL_VALUE(entry)+0x10000);
				3085	break;
				3086	} else if(action==MBCS_STATE_CHANGE_ONLY) {
				3087	/*
				3088	* This serves as a state change without any output.
				3089	* It is useful for reading simple stateful encodings,
				3090	* for example using just Shift-In/Shift-Out codes.
				3091	* The 21 unused bits may later be used for more sophisticated
				3092	* state transitions.
				3093	*/
				3094	if(cnv->sharedData->mbcs.dbcsOnlyState!=0) {
				3095	/* SI/SO are illegal for DBCS-only conversion */
				3096	state=(uint8_t)(cnv->mode); /* restore the previous state */
				3097
				3098	/* callback(illegal) */
				3099	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
				3100	}
				3101	} else if(action==MBCS_STATE_FALLBACK_DIRECT_16) {
				3102	if(UCNV_TO_U_USE_FALLBACK(cnv)) {
				3103	/* output BMP code point */
				3104	c=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
				3105	break;
				3106	}
				3107	} else if(action==MBCS_STATE_UNASSIGNED) {
				3108	/* just fall through */
				3109	} else if(action==MBCS_STATE_ILLEGAL) {
				3110	/* callback(illegal) */
				3111	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
				3112	} else {
				3113	/* reserved (must never occur), or only state change */
				3114	offset=0;
				3115	lastSource=source;
				3116	continue;
				3117	}
				3118
				3119	/* end of action codes: prepare for a new character */
				3120	offset=0;
				3121
				3122	if(U_FAILURE(*pErrorCode)) {
				3123	/* callback(illegal) */
				3124	break;
				3125	} else /* unassigned sequence */ {
				3126	/* defer to the generic implementation */
				3127	cnv->toUnicodeStatus=0;
				3128	cnv->mode=state;
				3129	pArgs->source=(const char *)lastSource;
				3130	return UCNV_GET_NEXT_UCHAR_USE_TO_U;
				3131	}
				3132	}
				3133	}
				3134
				3135	if(c<0) {
				3136	if(U_SUCCESS(*pErrorCode) && source==sourceLimit && lastSource<source) {
				3137	/* incomplete character byte sequence */
				3138	uint8_t *bytes=cnv->toUBytes;
				3139	cnv->toULength=(int8_t)(source-lastSource);
				3140	do {
				3141	bytes++=lastSource++;
				3142	} while(lastSource<source);
				3143	*pErrorCode=U_TRUNCATED_CHAR_FOUND;
				3144	} else if(U_FAILURE(*pErrorCode)) {
				3145	/* callback(illegal) */
				3146	/*
				3147	* Ticket 5691: consistent illegal sequences:
				3148	* - We include at least the first byte in the illegal sequence.
				3149	* - If any of the non-initial bytes could be the start of a character,
				3150	* we stop the illegal sequence before the first one of those.
				3151	*/
				3152	UBool isDBCSOnly=(UBool)(cnv->sharedData->mbcs.dbcsOnlyState!=0);
				3153	uint8_t *bytes=cnv->toUBytes;
				3154	bytes++=lastSource++; /* first byte */
				3155	if(lastSource==source) {
				3156	cnv->toULength=1;
				3157	} else /* lastSource<source: multi-byte character */ {
				3158	int8_t i;
				3159	for(i=1;
				3160	lastSource<source && !isSingleOrLead(stateTable, state, isDBCSOnly, *lastSource);
				3161	++i
				3162	) {
				3163	bytes++=lastSource++;
				3164	}
				3165	cnv->toULength=i;
				3166	source=lastSource;
				3167	}
				3168	} else {
				3169	/* no output because of empty input or only state changes */
				3170	*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
				3171	}
				3172	c=0xffff;
				3173	}
				3174
				3175	/* set the converter state back into UConverter, ready for a new character */
				3176	cnv->toUnicodeStatus=0;
				3177	cnv->mode=state;
				3178
				3179	/* write back the updated pointer */
				3180	pArgs->source=(const char *)source;
				3181	return c;
				3182	}
				3183
				3184	#if 0
				3185	/*
				3186	* Code disabled 2002dec09 (ICU 2.4) because it is not currently used in ICU. markus
				3187	* Removal improves code coverage.
				3188	*/
				3189	/**
				3190	* This version of ucnv_MBCSSimpleGetNextUChar() is optimized for single-byte, single-state codepages.
				3191	* It does not handle the EBCDIC swaplfnl option (set in UConverter).
				3192	* It does not handle conversion extensions (_extToU()).
				3193	*/
				3194	U_CFUNC UChar32
				3195	ucnv_MBCSSingleSimpleGetNextUChar(UConverterSharedData *sharedData,
				3196	uint8_t b, UBool useFallback) {
				3197	int32_t entry;
				3198	uint8_t action;
				3199
				3200	entry=sharedData->mbcs.stateTable[0][b];
				3201	/* MBCS_ENTRY_IS_FINAL(entry) */
				3202
				3203	if(MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) {
				3204	/* output BMP code point */
				3205	return (UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
				3206	}
				3207
				3208	/*
				3209	* An if-else-if chain provides more reliable performance for
				3210	* the most common cases compared to a switch.
				3211	*/
				3212	action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry));
				3213	if(action==MBCS_STATE_VALID_DIRECT_20) {
				3214	/* output supplementary code point */
				3215	return 0x10000+MBCS_ENTRY_FINAL_VALUE(entry);
				3216	} else if(action==MBCS_STATE_FALLBACK_DIRECT_16) {
				3217	if(!TO_U_USE_FALLBACK(useFallback)) {
				3218	return 0xfffe;
				3219	}
				3220	/* output BMP code point */
				3221	return (UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
				3222	} else if(action==MBCS_STATE_FALLBACK_DIRECT_20) {
				3223	if(!TO_U_USE_FALLBACK(useFallback)) {
				3224	return 0xfffe;
				3225	}
				3226	/* output supplementary code point */
				3227	return 0x10000+MBCS_ENTRY_FINAL_VALUE(entry);
				3228	} else if(action==MBCS_STATE_UNASSIGNED) {
				3229	return 0xfffe;
				3230	} else if(action==MBCS_STATE_ILLEGAL) {
				3231	return 0xffff;
				3232	} else {
				3233	/* reserved, must never occur */
				3234	return 0xffff;
				3235	}
				3236	}
				3237	#endif
				3238
				3239	/*
				3240	* This is a simple version of _MBCSGetNextUChar() that is used
				3241	* by other converter implementations.
				3242	* It only returns an "assigned" result if it consumes the entire input.
				3243	* It does not use state from the converter, nor error codes.
				3244	* It does not handle the EBCDIC swaplfnl option (set in UConverter).
				3245	* It handles conversion extensions but not GB 18030.
				3246	*
				3247	* Return value:
				3248	* U+fffe unassigned
				3249	* U+ffff illegal
				3250	* otherwise the Unicode code point
				3251	*/
				3252	U_CFUNC UChar32
				3253	ucnv_MBCSSimpleGetNextUChar(UConverterSharedData *sharedData,
				3254	const char *source, int32_t length,
				3255	UBool useFallback) {
				3256	const int32_t (*stateTable)[256];
				3257	const uint16_t *unicodeCodeUnits;
				3258
				3259	uint32_t offset;
				3260	uint8_t state, action;
				3261
				3262	UChar32 c;
				3263	int32_t i, entry;
				3264
				3265	if(length<=0) {
				3266	/* no input at all: "illegal" */
				3267	return 0xffff;
				3268	}
				3269
				3270	#if 0
				3271	/*
				3272	* Code disabled 2002dec09 (ICU 2.4) because it is not currently used in ICU. markus
				3273	* TODO In future releases, verify that this function is never called for SBCS
				3274	* conversions, i.e., that sharedData->mbcs.countStates==1 is still true.
				3275	* Removal improves code coverage.
				3276	*/
				3277	/* use optimized function if possible */
				3278	if(sharedData->mbcs.countStates==1) {
				3279	if(length==1) {
				3280	return ucnv_MBCSSingleSimpleGetNextUChar(sharedData, (uint8_t)*source, useFallback);
				3281	} else {
				3282	return 0xffff; /* illegal: more than a single byte for an SBCS converter */
				3283	}
				3284	}
				3285	#endif
				3286
				3287	/* set up the local pointers */
				3288	stateTable=sharedData->mbcs.stateTable;
				3289	unicodeCodeUnits=sharedData->mbcs.unicodeCodeUnits;
				3290
				3291	/* converter state */
				3292	offset=0;
				3293	state=sharedData->mbcs.dbcsOnlyState;
				3294
				3295	/* conversion loop */
				3296	for(i=0;;) {
				3297	entry=stateTable[state][(uint8_t)source[i++]];
				3298	if(MBCS_ENTRY_IS_TRANSITION(entry)) {
				3299	state=(uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry);
				3300	offset+=MBCS_ENTRY_TRANSITION_OFFSET(entry);
				3301
				3302	if(i==length) {
				3303	return 0xffff; /* truncated character */
				3304	}
				3305	} else {
				3306	/*
				3307	* An if-else-if chain provides more reliable performance for
				3308	* the most common cases compared to a switch.
				3309	*/
				3310	action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry));
				3311	if(action==MBCS_STATE_VALID_16) {
				3312	offset+=MBCS_ENTRY_FINAL_VALUE_16(entry);
				3313	c=unicodeCodeUnits[offset];
				3314	if(c!=0xfffe) {
				3315	/* done */
				3316	} else if(UCNV_TO_U_USE_FALLBACK(cnv)) {
				3317	c=ucnv_MBCSGetFallback(&sharedData->mbcs, offset);
				3318	/* else done with 0xfffe */
				3319	}
				3320	break;
				3321	} else if(action==MBCS_STATE_VALID_DIRECT_16) {
				3322	/* output BMP code point */
				3323	c=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
				3324	break;
				3325	} else if(action==MBCS_STATE_VALID_16_PAIR) {
				3326	offset+=MBCS_ENTRY_FINAL_VALUE_16(entry);
				3327	c=unicodeCodeUnits[offset++];
				3328	if(c<0xd800) {
				3329	/* output BMP code point below 0xd800 */
				3330	} else if(UCNV_TO_U_USE_FALLBACK(cnv) ? c<=0xdfff : c<=0xdbff) {
				3331	/* output roundtrip or fallback supplementary code point */
				3332	c=(UChar32)(((c&0x3ff)<<10)+unicodeCodeUnits[offset]+(0x10000-0xdc00));
				3333	} else if(UCNV_TO_U_USE_FALLBACK(cnv) ? (c&0xfffe)==0xe000 : c==0xe000) {
				3334	/* output roundtrip BMP code point above 0xd800 or fallback BMP code point */
				3335	c=unicodeCodeUnits[offset];
				3336	} else if(c==0xffff) {
				3337	return 0xffff;
				3338	} else {
				3339	c=0xfffe;
				3340	}
				3341	break;
				3342	} else if(action==MBCS_STATE_VALID_DIRECT_20) {
				3343	/* output supplementary code point */
				3344	c=0x10000+MBCS_ENTRY_FINAL_VALUE(entry);
				3345	break;
				3346	} else if(action==MBCS_STATE_FALLBACK_DIRECT_16) {
				3347	if(!TO_U_USE_FALLBACK(useFallback)) {
				3348	c=0xfffe;
				3349	break;
				3350	}
				3351	/* output BMP code point */
				3352	c=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
				3353	break;
				3354	} else if(action==MBCS_STATE_FALLBACK_DIRECT_20) {
				3355	if(!TO_U_USE_FALLBACK(useFallback)) {
				3356	c=0xfffe;
				3357	break;
				3358	}
				3359	/* output supplementary code point */
				3360	c=0x10000+MBCS_ENTRY_FINAL_VALUE(entry);
				3361	break;
				3362	} else if(action==MBCS_STATE_UNASSIGNED) {
				3363	c=0xfffe;
				3364	break;
				3365	}
				3366
				3367	/*
				3368	* forbid MBCS_STATE_CHANGE_ONLY for this function,
				3369	* and MBCS_STATE_ILLEGAL and reserved action codes
				3370	*/
				3371	return 0xffff;
				3372	}
				3373	}
				3374
				3375	if(i!=length) {
				3376	/* illegal for this function: not all input consumed */
				3377	return 0xffff;
				3378	}
				3379
				3380	if(c==0xfffe) {
				3381	/* try an extension mapping */
				3382	const int32_t *cx=sharedData->mbcs.extIndexes;
				3383	if(cx!=NULL) {
				3384	return ucnv_extSimpleMatchToU(cx, source, length, useFallback);
				3385	}
				3386	}
				3387
				3388	return c;
				3389	}
				3390
				3391	/* MBCS-from-Unicode conversion functions ----------------------------------- */
				3392
				3393	/* This version of ucnv_MBCSFromUnicodeWithOffsets() is optimized for double-byte codepages. */
				3394	static void
				3395	ucnv_MBCSDoubleFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
				3396	UErrorCode *pErrorCode) {
				3397	UConverter *cnv;
				3398	const UChar source, sourceLimit;
				3399	uint8_t *target;
				3400	int32_t targetCapacity;
				3401	int32_t *offsets;
				3402
				3403	const uint16_t *table;
				3404	const uint16_t *mbcsIndex;
				3405	const uint8_t *bytes;
				3406
				3407	UChar32 c;
				3408
				3409	int32_t sourceIndex, nextSourceIndex;
				3410
				3411	uint32_t stage2Entry;
				3412	uint32_t asciiRoundtrips;
				3413	uint32_t value;
				3414	uint8_t unicodeMask;
				3415
				3416	/* use optimized function if possible */
				3417	cnv=pArgs->converter;
				3418	unicodeMask=cnv->sharedData->mbcs.unicodeMask;
				3419
				3420	/* set up the local pointers */
				3421	source=pArgs->source;
				3422	sourceLimit=pArgs->sourceLimit;
				3423	target=(uint8_t *)pArgs->target;
				3424	targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
				3425	offsets=pArgs->offsets;
				3426
				3427	table=cnv->sharedData->mbcs.fromUnicodeTable;
				3428	mbcsIndex=cnv->sharedData->mbcs.mbcsIndex;
				3429	if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
				3430	bytes=cnv->sharedData->mbcs.swapLFNLFromUnicodeBytes;
				3431	} else {
				3432	bytes=cnv->sharedData->mbcs.fromUnicodeBytes;
				3433	}
				3434	asciiRoundtrips=cnv->sharedData->mbcs.asciiRoundtrips;
				3435
				3436	/* get the converter state from UConverter */
				3437	c=cnv->fromUChar32;
				3438
				3439	/* sourceIndex=-1 if the current character began in the previous buffer */
				3440	sourceIndex= c==0 ? 0 : -1;
				3441	nextSourceIndex=0;
				3442
				3443	/* conversion loop */
				3444	if(c!=0 && targetCapacity>0) {
				3445	goto getTrail;
				3446	}
				3447
				3448	while(source<sourceLimit) {
				3449	/*
				3450	* This following test is to see if available input would overflow the output.
				3451	* It does not catch output of more than one byte that
				3452	* overflows as a result of a multi-byte character or callback output
				3453	* from the last source character.
				3454	* Therefore, those situations also test for overflows and will
				3455	* then break the loop, too.
				3456	*/
				3457	if(targetCapacity>0) {
				3458	/*
				3459	* Get a correct Unicode code point:
				3460	* a single UChar for a BMP code point or
				3461	* a matched surrogate pair for a "supplementary code point".
				3462	*/
				3463	c=*source++;
				3464	++nextSourceIndex;
				3465	if(c<=0x7f && IS_ASCII_ROUNDTRIP(c, asciiRoundtrips)) {
				3466	*target++=(uint8_t)c;
				3467	if(offsets!=NULL) {
				3468	*offsets++=sourceIndex;
				3469	sourceIndex=nextSourceIndex;
				3470	}
				3471	--targetCapacity;
				3472	c=0;
				3473	continue;
				3474	}
				3475	/*
				3476	* utf8Friendly table: Test for <=0xd7ff rather than <=MBCS_FAST_MAX
				3477	* to avoid dealing with surrogates.
				3478	* MBCS_FAST_MAX must be >=0xd7ff.
				3479	*/
				3480	if(c<=0xd7ff) {
				3481	value=DBCS_RESULT_FROM_MOST_BMP(mbcsIndex, (const uint16_t *)bytes, c);
				3482	/* There are only roundtrips (!=0) and no-mapping (==0) entries. */
				3483	if(value==0) {
				3484	goto unassigned;
				3485	}
				3486	/* output the value */
				3487	} else {
				3488	/*
				3489	* This also tests if the codepage maps single surrogates.
				3490	* If it does, then surrogates are not paired but mapped separately.
				3491	* Note that in this case unmatched surrogates are not detected.
				3492	*/
				3493	if(U16_IS_SURROGATE(c) && !(unicodeMask&UCNV_HAS_SURROGATES)) {
				3494	if(U16_IS_SURROGATE_LEAD(c)) {
				3495	getTrail:
				3496	if(source<sourceLimit) {
				3497	/* test the following code unit */
				3498	UChar trail=*source;
				3499	if(U16_IS_TRAIL(trail)) {
				3500	++source;
				3501	++nextSourceIndex;
				3502	c=U16_GET_SUPPLEMENTARY(c, trail);
				3503	if(!(unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
				3504	/* BMP-only codepages are stored without stage 1 entries for supplementary code points */
				3505	/* callback(unassigned) */
				3506	goto unassigned;
				3507	}
				3508	/* convert this supplementary code point */
				3509	/* exit this condition tree */
				3510	} else {
				3511	/* this is an unmatched lead code unit (1st surrogate) */
				3512	/* callback(illegal) */
				3513	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
				3514	break;
				3515	}
				3516	} else {
				3517	/* no more input */
				3518	break;
				3519	}
				3520	} else {
				3521	/* this is an unmatched trail code unit (2nd surrogate) */
				3522	/* callback(illegal) */
				3523	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
				3524	break;
				3525	}
				3526	}
				3527
				3528	/* convert the Unicode code point in c into codepage bytes */
				3529	stage2Entry=MBCS_STAGE_2_FROM_U(table, c);
				3530
				3531	/* get the bytes and the length for the output */
				3532	/* MBCS_OUTPUT_2 */
				3533	value=MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c);
				3534
				3535	/* is this code point assigned, or do we use fallbacks? */
				3536	if(!(MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c) \|\|
				3537	(UCNV_FROM_U_USE_FALLBACK(cnv, c) && value!=0))
				3538	) {
				3539	/*
				3540	* We allow a 0 byte output if the "assigned" bit is set for this entry.
				3541	* There is no way with this data structure for fallback output
				3542	* to be a zero byte.
				3543	*/
				3544
				3545	unassigned:
				3546	/* try an extension mapping */
				3547	pArgs->source=source;
				3548	c=_extFromU(cnv, cnv->sharedData,
				3549	c, &source, sourceLimit,
				3550	&target, target+targetCapacity,
				3551	&offsets, sourceIndex,
				3552	pArgs->flush,
				3553	pErrorCode);
				3554	nextSourceIndex+=(int32_t)(source-pArgs->source);
				3555
				3556	if(U_FAILURE(*pErrorCode)) {
				3557	/* not mappable or buffer overflow */
				3558	break;
				3559	} else {
				3560	/* a mapping was written to the target, continue */
				3561
				3562	/* recalculate the targetCapacity after an extension mapping */
				3563	targetCapacity=(int32_t)(pArgs->targetLimit-(char *)target);
				3564
				3565	/* normal end of conversion: prepare for a new character */
				3566	sourceIndex=nextSourceIndex;
				3567	continue;
				3568	}
				3569	}
				3570	}
				3571
				3572	/* write the output character bytes from value and length */
				3573	/* from the first if in the loop we know that targetCapacity>0 */
				3574	if(value<=0xff) {
				3575	/* this is easy because we know that there is enough space */
				3576	*target++=(uint8_t)value;
				3577	if(offsets!=NULL) {
				3578	*offsets++=sourceIndex;
				3579	}
				3580	--targetCapacity;
				3581	} else /* length==2 */ {
				3582	*target++=(uint8_t)(value>>8);
				3583	if(2<=targetCapacity) {
				3584	*target++=(uint8_t)value;
				3585	if(offsets!=NULL) {
				3586	*offsets++=sourceIndex;
				3587	*offsets++=sourceIndex;
				3588	}
				3589	targetCapacity-=2;
				3590	} else {
				3591	if(offsets!=NULL) {
				3592	*offsets++=sourceIndex;
				3593	}
				3594	cnv->charErrorBuffer[0]=(char)value;
				3595	cnv->charErrorBufferLength=1;
				3596
				3597	/* target overflow */
				3598	targetCapacity=0;
				3599	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
				3600	c=0;
				3601	break;
				3602	}
				3603	}
				3604
				3605	/* normal end of conversion: prepare for a new character */
				3606	c=0;
				3607	sourceIndex=nextSourceIndex;
				3608	continue;
				3609	} else {
				3610	/* target is full */
				3611	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
				3612	break;
				3613	}
				3614	}
				3615
				3616	/* set the converter state back into UConverter */
				3617	cnv->fromUChar32=c;
				3618
				3619	/* write back the updated pointers */
				3620	pArgs->source=source;
				3621	pArgs->target=(char *)target;
				3622	pArgs->offsets=offsets;
				3623	}
				3624
				3625	/* This version of ucnv_MBCSFromUnicodeWithOffsets() is optimized for single-byte codepages. */
				3626	static void
				3627	ucnv_MBCSSingleFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
				3628	UErrorCode *pErrorCode) {
				3629	UConverter *cnv;
				3630	const UChar source, sourceLimit;
				3631	uint8_t *target;
				3632	int32_t targetCapacity;
				3633	int32_t *offsets;
				3634
				3635	const uint16_t *table;
				3636	const uint16_t *results;
				3637
				3638	UChar32 c;
				3639
				3640	int32_t sourceIndex, nextSourceIndex;
				3641
				3642	uint16_t value, minValue;
				3643	UBool hasSupplementary;
				3644
				3645	/* set up the local pointers */
				3646	cnv=pArgs->converter;
				3647	source=pArgs->source;
				3648	sourceLimit=pArgs->sourceLimit;
				3649	target=(uint8_t *)pArgs->target;
				3650	targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
				3651	offsets=pArgs->offsets;
				3652
				3653	table=cnv->sharedData->mbcs.fromUnicodeTable;
				3654	if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
				3655	results=(uint16_t *)cnv->sharedData->mbcs.swapLFNLFromUnicodeBytes;
				3656	} else {
				3657	results=(uint16_t *)cnv->sharedData->mbcs.fromUnicodeBytes;
				3658	}
				3659
				3660	if(cnv->useFallback) {
				3661	/* use all roundtrip and fallback results */
				3662	minValue=0x800;
				3663	} else {
				3664	/* use only roundtrips and fallbacks from private-use characters */
				3665	minValue=0xc00;
				3666	}
				3667	hasSupplementary=(UBool)(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY);
				3668
				3669	/* get the converter state from UConverter */
				3670	c=cnv->fromUChar32;
				3671
				3672	/* sourceIndex=-1 if the current character began in the previous buffer */
				3673	sourceIndex= c==0 ? 0 : -1;
				3674	nextSourceIndex=0;
				3675
				3676	/* conversion loop */
				3677	if(c!=0 && targetCapacity>0) {
				3678	goto getTrail;
				3679	}
				3680
				3681	while(source<sourceLimit) {
				3682	/*
				3683	* This following test is to see if available input would overflow the output.
				3684	* It does not catch output of more than one byte that
				3685	* overflows as a result of a multi-byte character or callback output
				3686	* from the last source character.
				3687	* Therefore, those situations also test for overflows and will
				3688	* then break the loop, too.
				3689	*/
				3690	if(targetCapacity>0) {
				3691	/*
				3692	* Get a correct Unicode code point:
				3693	* a single UChar for a BMP code point or
				3694	* a matched surrogate pair for a "supplementary code point".
				3695	*/
				3696	c=*source++;
				3697	++nextSourceIndex;
				3698	if(U16_IS_SURROGATE(c)) {
				3699	if(U16_IS_SURROGATE_LEAD(c)) {
				3700	getTrail:
				3701	if(source<sourceLimit) {
				3702	/* test the following code unit */
				3703	UChar trail=*source;
				3704	if(U16_IS_TRAIL(trail)) {
				3705	++source;
				3706	++nextSourceIndex;
				3707	c=U16_GET_SUPPLEMENTARY(c, trail);
				3708	if(!hasSupplementary) {
				3709	/* BMP-only codepages are stored without stage 1 entries for supplementary code points */
				3710	/* callback(unassigned) */
				3711	goto unassigned;
				3712	}
				3713	/* convert this supplementary code point */
				3714	/* exit this condition tree */
				3715	} else {
				3716	/* this is an unmatched lead code unit (1st surrogate) */
				3717	/* callback(illegal) */
				3718	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
				3719	break;
				3720	}
				3721	} else {
				3722	/* no more input */
				3723	break;
				3724	}
				3725	} else {
				3726	/* this is an unmatched trail code unit (2nd surrogate) */
				3727	/* callback(illegal) */
				3728	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
				3729	break;
				3730	}
				3731	}
				3732
				3733	/* convert the Unicode code point in c into codepage bytes */
				3734	value=MBCS_SINGLE_RESULT_FROM_U(table, results, c);
				3735
				3736	/* is this code point assigned, or do we use fallbacks? */
				3737	if(value>=minValue) {
				3738	/* assigned, write the output character bytes from value and length */
				3739	/* length==1 */
				3740	/* this is easy because we know that there is enough space */
				3741	*target++=(uint8_t)value;
				3742	if(offsets!=NULL) {
				3743	*offsets++=sourceIndex;
				3744	}
				3745	--targetCapacity;
				3746
				3747	/* normal end of conversion: prepare for a new character */
				3748	c=0;
				3749	sourceIndex=nextSourceIndex;
				3750	} else { /* unassigned */
				3751	unassigned:
				3752	/* try an extension mapping */
				3753	pArgs->source=source;
				3754	c=_extFromU(cnv, cnv->sharedData,
				3755	c, &source, sourceLimit,
				3756	&target, target+targetCapacity,
				3757	&offsets, sourceIndex,
				3758	pArgs->flush,
				3759	pErrorCode);
				3760	nextSourceIndex+=(int32_t)(source-pArgs->source);
				3761
				3762	if(U_FAILURE(*pErrorCode)) {
				3763	/* not mappable or buffer overflow */
				3764	break;
				3765	} else {
				3766	/* a mapping was written to the target, continue */
				3767
				3768	/* recalculate the targetCapacity after an extension mapping */
				3769	targetCapacity=(int32_t)(pArgs->targetLimit-(char *)target);
				3770
				3771	/* normal end of conversion: prepare for a new character */
				3772	sourceIndex=nextSourceIndex;
				3773	}
				3774	}
				3775	} else {
				3776	/* target is full */
				3777	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
				3778	break;
				3779	}
				3780	}
				3781
				3782	/* set the converter state back into UConverter */
				3783	cnv->fromUChar32=c;
				3784
				3785	/* write back the updated pointers */
				3786	pArgs->source=source;
				3787	pArgs->target=(char *)target;
				3788	pArgs->offsets=offsets;
				3789	}
				3790
				3791	/*
				3792	* This version of ucnv_MBCSFromUnicode() is optimized for single-byte codepages
				3793	* that map only to and from the BMP.
				3794	* In addition to single-byte/state optimizations, the offset calculations
				3795	* become much easier.
				3796	* It would be possible to use the sbcsIndex for UTF-8-friendly tables,
				3797	* but measurements have shown that this diminishes performance
				3798	* in more cases than it improves it.
				3799	* See SVN revision 21013 (2007-feb-06) for the last version with #if switches
				3800	* for various MBCS and SBCS optimizations.
				3801	*/
				3802	static void
				3803	ucnv_MBCSSingleFromBMPWithOffsets(UConverterFromUnicodeArgs *pArgs,
				3804	UErrorCode *pErrorCode) {
				3805	UConverter *cnv;
				3806	const UChar source, sourceLimit, *lastSource;
				3807	uint8_t *target;
				3808	int32_t targetCapacity, length;
				3809	int32_t *offsets;
				3810
				3811	const uint16_t *table;
				3812	const uint16_t *results;
				3813
				3814	UChar32 c;
				3815
				3816	int32_t sourceIndex;
				3817
				3818	uint32_t asciiRoundtrips;
				3819	uint16_t value, minValue;
				3820
				3821	/* set up the local pointers */
				3822	cnv=pArgs->converter;
				3823	source=pArgs->source;
				3824	sourceLimit=pArgs->sourceLimit;
				3825	target=(uint8_t *)pArgs->target;
				3826	targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
				3827	offsets=pArgs->offsets;
				3828
				3829	table=cnv->sharedData->mbcs.fromUnicodeTable;
				3830	if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
				3831	results=(uint16_t *)cnv->sharedData->mbcs.swapLFNLFromUnicodeBytes;
				3832	} else {
				3833	results=(uint16_t *)cnv->sharedData->mbcs.fromUnicodeBytes;
				3834	}
				3835	asciiRoundtrips=cnv->sharedData->mbcs.asciiRoundtrips;
				3836
				3837	if(cnv->useFallback) {
				3838	/* use all roundtrip and fallback results */
				3839	minValue=0x800;
				3840	} else {
				3841	/* use only roundtrips and fallbacks from private-use characters */
				3842	minValue=0xc00;
				3843	}
				3844
				3845	/* get the converter state from UConverter */
				3846	c=cnv->fromUChar32;
				3847
				3848	/* sourceIndex=-1 if the current character began in the previous buffer */
				3849	sourceIndex= c==0 ? 0 : -1;
				3850	lastSource=source;
				3851
				3852	/*
				3853	* since the conversion here is 1:1 UChar:uint8_t, we need only one counter
				3854	* for the minimum of the sourceLength and targetCapacity
				3855	*/
				3856	length=(int32_t)(sourceLimit-source);
				3857	if(length<targetCapacity) {
				3858	targetCapacity=length;
				3859	}
				3860
				3861	/* conversion loop */
				3862	if(c!=0 && targetCapacity>0) {
				3863	goto getTrail;
				3864	}
				3865
				3866	#if MBCS_UNROLL_SINGLE_FROM_BMP
				3867	/* unrolling makes it slower on Pentium III/Windows 2000?! */
				3868	/* unroll the loop with the most common case */
				3869	unrolled:
				3870	if(targetCapacity>=4) {
				3871	int32_t count, loops;
				3872	uint16_t andedValues;
				3873
				3874	loops=count=targetCapacity>>2;
				3875	do {
				3876	c=*source++;
				3877	andedValues=value=MBCS_SINGLE_RESULT_FROM_U(table, results, c);
				3878	*target++=(uint8_t)value;
				3879	c=*source++;
				3880	andedValues&=value=MBCS_SINGLE_RESULT_FROM_U(table, results, c);
				3881	*target++=(uint8_t)value;
				3882	c=*source++;
				3883	andedValues&=value=MBCS_SINGLE_RESULT_FROM_U(table, results, c);
				3884	*target++=(uint8_t)value;
				3885	c=*source++;
				3886	andedValues&=value=MBCS_SINGLE_RESULT_FROM_U(table, results, c);
				3887	*target++=(uint8_t)value;
				3888
				3889	/* were all 4 entries really valid? */
				3890	if(andedValues<minValue) {
				3891	/* no, return to the first of these 4 */
				3892	source-=4;
				3893	target-=4;
				3894	break;
				3895	}
				3896	} while(--count>0);
				3897	count=loops-count;
				3898	targetCapacity-=4*count;
				3899
				3900	if(offsets!=NULL) {
				3901	lastSource+=4*count;
				3902	while(count>0) {
				3903	*offsets++=sourceIndex++;
				3904	*offsets++=sourceIndex++;
				3905	*offsets++=sourceIndex++;
				3906	*offsets++=sourceIndex++;
				3907	--count;
				3908	}
				3909	}
				3910
				3911	c=0;
				3912	}
				3913	#endif
				3914
				3915	while(targetCapacity>0) {
				3916	/*
				3917	* Get a correct Unicode code point:
				3918	* a single UChar for a BMP code point or
				3919	* a matched surrogate pair for a "supplementary code point".
				3920	*/
				3921	c=*source++;
				3922	/*
				3923	* Do not immediately check for single surrogates:
				3924	* Assume that they are unassigned and check for them in that case.
				3925	* This speeds up the conversion of assigned characters.
				3926	*/
				3927	/* convert the Unicode code point in c into codepage bytes */
				3928	if(c<=0x7f && IS_ASCII_ROUNDTRIP(c, asciiRoundtrips)) {
				3929	*target++=(uint8_t)c;
				3930	--targetCapacity;
				3931	c=0;
				3932	continue;
				3933	}
				3934	value=MBCS_SINGLE_RESULT_FROM_U(table, results, c);
				3935	/* is this code point assigned, or do we use fallbacks? */
				3936	if(value>=minValue) {
				3937	/* assigned, write the output character bytes from value and length */
				3938	/* length==1 */
				3939	/* this is easy because we know that there is enough space */
				3940	*target++=(uint8_t)value;
				3941	--targetCapacity;
				3942
				3943	/* normal end of conversion: prepare for a new character */
				3944	c=0;
				3945	continue;
				3946	} else if(!U16_IS_SURROGATE(c)) {
				3947	/* normal, unassigned BMP character */
				3948	} else if(U16_IS_SURROGATE_LEAD(c)) {
				3949	getTrail:
				3950	if(source<sourceLimit) {
				3951	/* test the following code unit */
				3952	UChar trail=*source;
				3953	if(U16_IS_TRAIL(trail)) {
				3954	++source;
				3955	c=U16_GET_SUPPLEMENTARY(c, trail);
				3956	/* this codepage does not map supplementary code points */
				3957	/* callback(unassigned) */
				3958	} else {
				3959	/* this is an unmatched lead code unit (1st surrogate) */
				3960	/* callback(illegal) */
				3961	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
				3962	break;
				3963	}
				3964	} else {
				3965	/* no more input */
				3966	if (pArgs->flush) {
				3967	*pErrorCode=U_TRUNCATED_CHAR_FOUND;
				3968	}
				3969	break;
				3970	}
				3971	} else {
				3972	/* this is an unmatched trail code unit (2nd surrogate) */
				3973	/* callback(illegal) */
				3974	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
				3975	break;
				3976	}
				3977
				3978	/* c does not have a mapping */
				3979
				3980	/* get the number of code units for c to correctly advance sourceIndex */
				3981	length=U16_LENGTH(c);
				3982
				3983	/* set offsets since the start or the last extension */
				3984	if(offsets!=NULL) {
				3985	int32_t count=(int32_t)(source-lastSource);
				3986
				3987	/* do not set the offset for this character */
				3988	count-=length;
				3989
				3990	while(count>0) {
				3991	*offsets++=sourceIndex++;
				3992	--count;
				3993	}
				3994	/* offsets and sourceIndex are now set for the current character */
				3995	}
				3996
				3997	/* try an extension mapping */
				3998	lastSource=source;
				3999	c=_extFromU(cnv, cnv->sharedData,
				4000	c, &source, sourceLimit,
				4001	&target, (const uint8_t *)(pArgs->targetLimit),
				4002	&offsets, sourceIndex,
				4003	pArgs->flush,
				4004	pErrorCode);
				4005	sourceIndex+=length+(int32_t)(source-lastSource);
				4006	lastSource=source;
				4007
				4008	if(U_FAILURE(*pErrorCode)) {
				4009	/* not mappable or buffer overflow */
				4010	break;
				4011	} else {
				4012	/* a mapping was written to the target, continue */
				4013
				4014	/* recalculate the targetCapacity after an extension mapping */
				4015	targetCapacity=(int32_t)(pArgs->targetLimit-(char *)target);
				4016	length=(int32_t)(sourceLimit-source);
				4017	if(length<targetCapacity) {
				4018	targetCapacity=length;
				4019	}
				4020	}
				4021
				4022	#if MBCS_UNROLL_SINGLE_FROM_BMP
				4023	/* unrolling makes it slower on Pentium III/Windows 2000?! */
				4024	goto unrolled;
				4025	#endif
				4026	}
				4027
				4028	if(U_SUCCESS(pErrorCode) && source<sourceLimit && target>=(uint8_t )pArgs->targetLimit) {
				4029	/* target is full */
				4030	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
				4031	}
				4032
				4033	/* set offsets since the start or the last callback */
				4034	if(offsets!=NULL) {
				4035	size_t count=source-lastSource;
				4036	if (count > 0 && *pErrorCode == U_TRUNCATED_CHAR_FOUND) {
				4037	/*
				4038	Caller gave us a partial supplementary character,
				4039	which this function couldn't convert in any case.
				4040	The callback will handle the offset.
				4041	*/
				4042	count--;
				4043	}
				4044	while(count>0) {
				4045	*offsets++=sourceIndex++;
				4046	--count;
				4047	}
				4048	}
				4049
				4050	/* set the converter state back into UConverter */
				4051	cnv->fromUChar32=c;
				4052
				4053	/* write back the updated pointers */
				4054	pArgs->source=source;
				4055	pArgs->target=(char *)target;
				4056	pArgs->offsets=offsets;
				4057	}
				4058
				4059	U_CFUNC void
				4060	ucnv_MBCSFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
				4061	UErrorCode *pErrorCode) {
				4062	UConverter *cnv;
				4063	const UChar source, sourceLimit;
				4064	uint8_t *target;
				4065	int32_t targetCapacity;
				4066	int32_t *offsets;
				4067
				4068	const uint16_t *table;
				4069	const uint16_t *mbcsIndex;
				4070	const uint8_t p, bytes;
				4071	uint8_t outputType;
				4072
				4073	UChar32 c;
				4074
				4075	int32_t prevSourceIndex, sourceIndex, nextSourceIndex;
				4076
				4077	uint32_t stage2Entry;
				4078	uint32_t asciiRoundtrips;
				4079	uint32_t value;
				4080	/* Shift-In and Shift-Out byte sequences differ by encoding scheme. */
				4081	uint8_t siBytes[2] = {0, 0};
				4082	uint8_t soBytes[2] = {0, 0};
				4083	uint8_t siLength, soLength;
				4084	int32_t length = 0, prevLength;
				4085	uint8_t unicodeMask;
				4086
				4087	cnv=pArgs->converter;
				4088
				4089	if(cnv->preFromUFirstCP>=0) {
				4090	/*
				4091	* pass sourceIndex=-1 because we continue from an earlier buffer
				4092	* in the future, this may change with continuous offsets
				4093	*/
				4094	ucnv_extContinueMatchFromU(cnv, pArgs, -1, pErrorCode);
				4095
				4096	if(U_FAILURE(*pErrorCode) \|\| cnv->preFromULength<0) {
				4097	return;
				4098	}
				4099	}
				4100
				4101	/* use optimized function if possible */
				4102	outputType=cnv->sharedData->mbcs.outputType;
				4103	unicodeMask=cnv->sharedData->mbcs.unicodeMask;
				4104	if(outputType==MBCS_OUTPUT_1 && !(unicodeMask&UCNV_HAS_SURROGATES)) {
				4105	if(!(unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
				4106	ucnv_MBCSSingleFromBMPWithOffsets(pArgs, pErrorCode);
				4107	} else {
				4108	ucnv_MBCSSingleFromUnicodeWithOffsets(pArgs, pErrorCode);
				4109	}
				4110	return;
				4111	} else if(outputType==MBCS_OUTPUT_2 && cnv->sharedData->mbcs.utf8Friendly) {
				4112	ucnv_MBCSDoubleFromUnicodeWithOffsets(pArgs, pErrorCode);
				4113	return;
				4114	}
				4115
				4116	/* set up the local pointers */
				4117	source=pArgs->source;
				4118	sourceLimit=pArgs->sourceLimit;
				4119	target=(uint8_t *)pArgs->target;
				4120	targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
				4121	offsets=pArgs->offsets;
				4122
				4123	table=cnv->sharedData->mbcs.fromUnicodeTable;
				4124	if(cnv->sharedData->mbcs.utf8Friendly) {
				4125	mbcsIndex=cnv->sharedData->mbcs.mbcsIndex;
				4126	} else {
				4127	mbcsIndex=NULL;
				4128	}
				4129	if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
				4130	bytes=cnv->sharedData->mbcs.swapLFNLFromUnicodeBytes;
				4131	} else {
				4132	bytes=cnv->sharedData->mbcs.fromUnicodeBytes;
				4133	}
				4134	asciiRoundtrips=cnv->sharedData->mbcs.asciiRoundtrips;
				4135
				4136	/* get the converter state from UConverter */
				4137	c=cnv->fromUChar32;
				4138
				4139	if(outputType==MBCS_OUTPUT_2_SISO) {
				4140	prevLength=cnv->fromUnicodeStatus;
				4141	if(prevLength==0) {
				4142	/* set the real value */
				4143	prevLength=1;
				4144	}
				4145	} else {
				4146	/* prevent fromUnicodeStatus from being set to something non-0 */
				4147	prevLength=0;
				4148	}
				4149
				4150	/* sourceIndex=-1 if the current character began in the previous buffer */
				4151	prevSourceIndex=-1;
				4152	sourceIndex= c==0 ? 0 : -1;
				4153	nextSourceIndex=0;
				4154
				4155	/* Get the SI/SO character for the converter */
				4156	siLength = getSISOBytes(SI, cnv->options, siBytes);
				4157	soLength = getSISOBytes(SO, cnv->options, soBytes);
				4158
				4159	/* conversion loop */
				4160	/*
				4161	* This is another piece of ugly code:
				4162	* A goto into the loop if the converter state contains a first surrogate
				4163	* from the previous function call.
				4164	* It saves me to check in each loop iteration a check of if(c==0)
				4165	* and duplicating the trail-surrogate-handling code in the else
				4166	* branch of that check.
				4167	* I could not find any other way to get around this other than
				4168	* using a function call for the conversion and callback, which would
				4169	* be even more inefficient.
				4170	*
				4171	* Markus Scherer 2000-jul-19
				4172	*/
				4173	if(c!=0 && targetCapacity>0) {
				4174	goto getTrail;
				4175	}
				4176
				4177	while(source<sourceLimit) {
				4178	/*
				4179	* This following test is to see if available input would overflow the output.
				4180	* It does not catch output of more than one byte that
				4181	* overflows as a result of a multi-byte character or callback output
				4182	* from the last source character.
				4183	* Therefore, those situations also test for overflows and will
				4184	* then break the loop, too.
				4185	*/
				4186	if(targetCapacity>0) {
				4187	/*
				4188	* Get a correct Unicode code point:
				4189	* a single UChar for a BMP code point or
				4190	* a matched surrogate pair for a "supplementary code point".
				4191	*/
				4192	c=*source++;
				4193	++nextSourceIndex;
				4194	if(c<=0x7f && IS_ASCII_ROUNDTRIP(c, asciiRoundtrips)) {
				4195	*target++=(uint8_t)c;
				4196	if(offsets!=NULL) {
				4197	*offsets++=sourceIndex;
				4198	prevSourceIndex=sourceIndex;
				4199	sourceIndex=nextSourceIndex;
				4200	}
				4201	--targetCapacity;
				4202	c=0;
				4203	continue;
				4204	}
				4205	/*
				4206	* utf8Friendly table: Test for <=0xd7ff rather than <=MBCS_FAST_MAX
				4207	* to avoid dealing with surrogates.
				4208	* MBCS_FAST_MAX must be >=0xd7ff.
				4209	*/
				4210	if(c<=0xd7ff && mbcsIndex!=NULL) {
				4211	value=mbcsIndex[c>>6];
				4212
				4213	/* get the bytes and the length for the output (copied from below and adapted for utf8Friendly data) */
				4214	/* There are only roundtrips (!=0) and no-mapping (==0) entries. */
				4215	switch(outputType) {
				4216	case MBCS_OUTPUT_2:
				4217	value=((const uint16_t *)bytes)[value +(c&0x3f)];
				4218	if(value<=0xff) {
				4219	if(value==0) {
				4220	goto unassigned;
				4221	} else {
				4222	length=1;
				4223	}
				4224	} else {
				4225	length=2;
				4226	}
				4227	break;
				4228	case MBCS_OUTPUT_2_SISO:
				4229	/* 1/2-byte stateful with Shift-In/Shift-Out */
				4230	/*
				4231	* Save the old state in the converter object
				4232	* right here, then change the local prevLength state variable if necessary.
				4233	* Then, if this character turns out to be unassigned or a fallback that
				4234	* is not taken, the callback code must not save the new state in the converter
				4235	* because the new state is for a character that is not output.
				4236	* However, the callback must still restore the state from the converter
				4237	* in case the callback function changed it for its output.
				4238	*/
				4239	cnv->fromUnicodeStatus=prevLength; /* save the old state */
				4240	value=((const uint16_t *)bytes)[value +(c&0x3f)];
				4241	if(value<=0xff) {
				4242	if(value==0) {
				4243	goto unassigned;
				4244	} else if(prevLength<=1) {
				4245	length=1;
				4246	} else {
				4247	/* change from double-byte mode to single-byte */
				4248	if (siLength == 1) {
				4249	value\|=(uint32_t)siBytes[0]<<8;
				4250	length = 2;
				4251	} else if (siLength == 2) {
				4252	value\|=(uint32_t)siBytes[1]<<8;
				4253	value\|=(uint32_t)siBytes[0]<<16;
				4254	length = 3;
				4255	}
				4256	prevLength=1;
				4257	}
				4258	} else {
				4259	if(prevLength==2) {
				4260	length=2;
				4261	} else {
				4262	/* change from single-byte mode to double-byte */
				4263	if (soLength == 1) {
				4264	value\|=(uint32_t)soBytes[0]<<16;
				4265	length = 3;
				4266	} else if (soLength == 2) {
				4267	value\|=(uint32_t)soBytes[1]<<16;
				4268	value\|=(uint32_t)soBytes[0]<<24;
				4269	length = 4;
				4270	}
				4271	prevLength=2;
				4272	}
				4273	}
				4274	break;
				4275	case MBCS_OUTPUT_DBCS_ONLY:
				4276	/* table with single-byte results, but only DBCS mappings used */
				4277	value=((const uint16_t *)bytes)[value +(c&0x3f)];
				4278	if(value<=0xff) {
				4279	/* no mapping or SBCS result, not taken for DBCS-only */
				4280	goto unassigned;
				4281	} else {
				4282	length=2;
				4283	}
				4284	break;
				4285	case MBCS_OUTPUT_3:
				4286	p=bytes+(value+(c&0x3f))*3;
				4287	value=((uint32_t)*p<<16)\|((uint32_t)p[1]<<8)\|p[2];
				4288	if(value<=0xff) {
				4289	if(value==0) {
				4290	goto unassigned;
				4291	} else {
				4292	length=1;
				4293	}
				4294	} else if(value<=0xffff) {
				4295	length=2;
				4296	} else {
				4297	length=3;
				4298	}
				4299	break;
				4300	case MBCS_OUTPUT_4:
				4301	value=((const uint32_t *)bytes)[value +(c&0x3f)];
				4302	if(value<=0xff) {
				4303	if(value==0) {
				4304	goto unassigned;
				4305	} else {
				4306	length=1;
				4307	}
				4308	} else if(value<=0xffff) {
				4309	length=2;
				4310	} else if(value<=0xffffff) {
				4311	length=3;
				4312	} else {
				4313	length=4;
				4314	}
				4315	break;
				4316	case MBCS_OUTPUT_3_EUC:
				4317	value=((const uint16_t *)bytes)[value +(c&0x3f)];
				4318	/* EUC 16-bit fixed-length representation */
				4319	if(value<=0xff) {
				4320	if(value==0) {
				4321	goto unassigned;
				4322	} else {
				4323	length=1;
				4324	}
				4325	} else if((value&0x8000)==0) {
				4326	value\|=0x8e8000;
				4327	length=3;
				4328	} else if((value&0x80)==0) {
				4329	value\|=0x8f0080;
				4330	length=3;
				4331	} else {
				4332	length=2;
				4333	}
				4334	break;
				4335	case MBCS_OUTPUT_4_EUC:
				4336	p=bytes+(value+(c&0x3f))*3;
				4337	value=((uint32_t)*p<<16)\|((uint32_t)p[1]<<8)\|p[2];
				4338	/* EUC 16-bit fixed-length representation applied to the first two bytes */
				4339	if(value<=0xff) {
				4340	if(value==0) {
				4341	goto unassigned;
				4342	} else {
				4343	length=1;
				4344	}
				4345	} else if(value<=0xffff) {
				4346	length=2;
				4347	} else if((value&0x800000)==0) {
				4348	value\|=0x8e800000;
				4349	length=4;
				4350	} else if((value&0x8000)==0) {
				4351	value\|=0x8f008000;
				4352	length=4;
				4353	} else {
				4354	length=3;
				4355	}
				4356	break;
				4357	default:
				4358	/* must not occur */
				4359	/*
				4360	* To avoid compiler warnings that value & length may be
				4361	* used without having been initialized, we set them here.
				4362	* In reality, this is unreachable code.
				4363	* Not having a default branch also causes warnings with
				4364	* some compilers.
				4365	*/
				4366	value=0;
				4367	length=0;
				4368	break;
				4369	}
				4370	/* output the value */
				4371	} else {
				4372	/*
				4373	* This also tests if the codepage maps single surrogates.
				4374	* If it does, then surrogates are not paired but mapped separately.
				4375	* Note that in this case unmatched surrogates are not detected.
				4376	*/
				4377	if(U16_IS_SURROGATE(c) && !(unicodeMask&UCNV_HAS_SURROGATES)) {
				4378	if(U16_IS_SURROGATE_LEAD(c)) {
				4379	getTrail:
				4380	if(source<sourceLimit) {
				4381	/* test the following code unit */
				4382	UChar trail=*source;
				4383	if(U16_IS_TRAIL(trail)) {
				4384	++source;
				4385	++nextSourceIndex;
				4386	c=U16_GET_SUPPLEMENTARY(c, trail);
				4387	if(!(unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
				4388	/* BMP-only codepages are stored without stage 1 entries for supplementary code points */
				4389	cnv->fromUnicodeStatus=prevLength; /* save the old state */
				4390	/* callback(unassigned) */
				4391	goto unassigned;
				4392	}
				4393	/* convert this supplementary code point */
				4394	/* exit this condition tree */
				4395	} else {
				4396	/* this is an unmatched lead code unit (1st surrogate) */
				4397	/* callback(illegal) */
				4398	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
				4399	break;
				4400	}
				4401	} else {
				4402	/* no more input */
				4403	break;
				4404	}
				4405	} else {
				4406	/* this is an unmatched trail code unit (2nd surrogate) */
				4407	/* callback(illegal) */
				4408	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
				4409	break;
				4410	}
				4411	}
				4412
				4413	/* convert the Unicode code point in c into codepage bytes */
				4414
				4415	/*
				4416	* The basic lookup is a triple-stage compact array (trie) lookup.
				4417	* For details see the beginning of this file.
				4418	*
				4419	* Single-byte codepages are handled with a different data structure
				4420	* by _MBCSSingle... functions.
				4421	*
				4422	* The result consists of a 32-bit value from stage 2 and
				4423	* a pointer to as many bytes as are stored per character.
				4424	* The pointer points to the character's bytes in stage 3.
				4425	* Bits 15..0 of the stage 2 entry contain the stage 3 index
				4426	* for that pointer, while bits 31..16 are flags for which of
				4427	* the 16 characters in the block are roundtrip-assigned.
				4428	*
				4429	* For 2-byte and 4-byte codepages, the bytes are stored as uint16_t
				4430	* respectively as uint32_t, in the platform encoding.
				4431	* For 3-byte codepages, the bytes are always stored in big-endian order.
				4432	*
				4433	* For EUC encodings that use only either 0x8e or 0x8f as the first
				4434	* byte of their longest byte sequences, the first two bytes in
				4435	* this third stage indicate with their 7th bits whether these bytes
				4436	* are to be written directly or actually need to be preceeded by
				4437	* one of the two Single-Shift codes. With this, the third stage
				4438	* stores one byte fewer per character than the actual maximum length of
				4439	* EUC byte sequences.
				4440	*
				4441	* Other than that, leading zero bytes are removed and the other
				4442	* bytes output. A single zero byte may be output if the "assigned"
				4443	* bit in stage 2 was on.
				4444	* The data structure does not support zero byte output as a fallback,
				4445	* and also does not allow output of leading zeros.
				4446	*/
				4447	stage2Entry=MBCS_STAGE_2_FROM_U(table, c);
				4448
				4449	/* get the bytes and the length for the output */
				4450	switch(outputType) {
				4451	case MBCS_OUTPUT_2:
				4452	value=MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c);
				4453	if(value<=0xff) {
				4454	length=1;
				4455	} else {
				4456	length=2;
				4457	}
				4458	break;
				4459	case MBCS_OUTPUT_2_SISO:
				4460	/* 1/2-byte stateful with Shift-In/Shift-Out */
				4461	/*
				4462	* Save the old state in the converter object
				4463	* right here, then change the local prevLength state variable if necessary.
				4464	* Then, if this character turns out to be unassigned or a fallback that
				4465	* is not taken, the callback code must not save the new state in the converter
				4466	* because the new state is for a character that is not output.
				4467	* However, the callback must still restore the state from the converter
				4468	* in case the callback function changed it for its output.
				4469	*/
				4470	cnv->fromUnicodeStatus=prevLength; /* save the old state */
				4471	value=MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c);
				4472	if(value<=0xff) {
				4473	if(value==0 && MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c)==0) {
				4474	/* no mapping, leave value==0 */
				4475	length=0;
				4476	} else if(prevLength<=1) {
				4477	length=1;
				4478	} else {
				4479	/* change from double-byte mode to single-byte */
				4480	if (siLength == 1) {
				4481	value\|=(uint32_t)siBytes[0]<<8;
				4482	length = 2;
				4483	} else if (siLength == 2) {
				4484	value\|=(uint32_t)siBytes[1]<<8;
				4485	value\|=(uint32_t)siBytes[0]<<16;
				4486	length = 3;
				4487	}
				4488	prevLength=1;
				4489	}
				4490	} else {
				4491	if(prevLength==2) {
				4492	length=2;
				4493	} else {
				4494	/* change from single-byte mode to double-byte */
				4495	if (soLength == 1) {
				4496	value\|=(uint32_t)soBytes[0]<<16;
				4497	length = 3;
				4498	} else if (soLength == 2) {
				4499	value\|=(uint32_t)soBytes[1]<<16;
				4500	value\|=(uint32_t)soBytes[0]<<24;
				4501	length = 4;
				4502	}
				4503	prevLength=2;
				4504	}
				4505	}
				4506	break;
				4507	case MBCS_OUTPUT_DBCS_ONLY:
				4508	/* table with single-byte results, but only DBCS mappings used */
				4509	value=MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c);
				4510	if(value<=0xff) {
				4511	/* no mapping or SBCS result, not taken for DBCS-only */
				4512	value=stage2Entry=0; /* stage2Entry=0 to reset roundtrip flags */
				4513	length=0;
				4514	} else {
				4515	length=2;
				4516	}
				4517	break;
				4518	case MBCS_OUTPUT_3:
				4519	p=MBCS_POINTER_3_FROM_STAGE_2(bytes, stage2Entry, c);
				4520	value=((uint32_t)*p<<16)\|((uint32_t)p[1]<<8)\|p[2];
				4521	if(value<=0xff) {
				4522	length=1;
				4523	} else if(value<=0xffff) {
				4524	length=2;
				4525	} else {
				4526	length=3;
				4527	}
				4528	break;
				4529	case MBCS_OUTPUT_4:
				4530	value=MBCS_VALUE_4_FROM_STAGE_2(bytes, stage2Entry, c);
				4531	if(value<=0xff) {
				4532	length=1;
				4533	} else if(value<=0xffff) {
				4534	length=2;
				4535	} else if(value<=0xffffff) {
				4536	length=3;
				4537	} else {
				4538	length=4;
				4539	}
				4540	break;
				4541	case MBCS_OUTPUT_3_EUC:
				4542	value=MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c);
				4543	/* EUC 16-bit fixed-length representation */
				4544	if(value<=0xff) {
				4545	length=1;
				4546	} else if((value&0x8000)==0) {
				4547	value\|=0x8e8000;
				4548	length=3;
				4549	} else if((value&0x80)==0) {
				4550	value\|=0x8f0080;
				4551	length=3;
				4552	} else {
				4553	length=2;
				4554	}
				4555	break;
				4556	case MBCS_OUTPUT_4_EUC:
				4557	p=MBCS_POINTER_3_FROM_STAGE_2(bytes, stage2Entry, c);
				4558	value=((uint32_t)*p<<16)\|((uint32_t)p[1]<<8)\|p[2];
				4559	/* EUC 16-bit fixed-length representation applied to the first two bytes */
				4560	if(value<=0xff) {
				4561	length=1;
				4562	} else if(value<=0xffff) {
				4563	length=2;
				4564	} else if((value&0x800000)==0) {
				4565	value\|=0x8e800000;
				4566	length=4;
				4567	} else if((value&0x8000)==0) {
				4568	value\|=0x8f008000;
				4569	length=4;
				4570	} else {
				4571	length=3;
				4572	}
				4573	break;
				4574	default:
				4575	/* must not occur */
				4576	/*
				4577	* To avoid compiler warnings that value & length may be
				4578	* used without having been initialized, we set them here.
				4579	* In reality, this is unreachable code.
				4580	* Not having a default branch also causes warnings with
				4581	* some compilers.
				4582	*/
				4583	value=stage2Entry=0; /* stage2Entry=0 to reset roundtrip flags */
				4584	length=0;
				4585	break;
				4586	}
				4587
				4588	/* is this code point assigned, or do we use fallbacks? */
				4589	if(!(MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c)!=0 \|\|
				4590	(UCNV_FROM_U_USE_FALLBACK(cnv, c) && value!=0))
				4591	) {
				4592	/*
				4593	* We allow a 0 byte output if the "assigned" bit is set for this entry.
				4594	* There is no way with this data structure for fallback output
				4595	* to be a zero byte.
				4596	*/
				4597
				4598	unassigned:
				4599	/* try an extension mapping */
				4600	pArgs->source=source;
				4601	c=_extFromU(cnv, cnv->sharedData,
				4602	c, &source, sourceLimit,
				4603	&target, target+targetCapacity,
				4604	&offsets, sourceIndex,
				4605	pArgs->flush,
				4606	pErrorCode);
				4607	nextSourceIndex+=(int32_t)(source-pArgs->source);
				4608	prevLength=cnv->fromUnicodeStatus; /* restore SISO state */
				4609
				4610	if(U_FAILURE(*pErrorCode)) {
				4611	/* not mappable or buffer overflow */
				4612	break;
				4613	} else {
				4614	/* a mapping was written to the target, continue */
				4615
				4616	/* recalculate the targetCapacity after an extension mapping */
				4617	targetCapacity=(int32_t)(pArgs->targetLimit-(char *)target);
				4618
				4619	/* normal end of conversion: prepare for a new character */
				4620	if(offsets!=NULL) {
				4621	prevSourceIndex=sourceIndex;
				4622	sourceIndex=nextSourceIndex;
				4623	}
				4624	continue;
				4625	}
				4626	}
				4627	}
				4628
				4629	/* write the output character bytes from value and length */
				4630	/* from the first if in the loop we know that targetCapacity>0 */
				4631	if(length<=targetCapacity) {
				4632	if(offsets==NULL) {
				4633	switch(length) {
				4634	/* each branch falls through to the next one */
				4635	case 4:
				4636	*target++=(uint8_t)(value>>24);
				4637	case 3: /fall through/
				4638	*target++=(uint8_t)(value>>16);
				4639	case 2: /fall through/
				4640	*target++=(uint8_t)(value>>8);
				4641	case 1: /fall through/
				4642	*target++=(uint8_t)value;
				4643	default:
				4644	/* will never occur */
				4645	break;
				4646	}
				4647	} else {
				4648	switch(length) {
				4649	/* each branch falls through to the next one */
				4650	case 4:
				4651	*target++=(uint8_t)(value>>24);
				4652	*offsets++=sourceIndex;
				4653	case 3: /fall through/
				4654	*target++=(uint8_t)(value>>16);
				4655	*offsets++=sourceIndex;
				4656	case 2: /fall through/
				4657	*target++=(uint8_t)(value>>8);
				4658	*offsets++=sourceIndex;
				4659	case 1: /fall through/
				4660	*target++=(uint8_t)value;
				4661	*offsets++=sourceIndex;
				4662	default:
				4663	/* will never occur */
				4664	break;
				4665	}
				4666	}
				4667	targetCapacity-=length;
				4668	} else {
				4669	uint8_t *charErrorBuffer;
				4670
				4671	/*
				4672	* We actually do this backwards here:
				4673	* In order to save an intermediate variable, we output
				4674	* first to the overflow buffer what does not fit into the
				4675	* regular target.
				4676	*/
				4677	/* we know that 1<=targetCapacity<length<=4 */
				4678	length-=targetCapacity;
				4679	charErrorBuffer=(uint8_t *)cnv->charErrorBuffer;
				4680	switch(length) {
				4681	/* each branch falls through to the next one */
				4682	case 3:
				4683	*charErrorBuffer++=(uint8_t)(value>>16);
				4684	case 2: /fall through/
				4685	*charErrorBuffer++=(uint8_t)(value>>8);
				4686	case 1: /fall through/
				4687	*charErrorBuffer=(uint8_t)value;
				4688	default:
				4689	/* will never occur */
				4690	break;
				4691	}
				4692	cnv->charErrorBufferLength=(int8_t)length;
				4693
				4694	/* now output what fits into the regular target */
				4695	value>>=8length; / length was reduced by targetCapacity */
				4696	switch(targetCapacity) {
				4697	/* each branch falls through to the next one */
				4698	case 3:
				4699	*target++=(uint8_t)(value>>16);
				4700	if(offsets!=NULL) {
				4701	*offsets++=sourceIndex;
				4702	}
				4703	case 2: /fall through/
				4704	*target++=(uint8_t)(value>>8);
				4705	if(offsets!=NULL) {
				4706	*offsets++=sourceIndex;
				4707	}
				4708	case 1: /fall through/
				4709	*target++=(uint8_t)value;
				4710	if(offsets!=NULL) {
				4711	*offsets++=sourceIndex;
				4712	}
				4713	default:
				4714	/* will never occur */
				4715	break;
				4716	}
				4717
				4718	/* target overflow */
				4719	targetCapacity=0;
				4720	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
				4721	c=0;
				4722	break;
				4723	}
				4724
				4725	/* normal end of conversion: prepare for a new character */
				4726	c=0;
				4727	if(offsets!=NULL) {
				4728	prevSourceIndex=sourceIndex;
				4729	sourceIndex=nextSourceIndex;
				4730	}
				4731	continue;
				4732	} else {
				4733	/* target is full */
				4734	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
				4735	break;
				4736	}
				4737	}
				4738
				4739	/*
				4740	* the end of the input stream and detection of truncated input
				4741	* are handled by the framework, but for EBCDIC_STATEFUL conversion
				4742	* we need to emit an SI at the very end
				4743	*
				4744	* conditions:
				4745	* successful
				4746	* EBCDIC_STATEFUL in DBCS mode
				4747	* end of input and no truncated input
				4748	*/
				4749	if( U_SUCCESS(*pErrorCode) &&
				4750	outputType==MBCS_OUTPUT_2_SISO && prevLength==2 &&
				4751	pArgs->flush && source>=sourceLimit && c==0
				4752	) {
				4753	/* EBCDIC_STATEFUL ending with DBCS: emit an SI to return the output stream to SBCS */
				4754	if(targetCapacity>0) {
				4755	*target++=(uint8_t)siBytes[0];
				4756	if (siLength == 2) {
				4757	if (targetCapacity<2) {
				4758	cnv->charErrorBuffer[0]=(uint8_t)siBytes[1];
				4759	cnv->charErrorBufferLength=1;
				4760	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
				4761	} else {
				4762	*target++=(uint8_t)siBytes[1];
				4763	}
				4764	}
				4765	if(offsets!=NULL) {
				4766	/* set the last source character's index (sourceIndex points at sourceLimit now) */
				4767	*offsets++=prevSourceIndex;
				4768	}
				4769	} else {
				4770	/* target is full */
				4771	cnv->charErrorBuffer[0]=(uint8_t)siBytes[0];
				4772	if (siLength == 2) {
				4773	cnv->charErrorBuffer[1]=(uint8_t)siBytes[1];
				4774	}
				4775	cnv->charErrorBufferLength=siLength;
				4776	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
				4777	}
				4778	prevLength=1; /* we switched into SBCS */
				4779	}
				4780
				4781	/* set the converter state back into UConverter */
				4782	cnv->fromUChar32=c;
				4783	cnv->fromUnicodeStatus=prevLength;
				4784
				4785	/* write back the updated pointers */
				4786	pArgs->source=source;
				4787	pArgs->target=(char *)target;
				4788	pArgs->offsets=offsets;
				4789	}
				4790
				4791	/*
				4792	* This is another simple conversion function for internal use by other
				4793	* conversion implementations.
				4794	* It does not use the converter state nor call callbacks.
				4795	* It does not handle the EBCDIC swaplfnl option (set in UConverter).
				4796	* It handles conversion extensions but not GB 18030.
				4797	*
				4798	* It converts one single Unicode code point into codepage bytes, encoded
				4799	* as one 32-bit value. The function returns the number of bytes in *pValue:
				4800	* 1..4 the number of bytes in *pValue
				4801	* 0 unassigned (*pValue undefined)
				4802	* -1 illegal (currently not used, *pValue undefined)
				4803	*
				4804	* *pValue will contain the resulting bytes with the last byte in bits 7..0,
				4805	* the second to last byte in bits 15..8, etc.
				4806	* Currently, the function assumes but does not check that 0<=c<=0x10ffff.
				4807	*/
				4808	U_CFUNC int32_t
				4809	ucnv_MBCSFromUChar32(UConverterSharedData *sharedData,
				4810	UChar32 c, uint32_t *pValue,
				4811	UBool useFallback) {
				4812	const int32_t *cx;
				4813	const uint16_t *table;
				4814	#if 0
				4815	/* #if 0 because this is not currently used in ICU - reduce code, increase code coverage */
				4816	const uint8_t *p;
				4817	#endif
				4818	uint32_t stage2Entry;
				4819	uint32_t value;
				4820	int32_t length;
				4821
				4822	/* BMP-only codepages are stored without stage 1 entries for supplementary code points */
				4823	if(c<=0xffff \|\| (sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
				4824	table=sharedData->mbcs.fromUnicodeTable;
				4825
				4826	/* convert the Unicode code point in c into codepage bytes (same as in _MBCSFromUnicodeWithOffsets) */
				4827	if(sharedData->mbcs.outputType==MBCS_OUTPUT_1) {
				4828	value=MBCS_SINGLE_RESULT_FROM_U(table, (uint16_t *)sharedData->mbcs.fromUnicodeBytes, c);
				4829	/* is this code point assigned, or do we use fallbacks? */
				4830	if(useFallback ? value>=0x800 : value>=0xc00) {
				4831	*pValue=value&0xff;
				4832	return 1;
				4833	}
				4834	} else /* outputType!=MBCS_OUTPUT_1 */ {
				4835	stage2Entry=MBCS_STAGE_2_FROM_U(table, c);
				4836
				4837	/* get the bytes and the length for the output */
				4838	switch(sharedData->mbcs.outputType) {
				4839	case MBCS_OUTPUT_2:
				4840	value=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
				4841	if(value<=0xff) {
				4842	length=1;
				4843	} else {
				4844	length=2;
				4845	}
				4846	break;
				4847	#if 0
				4848	/* #if 0 because this is not currently used in ICU - reduce code, increase code coverage */
				4849	case MBCS_OUTPUT_DBCS_ONLY:
				4850	/* table with single-byte results, but only DBCS mappings used */
				4851	value=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
				4852	if(value<=0xff) {
				4853	/* no mapping or SBCS result, not taken for DBCS-only */
				4854	value=stage2Entry=0; /* stage2Entry=0 to reset roundtrip flags */
				4855	length=0;
				4856	} else {
				4857	length=2;
				4858	}
				4859	break;
				4860	case MBCS_OUTPUT_3:
				4861	p=MBCS_POINTER_3_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
				4862	value=((uint32_t)*p<<16)\|((uint32_t)p[1]<<8)\|p[2];
				4863	if(value<=0xff) {
				4864	length=1;
				4865	} else if(value<=0xffff) {
				4866	length=2;
				4867	} else {
				4868	length=3;
				4869	}
				4870	break;
				4871	case MBCS_OUTPUT_4:
				4872	value=MBCS_VALUE_4_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
				4873	if(value<=0xff) {
				4874	length=1;
				4875	} else if(value<=0xffff) {
				4876	length=2;
				4877	} else if(value<=0xffffff) {
				4878	length=3;
				4879	} else {
				4880	length=4;
				4881	}
				4882	break;
				4883	case MBCS_OUTPUT_3_EUC:
				4884	value=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
				4885	/* EUC 16-bit fixed-length representation */
				4886	if(value<=0xff) {
				4887	length=1;
				4888	} else if((value&0x8000)==0) {
				4889	value\|=0x8e8000;
				4890	length=3;
				4891	} else if((value&0x80)==0) {
				4892	value\|=0x8f0080;
				4893	length=3;
				4894	} else {
				4895	length=2;
				4896	}
				4897	break;
				4898	case MBCS_OUTPUT_4_EUC:
				4899	p=MBCS_POINTER_3_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
				4900	value=((uint32_t)*p<<16)\|((uint32_t)p[1]<<8)\|p[2];
				4901	/* EUC 16-bit fixed-length representation applied to the first two bytes */
				4902	if(value<=0xff) {
				4903	length=1;
				4904	} else if(value<=0xffff) {
				4905	length=2;
				4906	} else if((value&0x800000)==0) {
				4907	value\|=0x8e800000;
				4908	length=4;
				4909	} else if((value&0x8000)==0) {
				4910	value\|=0x8f008000;
				4911	length=4;
				4912	} else {
				4913	length=3;
				4914	}
				4915	break;
				4916	#endif
				4917	default:
				4918	/* must not occur */
				4919	return -1;
				4920	}
				4921
				4922	/* is this code point assigned, or do we use fallbacks? */
				4923	if( MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c) \|\|
				4924	(FROM_U_USE_FALLBACK(useFallback, c) && value!=0)
				4925	) {
				4926	/*
				4927	* We allow a 0 byte output if the "assigned" bit is set for this entry.
				4928	* There is no way with this data structure for fallback output
				4929	* to be a zero byte.
				4930	*/
				4931	/* assigned */
				4932	*pValue=value;
				4933	return length;
				4934	}
				4935	}
				4936	}
				4937
				4938	cx=sharedData->mbcs.extIndexes;
				4939	if(cx!=NULL) {
				4940	length=ucnv_extSimpleMatchFromU(cx, c, pValue, useFallback);
				4941	return length>=0 ? length : -length; /* return abs(length); */
				4942	}
				4943
				4944	/* unassigned */
				4945	return 0;
				4946	}
				4947
				4948
				4949	#if 0
				4950	/*
				4951	* This function has been moved to ucnv2022.c for inlining.
				4952	* This implementation is here only for documentation purposes
				4953	*/
				4954
				4955	/**
				4956	* This version of ucnv_MBCSFromUChar32() is optimized for single-byte codepages.
				4957	* It does not handle the EBCDIC swaplfnl option (set in UConverter).
				4958	* It does not handle conversion extensions (_extFromU()).
				4959	*
				4960	* It returns the codepage byte for the code point, or -1 if it is unassigned.
				4961	*/
				4962	U_CFUNC int32_t
				4963	ucnv_MBCSSingleFromUChar32(UConverterSharedData *sharedData,
				4964	UChar32 c,
				4965	UBool useFallback) {
				4966	const uint16_t *table;
				4967	int32_t value;
				4968
				4969	/* BMP-only codepages are stored without stage 1 entries for supplementary code points */
				4970	if(c>=0x10000 && !(sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
				4971	return -1;
				4972	}
				4973
				4974	/* convert the Unicode code point in c into codepage bytes (same as in _MBCSFromUnicodeWithOffsets) */
				4975	table=sharedData->mbcs.fromUnicodeTable;
				4976
				4977	/* get the byte for the output */
				4978	value=MBCS_SINGLE_RESULT_FROM_U(table, (uint16_t *)sharedData->mbcs.fromUnicodeBytes, c);
				4979	/* is this code point assigned, or do we use fallbacks? */
				4980	if(useFallback ? value>=0x800 : value>=0xc00) {
				4981	return value&0xff;
				4982	} else {
				4983	return -1;
				4984	}
				4985	}
				4986	#endif
				4987
				4988	/* MBCS-from-UTF-8 conversion functions ------------------------------------- */
				4989
				4990	/* minimum code point values for n-byte UTF-8 sequences, n=0..4 */
				4991	static const UChar32
				4992	utf8_minLegal[5]={ 0, 0, 0x80, 0x800, 0x10000 };
				4993
				4994	/* offsets for n-byte UTF-8 sequences that were calculated with ((lead<<6)+trail)<<6+trail... */
				4995	static const UChar32
				4996	utf8_offsets[7]={ 0, 0, 0x3080, 0xE2080, 0x3C82080 };
				4997
				4998	static void
				4999	ucnv_SBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
				5000	UConverterToUnicodeArgs *pToUArgs,
				5001	UErrorCode *pErrorCode) {
				5002	UConverter utf8, cnv;
				5003	const uint8_t source, sourceLimit;
				5004	uint8_t *target;
				5005	int32_t targetCapacity;
				5006
				5007	const uint16_t table, sbcsIndex;
				5008	const uint16_t *results;
				5009
				5010	int8_t oldToULength, toULength, toULimit;
				5011
				5012	UChar32 c;
				5013	uint8_t b, t1, t2;
				5014
				5015	uint32_t asciiRoundtrips;
				5016	uint16_t value, minValue;
				5017	UBool hasSupplementary;
				5018
				5019	/* set up the local pointers */
				5020	utf8=pToUArgs->converter;
				5021	cnv=pFromUArgs->converter;
				5022	source=(uint8_t *)pToUArgs->source;
				5023	sourceLimit=(uint8_t *)pToUArgs->sourceLimit;
				5024	target=(uint8_t *)pFromUArgs->target;
				5025	targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target);
				5026
				5027	table=cnv->sharedData->mbcs.fromUnicodeTable;
				5028	sbcsIndex=cnv->sharedData->mbcs.sbcsIndex;
				5029	if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
				5030	results=(uint16_t *)cnv->sharedData->mbcs.swapLFNLFromUnicodeBytes;
				5031	} else {
				5032	results=(uint16_t *)cnv->sharedData->mbcs.fromUnicodeBytes;
				5033	}
				5034	asciiRoundtrips=cnv->sharedData->mbcs.asciiRoundtrips;
				5035
				5036	if(cnv->useFallback) {
				5037	/* use all roundtrip and fallback results */
				5038	minValue=0x800;
				5039	} else {
				5040	/* use only roundtrips and fallbacks from private-use characters */
				5041	minValue=0xc00;
				5042	}
				5043	hasSupplementary=(UBool)(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY);
				5044
				5045	/* get the converter state from the UTF-8 UConverter */
				5046	c=(UChar32)utf8->toUnicodeStatus;
				5047	if(c!=0) {
				5048	toULength=oldToULength=utf8->toULength;
				5049	toULimit=(int8_t)utf8->mode;
				5050	} else {
				5051	toULength=oldToULength=toULimit=0;
				5052	}
				5053
				5054	/*
				5055	* Make sure that the last byte sequence before sourceLimit is complete
				5056	* or runs into a lead byte.
				5057	* Do not go back into the bytes that will be read for finishing a partial
				5058	* sequence from the previous buffer.
				5059	* In the conversion loop compare source with sourceLimit only once
				5060	* per multi-byte character.
				5061	*/
				5062	{
				5063	int32_t i, length;
				5064
				5065	length=(int32_t)(sourceLimit-source) - (toULimit-oldToULength);
				5066	for(i=0; i<3 && i<length;) {
				5067	b=*(sourceLimit-i-1);
				5068	if(U8_IS_TRAIL(b)) {
				5069	++i;
				5070	} else {
				5071	if(i<U8_COUNT_TRAIL_BYTES(b)) {
				5072	/* exit the conversion loop before the lead byte if there are not enough trail bytes for it */
				5073	sourceLimit-=i+1;
				5074	}
				5075	break;
				5076	}
				5077	}
				5078	}
				5079
				5080	if(c!=0 && targetCapacity>0) {
				5081	utf8->toUnicodeStatus=0;
				5082	utf8->toULength=0;
				5083	goto moreBytes;
				5084	/*
				5085	* Note: We could avoid the goto by duplicating some of the moreBytes
				5086	* code, but only up to the point of collecting a complete UTF-8
				5087	* sequence; then recurse for the toUBytes[toULength]
				5088	* and then continue with normal conversion.
				5089	*
				5090	* If so, move this code to just after initializing the minimum
				5091	* set of local variables for reading the UTF-8 input
				5092	* (utf8, source, target, limits but not cnv, table, minValue, etc.).
				5093	*
				5094	* Potential advantages:
				5095	* - avoid the goto
				5096	* - oldToULength could become a local variable in just those code blocks
				5097	* that deal with buffer boundaries
				5098	* - possibly faster if the goto prevents some compiler optimizations
				5099	* (this would need measuring to confirm)
				5100	* Disadvantage:
				5101	* - code duplication
				5102	*/
				5103	}
				5104
				5105	/* conversion loop */
				5106	while(source<sourceLimit) {
				5107	if(targetCapacity>0) {
				5108	b=*source++;
				5109	if((int8_t)b>=0) {
				5110	/* convert ASCII */
				5111	if(IS_ASCII_ROUNDTRIP(b, asciiRoundtrips)) {
				5112	*target++=(uint8_t)b;
				5113	--targetCapacity;
				5114	continue;
				5115	} else {
				5116	c=b;
				5117	value=SBCS_RESULT_FROM_UTF8(sbcsIndex, results, 0, c);
				5118	}
				5119	} else {
				5120	if(b<0xe0) {
				5121	if( /* handle U+0080..U+07FF inline */
				5122	b>=0xc2 &&
				5123	(t1=(uint8_t)(*source-0x80)) <= 0x3f
				5124	) {
				5125	c=b&0x1f;
				5126	++source;
				5127	value=SBCS_RESULT_FROM_UTF8(sbcsIndex, results, c, t1);
				5128	if(value>=minValue) {
				5129	*target++=(uint8_t)value;
				5130	--targetCapacity;
				5131	continue;
				5132	} else {
				5133	c=(c<<6)\|t1;
				5134	}
				5135	} else {
				5136	c=-1;
				5137	}
				5138	} else if(b==0xe0) {
				5139	if( /* handle U+0800..U+0FFF inline */
				5140	(t1=(uint8_t)(source[0]-0x80)) <= 0x3f && t1 >= 0x20 &&
				5141	(t2=(uint8_t)(source[1]-0x80)) <= 0x3f
				5142	) {
				5143	c=t1;
				5144	source+=2;
				5145	value=SBCS_RESULT_FROM_UTF8(sbcsIndex, results, c, t2);
				5146	if(value>=minValue) {
				5147	*target++=(uint8_t)value;
				5148	--targetCapacity;
				5149	continue;
				5150	} else {
				5151	c=(c<<6)\|t2;
				5152	}
				5153	} else {
				5154	c=-1;
				5155	}
				5156	} else {
				5157	c=-1;
				5158	}
				5159
				5160	if(c<0) {
				5161	/* handle "complicated" and error cases, and continuing partial characters */
				5162	oldToULength=0;
				5163	toULength=1;
				5164	toULimit=U8_COUNT_TRAIL_BYTES(b)+1;
				5165	c=b;
				5166	moreBytes:
				5167	while(toULength<toULimit) {
				5168	/*
				5169	* The sourceLimit may have been adjusted before the conversion loop
				5170	* to stop before a truncated sequence.
				5171	* Here we need to use the real limit in case we have two truncated
				5172	* sequences at the end.
				5173	* See ticket #7492.
				5174	*/
				5175	if(source<(uint8_t *)pToUArgs->sourceLimit) {
				5176	b=*source;
				5177	if(U8_IS_TRAIL(b)) {
				5178	++source;
				5179	++toULength;
				5180	c=(c<<6)+b;
				5181	} else {
				5182	break; /* sequence too short, stop with toULength<toULimit */
				5183	}
				5184	} else {
				5185	/* store the partial UTF-8 character, compatible with the regular UTF-8 converter */
				5186	source-=(toULength-oldToULength);
				5187	while(oldToULength<toULength) {
				5188	utf8->toUBytes[oldToULength++]=*source++;
				5189	}
				5190	utf8->toUnicodeStatus=c;
				5191	utf8->toULength=toULength;
				5192	utf8->mode=toULimit;
				5193	pToUArgs->source=(char *)source;
				5194	pFromUArgs->target=(char *)target;
				5195	return;
				5196	}
				5197	}
				5198
				5199	if( toULength==toULimit && /* consumed all trail bytes */
				5200	(toULength==3 \|\| toULength==2) && /* BMP */
				5201	(c-=utf8_offsets[toULength])>=utf8_minLegal[toULength] &&
				5202	(c<=0xd7ff \|\| 0xe000<=c) /* not a surrogate */
				5203	) {
				5204	value=MBCS_SINGLE_RESULT_FROM_U(table, results, c);
				5205	} else if(
				5206	toULength==toULimit && toULength==4 &&
				5207	(0x10000<=(c-=utf8_offsets[4]) && c<=0x10ffff)
				5208	) {
				5209	/* supplementary code point */
				5210	if(!hasSupplementary) {
				5211	/* BMP-only codepages are stored without stage 1 entries for supplementary code points */
				5212	value=0;
				5213	} else {
				5214	value=MBCS_SINGLE_RESULT_FROM_U(table, results, c);
				5215	}
				5216	} else {
				5217	/* error handling: illegal UTF-8 byte sequence */
				5218	source-=(toULength-oldToULength);
				5219	while(oldToULength<toULength) {
				5220	utf8->toUBytes[oldToULength++]=*source++;
				5221	}
				5222	utf8->toULength=toULength;
				5223	pToUArgs->source=(char *)source;
				5224	pFromUArgs->target=(char *)target;
				5225	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
				5226	return;
				5227	}
				5228	}
				5229	}
				5230
				5231	if(value>=minValue) {
				5232	/* output the mapping for c */
				5233	*target++=(uint8_t)value;
				5234	--targetCapacity;
				5235	} else {
				5236	/* value<minValue means c is unassigned (unmappable) */
				5237	/*
				5238	* Try an extension mapping.
				5239	* Pass in no source because we don't have UTF-16 input.
				5240	* If we have a partial match on c, we will return and revert
				5241	* to UTF-8->UTF-16->charset conversion.
				5242	*/
				5243	static const UChar nul=0;
				5244	const UChar *noSource=&nul;
				5245	c=_extFromU(cnv, cnv->sharedData,
				5246	c, &noSource, noSource,
				5247	&target, target+targetCapacity,
				5248	NULL, -1,
				5249	pFromUArgs->flush,
				5250	pErrorCode);
				5251
				5252	if(U_FAILURE(*pErrorCode)) {
				5253	/* not mappable or buffer overflow */
				5254	cnv->fromUChar32=c;
				5255	break;
				5256	} else if(cnv->preFromUFirstCP>=0) {
				5257	/*
				5258	* Partial match, return and revert to pivoting.
				5259	* In normal from-UTF-16 conversion, we would just continue
				5260	* but then exit the loop because the extension match would
				5261	* have consumed the source.
				5262	*/
				5263	*pErrorCode=U_USING_DEFAULT_WARNING;
				5264	break;
				5265	} else {
				5266	/* a mapping was written to the target, continue */
				5267
				5268	/* recalculate the targetCapacity after an extension mapping */
				5269	targetCapacity=(int32_t)(pFromUArgs->targetLimit-(char *)target);
				5270	}
				5271	}
				5272	} else {
				5273	/* target is full */
				5274	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
				5275	break;
				5276	}
				5277	}
				5278
				5279	/*
				5280	* The sourceLimit may have been adjusted before the conversion loop
				5281	* to stop before a truncated sequence.
				5282	* If so, then collect the truncated sequence now.
				5283	*/
				5284	if(U_SUCCESS(*pErrorCode) &&
				5285	cnv->preFromUFirstCP<0 &&
				5286	source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) {
				5287	c=utf8->toUBytes[0]=b=*source++;
				5288	toULength=1;
				5289	toULimit=U8_COUNT_TRAIL_BYTES(b)+1;
				5290	while(source<sourceLimit) {
				5291	utf8->toUBytes[toULength++]=b=*source++;
				5292	c=(c<<6)+b;
				5293	}
				5294	utf8->toUnicodeStatus=c;
				5295	utf8->toULength=toULength;
				5296	utf8->mode=toULimit;
				5297	}
				5298
				5299	/* write back the updated pointers */
				5300	pToUArgs->source=(char *)source;
				5301	pFromUArgs->target=(char *)target;
				5302	}
				5303
				5304	static void
				5305	ucnv_DBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
				5306	UConverterToUnicodeArgs *pToUArgs,
				5307	UErrorCode *pErrorCode) {
				5308	UConverter utf8, cnv;
				5309	const uint8_t source, sourceLimit;
				5310	uint8_t *target;
				5311	int32_t targetCapacity;
				5312
				5313	const uint16_t table, mbcsIndex;
				5314	const uint16_t *results;
				5315
				5316	int8_t oldToULength, toULength, toULimit;
				5317
				5318	UChar32 c;
				5319	uint8_t b, t1, t2;
				5320
				5321	uint32_t stage2Entry;
				5322	uint32_t asciiRoundtrips;
				5323	uint16_t value;
				5324	UBool hasSupplementary;
				5325
				5326	/* set up the local pointers */
				5327	utf8=pToUArgs->converter;
				5328	cnv=pFromUArgs->converter;
				5329	source=(uint8_t *)pToUArgs->source;
				5330	sourceLimit=(uint8_t *)pToUArgs->sourceLimit;
				5331	target=(uint8_t *)pFromUArgs->target;
				5332	targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target);
				5333
				5334	table=cnv->sharedData->mbcs.fromUnicodeTable;
				5335	mbcsIndex=cnv->sharedData->mbcs.mbcsIndex;
				5336	if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
				5337	results=(uint16_t *)cnv->sharedData->mbcs.swapLFNLFromUnicodeBytes;
				5338	} else {
				5339	results=(uint16_t *)cnv->sharedData->mbcs.fromUnicodeBytes;
				5340	}
				5341	asciiRoundtrips=cnv->sharedData->mbcs.asciiRoundtrips;
				5342
				5343	hasSupplementary=(UBool)(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY);
				5344
				5345	/* get the converter state from the UTF-8 UConverter */
				5346	c=(UChar32)utf8->toUnicodeStatus;
				5347	if(c!=0) {
				5348	toULength=oldToULength=utf8->toULength;
				5349	toULimit=(int8_t)utf8->mode;
				5350	} else {
				5351	toULength=oldToULength=toULimit=0;
				5352	}
				5353
				5354	/*
				5355	* Make sure that the last byte sequence before sourceLimit is complete
				5356	* or runs into a lead byte.
				5357	* Do not go back into the bytes that will be read for finishing a partial
				5358	* sequence from the previous buffer.
				5359	* In the conversion loop compare source with sourceLimit only once
				5360	* per multi-byte character.
				5361	*/
				5362	{
				5363	int32_t i, length;
				5364
				5365	length=(int32_t)(sourceLimit-source) - (toULimit-oldToULength);
				5366	for(i=0; i<3 && i<length;) {
				5367	b=*(sourceLimit-i-1);
				5368	if(U8_IS_TRAIL(b)) {
				5369	++i;
				5370	} else {
				5371	if(i<U8_COUNT_TRAIL_BYTES(b)) {
				5372	/* exit the conversion loop before the lead byte if there are not enough trail bytes for it */
				5373	sourceLimit-=i+1;
				5374	}
				5375	break;
				5376	}
				5377	}
				5378	}
				5379
				5380	if(c!=0 && targetCapacity>0) {
				5381	utf8->toUnicodeStatus=0;
				5382	utf8->toULength=0;
				5383	goto moreBytes;
				5384	/* See note in ucnv_SBCSFromUTF8() about this goto. */
				5385	}
				5386
				5387	/* conversion loop */
				5388	while(source<sourceLimit) {
				5389	if(targetCapacity>0) {
				5390	b=*source++;
				5391	if((int8_t)b>=0) {
				5392	/* convert ASCII */
				5393	if(IS_ASCII_ROUNDTRIP(b, asciiRoundtrips)) {
				5394	*target++=b;
				5395	--targetCapacity;
				5396	continue;
				5397	} else {
				5398	value=DBCS_RESULT_FROM_UTF8(mbcsIndex, results, 0, b);
				5399	if(value==0) {
				5400	c=b;
				5401	goto unassigned;
				5402	}
				5403	}
				5404	} else {
				5405	if(b>0xe0) {
				5406	if( /* handle U+1000..U+D7FF inline */
				5407	(((t1=(uint8_t)(source[0]-0x80), b<0xed) && (t1 <= 0x3f)) \|\|
				5408	(b==0xed && (t1 <= 0x1f))) &&
				5409	(t2=(uint8_t)(source[1]-0x80)) <= 0x3f
				5410	) {
				5411	c=((b&0xf)<<6)\|t1;
				5412	source+=2;
				5413	value=DBCS_RESULT_FROM_UTF8(mbcsIndex, results, c, t2);
				5414	if(value==0) {
				5415	c=(c<<6)\|t2;
				5416	goto unassigned;
				5417	}
				5418	} else {
				5419	c=-1;
				5420	}
				5421	} else if(b<0xe0) {
				5422	if( /* handle U+0080..U+07FF inline */
				5423	b>=0xc2 &&
				5424	(t1=(uint8_t)(*source-0x80)) <= 0x3f
				5425	) {
				5426	c=b&0x1f;
				5427	++source;
				5428	value=DBCS_RESULT_FROM_UTF8(mbcsIndex, results, c, t1);
				5429	if(value==0) {
				5430	c=(c<<6)\|t1;
				5431	goto unassigned;
				5432	}
				5433	} else {
				5434	c=-1;
				5435	}
				5436	} else {
				5437	c=-1;
				5438	}
				5439
				5440	if(c<0) {
				5441	/* handle "complicated" and error cases, and continuing partial characters */
				5442	oldToULength=0;
				5443	toULength=1;
				5444	toULimit=U8_COUNT_TRAIL_BYTES(b)+1;
				5445	c=b;
				5446	moreBytes:
				5447	while(toULength<toULimit) {
				5448	/*
				5449	* The sourceLimit may have been adjusted before the conversion loop
				5450	* to stop before a truncated sequence.
				5451	* Here we need to use the real limit in case we have two truncated
				5452	* sequences at the end.
				5453	* See ticket #7492.
				5454	*/
				5455	if(source<(uint8_t *)pToUArgs->sourceLimit) {
				5456	b=*source;
				5457	if(U8_IS_TRAIL(b)) {
				5458	++source;
				5459	++toULength;
				5460	c=(c<<6)+b;
				5461	} else {
				5462	break; /* sequence too short, stop with toULength<toULimit */
				5463	}
				5464	} else {
				5465	/* store the partial UTF-8 character, compatible with the regular UTF-8 converter */
				5466	source-=(toULength-oldToULength);
				5467	while(oldToULength<toULength) {
				5468	utf8->toUBytes[oldToULength++]=*source++;
				5469	}
				5470	utf8->toUnicodeStatus=c;
				5471	utf8->toULength=toULength;
				5472	utf8->mode=toULimit;
				5473	pToUArgs->source=(char *)source;
				5474	pFromUArgs->target=(char *)target;
				5475	return;
				5476	}
				5477	}
				5478
				5479	if( toULength==toULimit && /* consumed all trail bytes */
				5480	(toULength==3 \|\| toULength==2) && /* BMP */
				5481	(c-=utf8_offsets[toULength])>=utf8_minLegal[toULength] &&
				5482	(c<=0xd7ff \|\| 0xe000<=c) /* not a surrogate */
				5483	) {
				5484	stage2Entry=MBCS_STAGE_2_FROM_U(table, c);
				5485	} else if(
				5486	toULength==toULimit && toULength==4 &&
				5487	(0x10000<=(c-=utf8_offsets[4]) && c<=0x10ffff)
				5488	) {
				5489	/* supplementary code point */
				5490	if(!hasSupplementary) {
				5491	/* BMP-only codepages are stored without stage 1 entries for supplementary code points */
				5492	stage2Entry=0;
				5493	} else {
				5494	stage2Entry=MBCS_STAGE_2_FROM_U(table, c);
				5495	}
				5496	} else {
				5497	/* error handling: illegal UTF-8 byte sequence */
				5498	source-=(toULength-oldToULength);
				5499	while(oldToULength<toULength) {
				5500	utf8->toUBytes[oldToULength++]=*source++;
				5501	}
				5502	utf8->toULength=toULength;
				5503	pToUArgs->source=(char *)source;
				5504	pFromUArgs->target=(char *)target;
				5505	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
				5506	return;
				5507	}
				5508
				5509	/* get the bytes and the length for the output */
				5510	/* MBCS_OUTPUT_2 */
				5511	value=MBCS_VALUE_2_FROM_STAGE_2(results, stage2Entry, c);
				5512
				5513	/* is this code point assigned, or do we use fallbacks? */
				5514	if(!(MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c) \|\|
				5515	(UCNV_FROM_U_USE_FALLBACK(cnv, c) && value!=0))
				5516	) {
				5517	goto unassigned;
				5518	}
				5519	}
				5520	}
				5521
				5522	/* write the output character bytes from value and length */
				5523	/* from the first if in the loop we know that targetCapacity>0 */
				5524	if(value<=0xff) {
				5525	/* this is easy because we know that there is enough space */
				5526	*target++=(uint8_t)value;
				5527	--targetCapacity;
				5528	} else /* length==2 */ {
				5529	*target++=(uint8_t)(value>>8);
				5530	if(2<=targetCapacity) {
				5531	*target++=(uint8_t)value;
				5532	targetCapacity-=2;
				5533	} else {
				5534	cnv->charErrorBuffer[0]=(char)value;
				5535	cnv->charErrorBufferLength=1;
				5536
				5537	/* target overflow */
				5538	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
				5539	break;
				5540	}
				5541	}
				5542	continue;
				5543
				5544	unassigned:
				5545	{
				5546	/*
				5547	* Try an extension mapping.
				5548	* Pass in no source because we don't have UTF-16 input.
				5549	* If we have a partial match on c, we will return and revert
				5550	* to UTF-8->UTF-16->charset conversion.
				5551	*/
				5552	static const UChar nul=0;
				5553	const UChar *noSource=&nul;
				5554	c=_extFromU(cnv, cnv->sharedData,
				5555	c, &noSource, noSource,
				5556	&target, target+targetCapacity,
				5557	NULL, -1,
				5558	pFromUArgs->flush,
				5559	pErrorCode);
				5560
				5561	if(U_FAILURE(*pErrorCode)) {
				5562	/* not mappable or buffer overflow */
				5563	cnv->fromUChar32=c;
				5564	break;
				5565	} else if(cnv->preFromUFirstCP>=0) {
				5566	/*
				5567	* Partial match, return and revert to pivoting.
				5568	* In normal from-UTF-16 conversion, we would just continue
				5569	* but then exit the loop because the extension match would
				5570	* have consumed the source.
				5571	*/
				5572	*pErrorCode=U_USING_DEFAULT_WARNING;
				5573	break;
				5574	} else {
				5575	/* a mapping was written to the target, continue */
				5576
				5577	/* recalculate the targetCapacity after an extension mapping */
				5578	targetCapacity=(int32_t)(pFromUArgs->targetLimit-(char *)target);
				5579	continue;
				5580	}
				5581	}
				5582	} else {
				5583	/* target is full */
				5584	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
				5585	break;
				5586	}
				5587	}
				5588
				5589	/*
				5590	* The sourceLimit may have been adjusted before the conversion loop
				5591	* to stop before a truncated sequence.
				5592	* If so, then collect the truncated sequence now.
				5593	*/
				5594	if(U_SUCCESS(*pErrorCode) &&
				5595	cnv->preFromUFirstCP<0 &&
				5596	source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) {
				5597	c=utf8->toUBytes[0]=b=*source++;
				5598	toULength=1;
				5599	toULimit=U8_COUNT_TRAIL_BYTES(b)+1;
				5600	while(source<sourceLimit) {
				5601	utf8->toUBytes[toULength++]=b=*source++;
				5602	c=(c<<6)+b;
				5603	}
				5604	utf8->toUnicodeStatus=c;
				5605	utf8->toULength=toULength;
				5606	utf8->mode=toULimit;
				5607	}
				5608
				5609	/* write back the updated pointers */
				5610	pToUArgs->source=(char *)source;
				5611	pFromUArgs->target=(char *)target;
				5612	}
				5613
				5614	/* miscellaneous ------------------------------------------------------------ */
				5615
				5616	static void
				5617	ucnv_MBCSGetStarters(const UConverter* cnv,
				5618	UBool starters[256],
				5619	UErrorCode *) {
				5620	const int32_t *state0;
				5621	int i;
				5622
				5623	state0=cnv->sharedData->mbcs.stateTable[cnv->sharedData->mbcs.dbcsOnlyState];
				5624	for(i=0; i<256; ++i) {
				5625	/* all bytes that cause a state transition from state 0 are lead bytes */
				5626	starters[i]= (UBool)MBCS_ENTRY_IS_TRANSITION(state0[i]);
				5627	}
				5628	}
				5629
				5630	/*
				5631	* This is an internal function that allows other converter implementations
				5632	* to check whether a byte is a lead byte.
				5633	*/
				5634	U_CFUNC UBool
				5635	ucnv_MBCSIsLeadByte(UConverterSharedData *sharedData, char byte) {
				5636	return (UBool)MBCS_ENTRY_IS_TRANSITION(sharedData->mbcs.stateTable[0][(uint8_t)byte]);
				5637	}
				5638
				5639	static void
				5640	ucnv_MBCSWriteSub(UConverterFromUnicodeArgs *pArgs,
				5641	int32_t offsetIndex,
				5642	UErrorCode *pErrorCode) {
				5643	UConverter *cnv=pArgs->converter;
				5644	char p, subchar;
				5645	char buffer[4];
				5646	int32_t length;
				5647
				5648	/* first, select between subChar and subChar1 */
				5649	if( cnv->subChar1!=0 &&
				5650	(cnv->sharedData->mbcs.extIndexes!=NULL ?
				5651	cnv->useSubChar1 :
				5652	(cnv->invalidUCharBuffer[0]<=0xff))
				5653	) {
				5654	/* select subChar1 if it is set (not 0) and the unmappable Unicode code point is up to U+00ff (IBM MBCS behavior) */
				5655	subchar=(char *)&cnv->subChar1;
				5656	length=1;
				5657	} else {
				5658	/* select subChar in all other cases */
				5659	subchar=(char *)cnv->subChars;
				5660	length=cnv->subCharLen;
				5661	}
				5662
				5663	/* reset the selector for the next code point */
				5664	cnv->useSubChar1=FALSE;
				5665
				5666	if (cnv->sharedData->mbcs.outputType == MBCS_OUTPUT_2_SISO) {
				5667	p=buffer;
				5668
				5669	/* fromUnicodeStatus contains prevLength */
				5670	switch(length) {
				5671	case 1:
				5672	if(cnv->fromUnicodeStatus==2) {
				5673	/* DBCS mode and SBCS sub char: change to SBCS */
				5674	cnv->fromUnicodeStatus=1;
				5675	*p++=UCNV_SI;
				5676	}
				5677	*p++=subchar[0];
				5678	break;
				5679	case 2:
				5680	if(cnv->fromUnicodeStatus<=1) {
				5681	/* SBCS mode and DBCS sub char: change to DBCS */
				5682	cnv->fromUnicodeStatus=2;
				5683	*p++=UCNV_SO;
				5684	}
				5685	*p++=subchar[0];
				5686	*p++=subchar[1];
				5687	break;
				5688	default:
				5689	*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
				5690	return;
				5691	}
				5692	subchar=buffer;
				5693	length=(int32_t)(p-buffer);
				5694	}
				5695
				5696	ucnv_cbFromUWriteBytes(pArgs, subchar, length, offsetIndex, pErrorCode);
				5697	}
				5698
				5699	U_CFUNC UConverterType
				5700	ucnv_MBCSGetType(const UConverter* converter) {
				5701	/* SBCS, DBCS, and EBCDIC_STATEFUL are replaced by MBCS, but here we cheat a little */
				5702	if(converter->sharedData->mbcs.countStates==1) {
				5703	return (UConverterType)UCNV_SBCS;
				5704	} else if((converter->sharedData->mbcs.outputType&0xff)==MBCS_OUTPUT_2_SISO) {
				5705	return (UConverterType)UCNV_EBCDIC_STATEFUL;
				5706	} else if(converter->sharedData->staticData->minBytesPerChar==2 && converter->sharedData->staticData->maxBytesPerChar==2) {
				5707	return (UConverterType)UCNV_DBCS;
				5708	}
				5709	return (UConverterType)UCNV_MBCS;
				5710	}
				5711
				5712	#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */