Blame - internal/cgen/base/floatconv-submodule-code.c - skia.googlesource.com/external/github.com/google/wuffs

blob: 7691182ef77ed51c0d941e620eda627370a9d297 [file] [log] [blame]

Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	1	// Copyright 2020 The Wuffs Authors.
				2	//
				3	// Licensed under the Apache License, Version 2.0 (the "License");
				4	// you may not use this file except in compliance with the License.
				5	// You may obtain a copy of the License at
				6	//
				7	// https://www.apache.org/licenses/LICENSE-2.0
				8	//
				9	// Unless required by applicable law or agreed to in writing, software
				10	// distributed under the License is distributed on an "AS IS" BASIS,
				11	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				12	// See the License for the specific language governing permissions and
				13	// limitations under the License.
				14
				15	// ---------------- IEEE 754 Floating Point
				16
Nigel Tao	7bf7cf2	2020-07-12 16:23:15 +1000	[diff] [blame]	17	WUFFS_BASE__MAYBE_STATIC wuffs_base__lossy_value_u16 //
Nigel Tao	a3931d5	2020-07-12 21:06:44 +1000	[diff] [blame]	18	wuffs_base__ieee_754_bit_representation__from_f64_to_u16_truncate(double f) {
Nigel Tao	7bf7cf2	2020-07-12 16:23:15 +1000	[diff] [blame]	19	uint64_t u = 0;
				20	if (sizeof(uint64_t) == sizeof(double)) {
				21	memcpy(&u, &f, sizeof(uint64_t));
				22	}
Nigel Tao	56d9096	2020-07-12 21:11:49 +1000	[diff] [blame]	23	uint16_t neg = ((uint16_t)((u >> 63) << 15));
Nigel Tao	7bf7cf2	2020-07-12 16:23:15 +1000	[diff] [blame]	24	u &= 0x7FFFFFFFFFFFFFFF;
				25	uint64_t exp = u >> 52;
				26	uint64_t man = u & 0x000FFFFFFFFFFFFF;
				27
				28	if (exp == 0x7FF) {
				29	if (man == 0) { // Infinity.
				30	wuffs_base__lossy_value_u16 ret;
				31	ret.value = neg \| 0x7C00;
				32	ret.lossy = false;
				33	return ret;
				34	}
				35	// NaN. Shift the 52 mantissa bits to 10 mantissa bits, keeping the most
				36	// significant mantissa bit (quiet vs signaling NaNs). Also set the low 9
				37	// bits of ret.value so that the 10-bit mantissa is non-zero.
				38	wuffs_base__lossy_value_u16 ret;
				39	ret.value = neg \| 0x7DFF \| ((uint16_t)(man >> 42));
				40	ret.lossy = false;
				41	return ret;
				42
				43	} else if (exp > 0x40E) { // Truncate to the largest finite f16.
				44	wuffs_base__lossy_value_u16 ret;
				45	ret.value = neg \| 0x7BFF;
				46	ret.lossy = true;
				47	return ret;
				48
				49	} else if (exp <= 0x3E6) { // Truncate to zero.
				50	wuffs_base__lossy_value_u16 ret;
				51	ret.value = neg;
				52	ret.lossy = (u != 0);
				53	return ret;
				54
				55	} else if (exp <= 0x3F0) { // Normal f64, subnormal f16.
				56	// Convert from a 53-bit mantissa (after realizing the implicit bit) to a
				57	// 10-bit mantissa and then adjust for the exponent.
				58	man \|= 0x0010000000000000;
Nigel Tao	56d9096	2020-07-12 21:11:49 +1000	[diff] [blame]	59	uint32_t shift = ((uint32_t)(1051 - exp)); // 1051 = 0x3F0 + 53 - 10.
Nigel Tao	7bf7cf2	2020-07-12 16:23:15 +1000	[diff] [blame]	60	uint64_t shifted_man = man >> shift;
				61	wuffs_base__lossy_value_u16 ret;
				62	ret.value = neg \| ((uint16_t)shifted_man);
				63	ret.lossy = (shifted_man << shift) != man;
				64	return ret;
				65	}
				66
				67	// Normal f64, normal f16.
				68
				69	// Re-bias from 1023 to 15 and shift above f16's 10 mantissa bits.
				70	exp = (exp - 1008) << 10; // 1008 = 1023 - 15 = 0x3FF - 0xF.
				71
				72	// Convert from a 52-bit mantissa (excluding the implicit bit) to a 10-bit
				73	// mantissa (again excluding the implicit bit). We lose some information if
				74	// any of the bottom 42 bits are non-zero.
				75	wuffs_base__lossy_value_u16 ret;
				76	ret.value = neg \| ((uint16_t)exp) \| ((uint16_t)(man >> 42));
				77	ret.lossy = (man << 22) != 0;
				78	return ret;
				79	}
				80
				81	WUFFS_BASE__MAYBE_STATIC wuffs_base__lossy_value_u32 //
Nigel Tao	a3931d5	2020-07-12 21:06:44 +1000	[diff] [blame]	82	wuffs_base__ieee_754_bit_representation__from_f64_to_u32_truncate(double f) {
Nigel Tao	7bf7cf2	2020-07-12 16:23:15 +1000	[diff] [blame]	83	uint64_t u = 0;
				84	if (sizeof(uint64_t) == sizeof(double)) {
				85	memcpy(&u, &f, sizeof(uint64_t));
				86	}
				87	uint32_t neg = ((uint32_t)(u >> 63)) << 31;
				88	u &= 0x7FFFFFFFFFFFFFFF;
				89	uint64_t exp = u >> 52;
				90	uint64_t man = u & 0x000FFFFFFFFFFFFF;
				91
				92	if (exp == 0x7FF) {
				93	if (man == 0) { // Infinity.
				94	wuffs_base__lossy_value_u32 ret;
				95	ret.value = neg \| 0x7F800000;
				96	ret.lossy = false;
				97	return ret;
				98	}
				99	// NaN. Shift the 52 mantissa bits to 23 mantissa bits, keeping the most
				100	// significant mantissa bit (quiet vs signaling NaNs). Also set the low 22
				101	// bits of ret.value so that the 23-bit mantissa is non-zero.
				102	wuffs_base__lossy_value_u32 ret;
				103	ret.value = neg \| 0x7FBFFFFF \| ((uint32_t)(man >> 29));
				104	ret.lossy = false;
				105	return ret;
				106
				107	} else if (exp > 0x47E) { // Truncate to the largest finite f32.
				108	wuffs_base__lossy_value_u32 ret;
				109	ret.value = neg \| 0x7F7FFFFF;
				110	ret.lossy = true;
				111	return ret;
				112
				113	} else if (exp <= 0x369) { // Truncate to zero.
				114	wuffs_base__lossy_value_u32 ret;
				115	ret.value = neg;
				116	ret.lossy = (u != 0);
				117	return ret;
				118
				119	} else if (exp <= 0x380) { // Normal f64, subnormal f32.
				120	// Convert from a 53-bit mantissa (after realizing the implicit bit) to a
				121	// 23-bit mantissa and then adjust for the exponent.
				122	man \|= 0x0010000000000000;
Nigel Tao	56d9096	2020-07-12 21:11:49 +1000	[diff] [blame]	123	uint32_t shift = ((uint32_t)(926 - exp)); // 926 = 0x380 + 53 - 23.
Nigel Tao	7bf7cf2	2020-07-12 16:23:15 +1000	[diff] [blame]	124	uint64_t shifted_man = man >> shift;
				125	wuffs_base__lossy_value_u32 ret;
				126	ret.value = neg \| ((uint32_t)shifted_man);
				127	ret.lossy = (shifted_man << shift) != man;
				128	return ret;
				129	}
				130
				131	// Normal f64, normal f32.
				132
				133	// Re-bias from 1023 to 127 and shift above f32's 23 mantissa bits.
				134	exp = (exp - 896) << 23; // 896 = 1023 - 127 = 0x3FF - 0x7F.
				135
				136	// Convert from a 52-bit mantissa (excluding the implicit bit) to a 23-bit
				137	// mantissa (again excluding the implicit bit). We lose some information if
				138	// any of the bottom 29 bits are non-zero.
				139	wuffs_base__lossy_value_u32 ret;
				140	ret.value = neg \| ((uint32_t)exp) \| ((uint32_t)(man >> 29));
				141	ret.lossy = (man << 35) != 0;
				142	return ret;
				143	}
				144
				145	// --------
				146
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	147	#define WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE 2047
				148	#define WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION 800
				149
				150	// WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL is the largest N
				151	// such that ((10 << N) < (1 << 64)).
				152	#define WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL 60
				153
				154	// wuffs_base__private_implementation__high_prec_dec (abbreviated as HPD) is a
				155	// fixed precision floating point decimal number, augmented with ±infinity
				156	// values, but it cannot represent NaN (Not a Number).
				157	//
				158	// "High precision" means that the mantissa holds 800 decimal digits. 800 is
				159	// WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION.
				160	//
				161	// An HPD isn't for general purpose arithmetic, only for conversions to and
				162	// from IEEE 754 double-precision floating point, where the largest and
				163	// smallest positive, finite values are approximately 1.8e+308 and 4.9e-324.
				164	// HPD exponents above +2047 mean infinity, below -2047 mean zero. The ±2047
				165	// bounds are further away from zero than ±(324 + 800), where 800 and 2047 is
				166	// WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION and
				167	// WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE.
				168	//
				169	// digits[.. num_digits] are the number's digits in big-endian order. The
				170	// uint8_t values are in the range [0 ..= 9], not ['0' ..= '9'], where e.g. '7'
				171	// is the ASCII value 0x37.
				172	//
				173	// decimal_point is the index (within digits) of the decimal point. It may be
				174	// negative or be larger than num_digits, in which case the explicit digits are
				175	// padded with implicit zeroes.
				176	//
				177	// For example, if num_digits is 3 and digits is "\x07\x08\x09":
Nigel Tao	f148f5c	2021-11-05 16:50:38 +1100	[diff] [blame]	178	// - A decimal_point of -2 means ".00789"
				179	// - A decimal_point of -1 means ".0789"
				180	// - A decimal_point of +0 means ".789"
				181	// - A decimal_point of +1 means "7.89"
				182	// - A decimal_point of +2 means "78.9"
				183	// - A decimal_point of +3 means "789."
				184	// - A decimal_point of +4 means "7890."
				185	// - A decimal_point of +5 means "78900."
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	186	//
				187	// As above, a decimal_point higher than +2047 means that the overall value is
				188	// infinity, lower than -2047 means zero.
				189	//
				190	// negative is a sign bit. An HPD can distinguish positive and negative zero.
				191	//
				192	// truncated is whether there are more than
				193	// WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION digits, and at
				194	// least one of those extra digits are non-zero. The existence of long-tail
				195	// digits can affect rounding.
				196	//
				197	// The "all fields are zero" value is valid, and represents the number +0.
Nigel Tao	4f1d24c	2020-09-23 22:02:53 +1000	[diff] [blame]	198	typedef struct wuffs_base__private_implementation__high_prec_dec__struct {
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	199	uint32_t num_digits;
				200	int32_t decimal_point;
				201	bool negative;
				202	bool truncated;
				203	uint8_t digits[WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION];
				204	} wuffs_base__private_implementation__high_prec_dec;
				205
				206	// wuffs_base__private_implementation__high_prec_dec__trim trims trailing
				207	// zeroes from the h->digits[.. h->num_digits] slice. They have no benefit,
				208	// since we explicitly track h->decimal_point.
				209	//
				210	// Preconditions:
				211	// - h is non-NULL.
				212	static inline void //
				213	wuffs_base__private_implementation__high_prec_dec__trim(
				214	wuffs_base__private_implementation__high_prec_dec* h) {
				215	while ((h->num_digits > 0) && (h->digits[h->num_digits - 1] == 0)) {
				216	h->num_digits--;
				217	}
				218	}
				219
				220	// wuffs_base__private_implementation__high_prec_dec__assign sets h to
				221	// represent the number x.
				222	//
				223	// Preconditions:
				224	// - h is non-NULL.
				225	static void //
				226	wuffs_base__private_implementation__high_prec_dec__assign(
				227	wuffs_base__private_implementation__high_prec_dec* h,
				228	uint64_t x,
				229	bool negative) {
				230	uint32_t n = 0;
				231
				232	// Set h->digits.
				233	if (x > 0) {
				234	// Calculate the digits, working right-to-left. After we determine n (how
				235	// many digits there are), copy from buf to h->digits.
				236	//
				237	// UINT64_MAX, 18446744073709551615, is 20 digits long. It can be faster to
				238	// copy a constant number of bytes than a variable number (20 instead of
				239	// n). Make buf large enough (and start writing to it from the middle) so
				240	// that can we always copy 20 bytes: the slice buf[(20-n) .. (40-n)].
				241	uint8_t buf[40] = {0};
				242	uint8_t* ptr = &buf[20];
				243	do {
				244	uint64_t remaining = x / 10;
				245	x -= remaining * 10;
				246	ptr--;
				247	*ptr = (uint8_t)x;
				248	n++;
				249	x = remaining;
				250	} while (x > 0);
				251	memcpy(h->digits, ptr, 20);
				252	}
				253
				254	// Set h's other fields.
				255	h->num_digits = n;
				256	h->decimal_point = (int32_t)n;
				257	h->negative = negative;
				258	h->truncated = false;
				259	wuffs_base__private_implementation__high_prec_dec__trim(h);
				260	}
				261
				262	static wuffs_base__status //
				263	wuffs_base__private_implementation__high_prec_dec__parse(
				264	wuffs_base__private_implementation__high_prec_dec* h,
Nigel Tao	e0c5de9	2020-07-11 11:48:17 +1000	[diff] [blame]	265	wuffs_base__slice_u8 s,
				266	uint32_t options) {
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	267	if (!h) {
				268	return wuffs_base__make_status(wuffs_base__error__bad_receiver);
				269	}
				270	h->num_digits = 0;
				271	h->decimal_point = 0;
				272	h->negative = false;
				273	h->truncated = false;
				274
				275	uint8_t* p = s.ptr;
				276	uint8_t* q = s.ptr + s.len;
				277
Nigel Tao	c5c9885	2020-07-11 13:10:14 +1000	[diff] [blame]	278	if (options & WUFFS_BASE__PARSE_NUMBER_XXX__ALLOW_UNDERSCORES) {
				279	for (;; p++) {
				280	if (p >= q) {
				281	return wuffs_base__make_status(wuffs_base__error__bad_argument);
				282	} else if (*p != '_') {
				283	break;
				284	}
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	285	}
				286	}
				287
				288	// Parse sign.
				289	do {
				290	if (*p == '+') {
				291	p++;
				292	} else if (*p == '-') {
				293	h->negative = true;
				294	p++;
				295	} else {
				296	break;
				297	}
Nigel Tao	c5c9885	2020-07-11 13:10:14 +1000	[diff] [blame]	298	if (options & WUFFS_BASE__PARSE_NUMBER_XXX__ALLOW_UNDERSCORES) {
				299	for (;; p++) {
				300	if (p >= q) {
				301	return wuffs_base__make_status(wuffs_base__error__bad_argument);
				302	} else if (*p != '_') {
				303	break;
				304	}
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	305	}
				306	}
				307	} while (0);
				308
				309	// Parse digits, up to (and including) a '.', 'E' or 'e'. Examples for each
				310	// limb in this if-else chain:
				311	// - "0.789"
				312	// - "1002.789"
				313	// - ".789"
				314	// - Other (invalid input).
				315	uint32_t nd = 0;
				316	int32_t dp = 0;
				317	bool no_digits_before_separator = false;
Nigel Tao	e82bc8e	2020-07-11 12:49:15 +1000	[diff] [blame]	318	if (('0' == *p) &&
				319	!(options &
				320	WUFFS_BASE__PARSE_NUMBER_XXX__ALLOW_MULTIPLE_LEADING_ZEROES)) {
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	321	p++;
				322	for (;; p++) {
				323	if (p >= q) {
				324	goto after_all;
Nigel Tao	e0c5de9	2020-07-11 11:48:17 +1000	[diff] [blame]	325	} else if (*p ==
				326	((options &
				327	WUFFS_BASE__PARSE_NUMBER_FXX__DECIMAL_SEPARATOR_IS_A_COMMA)
				328	? ','
				329	: '.')) {
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	330	p++;
				331	goto after_sep;
				332	} else if ((p == 'E') \|\| (p == 'e')) {
				333	p++;
				334	goto after_exp;
Nigel Tao	c5c9885	2020-07-11 13:10:14 +1000	[diff] [blame]	335	} else if ((*p != '_') \|\|
				336	!(options & WUFFS_BASE__PARSE_NUMBER_XXX__ALLOW_UNDERSCORES)) {
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	337	return wuffs_base__make_status(wuffs_base__error__bad_argument);
				338	}
				339	}
				340
Nigel Tao	e82bc8e	2020-07-11 12:49:15 +1000	[diff] [blame]	341	} else if (('0' <= p) && (p <= '9')) {
				342	if (*p == '0') {
				343	for (; (p < q) && (*p == '0'); p++) {
				344	}
				345	} else {
				346	h->digits[nd++] = (uint8_t)(*p - '0');
				347	dp = (int32_t)nd;
				348	p++;
				349	}
				350
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	351	for (;; p++) {
				352	if (p >= q) {
				353	goto after_all;
				354	} else if (('0' <= p) && (p <= '9')) {
				355	if (nd < WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION) {
				356	h->digits[nd++] = (uint8_t)(*p - '0');
				357	dp = (int32_t)nd;
				358	} else if ('0' != *p) {
				359	// Long-tail non-zeroes set the truncated bit.
				360	h->truncated = true;
				361	}
Nigel Tao	e0c5de9	2020-07-11 11:48:17 +1000	[diff] [blame]	362	} else if (*p ==
				363	((options &
				364	WUFFS_BASE__PARSE_NUMBER_FXX__DECIMAL_SEPARATOR_IS_A_COMMA)
				365	? ','
				366	: '.')) {
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	367	p++;
				368	goto after_sep;
				369	} else if ((p == 'E') \|\| (p == 'e')) {
				370	p++;
				371	goto after_exp;
Nigel Tao	c5c9885	2020-07-11 13:10:14 +1000	[diff] [blame]	372	} else if ((*p != '_') \|\|
				373	!(options & WUFFS_BASE__PARSE_NUMBER_XXX__ALLOW_UNDERSCORES)) {
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	374	return wuffs_base__make_status(wuffs_base__error__bad_argument);
				375	}
				376	}
				377
Nigel Tao	e0c5de9	2020-07-11 11:48:17 +1000	[diff] [blame]	378	} else if (*p == ((options &
				379	WUFFS_BASE__PARSE_NUMBER_FXX__DECIMAL_SEPARATOR_IS_A_COMMA)
				380	? ','
				381	: '.')) {
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	382	p++;
				383	no_digits_before_separator = true;
				384
				385	} else {
				386	return wuffs_base__make_status(wuffs_base__error__bad_argument);
				387	}
				388
				389	after_sep:
				390	for (;; p++) {
				391	if (p >= q) {
				392	goto after_all;
				393	} else if ('0' == *p) {
				394	if (nd == 0) {
				395	// Track leading zeroes implicitly.
				396	dp--;
				397	} else if (nd <
				398	WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION) {
				399	h->digits[nd++] = (uint8_t)(*p - '0');
				400	}
				401	} else if (('0' < p) && (p <= '9')) {
				402	if (nd < WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION) {
				403	h->digits[nd++] = (uint8_t)(*p - '0');
				404	} else {
				405	// Long-tail non-zeroes set the truncated bit.
				406	h->truncated = true;
				407	}
				408	} else if ((p == 'E') \|\| (p == 'e')) {
				409	p++;
				410	goto after_exp;
Nigel Tao	c5c9885	2020-07-11 13:10:14 +1000	[diff] [blame]	411	} else if ((*p != '_') \|\|
				412	!(options & WUFFS_BASE__PARSE_NUMBER_XXX__ALLOW_UNDERSCORES)) {
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	413	return wuffs_base__make_status(wuffs_base__error__bad_argument);
				414	}
				415	}
				416
				417	after_exp:
				418	do {
Nigel Tao	c5c9885	2020-07-11 13:10:14 +1000	[diff] [blame]	419	if (options & WUFFS_BASE__PARSE_NUMBER_XXX__ALLOW_UNDERSCORES) {
				420	for (;; p++) {
				421	if (p >= q) {
				422	return wuffs_base__make_status(wuffs_base__error__bad_argument);
				423	} else if (*p != '_') {
				424	break;
				425	}
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	426	}
				427	}
				428
				429	int32_t exp_sign = +1;
				430	if (*p == '+') {
				431	p++;
				432	} else if (*p == '-') {
				433	exp_sign = -1;
				434	p++;
				435	}
				436
				437	int32_t exp = 0;
				438	const int32_t exp_large =
				439	WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE +
				440	WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION;
				441	bool saw_exp_digits = false;
				442	for (; p < q; p++) {
Nigel Tao	c5c9885	2020-07-11 13:10:14 +1000	[diff] [blame]	443	if ((*p == '_') &&
				444	(options & WUFFS_BASE__PARSE_NUMBER_XXX__ALLOW_UNDERSCORES)) {
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	445	// No-op.
				446	} else if (('0' <= p) && (p <= '9')) {
				447	saw_exp_digits = true;
				448	if (exp < exp_large) {
				449	exp = (10 * exp) + ((int32_t)(*p - '0'));
				450	}
				451	} else {
				452	break;
				453	}
				454	}
				455	if (!saw_exp_digits) {
				456	return wuffs_base__make_status(wuffs_base__error__bad_argument);
				457	}
				458	dp += exp_sign * exp;
				459	} while (0);
				460
				461	after_all:
				462	if (p != q) {
				463	return wuffs_base__make_status(wuffs_base__error__bad_argument);
				464	}
				465	h->num_digits = nd;
				466	if (nd == 0) {
				467	if (no_digits_before_separator) {
				468	return wuffs_base__make_status(wuffs_base__error__bad_argument);
				469	}
				470	h->decimal_point = 0;
				471	} else if (dp <
				472	-WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE) {
				473	h->decimal_point =
				474	-WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE - 1;
				475	} else if (dp >
				476	+WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE) {
				477	h->decimal_point =
				478	+WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE + 1;
				479	} else {
				480	h->decimal_point = dp;
				481	}
				482	wuffs_base__private_implementation__high_prec_dec__trim(h);
				483	return wuffs_base__make_status(NULL);
				484	}
				485
				486	// --------
				487
				488	// wuffs_base__private_implementation__high_prec_dec__lshift_num_new_digits
				489	// returns the number of additional decimal digits when left-shifting by shift.
				490	//
				491	// See below for preconditions.
				492	static uint32_t //
				493	wuffs_base__private_implementation__high_prec_dec__lshift_num_new_digits(
				494	wuffs_base__private_implementation__high_prec_dec* h,
				495	uint32_t shift) {
				496	// Masking with 0x3F should be unnecessary (assuming the preconditions) but
				497	// it's cheap and ensures that we don't overflow the
				498	// wuffs_base__private_implementation__hpd_left_shift array.
				499	shift &= 63;
				500
				501	uint32_t x_a = wuffs_base__private_implementation__hpd_left_shift[shift];
				502	uint32_t x_b = wuffs_base__private_implementation__hpd_left_shift[shift + 1];
				503	uint32_t num_new_digits = x_a >> 11;
				504	uint32_t pow5_a = 0x7FF & x_a;
				505	uint32_t pow5_b = 0x7FF & x_b;
				506
				507	const uint8_t* pow5 =
				508	&wuffs_base__private_implementation__powers_of_5[pow5_a];
				509	uint32_t i = 0;
				510	uint32_t n = pow5_b - pow5_a;
				511	for (; i < n; i++) {
				512	if (i >= h->num_digits) {
				513	return num_new_digits - 1;
				514	} else if (h->digits[i] == pow5[i]) {
				515	continue;
				516	} else if (h->digits[i] < pow5[i]) {
				517	return num_new_digits - 1;
				518	} else {
				519	return num_new_digits;
				520	}
				521	}
				522	return num_new_digits;
				523	}
				524
				525	// --------
				526
				527	// wuffs_base__private_implementation__high_prec_dec__rounded_integer returns
				528	// the integral (non-fractional) part of h, provided that it is 18 or fewer
				529	// decimal digits. For 19 or more digits, it returns UINT64_MAX. Note that:
Nigel Tao	f148f5c	2021-11-05 16:50:38 +1100	[diff] [blame]	530	// - (1 << 53) is 9007199254740992, which has 16 decimal digits.
				531	// - (1 << 56) is 72057594037927936, which has 17 decimal digits.
				532	// - (1 << 59) is 576460752303423488, which has 18 decimal digits.
				533	// - (1 << 63) is 9223372036854775808, which has 19 decimal digits.
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	534	// and that IEEE 754 double precision has 52 mantissa bits.
				535	//
				536	// That integral part is rounded-to-even: rounding 7.5 or 8.5 both give 8.
				537	//
				538	// h's negative bit is ignored: rounding -8.6 returns 9.
				539	//
				540	// See below for preconditions.
				541	static uint64_t //
				542	wuffs_base__private_implementation__high_prec_dec__rounded_integer(
				543	wuffs_base__private_implementation__high_prec_dec* h) {
				544	if ((h->num_digits == 0) \|\| (h->decimal_point < 0)) {
				545	return 0;
				546	} else if (h->decimal_point > 18) {
				547	return UINT64_MAX;
				548	}
				549
				550	uint32_t dp = (uint32_t)(h->decimal_point);
				551	uint64_t n = 0;
				552	uint32_t i = 0;
				553	for (; i < dp; i++) {
				554	n = (10 * n) + ((i < h->num_digits) ? h->digits[i] : 0);
				555	}
				556
				557	bool round_up = false;
				558	if (dp < h->num_digits) {
				559	round_up = h->digits[dp] >= 5;
				560	if ((h->digits[dp] == 5) && (dp + 1 == h->num_digits)) {
				561	// We are exactly halfway. If we're truncated, round up, otherwise round
				562	// to even.
				563	round_up = h->truncated \|\| //
				564	((dp > 0) && (1 & h->digits[dp - 1]));
				565	}
				566	}
				567	if (round_up) {
				568	n++;
				569	}
				570
				571	return n;
				572	}
				573
				574	// wuffs_base__private_implementation__high_prec_dec__small_xshift shifts h's
				575	// number (where 'x' is 'l' or 'r' for left or right) by a small shift value.
				576	//
				577	// Preconditions:
				578	// - h is non-NULL.
				579	// - h->decimal_point is "not extreme".
				580	// - shift is non-zero.
				581	// - shift is "a small shift".
				582	//
				583	// "Not extreme" means within
				584	// ±WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE.
				585	//
				586	// "A small shift" means not more than
				587	// WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL.
				588	//
				589	// wuffs_base__private_implementation__high_prec_dec__rounded_integer and
				590	// wuffs_base__private_implementation__high_prec_dec__lshift_num_new_digits
				591	// have the same preconditions.
				592	//
				593	// wuffs_base__private_implementation__high_prec_dec__lshift keeps the first
				594	// two preconditions but not the last two. Its shift argument is signed and
				595	// does not need to be "small": zero is a no-op, positive means left shift and
				596	// negative means right shift.
				597
				598	static void //
				599	wuffs_base__private_implementation__high_prec_dec__small_lshift(
				600	wuffs_base__private_implementation__high_prec_dec* h,
				601	uint32_t shift) {
				602	if (h->num_digits == 0) {
				603	return;
				604	}
				605	uint32_t num_new_digits =
				606	wuffs_base__private_implementation__high_prec_dec__lshift_num_new_digits(
				607	h, shift);
				608	uint32_t rx = h->num_digits - 1; // Read index.
				609	uint32_t wx = h->num_digits - 1 + num_new_digits; // Write index.
				610	uint64_t n = 0;
				611
				612	// Repeat: pick up a digit, put down a digit, right to left.
				613	while (((int32_t)rx) >= 0) {
				614	n += ((uint64_t)(h->digits[rx])) << shift;
				615	uint64_t quo = n / 10;
				616	uint64_t rem = n - (10 * quo);
				617	if (wx < WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION) {
				618	h->digits[wx] = (uint8_t)rem;
				619	} else if (rem > 0) {
				620	h->truncated = true;
				621	}
				622	n = quo;
				623	wx--;
				624	rx--;
				625	}
				626
				627	// Put down leading digits, right to left.
				628	while (n > 0) {
				629	uint64_t quo = n / 10;
				630	uint64_t rem = n - (10 * quo);
				631	if (wx < WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION) {
				632	h->digits[wx] = (uint8_t)rem;
				633	} else if (rem > 0) {
				634	h->truncated = true;
				635	}
				636	n = quo;
				637	wx--;
				638	}
				639
				640	// Finish.
				641	h->num_digits += num_new_digits;
				642	if (h->num_digits >
				643	WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION) {
				644	h->num_digits = WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION;
				645	}
				646	h->decimal_point += (int32_t)num_new_digits;
				647	wuffs_base__private_implementation__high_prec_dec__trim(h);
				648	}
				649
				650	static void //
				651	wuffs_base__private_implementation__high_prec_dec__small_rshift(
				652	wuffs_base__private_implementation__high_prec_dec* h,
				653	uint32_t shift) {
				654	uint32_t rx = 0; // Read index.
				655	uint32_t wx = 0; // Write index.
				656	uint64_t n = 0;
				657
				658	// Pick up enough leading digits to cover the first shift.
				659	while ((n >> shift) == 0) {
				660	if (rx < h->num_digits) {
				661	// Read a digit.
				662	n = (10 * n) + h->digits[rx++];
				663	} else if (n == 0) {
				664	// h's number used to be zero and remains zero.
				665	return;
				666	} else {
				667	// Read sufficient implicit trailing zeroes.
				668	while ((n >> shift) == 0) {
				669	n = 10 * n;
				670	rx++;
				671	}
				672	break;
				673	}
				674	}
				675	h->decimal_point -= ((int32_t)(rx - 1));
				676	if (h->decimal_point <
				677	-WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE) {
				678	// After the shift, h's number is effectively zero.
				679	h->num_digits = 0;
				680	h->decimal_point = 0;
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	681	h->truncated = false;
				682	return;
				683	}
				684
				685	// Repeat: pick up a digit, put down a digit, left to right.
				686	uint64_t mask = (((uint64_t)(1)) << shift) - 1;
				687	while (rx < h->num_digits) {
				688	uint8_t new_digit = ((uint8_t)(n >> shift));
				689	n = (10 * (n & mask)) + h->digits[rx++];
				690	h->digits[wx++] = new_digit;
				691	}
				692
				693	// Put down trailing digits, left to right.
				694	while (n > 0) {
				695	uint8_t new_digit = ((uint8_t)(n >> shift));
				696	n = 10 * (n & mask);
				697	if (wx < WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION) {
				698	h->digits[wx++] = new_digit;
				699	} else if (new_digit > 0) {
				700	h->truncated = true;
				701	}
				702	}
				703
				704	// Finish.
				705	h->num_digits = wx;
				706	wuffs_base__private_implementation__high_prec_dec__trim(h);
				707	}
				708
				709	static void //
				710	wuffs_base__private_implementation__high_prec_dec__lshift(
				711	wuffs_base__private_implementation__high_prec_dec* h,
				712	int32_t shift) {
				713	if (shift > 0) {
				714	while (shift > +WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL) {
				715	wuffs_base__private_implementation__high_prec_dec__small_lshift(
				716	h, WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL);
				717	shift -= WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL;
				718	}
				719	wuffs_base__private_implementation__high_prec_dec__small_lshift(
				720	h, ((uint32_t)(+shift)));
				721	} else if (shift < 0) {
				722	while (shift < -WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL) {
				723	wuffs_base__private_implementation__high_prec_dec__small_rshift(
				724	h, WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL);
				725	shift += WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL;
				726	}
				727	wuffs_base__private_implementation__high_prec_dec__small_rshift(
				728	h, ((uint32_t)(-shift)));
				729	}
				730	}
				731
				732	// --------
				733
				734	// wuffs_base__private_implementation__high_prec_dec__round_etc rounds h's
				735	// number. For those functions that take an n argument, rounding produces at
				736	// most n digits (which is not necessarily at most n decimal places). Negative
				737	// n values are ignored, as well as any n greater than or equal to h's number
				738	// of digits. The etc__round_just_enough function implicitly chooses an n to
				739	// implement WUFFS_BASE__RENDER_NUMBER_FXX__JUST_ENOUGH_PRECISION.
				740	//
				741	// Preconditions:
				742	// - h is non-NULL.
				743	// - h->decimal_point is "not extreme".
				744	//
				745	// "Not extreme" means within
				746	// ±WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE.
				747
				748	static void //
				749	wuffs_base__private_implementation__high_prec_dec__round_down(
				750	wuffs_base__private_implementation__high_prec_dec* h,
				751	int32_t n) {
				752	if ((n < 0) \|\| (h->num_digits <= (uint32_t)n)) {
				753	return;
				754	}
				755	h->num_digits = (uint32_t)(n);
				756	wuffs_base__private_implementation__high_prec_dec__trim(h);
				757	}
				758
				759	static void //
				760	wuffs_base__private_implementation__high_prec_dec__round_up(
				761	wuffs_base__private_implementation__high_prec_dec* h,
				762	int32_t n) {
				763	if ((n < 0) \|\| (h->num_digits <= (uint32_t)n)) {
				764	return;
				765	}
				766
				767	for (n--; n >= 0; n--) {
				768	if (h->digits[n] < 9) {
				769	h->digits[n]++;
				770	h->num_digits = (uint32_t)(n + 1);
				771	return;
				772	}
				773	}
				774
				775	// The number is all 9s. Change to a single 1 and adjust the decimal point.
				776	h->digits[0] = 1;
				777	h->num_digits = 1;
				778	h->decimal_point++;
				779	}
				780
				781	static void //
				782	wuffs_base__private_implementation__high_prec_dec__round_nearest(
				783	wuffs_base__private_implementation__high_prec_dec* h,
				784	int32_t n) {
				785	if ((n < 0) \|\| (h->num_digits <= (uint32_t)n)) {
				786	return;
				787	}
				788	bool up = h->digits[n] >= 5;
				789	if ((h->digits[n] == 5) && ((n + 1) == ((int32_t)(h->num_digits)))) {
				790	up = h->truncated \|\| //
				791	((n > 0) && ((h->digits[n - 1] & 1) != 0));
				792	}
				793
				794	if (up) {
				795	wuffs_base__private_implementation__high_prec_dec__round_up(h, n);
				796	} else {
				797	wuffs_base__private_implementation__high_prec_dec__round_down(h, n);
				798	}
				799	}
				800
				801	static void //
				802	wuffs_base__private_implementation__high_prec_dec__round_just_enough(
				803	wuffs_base__private_implementation__high_prec_dec* h,
				804	int32_t exp2,
				805	uint64_t mantissa) {
				806	// The magic numbers 52 and 53 in this function are because IEEE 754 double
				807	// precision has 52 mantissa bits.
				808	//
				809	// Let f be the floating point number represented by exp2 and mantissa (and
				810	// also the number in h): the number (mantissa * (2 ** (exp2 - 52))).
				811	//
				812	// If f is zero or a small integer, we can return early.
				813	if ((mantissa == 0) \|\|
				814	((exp2 < 53) && (h->decimal_point >= ((int32_t)(h->num_digits))))) {
				815	return;
				816	}
				817
				818	// The smallest normal f has an exp2 of -1022 and a mantissa of (1 << 52).
				819	// Subnormal numbers have the same exp2 but a smaller mantissa.
				820	static const int32_t min_incl_normal_exp2 = -1022;
				821	static const uint64_t min_incl_normal_mantissa = 0x0010000000000000ul;
				822
				823	// Compute lower and upper bounds such that any number between them (possibly
				824	// inclusive) will round to f. First, the lower bound. Our number f is:
				825	// ((mantissa + 0) * (2 ** ( exp2 - 52)))
				826	//
				827	// The next lowest floating point number is:
				828	// ((mantissa - 1) * (2 ** ( exp2 - 52)))
				829	// unless (mantissa - 1) drops the (1 << 52) bit and exp2 is not the
				830	// min_incl_normal_exp2. Either way, call it:
				831	// ((l_mantissa) * (2 ** (l_exp2 - 52)))
				832	//
				833	// The lower bound is halfway between them (noting that 52 became 53):
				834	// (((2 * l_mantissa) + 1) * (2 ** (l_exp2 - 53)))
				835	int32_t l_exp2 = exp2;
				836	uint64_t l_mantissa = mantissa - 1;
				837	if ((exp2 > min_incl_normal_exp2) && (mantissa <= min_incl_normal_mantissa)) {
				838	l_exp2 = exp2 - 1;
				839	l_mantissa = (2 * mantissa) - 1;
				840	}
				841	wuffs_base__private_implementation__high_prec_dec lower;
				842	wuffs_base__private_implementation__high_prec_dec__assign(
				843	&lower, (2 * l_mantissa) + 1, false);
				844	wuffs_base__private_implementation__high_prec_dec__lshift(&lower,
				845	l_exp2 - 53);
				846
				847	// Next, the upper bound. Our number f is:
				848	// ((mantissa + 0) * (2 ** (exp2 - 52)))
				849	//
				850	// The next highest floating point number is:
				851	// ((mantissa + 1) * (2 ** (exp2 - 52)))
				852	//
				853	// The upper bound is halfway between them (noting that 52 became 53):
				854	// (((2 * mantissa) + 1) * (2 ** (exp2 - 53)))
				855	wuffs_base__private_implementation__high_prec_dec upper;
				856	wuffs_base__private_implementation__high_prec_dec__assign(
				857	&upper, (2 * mantissa) + 1, false);
				858	wuffs_base__private_implementation__high_prec_dec__lshift(&upper, exp2 - 53);
				859
				860	// The lower and upper bounds are possible outputs only if the original
				861	// mantissa is even, so that IEEE round-to-even would round to the original
				862	// mantissa and not its neighbors.
				863	bool inclusive = (mantissa & 1) == 0;
				864
				865	// As we walk the digits, we want to know whether rounding up would fall
				866	// within the upper bound. This is tracked by upper_delta:
				867	// - When -1, the digits of h and upper are the same so far.
				868	// - When +0, we saw a difference of 1 between h and upper on a previous
				869	// digit and subsequently only 9s for h and 0s for upper. Thus, rounding
				870	// up may fall outside of the bound if !inclusive.
				871	// - When +1, the difference is greater than 1 and we know that rounding up
				872	// falls within the bound.
				873	//
				874	// This is a state machine with three states. The numerical value for each
				875	// state (-1, +0 or +1) isn't important, other than their order.
				876	int upper_delta = -1;
				877
				878	// We can now figure out the shortest number of digits required. Walk the
				879	// digits until h has distinguished itself from lower or upper.
				880	//
				881	// The zi and zd variables are indexes and digits, for z in l (lower), h (the
				882	// number) and u (upper).
				883	//
				884	// The lower, h and upper numbers may have their decimal points at different
				885	// places. In this case, upper is the longest, so we iterate ui starting from
				886	// 0 and iterate li and hi starting from either 0 or -1.
				887	int32_t ui = 0;
				888	for (;; ui++) {
				889	// Calculate hd, the middle number's digit.
				890	int32_t hi = ui - upper.decimal_point + h->decimal_point;
				891	if (hi >= ((int32_t)(h->num_digits))) {
				892	break;
				893	}
				894	uint8_t hd = (((uint32_t)hi) < h->num_digits) ? h->digits[hi] : 0;
				895
				896	// Calculate ld, the lower bound's digit.
				897	int32_t li = ui - upper.decimal_point + lower.decimal_point;
				898	uint8_t ld = (((uint32_t)li) < lower.num_digits) ? lower.digits[li] : 0;
				899
				900	// We can round down (truncate) if lower has a different digit than h or if
				901	// lower is inclusive and is exactly the result of rounding down (i.e. we
				902	// have reached the final digit of lower).
				903	bool can_round_down =
				904	(ld != hd) \|\| //
				905	(inclusive && ((li + 1) == ((int32_t)(lower.num_digits))));
				906
				907	// Calculate ud, the upper bound's digit, and update upper_delta.
				908	uint8_t ud = (((uint32_t)ui) < upper.num_digits) ? upper.digits[ui] : 0;
				909	if (upper_delta < 0) {
				910	if ((hd + 1) < ud) {
				911	// For example:
				912	// h = 12345???
				913	// upper = 12347???
				914	upper_delta = +1;
				915	} else if (hd != ud) {
				916	// For example:
				917	// h = 12345???
				918	// upper = 12346???
				919	upper_delta = +0;
				920	}
				921	} else if (upper_delta == 0) {
				922	if ((hd != 9) \|\| (ud != 0)) {
				923	// For example:
				924	// h = 1234598?
				925	// upper = 1234600?
				926	upper_delta = +1;
				927	}
				928	}
				929
				930	// We can round up if upper has a different digit than h and either upper
				931	// is inclusive or upper is bigger than the result of rounding up.
				932	bool can_round_up =
				933	(upper_delta > 0) \|\| //
				934	((upper_delta == 0) && //
				935	(inclusive \|\| ((ui + 1) < ((int32_t)(upper.num_digits)))));
				936
				937	// If we can round either way, round to nearest. If we can round only one
				938	// way, do it. If we can't round, continue the loop.
				939	if (can_round_down) {
				940	if (can_round_up) {
				941	wuffs_base__private_implementation__high_prec_dec__round_nearest(
				942	h, hi + 1);
				943	return;
				944	} else {
				945	wuffs_base__private_implementation__high_prec_dec__round_down(h,
				946	hi + 1);
				947	return;
				948	}
				949	} else {
				950	if (can_round_up) {
				951	wuffs_base__private_implementation__high_prec_dec__round_up(h, hi + 1);
				952	return;
				953	}
				954	}
				955	}
				956	}
				957
				958	// --------
				959
Nigel Tao	c4fa8e2	2020-07-18 17:35:13 +1000	[diff] [blame]	960	// wuffs_base__private_implementation__parse_number_f64_eisel_lemire produces
				961	// the IEEE 754 double-precision value for an exact mantissa and base-10
				962	// exponent. For example:
Nigel Tao	b15a0fc	2020-07-08 10:50:14 +1000	[diff] [blame]	963	// - when parsing "12345.678e+02", man is 12345678 and exp10 is -1.
				964	// - when parsing "-12", man is 12 and exp10 is 0. Processing the leading
				965	// minus sign is the responsibility of the caller, not this function.
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	966	//
				967	// On success, it returns a non-negative int64_t such that the low 63 bits hold
				968	// the 11-bit exponent and 52-bit mantissa.
				969	//
				970	// On failure, it returns a negative value.
				971	//
Nigel Tao	c4fa8e2	2020-07-18 17:35:13 +1000	[diff] [blame]	972	// The algorithm is based on an original idea by Michael Eisel that was refined
				973	// by Daniel Lemire. See
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	974	// https://lemire.me/blog/2020/03/10/fast-float-parsing-in-practice/
Nigel Tao	1d8d18f	2020-10-07 22:13:51 +1100	[diff] [blame]	975	// and
				976	// https://nigeltao.github.io/blog/2020/eisel-lemire.html
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	977	//
				978	// Preconditions:
				979	// - man is non-zero.
Nigel Tao	0987296	2020-09-15 22:22:51 +1000	[diff] [blame]	980	// - exp10 is in the range [-307 ..= 288], the same range of the
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	981	// wuffs_base__private_implementation__powers_of_10 array.
Nigel Tao	8b45db0	2020-09-15 21:50:32 +1000	[diff] [blame]	982	//
				983	// The exp10 range (and the fact that man is in the range [1 ..= UINT64_MAX],
				984	// approximately [1 ..= 1.85e+19]) means that (man * (10 ** exp10)) is in the
				985	// range [1e-307 ..= 1.85e+307]. This is entirely within the range of normal
				986	// (neither subnormal nor non-finite) f64 values: DBL_MIN and DBL_MAX are
				987	// approximately 2.23e–308 and 1.80e+308.
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	988	static int64_t //
Nigel Tao	c4fa8e2	2020-07-18 17:35:13 +1000	[diff] [blame]	989	wuffs_base__private_implementation__parse_number_f64_eisel_lemire(
				990	uint64_t man,
				991	int32_t exp10) {
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	992	// Look up the (possibly truncated) base-2 representation of (10 ** exp10).
				993	// The look-up table was constructed so that it is already normalized: the
				994	// table entry's mantissa's MSB (most significant bit) is on.
Nigel Tao	afe7f27	2020-09-23 15:52:13 +1000	[diff] [blame]	995	const uint64_t* po10 =
				996	&wuffs_base__private_implementation__powers_of_10[exp10 + 307][0];
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	997
				998	// Normalize the man argument. The (man != 0) precondition means that a
				999	// non-zero bit exists.
				1000	uint32_t clz = wuffs_base__count_leading_zeroes_u64(man);
				1001	man <<= clz;
				1002
				1003	// Calculate the return value's base-2 exponent. We might tweak it by ±1
Nigel Tao	b6d8552	2020-09-23 15:21:47 +1000	[diff] [blame]	1004	// later, but its initial value comes from a linear scaling of exp10,
				1005	// converting from power-of-10 to power-of-2, and adjusting by clz.
				1006	//
				1007	// The magic constants are:
				1008	// - 1087 = 1023 + 64. The 1023 is the f64 exponent bias. The 64 is because
				1009	// the look-up table uses 64-bit mantissas.
				1010	// - 217706 is such that the ratio 217706 / 65536 ≈ 3.321930 is close enough
				1011	// (over the practical range of exp10) to log(10) / log(2) ≈ 3.321928.
				1012	// - 65536 = 1<<16 is arbitrary but a power of 2, so division is a shift.
				1013	//
				1014	// Equality of the linearly-scaled value and the actual power-of-2, over the
				1015	// range of exp10 arguments that this function accepts, is confirmed by
				1016	// script/print-mpb-powers-of-10.go
				1017	uint64_t ret_exp2 =
				1018	((uint64_t)(((217706 * exp10) >> 16) + 1087)) - ((uint64_t)clz);
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	1019
				1020	// Multiply the two mantissas. Normalization means that both mantissas are at
				1021	// least (1<<63), so the 128-bit product must be at least (1<<126). The high
Nigel Tao	74d4af6	2020-07-10 11:27:17 +1000	[diff] [blame]	1022	// 64 bits of the product, x_hi, must therefore be at least (1<<62).
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	1023	//
Nigel Tao	74d4af6	2020-07-10 11:27:17 +1000	[diff] [blame]	1024	// As a consequence, x_hi has either 0 or 1 leading zeroes. Shifting x_hi
				1025	// right by either 9 or 10 bits (depending on x_hi's MSB) will therefore
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	1026	// leave the top 10 MSBs (bits 54 ..= 63) off and the 11th MSB (bit 53) on.
Nigel Tao	afe7f27	2020-09-23 15:52:13 +1000	[diff] [blame]	1027	wuffs_base__multiply_u64__output x = wuffs_base__multiply_u64(man, po10[1]);
Nigel Tao	74d4af6	2020-07-10 11:27:17 +1000	[diff] [blame]	1028	uint64_t x_hi = x.hi;
				1029	uint64_t x_lo = x.lo;
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	1030
				1031	// Before we shift right by at least 9 bits, recall that the look-up table
				1032	// entry was possibly truncated. We have so far only calculated a lower bound
				1033	// for the product (man * e), where e is (10 ** exp10). The upper bound would
				1034	// add a further (man * 1) to the 128-bit product, which overflows the lower
Nigel Tao	74d4af6	2020-07-10 11:27:17 +1000	[diff] [blame]	1035	// 64-bit limb if ((x_lo + man) < man).
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	1036	//
Nigel Tao	74d4af6	2020-07-10 11:27:17 +1000	[diff] [blame]	1037	// If overflow occurs, that adds 1 to x_hi. Since we're about to shift right
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	1038	// by at least 9 bits, that carried 1 can be ignored unless the higher 64-bit
				1039	// limb's low 9 bits are all on.
Nigel Tao	ba3818c	2020-09-28 12:51:45 +1000	[diff] [blame]	1040	//
				1041	// For example, parsing "9999999999999999999" will take the if-true branch
				1042	// here, since:
				1043	// - x_hi = 0x4563918244F3FFFF
				1044	// - x_lo = 0x8000000000000000
				1045	// - man = 0x8AC7230489E7FFFF
Nigel Tao	74d4af6	2020-07-10 11:27:17 +1000	[diff] [blame]	1046	if (((x_hi & 0x1FF) == 0x1FF) && ((x_lo + man) < man)) {
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	1047	// Refine our calculation of (man * e). Before, our approximation of e used
				1048	// a "low resolution" 64-bit mantissa. Now use a "high resolution" 128-bit
				1049	// mantissa. We've already calculated x = (man * bits_0_to_63_incl_of_e).
				1050	// Now calculate y = (man * bits_64_to_127_incl_of_e).
Nigel Tao	afe7f27	2020-09-23 15:52:13 +1000	[diff] [blame]	1051	wuffs_base__multiply_u64__output y = wuffs_base__multiply_u64(man, po10[0]);
Nigel Tao	74d4af6	2020-07-10 11:27:17 +1000	[diff] [blame]	1052	uint64_t y_hi = y.hi;
				1053	uint64_t y_lo = y.lo;
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	1054
				1055	// Merge the 128-bit x and 128-bit y, which overlap by 64 bits, to
				1056	// calculate the 192-bit product of the 64-bit man by the 128-bit e.
				1057	// As we exit this if-block, we only care about the high 128 bits
				1058	// (merged_hi and merged_lo) of that 192-bit product.
Nigel Tao	ba3818c	2020-09-28 12:51:45 +1000	[diff] [blame]	1059	//
				1060	// For example, parsing "1.234e-45" will take the if-true branch here,
				1061	// since:
				1062	// - x_hi = 0x70B7E3696DB29FFF
				1063	// - x_lo = 0xE040000000000000
				1064	// - y_hi = 0x33718BBEAB0E0D7A
				1065	// - y_lo = 0xA880000000000000
Nigel Tao	74d4af6	2020-07-10 11:27:17 +1000	[diff] [blame]	1066	uint64_t merged_hi = x_hi;
				1067	uint64_t merged_lo = x_lo + y_hi;
				1068	if (merged_lo < x_lo) {
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	1069	merged_hi++; // Carry the overflow bit.
				1070	}
				1071
				1072	// The "high resolution" approximation of e is still a lower bound. Once
				1073	// again, see if the upper bound is large enough to produce a different
				1074	// result. This time, if it does, give up instead of reaching for an even
				1075	// more precise approximation to e.
				1076	//
				1077	// This three-part check is similar to the two-part check that guarded the
				1078	// if block that we're now in, but it has an extra term for the middle 64
				1079	// bits (checking that adding 1 to merged_lo would overflow).
Nigel Tao	ba3818c	2020-09-28 12:51:45 +1000	[diff] [blame]	1080	//
				1081	// For example, parsing "5.9604644775390625e-8" will take the if-true
				1082	// branch here, since:
				1083	// - merged_hi = 0x7FFFFFFFFFFFFFFF
				1084	// - merged_lo = 0xFFFFFFFFFFFFFFFF
				1085	// - y_lo = 0x4DB3FFC120988200
				1086	// - man = 0xD3C21BCECCEDA100
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	1087	if (((merged_hi & 0x1FF) == 0x1FF) && ((merged_lo + 1) == 0) &&
Nigel Tao	74d4af6	2020-07-10 11:27:17 +1000	[diff] [blame]	1088	(y_lo + man < man)) {
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	1089	return -1;
				1090	}
				1091
				1092	// Replace the 128-bit x with merged.
Nigel Tao	74d4af6	2020-07-10 11:27:17 +1000	[diff] [blame]	1093	x_hi = merged_hi;
				1094	x_lo = merged_lo;
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	1095	}
				1096
Nigel Tao	74d4af6	2020-07-10 11:27:17 +1000	[diff] [blame]	1097	// As mentioned above, shifting x_hi right by either 9 or 10 bits will leave
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	1098	// the top 10 MSBs (bits 54 ..= 63) off and the 11th MSB (bit 53) on. If the
				1099	// MSB (before shifting) was on, adjust ret_exp2 for the larger shift.
				1100	//
				1101	// Having bit 53 on (and higher bits off) means that ret_mantissa is a 54-bit
				1102	// number.
Nigel Tao	74d4af6	2020-07-10 11:27:17 +1000	[diff] [blame]	1103	uint64_t msb = x_hi >> 63;
				1104	uint64_t ret_mantissa = x_hi >> (msb + 9);
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	1105	ret_exp2 -= 1 ^ msb;
				1106
				1107	// IEEE 754 rounds to-nearest with ties rounded to-even. Rounding to-even can
				1108	// be tricky. If we're half-way between two exactly representable numbers
				1109	// (x's low 73 bits are zero and the next 2 bits that matter are "01"), give
				1110	// up instead of trying to pick the winner.
				1111	//
				1112	// Technically, we could tighten the condition by changing "73" to "73 or 74,
				1113	// depending on msb", but a flat "73" is simpler.
Nigel Tao	ba3818c	2020-09-28 12:51:45 +1000	[diff] [blame]	1114	//
				1115	// For example, parsing "1e+23" will take the if-true branch here, since:
				1116	// - x_hi = 0x54B40B1F852BDA00
				1117	// - ret_mantissa = 0x002A5A058FC295ED
Nigel Tao	74d4af6	2020-07-10 11:27:17 +1000	[diff] [blame]	1118	if ((x_lo == 0) && ((x_hi & 0x1FF) == 0) && ((ret_mantissa & 3) == 1)) {
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	1119	return -1;
				1120	}
				1121
				1122	// If we're not halfway then it's rounding to-nearest. Starting with a 54-bit
				1123	// number, carry the lowest bit (bit 0) up if it's on. Regardless of whether
				1124	// it was on or off, shifting right by one then produces a 53-bit number. If
				1125	// carrying up overflowed, shift again.
				1126	ret_mantissa += ret_mantissa & 1;
				1127	ret_mantissa >>= 1;
Nigel Tao	8b45db0	2020-09-15 21:50:32 +1000	[diff] [blame]	1128	// This if block is equivalent to (but benchmarks slightly faster than) the
				1129	// following branchless form:
				1130	// uint64_t overflow_adjustment = ret_mantissa >> 53;
				1131	// ret_mantissa >>= overflow_adjustment;
				1132	// ret_exp2 += overflow_adjustment;
Nigel Tao	ba3818c	2020-09-28 12:51:45 +1000	[diff] [blame]	1133	//
				1134	// For example, parsing "7.2057594037927933e+16" will take the if-true
				1135	// branch here, since:
				1136	// - x_hi = 0x7FFFFFFFFFFFFE80
				1137	// - ret_mantissa = 0x0020000000000000
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	1138	if ((ret_mantissa >> 53) > 0) {
				1139	ret_mantissa >>= 1;
				1140	ret_exp2++;
				1141	}
				1142
				1143	// Starting with a 53-bit number, IEEE 754 double-precision normal numbers
				1144	// have an implicit mantissa bit. Mask that away and keep the low 52 bits.
				1145	ret_mantissa &= 0x000FFFFFFFFFFFFF;
				1146
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	1147	// Pack the bits and return.
				1148	return ((int64_t)(ret_mantissa \| (ret_exp2 << 52)));
				1149	}
				1150
				1151	// --------
				1152
				1153	static wuffs_base__result_f64 //
Nigel Tao	e0c5de9	2020-07-11 11:48:17 +1000	[diff] [blame]	1154	wuffs_base__private_implementation__parse_number_f64_special(
				1155	wuffs_base__slice_u8 s,
Nigel Tao	4d61a05	2020-07-11 12:34:40 +1000	[diff] [blame]	1156	uint32_t options) {
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	1157	do {
Nigel Tao	4d61a05	2020-07-11 12:34:40 +1000	[diff] [blame]	1158	if (options & WUFFS_BASE__PARSE_NUMBER_FXX__REJECT_INF_AND_NAN) {
				1159	goto fail;
				1160	}
				1161
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	1162	uint8_t* p = s.ptr;
				1163	uint8_t* q = s.ptr + s.len;
				1164
				1165	for (; (p < q) && (*p == '_'); p++) {
				1166	}
				1167	if (p >= q) {
Nigel Tao	4d61a05	2020-07-11 12:34:40 +1000	[diff] [blame]	1168	goto fail;
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	1169	}
				1170
				1171	// Parse sign.
				1172	bool negative = false;
				1173	do {
				1174	if (*p == '+') {
				1175	p++;
				1176	} else if (*p == '-') {
				1177	negative = true;
				1178	p++;
				1179	} else {
				1180	break;
				1181	}
				1182	for (; (p < q) && (*p == '_'); p++) {
				1183	}
				1184	} while (0);
				1185	if (p >= q) {
Nigel Tao	4d61a05	2020-07-11 12:34:40 +1000	[diff] [blame]	1186	goto fail;
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	1187	}
				1188
				1189	bool nan = false;
				1190	switch (p[0]) {
				1191	case 'I':
				1192	case 'i':
				1193	if (((q - p) < 3) \|\| //
				1194	((p[1] != 'N') && (p[1] != 'n')) \|\| //
				1195	((p[2] != 'F') && (p[2] != 'f'))) {
Nigel Tao	4d61a05	2020-07-11 12:34:40 +1000	[diff] [blame]	1196	goto fail;
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	1197	}
				1198	p += 3;
				1199
				1200	if ((p >= q) \|\| (*p == '_')) {
				1201	break;
				1202	} else if (((q - p) < 5) \|\| //
				1203	((p[0] != 'I') && (p[0] != 'i')) \|\| //
				1204	((p[1] != 'N') && (p[1] != 'n')) \|\| //
				1205	((p[2] != 'I') && (p[2] != 'i')) \|\| //
				1206	((p[3] != 'T') && (p[3] != 't')) \|\| //
				1207	((p[4] != 'Y') && (p[4] != 'y'))) {
Nigel Tao	4d61a05	2020-07-11 12:34:40 +1000	[diff] [blame]	1208	goto fail;
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	1209	}
				1210	p += 5;
				1211
				1212	if ((p >= q) \|\| (*p == '_')) {
				1213	break;
				1214	}
Nigel Tao	4d61a05	2020-07-11 12:34:40 +1000	[diff] [blame]	1215	goto fail;
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	1216
				1217	case 'N':
				1218	case 'n':
				1219	if (((q - p) < 3) \|\| //
				1220	((p[1] != 'A') && (p[1] != 'a')) \|\| //
				1221	((p[2] != 'N') && (p[2] != 'n'))) {
Nigel Tao	4d61a05	2020-07-11 12:34:40 +1000	[diff] [blame]	1222	goto fail;
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	1223	}
				1224	p += 3;
				1225
				1226	if ((p >= q) \|\| (*p == '_')) {
				1227	nan = true;
				1228	break;
				1229	}
Nigel Tao	4d61a05	2020-07-11 12:34:40 +1000	[diff] [blame]	1230	goto fail;
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	1231
				1232	default:
Nigel Tao	4d61a05	2020-07-11 12:34:40 +1000	[diff] [blame]	1233	goto fail;
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	1234	}
				1235
				1236	// Finish.
				1237	for (; (p < q) && (*p == '_'); p++) {
				1238	}
				1239	if (p != q) {
Nigel Tao	4d61a05	2020-07-11 12:34:40 +1000	[diff] [blame]	1240	goto fail;
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	1241	}
				1242	wuffs_base__result_f64 ret;
				1243	ret.status.repr = NULL;
Nigel Tao	4d449dc	2020-07-12 11:00:47 +1000	[diff] [blame]	1244	ret.value = wuffs_base__ieee_754_bit_representation__from_u64_to_f64(
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	1245	(nan ? 0x7FFFFFFFFFFFFFFF : 0x7FF0000000000000) \|
				1246	(negative ? 0x8000000000000000 : 0));
				1247	return ret;
				1248	} while (0);
				1249
Nigel Tao	4d61a05	2020-07-11 12:34:40 +1000	[diff] [blame]	1250	fail:
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	1251	do {
				1252	wuffs_base__result_f64 ret;
Nigel Tao	4d61a05	2020-07-11 12:34:40 +1000	[diff] [blame]	1253	ret.status.repr = wuffs_base__error__bad_argument;
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	1254	ret.value = 0;
				1255	return ret;
				1256	} while (0);
				1257	}
				1258
				1259	WUFFS_BASE__MAYBE_STATIC wuffs_base__result_f64 //
Nigel Tao	e0c5de9	2020-07-11 11:48:17 +1000	[diff] [blame]	1260	wuffs_base__private_implementation__high_prec_dec__to_f64(
Nigel Tao	4d61a05	2020-07-11 12:34:40 +1000	[diff] [blame]	1261	wuffs_base__private_implementation__high_prec_dec* h,
				1262	uint32_t options) {
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	1263	do {
				1264	// powers converts decimal powers of 10 to binary powers of 2. For example,
				1265	// (10000 >> 13) is 1. It stops before the elements exceed 60, also known
				1266	// as WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL.
Nigel Tao	4d4c3c4	2023-02-04 13:39:29 +1100	[diff] [blame^]	1267	//
				1268	// This rounds down (1<<13 is a lower bound for 1e4). Adding 1 to the array
				1269	// element value rounds up (1<<14 is an upper bound for 1e4) while staying
				1270	// at or below WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL.
				1271	//
				1272	// When starting in the range [1e+1 .. 1e+2] (i.e. h->decimal_point == +2),
				1273	// powers[2] == 6 and so:
				1274	// - Right shifting by 6+0 produces the range [10/64 .. 100/64] =
				1275	// [0.156250 .. 1.56250]. The resultant h->decimal_point is +0 or +1.
				1276	// - Right shifting by 6+1 produces the range [10/128 .. 100/128] =
				1277	// [0.078125 .. 0.78125]. The resultant h->decimal_point is -1 or -0.
				1278	//
				1279	// When starting in the range [1e-3 .. 1e-2] (i.e. h->decimal_point == -2),
				1280	// powers[2] == 6 and so:
				1281	// - Left shifting by 6+0 produces the range [0.00164 .. 0.0164] =
				1282	// [0.064 .. 0.64]. The resultant h->decimal_point is -1 or -0.
				1283	// - Left shifting by 6+1 produces the range [0.001128 .. 0.01128] =
				1284	// [0.128 .. 1.28]. The resultant h->decimal_point is +0 or +1.
				1285	//
				1286	// Thus, when targeting h->decimal_point being +0 or +1, use (powers[n]+0)
				1287	// when right shifting but (powers[n]+1) when left shifting.
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	1288	static const uint32_t num_powers = 19;
				1289	static const uint8_t powers[19] = {
				1290	0, 3, 6, 9, 13, 16, 19, 23, 26, 29, //
				1291	33, 36, 39, 43, 46, 49, 53, 56, 59, //
				1292	};
				1293
				1294	// Handle zero and obvious extremes. The largest and smallest positive
				1295	// finite f64 values are approximately 1.8e+308 and 4.9e-324.
				1296	if ((h->num_digits == 0) \|\| (h->decimal_point < -326)) {
				1297	goto zero;
				1298	} else if (h->decimal_point > 310) {
				1299	goto infinity;
				1300	}
				1301
Nigel Tao	c4fa8e2	2020-07-18 17:35:13 +1000	[diff] [blame]	1302	// Try the fast Eisel-Lemire algorithm again. Calculating the (man, exp10)
				1303	// pair from the high_prec_dec h is more correct but slower than the
				1304	// approach taken in wuffs_base__parse_number_f64. The latter is optimized
				1305	// for the common cases (e.g. assuming no underscores or a leading '+'
				1306	// sign) rather than the full set of cases allowed by the Wuffs API.
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	1307	if (h->num_digits <= 19) {
				1308	uint64_t man = 0;
				1309	uint32_t i;
				1310	for (i = 0; i < h->num_digits; i++) {
				1311	man = (10 * man) + h->digits[i];
				1312	}
				1313	int32_t exp10 = h->decimal_point - ((int32_t)(h->num_digits));
Nigel Tao	8b45db0	2020-09-15 21:50:32 +1000	[diff] [blame]	1314	if ((man != 0) && (-307 <= exp10) && (exp10 <= 288)) {
Nigel Tao	c4fa8e2	2020-07-18 17:35:13 +1000	[diff] [blame]	1315	int64_t r =
				1316	wuffs_base__private_implementation__parse_number_f64_eisel_lemire(
				1317	man, exp10);
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	1318	if (r >= 0) {
				1319	wuffs_base__result_f64 ret;
				1320	ret.status.repr = NULL;
Nigel Tao	4d449dc	2020-07-12 11:00:47 +1000	[diff] [blame]	1321	ret.value = wuffs_base__ieee_754_bit_representation__from_u64_to_f64(
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	1322	((uint64_t)r) \| (((uint64_t)(h->negative)) << 63));
				1323	return ret;
				1324	}
				1325	}
				1326	}
				1327
Nigel Tao	ce685a6	2020-11-03 15:24:02 +1100	[diff] [blame]	1328	// When Eisel-Lemire fails, fall back to Simple Decimal Conversion. See
				1329	// https://nigeltao.github.io/blog/2020/parse-number-f64-simple.html
				1330	//
Nigel Tao	4d4c3c4	2023-02-04 13:39:29 +1100	[diff] [blame^]	1331	// Scale by powers of 2 until we're in the range [0.1 .. 10]. Equivalently,
				1332	// that h->decimal_point is +0 or +1.
				1333	//
				1334	// First we shift right while at or above 10...
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	1335	const int32_t f64_bias = -1023;
				1336	int32_t exp2 = 0;
sarastro-nl	d46220c	2022-11-21 21:29:03 +0100	[diff] [blame]	1337	while (h->decimal_point > 1) {
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	1338	uint32_t n = (uint32_t)(+h->decimal_point);
				1339	uint32_t shift =
				1340	(n < num_powers)
				1341	? powers[n]
				1342	: WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL;
				1343
				1344	wuffs_base__private_implementation__high_prec_dec__small_rshift(h, shift);
				1345	if (h->decimal_point <
				1346	-WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE) {
				1347	goto zero;
				1348	}
				1349	exp2 += (int32_t)shift;
				1350	}
Nigel Tao	4d4c3c4	2023-02-04 13:39:29 +1100	[diff] [blame^]	1351	// ...then we shift left while below 0.1.
sarastro-nl	d46220c	2022-11-21 21:29:03 +0100	[diff] [blame]	1352	while (h->decimal_point < 0) {
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	1353	uint32_t shift;
Nigel Tao	4d4c3c4	2023-02-04 13:39:29 +1100	[diff] [blame^]	1354	uint32_t n = (uint32_t)(-h->decimal_point);
				1355	shift = (n < num_powers)
				1356	// The +1 is per "when targeting h->decimal_point being +0 or
				1357	// +1... when left shifting" in the powers comment above.
				1358	? (powers[n] + 1)
				1359	: WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL;
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	1360
				1361	wuffs_base__private_implementation__high_prec_dec__small_lshift(h, shift);
				1362	if (h->decimal_point >
				1363	+WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE) {
				1364	goto infinity;
				1365	}
				1366	exp2 -= (int32_t)shift;
				1367	}
				1368
Nigel Tao	4d4c3c4	2023-02-04 13:39:29 +1100	[diff] [blame^]	1369	// To get from "in the range [0.1 .. 10]" to "in the range [1 .. 2]" (which
				1370	// will give us our exponent in base-2), the mantissa's first 3 digits will
				1371	// determine the final left shift, equal to 52 (the number of explicit f64
				1372	// bits) plus an additional adjustment.
				1373	int man3 = (100 * h->digits[0]) +
				1374	((h->num_digits > 1) ? (10 * h->digits[1]) : 0) +
				1375	((h->num_digits > 2) ? h->digits[2] : 0);
				1376	int32_t additional_lshift = 0;
				1377	if (h->decimal_point == 0) { // The value is in [0.1 .. 1].
				1378	if (man3 < 125) {
				1379	additional_lshift = +4;
				1380	} else if (man3 < 250) {
				1381	additional_lshift = +3;
				1382	} else if (man3 < 500) {
				1383	additional_lshift = +2;
				1384	} else {
				1385	additional_lshift = +1;
				1386	}
				1387	} else { // The value is in [1 .. 10].
				1388	if (man3 < 200) {
				1389	additional_lshift = -0;
				1390	} else if (man3 < 400) {
				1391	additional_lshift = -1;
				1392	} else if (man3 < 800) {
				1393	additional_lshift = -2;
				1394	} else {
				1395	additional_lshift = -3;
				1396	}
sarastro-nl	d46220c	2022-11-21 21:29:03 +0100	[diff] [blame]	1397	}
Nigel Tao	4d4c3c4	2023-02-04 13:39:29 +1100	[diff] [blame^]	1398	exp2 -= additional_lshift;
				1399	uint32_t final_lshift = (uint32_t)(52 + additional_lshift);
				1400
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	1401	// The minimum normal exponent is (f64_bias + 1).
				1402	while ((f64_bias + 1) > exp2) {
				1403	uint32_t n = (uint32_t)((f64_bias + 1) - exp2);
				1404	if (n > WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL) {
				1405	n = WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL;
				1406	}
				1407	wuffs_base__private_implementation__high_prec_dec__small_rshift(h, n);
				1408	exp2 += (int32_t)n;
				1409	}
				1410
				1411	// Check for overflow.
				1412	if ((exp2 - f64_bias) >= 0x07FF) { // (1 << 11) - 1.
				1413	goto infinity;
				1414	}
				1415
				1416	// Extract 53 bits for the mantissa (in base-2).
Nigel Tao	4d4c3c4	2023-02-04 13:39:29 +1100	[diff] [blame^]	1417	wuffs_base__private_implementation__high_prec_dec__small_lshift(
				1418	h, final_lshift);
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	1419	uint64_t man2 =
				1420	wuffs_base__private_implementation__high_prec_dec__rounded_integer(h);
				1421
				1422	// Rounding might have added one bit. If so, shift and re-check overflow.
				1423	if ((man2 >> 53) != 0) {
				1424	man2 >>= 1;
				1425	exp2++;
				1426	if ((exp2 - f64_bias) >= 0x07FF) { // (1 << 11) - 1.
				1427	goto infinity;
				1428	}
				1429	}
				1430
				1431	// Handle subnormal numbers.
				1432	if ((man2 >> 52) == 0) {
				1433	exp2 = f64_bias;
				1434	}
				1435
				1436	// Pack the bits and return.
				1437	uint64_t exp2_bits =
				1438	(uint64_t)((exp2 - f64_bias) & 0x07FF); // (1 << 11) - 1.
				1439	uint64_t bits = (man2 & 0x000FFFFFFFFFFFFF) \| // (1 << 52) - 1.
				1440	(exp2_bits << 52) \| //
				1441	(h->negative ? 0x8000000000000000 : 0); // (1 << 63).
				1442
				1443	wuffs_base__result_f64 ret;
				1444	ret.status.repr = NULL;
Nigel Tao	4d449dc	2020-07-12 11:00:47 +1000	[diff] [blame]	1445	ret.value = wuffs_base__ieee_754_bit_representation__from_u64_to_f64(bits);
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	1446	return ret;
				1447	} while (0);
				1448
				1449	zero:
				1450	do {
				1451	uint64_t bits = h->negative ? 0x8000000000000000 : 0;
				1452
				1453	wuffs_base__result_f64 ret;
				1454	ret.status.repr = NULL;
Nigel Tao	4d449dc	2020-07-12 11:00:47 +1000	[diff] [blame]	1455	ret.value = wuffs_base__ieee_754_bit_representation__from_u64_to_f64(bits);
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	1456	return ret;
				1457	} while (0);
				1458
				1459	infinity:
				1460	do {
Nigel Tao	4d61a05	2020-07-11 12:34:40 +1000	[diff] [blame]	1461	if (options & WUFFS_BASE__PARSE_NUMBER_FXX__REJECT_INF_AND_NAN) {
				1462	wuffs_base__result_f64 ret;
				1463	ret.status.repr = wuffs_base__error__bad_argument;
				1464	ret.value = 0;
				1465	return ret;
				1466	}
				1467
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	1468	uint64_t bits = h->negative ? 0xFFF0000000000000 : 0x7FF0000000000000;
				1469
				1470	wuffs_base__result_f64 ret;
				1471	ret.status.repr = NULL;
Nigel Tao	4d449dc	2020-07-12 11:00:47 +1000	[diff] [blame]	1472	ret.value = wuffs_base__ieee_754_bit_representation__from_u64_to_f64(bits);
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	1473	return ret;
				1474	} while (0);
				1475	}
				1476
				1477	static inline bool //
				1478	wuffs_base__private_implementation__is_decimal_digit(uint8_t c) {
				1479	return ('0' <= c) && (c <= '9');
				1480	}
				1481
				1482	WUFFS_BASE__MAYBE_STATIC wuffs_base__result_f64 //
				1483	wuffs_base__parse_number_f64(wuffs_base__slice_u8 s, uint32_t options) {
				1484	// In practice, almost all "dd.ddddE±xxx" numbers can be represented
				1485	// losslessly by a uint64_t mantissa "dddddd" and an int32_t base-10
				1486	// exponent, adjusting "xxx" for the position (if present) of the decimal
				1487	// separator '.' or ','.
				1488	//
				1489	// This (u64 man, i32 exp10) data structure is superficially similar to the
				1490	// "Do It Yourself Floating Point" type from Loitsch (†), but the exponent
				1491	// here is base-10, not base-2.
				1492	//
Nigel Tao	c4fa8e2	2020-07-18 17:35:13 +1000	[diff] [blame]	1493	// If s's number fits in a (man, exp10), parse that pair with the
				1494	// Eisel-Lemire algorithm. If not, or if Eisel-Lemire fails, parsing s with
				1495	// the fallback algorithm is slower but comprehensive.
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	1496	//
				1497	// † "Printing Floating-Point Numbers Quickly and Accurately with Integers"
				1498	// (https://www.cs.tufts.edu/~nr/cs257/archive/florian-loitsch/printf.pdf).
				1499	// Florian Loitsch is also the primary contributor to
				1500	// https://github.com/google/double-conversion
				1501	do {
				1502	// Calculating that (man, exp10) pair needs to stay within s's bounds.
				1503	// Provided that s isn't extremely long, work on a NUL-terminated copy of
				1504	// s's contents. The NUL byte isn't a valid part of "±dd.ddddE±xxx".
				1505	//
				1506	// As the pointer p walks the contents, it's faster to repeatedly check "is
				1507	// p a valid digit" than "is p within bounds and p a valid digit".
				1508	if (s.len >= 256) {
				1509	goto fallback;
				1510	}
				1511	uint8_t z[256];
				1512	memcpy(&z[0], s.ptr, s.len);
				1513	z[s.len] = 0;
				1514	const uint8_t* p = &z[0];
				1515
				1516	// Look for a leading minus sign. Technically, we could also look for an
				1517	// optional plus sign, but the "script/process-json-numbers.c with -p"
				1518	// benchmark is noticably slower if we do. It's optional and, in practice,
				1519	// usually absent. Let the fallback catch it.
				1520	bool negative = (*p == '-');
				1521	if (negative) {
				1522	p++;
				1523	}
				1524
				1525	// After walking "dd.dddd", comparing p later with p now will produce the
				1526	// number of "d"s and "."s.
				1527	const uint8_t* const start_of_digits_ptr = p;
				1528
				1529	// Walk the "d"s before a '.', 'E', NUL byte, etc. If it starts with '0',
				1530	// it must be a single '0'. If it starts with a non-zero decimal digit, it
				1531	// can be a sequence of decimal digits.
				1532	//
				1533	// Update the man variable during the walk. It's OK if man overflows now.
				1534	// We'll detect that later.
				1535	uint64_t man;
				1536	if (*p == '0') {
				1537	man = 0;
				1538	p++;
				1539	if (wuffs_base__private_implementation__is_decimal_digit(*p)) {
				1540	goto fallback;
				1541	}
				1542	} else if (wuffs_base__private_implementation__is_decimal_digit(*p)) {
				1543	man = ((uint8_t)(*p - '0'));
				1544	p++;
				1545	for (; wuffs_base__private_implementation__is_decimal_digit(*p); p++) {
				1546	man = (10 * man) + ((uint8_t)(*p - '0'));
				1547	}
				1548	} else {
				1549	goto fallback;
				1550	}
				1551
				1552	// Walk the "d"s after the optional decimal separator ('.' or ','),
				1553	// updating the man and exp10 variables.
				1554	int32_t exp10 = 0;
Nigel Tao	e0c5de9	2020-07-11 11:48:17 +1000	[diff] [blame]	1555	if (*p ==
				1556	((options & WUFFS_BASE__PARSE_NUMBER_FXX__DECIMAL_SEPARATOR_IS_A_COMMA)
				1557	? ','
				1558	: '.')) {
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	1559	p++;
				1560	const uint8_t* first_after_separator_ptr = p;
				1561	if (!wuffs_base__private_implementation__is_decimal_digit(*p)) {
				1562	goto fallback;
				1563	}
				1564	man = (10 * man) + ((uint8_t)(*p - '0'));
				1565	p++;
				1566	for (; wuffs_base__private_implementation__is_decimal_digit(*p); p++) {
				1567	man = (10 * man) + ((uint8_t)(*p - '0'));
				1568	}
				1569	exp10 = ((int32_t)(first_after_separator_ptr - p));
				1570	}
				1571
				1572	// Count the number of digits:
				1573	// - for an input of "314159", digit_count is 6.
				1574	// - for an input of "3.14159", digit_count is 7.
				1575	//
				1576	// This is off-by-one if there is a decimal separator. That's OK for now.
				1577	// We'll correct for that later. The "script/process-json-numbers.c with
				1578	// -p" benchmark is noticably slower if we try to correct for that now.
				1579	uint32_t digit_count = (uint32_t)(p - start_of_digits_ptr);
				1580
				1581	// Update exp10 for the optional exponent, starting with 'E' or 'e'.
				1582	if ((*p \| 0x20) == 'e') {
				1583	p++;
				1584	int32_t exp_sign = +1;
				1585	if (*p == '-') {
				1586	p++;
				1587	exp_sign = -1;
				1588	} else if (*p == '+') {
				1589	p++;
				1590	}
				1591	if (!wuffs_base__private_implementation__is_decimal_digit(*p)) {
				1592	goto fallback;
				1593	}
				1594	int32_t exp_num = ((uint8_t)(*p - '0'));
				1595	p++;
				1596	// The rest of the exp_num walking has a peculiar control flow but, once
				1597	// again, the "script/process-json-numbers.c with -p" benchmark is
				1598	// sensitive to alternative formulations.
				1599	if (wuffs_base__private_implementation__is_decimal_digit(*p)) {
				1600	exp_num = (10 * exp_num) + ((uint8_t)(*p - '0'));
				1601	p++;
				1602	}
				1603	if (wuffs_base__private_implementation__is_decimal_digit(*p)) {
				1604	exp_num = (10 * exp_num) + ((uint8_t)(*p - '0'));
				1605	p++;
				1606	}
				1607	while (wuffs_base__private_implementation__is_decimal_digit(*p)) {
				1608	if (exp_num > 0x1000000) {
				1609	goto fallback;
				1610	}
				1611	exp_num = (10 * exp_num) + ((uint8_t)(*p - '0'));
				1612	p++;
				1613	}
				1614	exp10 += exp_sign * exp_num;
				1615	}
				1616
				1617	// The Wuffs API is that the original slice has no trailing data. It also
				1618	// allows underscores, which we don't catch here but the fallback should.
				1619	if (p != &z[s.len]) {
				1620	goto fallback;
				1621	}
				1622
				1623	// Check that the uint64_t typed man variable has not overflowed, based on
				1624	// digit_count.
				1625	//
				1626	// For reference:
				1627	// - (1 << 63) is 9223372036854775808, which has 19 decimal digits.
				1628	// - (1 << 64) is 18446744073709551616, which has 20 decimal digits.
				1629	// - 19 nines, 9999999999999999999, is 0x8AC7230489E7FFFF, which has 64
				1630	// bits and 16 hexadecimal digits.
				1631	// - 20 nines, 99999999999999999999, is 0x56BC75E2D630FFFFF, which has 67
				1632	// bits and 17 hexadecimal digits.
				1633	if (digit_count > 19) {
				1634	// Even if we have more than 19 pseudo-digits, it's not yet definitely an
				1635	// overflow. Recall that digit_count might be off-by-one (too large) if
				1636	// there's a decimal separator. It will also over-report the number of
				1637	// meaningful digits if the input looks something like "0.000dddExxx".
				1638	//
				1639	// We adjust by the number of leading '0's and '.'s and re-compare to 19.
				1640	// Once again, technically, we could skip ','s too, but that perturbs the
				1641	// "script/process-json-numbers.c with -p" benchmark.
				1642	const uint8_t* q = start_of_digits_ptr;
				1643	for (; (q == '0') \|\| (q == '.'); q++) {
				1644	}
				1645	digit_count -= (uint32_t)(q - start_of_digits_ptr);
				1646	if (digit_count > 19) {
				1647	goto fallback;
				1648	}
				1649	}
				1650
Nigel Tao	c4fa8e2	2020-07-18 17:35:13 +1000	[diff] [blame]	1651	// The wuffs_base__private_implementation__parse_number_f64_eisel_lemire
Nigel Tao	8b45db0	2020-09-15 21:50:32 +1000	[diff] [blame]	1652	// preconditions include that exp10 is in the range [-307 ..= 288].
				1653	if ((exp10 < -307) \|\| (288 < exp10)) {
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	1654	goto fallback;
				1655	}
				1656
Nigel Tao	9f22b5e	2020-09-11 09:10:08 +1000	[diff] [blame]	1657	// If both man and (10 ** exp10) are exactly representable by a double, we
				1658	// don't need to run the Eisel-Lemire algorithm.
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	1659	if ((-22 <= exp10) && (exp10 <= 22) && ((man >> 53) == 0)) {
				1660	double d = (double)man;
				1661	if (exp10 >= 0) {
				1662	d *= wuffs_base__private_implementation__f64_powers_of_10[+exp10];
				1663	} else {
				1664	d /= wuffs_base__private_implementation__f64_powers_of_10[-exp10];
				1665	}
				1666	wuffs_base__result_f64 ret;
				1667	ret.status.repr = NULL;
				1668	ret.value = negative ? -d : +d;
				1669	return ret;
				1670	}
				1671
Nigel Tao	c4fa8e2	2020-07-18 17:35:13 +1000	[diff] [blame]	1672	// The wuffs_base__private_implementation__parse_number_f64_eisel_lemire
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	1673	// preconditions include that man is non-zero. Parsing "0" should be caught
Nigel Tao	9f22b5e	2020-09-11 09:10:08 +1000	[diff] [blame]	1674	// by the "If both man and (10 ** exp10)" above, but "0e99" might not.
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	1675	if (man == 0) {
				1676	goto fallback;
				1677	}
				1678
Nigel Tao	c4fa8e2	2020-07-18 17:35:13 +1000	[diff] [blame]	1679	// Our man and exp10 are in range. Run the Eisel-Lemire algorithm.
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	1680	int64_t r =
Nigel Tao	c4fa8e2	2020-07-18 17:35:13 +1000	[diff] [blame]	1681	wuffs_base__private_implementation__parse_number_f64_eisel_lemire(
				1682	man, exp10);
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	1683	if (r < 0) {
				1684	goto fallback;
				1685	}
				1686	wuffs_base__result_f64 ret;
				1687	ret.status.repr = NULL;
Nigel Tao	4d449dc	2020-07-12 11:00:47 +1000	[diff] [blame]	1688	ret.value = wuffs_base__ieee_754_bit_representation__from_u64_to_f64(
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	1689	((uint64_t)r) \| (((uint64_t)negative) << 63));
				1690	return ret;
				1691	} while (0);
				1692
				1693	fallback:
				1694	do {
				1695	wuffs_base__private_implementation__high_prec_dec h;
				1696	wuffs_base__status status =
Nigel Tao	e0c5de9	2020-07-11 11:48:17 +1000	[diff] [blame]	1697	wuffs_base__private_implementation__high_prec_dec__parse(&h, s,
				1698	options);
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	1699	if (status.repr) {
Nigel Tao	e0c5de9	2020-07-11 11:48:17 +1000	[diff] [blame]	1700	return wuffs_base__private_implementation__parse_number_f64_special(
Nigel Tao	4d61a05	2020-07-11 12:34:40 +1000	[diff] [blame]	1701	s, options);
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	1702	}
Nigel Tao	4d61a05	2020-07-11 12:34:40 +1000	[diff] [blame]	1703	return wuffs_base__private_implementation__high_prec_dec__to_f64(&h,
				1704	options);
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	1705	} while (0);
				1706	}
				1707
				1708	// --------
				1709
				1710	static inline size_t //
				1711	wuffs_base__private_implementation__render_inf(wuffs_base__slice_u8 dst,
				1712	bool neg,
				1713	uint32_t options) {
				1714	if (neg) {
				1715	if (dst.len < 4) {
				1716	return 0;
				1717	}
Nigel Tao	a1c22ca	2021-01-17 22:22:49 +1100	[diff] [blame]	1718	wuffs_base__poke_u32le__no_bounds_check(dst.ptr, 0x666E492D); // '-Inf'le.
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	1719	return 4;
				1720	}
				1721
				1722	if (options & WUFFS_BASE__RENDER_NUMBER_XXX__LEADING_PLUS_SIGN) {
				1723	if (dst.len < 4) {
				1724	return 0;
				1725	}
Nigel Tao	a1c22ca	2021-01-17 22:22:49 +1100	[diff] [blame]	1726	wuffs_base__poke_u32le__no_bounds_check(dst.ptr, 0x666E492B); // '+Inf'le.
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	1727	return 4;
				1728	}
				1729
				1730	if (dst.len < 3) {
				1731	return 0;
				1732	}
Nigel Tao	a1c22ca	2021-01-17 22:22:49 +1100	[diff] [blame]	1733	wuffs_base__poke_u24le__no_bounds_check(dst.ptr, 0x666E49); // 'Inf'le.
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	1734	return 3;
				1735	}
				1736
				1737	static inline size_t //
				1738	wuffs_base__private_implementation__render_nan(wuffs_base__slice_u8 dst) {
				1739	if (dst.len < 3) {
				1740	return 0;
				1741	}
Nigel Tao	a1c22ca	2021-01-17 22:22:49 +1100	[diff] [blame]	1742	wuffs_base__poke_u24le__no_bounds_check(dst.ptr, 0x4E614E); // 'NaN'le.
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	1743	return 3;
				1744	}
				1745
				1746	static size_t //
				1747	wuffs_base__private_implementation__high_prec_dec__render_exponent_absent(
				1748	wuffs_base__slice_u8 dst,
				1749	wuffs_base__private_implementation__high_prec_dec* h,
				1750	uint32_t precision,
				1751	uint32_t options) {
				1752	size_t n = (h->negative \|\|
				1753	(options & WUFFS_BASE__RENDER_NUMBER_XXX__LEADING_PLUS_SIGN))
				1754	? 1
				1755	: 0;
				1756	if (h->decimal_point <= 0) {
				1757	n += 1;
				1758	} else {
				1759	n += (size_t)(h->decimal_point);
				1760	}
				1761	if (precision > 0) {
				1762	n += precision + 1; // +1 for the '.'.
				1763	}
				1764
				1765	// Don't modify dst if the formatted number won't fit.
				1766	if (n > dst.len) {
				1767	return 0;
				1768	}
				1769
				1770	// Align-left or align-right.
				1771	uint8_t* ptr = (options & WUFFS_BASE__RENDER_NUMBER_XXX__ALIGN_RIGHT)
				1772	? &dst.ptr[dst.len - n]
				1773	: &dst.ptr[0];
				1774
				1775	// Leading "±".
				1776	if (h->negative) {
				1777	*ptr++ = '-';
				1778	} else if (options & WUFFS_BASE__RENDER_NUMBER_XXX__LEADING_PLUS_SIGN) {
				1779	*ptr++ = '+';
				1780	}
				1781
				1782	// Integral digits.
				1783	if (h->decimal_point <= 0) {
				1784	*ptr++ = '0';
				1785	} else {
				1786	uint32_t m =
				1787	wuffs_base__u32__min(h->num_digits, (uint32_t)(h->decimal_point));
				1788	uint32_t i = 0;
				1789	for (; i < m; i++) {
				1790	*ptr++ = (uint8_t)('0' \| h->digits[i]);
				1791	}
				1792	for (; i < (uint32_t)(h->decimal_point); i++) {
				1793	*ptr++ = '0';
				1794	}
				1795	}
				1796
				1797	// Separator and then fractional digits.
				1798	if (precision > 0) {
				1799	*ptr++ =
				1800	(options & WUFFS_BASE__RENDER_NUMBER_FXX__DECIMAL_SEPARATOR_IS_A_COMMA)
				1801	? ','
				1802	: '.';
				1803	uint32_t i = 0;
				1804	for (; i < precision; i++) {
				1805	uint32_t j = ((uint32_t)(h->decimal_point)) + i;
				1806	*ptr++ = (uint8_t)('0' \| ((j < h->num_digits) ? h->digits[j] : 0));
				1807	}
				1808	}
				1809
				1810	return n;
				1811	}
				1812
				1813	static size_t //
				1814	wuffs_base__private_implementation__high_prec_dec__render_exponent_present(
				1815	wuffs_base__slice_u8 dst,
				1816	wuffs_base__private_implementation__high_prec_dec* h,
				1817	uint32_t precision,
				1818	uint32_t options) {
				1819	int32_t exp = 0;
				1820	if (h->num_digits > 0) {
				1821	exp = h->decimal_point - 1;
				1822	}
				1823	bool negative_exp = exp < 0;
				1824	if (negative_exp) {
				1825	exp = -exp;
				1826	}
				1827
				1828	size_t n = (h->negative \|\|
				1829	(options & WUFFS_BASE__RENDER_NUMBER_XXX__LEADING_PLUS_SIGN))
				1830	? 4
				1831	: 3; // Mininum 3 bytes: first digit and then "e±".
				1832	if (precision > 0) {
				1833	n += precision + 1; // +1 for the '.'.
				1834	}
				1835	n += (exp < 100) ? 2 : 3;
				1836
				1837	// Don't modify dst if the formatted number won't fit.
				1838	if (n > dst.len) {
				1839	return 0;
				1840	}
				1841
				1842	// Align-left or align-right.
				1843	uint8_t* ptr = (options & WUFFS_BASE__RENDER_NUMBER_XXX__ALIGN_RIGHT)
				1844	? &dst.ptr[dst.len - n]
				1845	: &dst.ptr[0];
				1846
				1847	// Leading "±".
				1848	if (h->negative) {
				1849	*ptr++ = '-';
				1850	} else if (options & WUFFS_BASE__RENDER_NUMBER_XXX__LEADING_PLUS_SIGN) {
				1851	*ptr++ = '+';
				1852	}
				1853
				1854	// Integral digit.
				1855	if (h->num_digits > 0) {
				1856	*ptr++ = (uint8_t)('0' \| h->digits[0]);
				1857	} else {
				1858	*ptr++ = '0';
				1859	}
				1860
				1861	// Separator and then fractional digits.
				1862	if (precision > 0) {
				1863	*ptr++ =
				1864	(options & WUFFS_BASE__RENDER_NUMBER_FXX__DECIMAL_SEPARATOR_IS_A_COMMA)
				1865	? ','
				1866	: '.';
				1867	uint32_t i = 1;
				1868	uint32_t j = wuffs_base__u32__min(h->num_digits, precision + 1);
				1869	for (; i < j; i++) {
				1870	*ptr++ = (uint8_t)('0' \| h->digits[i]);
				1871	}
				1872	for (; i <= precision; i++) {
				1873	*ptr++ = '0';
				1874	}
				1875	}
				1876
				1877	// Exponent: "e±" and then 2 or 3 digits.
				1878	*ptr++ = 'e';
				1879	*ptr++ = negative_exp ? '-' : '+';
				1880	if (exp < 10) {
				1881	*ptr++ = '0';
				1882	*ptr++ = (uint8_t)('0' \| exp);
				1883	} else if (exp < 100) {
				1884	*ptr++ = (uint8_t)('0' \| (exp / 10));
				1885	*ptr++ = (uint8_t)('0' \| (exp % 10));
				1886	} else {
				1887	int32_t e = exp / 100;
				1888	exp -= e * 100;
				1889	*ptr++ = (uint8_t)('0' \| e);
				1890	*ptr++ = (uint8_t)('0' \| (exp / 10));
				1891	*ptr++ = (uint8_t)('0' \| (exp % 10));
				1892	}
				1893
				1894	return n;
				1895	}
				1896
				1897	WUFFS_BASE__MAYBE_STATIC size_t //
				1898	wuffs_base__render_number_f64(wuffs_base__slice_u8 dst,
				1899	double x,
				1900	uint32_t precision,
				1901	uint32_t options) {
				1902	// Decompose x (64 bits) into negativity (1 bit), base-2 exponent (11 bits
				1903	// with a -1023 bias) and mantissa (52 bits).
Nigel Tao	4d449dc	2020-07-12 11:00:47 +1000	[diff] [blame]	1904	uint64_t bits = wuffs_base__ieee_754_bit_representation__from_f64_to_u64(x);
Nigel Tao	2a7e1ed	2020-07-07 21:50:06 +1000	[diff] [blame]	1905	bool neg = (bits >> 63) != 0;
				1906	int32_t exp2 = ((int32_t)(bits >> 52)) & 0x7FF;
				1907	uint64_t man = bits & 0x000FFFFFFFFFFFFFul;
				1908
				1909	// Apply the exponent bias and set the implicit top bit of the mantissa,
				1910	// unless x is subnormal. Also take care of Inf and NaN.
				1911	if (exp2 == 0x7FF) {
				1912	if (man != 0) {
				1913	return wuffs_base__private_implementation__render_nan(dst);
				1914	}
				1915	return wuffs_base__private_implementation__render_inf(dst, neg, options);
				1916	} else if (exp2 == 0) {
				1917	exp2 = -1022;
				1918	} else {
				1919	exp2 -= 1023;
				1920	man \|= 0x0010000000000000ul;
				1921	}
				1922
				1923	// Ensure that precision isn't too large.
				1924	if (precision > 4095) {
				1925	precision = 4095;
				1926	}
				1927
				1928	// Convert from the (neg, exp2, man) tuple to an HPD.
				1929	wuffs_base__private_implementation__high_prec_dec h;
				1930	wuffs_base__private_implementation__high_prec_dec__assign(&h, man, neg);
				1931	if (h.num_digits > 0) {
				1932	wuffs_base__private_implementation__high_prec_dec__lshift(
				1933	&h, exp2 - 52); // 52 mantissa bits.
				1934	}
				1935
				1936	// Handle the "%e" and "%f" formats.
				1937	switch (options & (WUFFS_BASE__RENDER_NUMBER_FXX__EXPONENT_ABSENT \|
				1938	WUFFS_BASE__RENDER_NUMBER_FXX__EXPONENT_PRESENT)) {
				1939	case WUFFS_BASE__RENDER_NUMBER_FXX__EXPONENT_ABSENT: // The "%"f" format.
				1940	if (options & WUFFS_BASE__RENDER_NUMBER_FXX__JUST_ENOUGH_PRECISION) {
				1941	wuffs_base__private_implementation__high_prec_dec__round_just_enough(
				1942	&h, exp2, man);
				1943	int32_t p = ((int32_t)(h.num_digits)) - h.decimal_point;
				1944	precision = ((uint32_t)(wuffs_base__i32__max(0, p)));
				1945	} else {
				1946	wuffs_base__private_implementation__high_prec_dec__round_nearest(
				1947	&h, ((int32_t)precision) + h.decimal_point);
				1948	}
				1949	return wuffs_base__private_implementation__high_prec_dec__render_exponent_absent(
				1950	dst, &h, precision, options);
				1951
				1952	case WUFFS_BASE__RENDER_NUMBER_FXX__EXPONENT_PRESENT: // The "%e" format.
				1953	if (options & WUFFS_BASE__RENDER_NUMBER_FXX__JUST_ENOUGH_PRECISION) {
				1954	wuffs_base__private_implementation__high_prec_dec__round_just_enough(
				1955	&h, exp2, man);
				1956	precision = (h.num_digits > 0) ? (h.num_digits - 1) : 0;
				1957	} else {
				1958	wuffs_base__private_implementation__high_prec_dec__round_nearest(
				1959	&h, ((int32_t)precision) + 1);
				1960	}
				1961	return wuffs_base__private_implementation__high_prec_dec__render_exponent_present(
				1962	dst, &h, precision, options);
				1963	}
				1964
				1965	// We have the "%g" format and so precision means the number of significant
				1966	// digits, not the number of digits after the decimal separator. Perform
				1967	// rounding and determine whether to use "%e" or "%f".
				1968	int32_t e_threshold = 0;
				1969	if (options & WUFFS_BASE__RENDER_NUMBER_FXX__JUST_ENOUGH_PRECISION) {
				1970	wuffs_base__private_implementation__high_prec_dec__round_just_enough(
				1971	&h, exp2, man);
				1972	precision = h.num_digits;
				1973	e_threshold = 6;
				1974	} else {
				1975	if (precision == 0) {
				1976	precision = 1;
				1977	}
				1978	wuffs_base__private_implementation__high_prec_dec__round_nearest(
				1979	&h, ((int32_t)precision));
				1980	e_threshold = ((int32_t)precision);
				1981	int32_t nd = ((int32_t)(h.num_digits));
				1982	if ((e_threshold > nd) && (nd >= h.decimal_point)) {
				1983	e_threshold = nd;
				1984	}
				1985	}
				1986
				1987	// Use the "%e" format if the exponent is large.
				1988	int32_t e = h.decimal_point - 1;
				1989	if ((e < -4) \|\| (e_threshold <= e)) {
				1990	uint32_t p = wuffs_base__u32__min(precision, h.num_digits);
				1991	return wuffs_base__private_implementation__high_prec_dec__render_exponent_present(
				1992	dst, &h, (p > 0) ? (p - 1) : 0, options);
				1993	}
				1994
				1995	// Use the "%f" format otherwise.
				1996	int32_t p = ((int32_t)precision);
				1997	if (p > h.decimal_point) {
				1998	p = ((int32_t)(h.num_digits));
				1999	}
				2000	precision = ((uint32_t)(wuffs_base__i32__max(0, p - h.decimal_point)));
				2001	return wuffs_base__private_implementation__high_prec_dec__render_exponent_absent(
				2002	dst, &h, precision, options);
				2003	}