blob: a7ad93e91e9b11d376c703bee01a71025e74df33 [file] [log] [blame]
Nigel Taoa4f2bbb2020-07-28 14:15:24 +10001// After editing this file, run "go generate" in the ../data directory.
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10002
3// Copyright 2020 The Wuffs Authors.
4//
5// Licensed under the Apache License, Version 2.0 (the "License");
6// you may not use this file except in compliance with the License.
7// You may obtain a copy of the License at
8//
9// https://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16
17// ---------------- IEEE 754 Floating Point
18
Nigel Tao7bf7cf22020-07-12 16:23:15 +100019WUFFS_BASE__MAYBE_STATIC wuffs_base__lossy_value_u16 //
Nigel Taoa3931d52020-07-12 21:06:44 +100020wuffs_base__ieee_754_bit_representation__from_f64_to_u16_truncate(double f) {
Nigel Tao7bf7cf22020-07-12 16:23:15 +100021 uint64_t u = 0;
22 if (sizeof(uint64_t) == sizeof(double)) {
23 memcpy(&u, &f, sizeof(uint64_t));
24 }
Nigel Tao56d90962020-07-12 21:11:49 +100025 uint16_t neg = ((uint16_t)((u >> 63) << 15));
Nigel Tao7bf7cf22020-07-12 16:23:15 +100026 u &= 0x7FFFFFFFFFFFFFFF;
27 uint64_t exp = u >> 52;
28 uint64_t man = u & 0x000FFFFFFFFFFFFF;
29
30 if (exp == 0x7FF) {
31 if (man == 0) { // Infinity.
32 wuffs_base__lossy_value_u16 ret;
33 ret.value = neg | 0x7C00;
34 ret.lossy = false;
35 return ret;
36 }
37 // NaN. Shift the 52 mantissa bits to 10 mantissa bits, keeping the most
38 // significant mantissa bit (quiet vs signaling NaNs). Also set the low 9
39 // bits of ret.value so that the 10-bit mantissa is non-zero.
40 wuffs_base__lossy_value_u16 ret;
41 ret.value = neg | 0x7DFF | ((uint16_t)(man >> 42));
42 ret.lossy = false;
43 return ret;
44
45 } else if (exp > 0x40E) { // Truncate to the largest finite f16.
46 wuffs_base__lossy_value_u16 ret;
47 ret.value = neg | 0x7BFF;
48 ret.lossy = true;
49 return ret;
50
51 } else if (exp <= 0x3E6) { // Truncate to zero.
52 wuffs_base__lossy_value_u16 ret;
53 ret.value = neg;
54 ret.lossy = (u != 0);
55 return ret;
56
57 } else if (exp <= 0x3F0) { // Normal f64, subnormal f16.
58 // Convert from a 53-bit mantissa (after realizing the implicit bit) to a
59 // 10-bit mantissa and then adjust for the exponent.
60 man |= 0x0010000000000000;
Nigel Tao56d90962020-07-12 21:11:49 +100061 uint32_t shift = ((uint32_t)(1051 - exp)); // 1051 = 0x3F0 + 53 - 10.
Nigel Tao7bf7cf22020-07-12 16:23:15 +100062 uint64_t shifted_man = man >> shift;
63 wuffs_base__lossy_value_u16 ret;
64 ret.value = neg | ((uint16_t)shifted_man);
65 ret.lossy = (shifted_man << shift) != man;
66 return ret;
67 }
68
69 // Normal f64, normal f16.
70
71 // Re-bias from 1023 to 15 and shift above f16's 10 mantissa bits.
72 exp = (exp - 1008) << 10; // 1008 = 1023 - 15 = 0x3FF - 0xF.
73
74 // Convert from a 52-bit mantissa (excluding the implicit bit) to a 10-bit
75 // mantissa (again excluding the implicit bit). We lose some information if
76 // any of the bottom 42 bits are non-zero.
77 wuffs_base__lossy_value_u16 ret;
78 ret.value = neg | ((uint16_t)exp) | ((uint16_t)(man >> 42));
79 ret.lossy = (man << 22) != 0;
80 return ret;
81}
82
83WUFFS_BASE__MAYBE_STATIC wuffs_base__lossy_value_u32 //
Nigel Taoa3931d52020-07-12 21:06:44 +100084wuffs_base__ieee_754_bit_representation__from_f64_to_u32_truncate(double f) {
Nigel Tao7bf7cf22020-07-12 16:23:15 +100085 uint64_t u = 0;
86 if (sizeof(uint64_t) == sizeof(double)) {
87 memcpy(&u, &f, sizeof(uint64_t));
88 }
89 uint32_t neg = ((uint32_t)(u >> 63)) << 31;
90 u &= 0x7FFFFFFFFFFFFFFF;
91 uint64_t exp = u >> 52;
92 uint64_t man = u & 0x000FFFFFFFFFFFFF;
93
94 if (exp == 0x7FF) {
95 if (man == 0) { // Infinity.
96 wuffs_base__lossy_value_u32 ret;
97 ret.value = neg | 0x7F800000;
98 ret.lossy = false;
99 return ret;
100 }
101 // NaN. Shift the 52 mantissa bits to 23 mantissa bits, keeping the most
102 // significant mantissa bit (quiet vs signaling NaNs). Also set the low 22
103 // bits of ret.value so that the 23-bit mantissa is non-zero.
104 wuffs_base__lossy_value_u32 ret;
105 ret.value = neg | 0x7FBFFFFF | ((uint32_t)(man >> 29));
106 ret.lossy = false;
107 return ret;
108
109 } else if (exp > 0x47E) { // Truncate to the largest finite f32.
110 wuffs_base__lossy_value_u32 ret;
111 ret.value = neg | 0x7F7FFFFF;
112 ret.lossy = true;
113 return ret;
114
115 } else if (exp <= 0x369) { // Truncate to zero.
116 wuffs_base__lossy_value_u32 ret;
117 ret.value = neg;
118 ret.lossy = (u != 0);
119 return ret;
120
121 } else if (exp <= 0x380) { // Normal f64, subnormal f32.
122 // Convert from a 53-bit mantissa (after realizing the implicit bit) to a
123 // 23-bit mantissa and then adjust for the exponent.
124 man |= 0x0010000000000000;
Nigel Tao56d90962020-07-12 21:11:49 +1000125 uint32_t shift = ((uint32_t)(926 - exp)); // 926 = 0x380 + 53 - 23.
Nigel Tao7bf7cf22020-07-12 16:23:15 +1000126 uint64_t shifted_man = man >> shift;
127 wuffs_base__lossy_value_u32 ret;
128 ret.value = neg | ((uint32_t)shifted_man);
129 ret.lossy = (shifted_man << shift) != man;
130 return ret;
131 }
132
133 // Normal f64, normal f32.
134
135 // Re-bias from 1023 to 127 and shift above f32's 23 mantissa bits.
136 exp = (exp - 896) << 23; // 896 = 1023 - 127 = 0x3FF - 0x7F.
137
138 // Convert from a 52-bit mantissa (excluding the implicit bit) to a 23-bit
139 // mantissa (again excluding the implicit bit). We lose some information if
140 // any of the bottom 29 bits are non-zero.
141 wuffs_base__lossy_value_u32 ret;
142 ret.value = neg | ((uint32_t)exp) | ((uint32_t)(man >> 29));
143 ret.lossy = (man << 35) != 0;
144 return ret;
145}
146
147// --------
148
Nigel Tao2a7e1ed2020-07-07 21:50:06 +1000149#define WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE 2047
150#define WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION 800
151
152// WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL is the largest N
153// such that ((10 << N) < (1 << 64)).
154#define WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL 60
155
156// wuffs_base__private_implementation__high_prec_dec (abbreviated as HPD) is a
157// fixed precision floating point decimal number, augmented with ±infinity
158// values, but it cannot represent NaN (Not a Number).
159//
160// "High precision" means that the mantissa holds 800 decimal digits. 800 is
161// WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION.
162//
163// An HPD isn't for general purpose arithmetic, only for conversions to and
164// from IEEE 754 double-precision floating point, where the largest and
165// smallest positive, finite values are approximately 1.8e+308 and 4.9e-324.
166// HPD exponents above +2047 mean infinity, below -2047 mean zero. The ±2047
167// bounds are further away from zero than ±(324 + 800), where 800 and 2047 is
168// WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION and
169// WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE.
170//
171// digits[.. num_digits] are the number's digits in big-endian order. The
172// uint8_t values are in the range [0 ..= 9], not ['0' ..= '9'], where e.g. '7'
173// is the ASCII value 0x37.
174//
175// decimal_point is the index (within digits) of the decimal point. It may be
176// negative or be larger than num_digits, in which case the explicit digits are
177// padded with implicit zeroes.
178//
179// For example, if num_digits is 3 and digits is "\x07\x08\x09":
180// - A decimal_point of -2 means ".00789"
181// - A decimal_point of -1 means ".0789"
182// - A decimal_point of +0 means ".789"
183// - A decimal_point of +1 means "7.89"
184// - A decimal_point of +2 means "78.9"
185// - A decimal_point of +3 means "789."
186// - A decimal_point of +4 means "7890."
187// - A decimal_point of +5 means "78900."
188//
189// As above, a decimal_point higher than +2047 means that the overall value is
190// infinity, lower than -2047 means zero.
191//
192// negative is a sign bit. An HPD can distinguish positive and negative zero.
193//
194// truncated is whether there are more than
195// WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION digits, and at
196// least one of those extra digits are non-zero. The existence of long-tail
197// digits can affect rounding.
198//
199// The "all fields are zero" value is valid, and represents the number +0.
Nigel Tao4f1d24c2020-09-23 22:02:53 +1000200typedef struct wuffs_base__private_implementation__high_prec_dec__struct {
Nigel Tao2a7e1ed2020-07-07 21:50:06 +1000201 uint32_t num_digits;
202 int32_t decimal_point;
203 bool negative;
204 bool truncated;
205 uint8_t digits[WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION];
206} wuffs_base__private_implementation__high_prec_dec;
207
208// wuffs_base__private_implementation__high_prec_dec__trim trims trailing
209// zeroes from the h->digits[.. h->num_digits] slice. They have no benefit,
210// since we explicitly track h->decimal_point.
211//
212// Preconditions:
213// - h is non-NULL.
214static inline void //
215wuffs_base__private_implementation__high_prec_dec__trim(
216 wuffs_base__private_implementation__high_prec_dec* h) {
217 while ((h->num_digits > 0) && (h->digits[h->num_digits - 1] == 0)) {
218 h->num_digits--;
219 }
220}
221
222// wuffs_base__private_implementation__high_prec_dec__assign sets h to
223// represent the number x.
224//
225// Preconditions:
226// - h is non-NULL.
227static void //
228wuffs_base__private_implementation__high_prec_dec__assign(
229 wuffs_base__private_implementation__high_prec_dec* h,
230 uint64_t x,
231 bool negative) {
232 uint32_t n = 0;
233
234 // Set h->digits.
235 if (x > 0) {
236 // Calculate the digits, working right-to-left. After we determine n (how
237 // many digits there are), copy from buf to h->digits.
238 //
239 // UINT64_MAX, 18446744073709551615, is 20 digits long. It can be faster to
240 // copy a constant number of bytes than a variable number (20 instead of
241 // n). Make buf large enough (and start writing to it from the middle) so
242 // that can we always copy 20 bytes: the slice buf[(20-n) .. (40-n)].
243 uint8_t buf[40] = {0};
244 uint8_t* ptr = &buf[20];
245 do {
246 uint64_t remaining = x / 10;
247 x -= remaining * 10;
248 ptr--;
249 *ptr = (uint8_t)x;
250 n++;
251 x = remaining;
252 } while (x > 0);
253 memcpy(h->digits, ptr, 20);
254 }
255
256 // Set h's other fields.
257 h->num_digits = n;
258 h->decimal_point = (int32_t)n;
259 h->negative = negative;
260 h->truncated = false;
261 wuffs_base__private_implementation__high_prec_dec__trim(h);
262}
263
264static wuffs_base__status //
265wuffs_base__private_implementation__high_prec_dec__parse(
266 wuffs_base__private_implementation__high_prec_dec* h,
Nigel Taoe0c5de92020-07-11 11:48:17 +1000267 wuffs_base__slice_u8 s,
268 uint32_t options) {
Nigel Tao2a7e1ed2020-07-07 21:50:06 +1000269 if (!h) {
270 return wuffs_base__make_status(wuffs_base__error__bad_receiver);
271 }
272 h->num_digits = 0;
273 h->decimal_point = 0;
274 h->negative = false;
275 h->truncated = false;
276
277 uint8_t* p = s.ptr;
278 uint8_t* q = s.ptr + s.len;
279
Nigel Taoc5c98852020-07-11 13:10:14 +1000280 if (options & WUFFS_BASE__PARSE_NUMBER_XXX__ALLOW_UNDERSCORES) {
281 for (;; p++) {
282 if (p >= q) {
283 return wuffs_base__make_status(wuffs_base__error__bad_argument);
284 } else if (*p != '_') {
285 break;
286 }
Nigel Tao2a7e1ed2020-07-07 21:50:06 +1000287 }
288 }
289
290 // Parse sign.
291 do {
292 if (*p == '+') {
293 p++;
294 } else if (*p == '-') {
295 h->negative = true;
296 p++;
297 } else {
298 break;
299 }
Nigel Taoc5c98852020-07-11 13:10:14 +1000300 if (options & WUFFS_BASE__PARSE_NUMBER_XXX__ALLOW_UNDERSCORES) {
301 for (;; p++) {
302 if (p >= q) {
303 return wuffs_base__make_status(wuffs_base__error__bad_argument);
304 } else if (*p != '_') {
305 break;
306 }
Nigel Tao2a7e1ed2020-07-07 21:50:06 +1000307 }
308 }
309 } while (0);
310
311 // Parse digits, up to (and including) a '.', 'E' or 'e'. Examples for each
312 // limb in this if-else chain:
313 // - "0.789"
314 // - "1002.789"
315 // - ".789"
316 // - Other (invalid input).
317 uint32_t nd = 0;
318 int32_t dp = 0;
319 bool no_digits_before_separator = false;
Nigel Taoe82bc8e2020-07-11 12:49:15 +1000320 if (('0' == *p) &&
321 !(options &
322 WUFFS_BASE__PARSE_NUMBER_XXX__ALLOW_MULTIPLE_LEADING_ZEROES)) {
Nigel Tao2a7e1ed2020-07-07 21:50:06 +1000323 p++;
324 for (;; p++) {
325 if (p >= q) {
326 goto after_all;
Nigel Taoe0c5de92020-07-11 11:48:17 +1000327 } else if (*p ==
328 ((options &
329 WUFFS_BASE__PARSE_NUMBER_FXX__DECIMAL_SEPARATOR_IS_A_COMMA)
330 ? ','
331 : '.')) {
Nigel Tao2a7e1ed2020-07-07 21:50:06 +1000332 p++;
333 goto after_sep;
334 } else if ((*p == 'E') || (*p == 'e')) {
335 p++;
336 goto after_exp;
Nigel Taoc5c98852020-07-11 13:10:14 +1000337 } else if ((*p != '_') ||
338 !(options & WUFFS_BASE__PARSE_NUMBER_XXX__ALLOW_UNDERSCORES)) {
Nigel Tao2a7e1ed2020-07-07 21:50:06 +1000339 return wuffs_base__make_status(wuffs_base__error__bad_argument);
340 }
341 }
342
Nigel Taoe82bc8e2020-07-11 12:49:15 +1000343 } else if (('0' <= *p) && (*p <= '9')) {
344 if (*p == '0') {
345 for (; (p < q) && (*p == '0'); p++) {
346 }
347 } else {
348 h->digits[nd++] = (uint8_t)(*p - '0');
349 dp = (int32_t)nd;
350 p++;
351 }
352
Nigel Tao2a7e1ed2020-07-07 21:50:06 +1000353 for (;; p++) {
354 if (p >= q) {
355 goto after_all;
356 } else if (('0' <= *p) && (*p <= '9')) {
357 if (nd < WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION) {
358 h->digits[nd++] = (uint8_t)(*p - '0');
359 dp = (int32_t)nd;
360 } else if ('0' != *p) {
361 // Long-tail non-zeroes set the truncated bit.
362 h->truncated = true;
363 }
Nigel Taoe0c5de92020-07-11 11:48:17 +1000364 } else if (*p ==
365 ((options &
366 WUFFS_BASE__PARSE_NUMBER_FXX__DECIMAL_SEPARATOR_IS_A_COMMA)
367 ? ','
368 : '.')) {
Nigel Tao2a7e1ed2020-07-07 21:50:06 +1000369 p++;
370 goto after_sep;
371 } else if ((*p == 'E') || (*p == 'e')) {
372 p++;
373 goto after_exp;
Nigel Taoc5c98852020-07-11 13:10:14 +1000374 } else if ((*p != '_') ||
375 !(options & WUFFS_BASE__PARSE_NUMBER_XXX__ALLOW_UNDERSCORES)) {
Nigel Tao2a7e1ed2020-07-07 21:50:06 +1000376 return wuffs_base__make_status(wuffs_base__error__bad_argument);
377 }
378 }
379
Nigel Taoe0c5de92020-07-11 11:48:17 +1000380 } else if (*p == ((options &
381 WUFFS_BASE__PARSE_NUMBER_FXX__DECIMAL_SEPARATOR_IS_A_COMMA)
382 ? ','
383 : '.')) {
Nigel Tao2a7e1ed2020-07-07 21:50:06 +1000384 p++;
385 no_digits_before_separator = true;
386
387 } else {
388 return wuffs_base__make_status(wuffs_base__error__bad_argument);
389 }
390
391after_sep:
392 for (;; p++) {
393 if (p >= q) {
394 goto after_all;
395 } else if ('0' == *p) {
396 if (nd == 0) {
397 // Track leading zeroes implicitly.
398 dp--;
399 } else if (nd <
400 WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION) {
401 h->digits[nd++] = (uint8_t)(*p - '0');
402 }
403 } else if (('0' < *p) && (*p <= '9')) {
404 if (nd < WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION) {
405 h->digits[nd++] = (uint8_t)(*p - '0');
406 } else {
407 // Long-tail non-zeroes set the truncated bit.
408 h->truncated = true;
409 }
410 } else if ((*p == 'E') || (*p == 'e')) {
411 p++;
412 goto after_exp;
Nigel Taoc5c98852020-07-11 13:10:14 +1000413 } else if ((*p != '_') ||
414 !(options & WUFFS_BASE__PARSE_NUMBER_XXX__ALLOW_UNDERSCORES)) {
Nigel Tao2a7e1ed2020-07-07 21:50:06 +1000415 return wuffs_base__make_status(wuffs_base__error__bad_argument);
416 }
417 }
418
419after_exp:
420 do {
Nigel Taoc5c98852020-07-11 13:10:14 +1000421 if (options & WUFFS_BASE__PARSE_NUMBER_XXX__ALLOW_UNDERSCORES) {
422 for (;; p++) {
423 if (p >= q) {
424 return wuffs_base__make_status(wuffs_base__error__bad_argument);
425 } else if (*p != '_') {
426 break;
427 }
Nigel Tao2a7e1ed2020-07-07 21:50:06 +1000428 }
429 }
430
431 int32_t exp_sign = +1;
432 if (*p == '+') {
433 p++;
434 } else if (*p == '-') {
435 exp_sign = -1;
436 p++;
437 }
438
439 int32_t exp = 0;
440 const int32_t exp_large =
441 WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE +
442 WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION;
443 bool saw_exp_digits = false;
444 for (; p < q; p++) {
Nigel Taoc5c98852020-07-11 13:10:14 +1000445 if ((*p == '_') &&
446 (options & WUFFS_BASE__PARSE_NUMBER_XXX__ALLOW_UNDERSCORES)) {
Nigel Tao2a7e1ed2020-07-07 21:50:06 +1000447 // No-op.
448 } else if (('0' <= *p) && (*p <= '9')) {
449 saw_exp_digits = true;
450 if (exp < exp_large) {
451 exp = (10 * exp) + ((int32_t)(*p - '0'));
452 }
453 } else {
454 break;
455 }
456 }
457 if (!saw_exp_digits) {
458 return wuffs_base__make_status(wuffs_base__error__bad_argument);
459 }
460 dp += exp_sign * exp;
461 } while (0);
462
463after_all:
464 if (p != q) {
465 return wuffs_base__make_status(wuffs_base__error__bad_argument);
466 }
467 h->num_digits = nd;
468 if (nd == 0) {
469 if (no_digits_before_separator) {
470 return wuffs_base__make_status(wuffs_base__error__bad_argument);
471 }
472 h->decimal_point = 0;
473 } else if (dp <
474 -WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE) {
475 h->decimal_point =
476 -WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE - 1;
477 } else if (dp >
478 +WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE) {
479 h->decimal_point =
480 +WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE + 1;
481 } else {
482 h->decimal_point = dp;
483 }
484 wuffs_base__private_implementation__high_prec_dec__trim(h);
485 return wuffs_base__make_status(NULL);
486}
487
488// --------
489
490// wuffs_base__private_implementation__high_prec_dec__lshift_num_new_digits
491// returns the number of additional decimal digits when left-shifting by shift.
492//
493// See below for preconditions.
494static uint32_t //
495wuffs_base__private_implementation__high_prec_dec__lshift_num_new_digits(
496 wuffs_base__private_implementation__high_prec_dec* h,
497 uint32_t shift) {
498 // Masking with 0x3F should be unnecessary (assuming the preconditions) but
499 // it's cheap and ensures that we don't overflow the
500 // wuffs_base__private_implementation__hpd_left_shift array.
501 shift &= 63;
502
503 uint32_t x_a = wuffs_base__private_implementation__hpd_left_shift[shift];
504 uint32_t x_b = wuffs_base__private_implementation__hpd_left_shift[shift + 1];
505 uint32_t num_new_digits = x_a >> 11;
506 uint32_t pow5_a = 0x7FF & x_a;
507 uint32_t pow5_b = 0x7FF & x_b;
508
509 const uint8_t* pow5 =
510 &wuffs_base__private_implementation__powers_of_5[pow5_a];
511 uint32_t i = 0;
512 uint32_t n = pow5_b - pow5_a;
513 for (; i < n; i++) {
514 if (i >= h->num_digits) {
515 return num_new_digits - 1;
516 } else if (h->digits[i] == pow5[i]) {
517 continue;
518 } else if (h->digits[i] < pow5[i]) {
519 return num_new_digits - 1;
520 } else {
521 return num_new_digits;
522 }
523 }
524 return num_new_digits;
525}
526
527// --------
528
529// wuffs_base__private_implementation__high_prec_dec__rounded_integer returns
530// the integral (non-fractional) part of h, provided that it is 18 or fewer
531// decimal digits. For 19 or more digits, it returns UINT64_MAX. Note that:
532// - (1 << 53) is 9007199254740992, which has 16 decimal digits.
533// - (1 << 56) is 72057594037927936, which has 17 decimal digits.
534// - (1 << 59) is 576460752303423488, which has 18 decimal digits.
535// - (1 << 63) is 9223372036854775808, which has 19 decimal digits.
536// and that IEEE 754 double precision has 52 mantissa bits.
537//
538// That integral part is rounded-to-even: rounding 7.5 or 8.5 both give 8.
539//
540// h's negative bit is ignored: rounding -8.6 returns 9.
541//
542// See below for preconditions.
543static uint64_t //
544wuffs_base__private_implementation__high_prec_dec__rounded_integer(
545 wuffs_base__private_implementation__high_prec_dec* h) {
546 if ((h->num_digits == 0) || (h->decimal_point < 0)) {
547 return 0;
548 } else if (h->decimal_point > 18) {
549 return UINT64_MAX;
550 }
551
552 uint32_t dp = (uint32_t)(h->decimal_point);
553 uint64_t n = 0;
554 uint32_t i = 0;
555 for (; i < dp; i++) {
556 n = (10 * n) + ((i < h->num_digits) ? h->digits[i] : 0);
557 }
558
559 bool round_up = false;
560 if (dp < h->num_digits) {
561 round_up = h->digits[dp] >= 5;
562 if ((h->digits[dp] == 5) && (dp + 1 == h->num_digits)) {
563 // We are exactly halfway. If we're truncated, round up, otherwise round
564 // to even.
565 round_up = h->truncated || //
566 ((dp > 0) && (1 & h->digits[dp - 1]));
567 }
568 }
569 if (round_up) {
570 n++;
571 }
572
573 return n;
574}
575
576// wuffs_base__private_implementation__high_prec_dec__small_xshift shifts h's
577// number (where 'x' is 'l' or 'r' for left or right) by a small shift value.
578//
579// Preconditions:
580// - h is non-NULL.
581// - h->decimal_point is "not extreme".
582// - shift is non-zero.
583// - shift is "a small shift".
584//
585// "Not extreme" means within
586// ±WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE.
587//
588// "A small shift" means not more than
589// WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL.
590//
591// wuffs_base__private_implementation__high_prec_dec__rounded_integer and
592// wuffs_base__private_implementation__high_prec_dec__lshift_num_new_digits
593// have the same preconditions.
594//
595// wuffs_base__private_implementation__high_prec_dec__lshift keeps the first
596// two preconditions but not the last two. Its shift argument is signed and
597// does not need to be "small": zero is a no-op, positive means left shift and
598// negative means right shift.
599
600static void //
601wuffs_base__private_implementation__high_prec_dec__small_lshift(
602 wuffs_base__private_implementation__high_prec_dec* h,
603 uint32_t shift) {
604 if (h->num_digits == 0) {
605 return;
606 }
607 uint32_t num_new_digits =
608 wuffs_base__private_implementation__high_prec_dec__lshift_num_new_digits(
609 h, shift);
610 uint32_t rx = h->num_digits - 1; // Read index.
611 uint32_t wx = h->num_digits - 1 + num_new_digits; // Write index.
612 uint64_t n = 0;
613
614 // Repeat: pick up a digit, put down a digit, right to left.
615 while (((int32_t)rx) >= 0) {
616 n += ((uint64_t)(h->digits[rx])) << shift;
617 uint64_t quo = n / 10;
618 uint64_t rem = n - (10 * quo);
619 if (wx < WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION) {
620 h->digits[wx] = (uint8_t)rem;
621 } else if (rem > 0) {
622 h->truncated = true;
623 }
624 n = quo;
625 wx--;
626 rx--;
627 }
628
629 // Put down leading digits, right to left.
630 while (n > 0) {
631 uint64_t quo = n / 10;
632 uint64_t rem = n - (10 * quo);
633 if (wx < WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION) {
634 h->digits[wx] = (uint8_t)rem;
635 } else if (rem > 0) {
636 h->truncated = true;
637 }
638 n = quo;
639 wx--;
640 }
641
642 // Finish.
643 h->num_digits += num_new_digits;
644 if (h->num_digits >
645 WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION) {
646 h->num_digits = WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION;
647 }
648 h->decimal_point += (int32_t)num_new_digits;
649 wuffs_base__private_implementation__high_prec_dec__trim(h);
650}
651
652static void //
653wuffs_base__private_implementation__high_prec_dec__small_rshift(
654 wuffs_base__private_implementation__high_prec_dec* h,
655 uint32_t shift) {
656 uint32_t rx = 0; // Read index.
657 uint32_t wx = 0; // Write index.
658 uint64_t n = 0;
659
660 // Pick up enough leading digits to cover the first shift.
661 while ((n >> shift) == 0) {
662 if (rx < h->num_digits) {
663 // Read a digit.
664 n = (10 * n) + h->digits[rx++];
665 } else if (n == 0) {
666 // h's number used to be zero and remains zero.
667 return;
668 } else {
669 // Read sufficient implicit trailing zeroes.
670 while ((n >> shift) == 0) {
671 n = 10 * n;
672 rx++;
673 }
674 break;
675 }
676 }
677 h->decimal_point -= ((int32_t)(rx - 1));
678 if (h->decimal_point <
679 -WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE) {
680 // After the shift, h's number is effectively zero.
681 h->num_digits = 0;
682 h->decimal_point = 0;
Nigel Tao2a7e1ed2020-07-07 21:50:06 +1000683 h->truncated = false;
684 return;
685 }
686
687 // Repeat: pick up a digit, put down a digit, left to right.
688 uint64_t mask = (((uint64_t)(1)) << shift) - 1;
689 while (rx < h->num_digits) {
690 uint8_t new_digit = ((uint8_t)(n >> shift));
691 n = (10 * (n & mask)) + h->digits[rx++];
692 h->digits[wx++] = new_digit;
693 }
694
695 // Put down trailing digits, left to right.
696 while (n > 0) {
697 uint8_t new_digit = ((uint8_t)(n >> shift));
698 n = 10 * (n & mask);
699 if (wx < WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION) {
700 h->digits[wx++] = new_digit;
701 } else if (new_digit > 0) {
702 h->truncated = true;
703 }
704 }
705
706 // Finish.
707 h->num_digits = wx;
708 wuffs_base__private_implementation__high_prec_dec__trim(h);
709}
710
711static void //
712wuffs_base__private_implementation__high_prec_dec__lshift(
713 wuffs_base__private_implementation__high_prec_dec* h,
714 int32_t shift) {
715 if (shift > 0) {
716 while (shift > +WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL) {
717 wuffs_base__private_implementation__high_prec_dec__small_lshift(
718 h, WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL);
719 shift -= WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL;
720 }
721 wuffs_base__private_implementation__high_prec_dec__small_lshift(
722 h, ((uint32_t)(+shift)));
723 } else if (shift < 0) {
724 while (shift < -WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL) {
725 wuffs_base__private_implementation__high_prec_dec__small_rshift(
726 h, WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL);
727 shift += WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL;
728 }
729 wuffs_base__private_implementation__high_prec_dec__small_rshift(
730 h, ((uint32_t)(-shift)));
731 }
732}
733
734// --------
735
736// wuffs_base__private_implementation__high_prec_dec__round_etc rounds h's
737// number. For those functions that take an n argument, rounding produces at
738// most n digits (which is not necessarily at most n decimal places). Negative
739// n values are ignored, as well as any n greater than or equal to h's number
740// of digits. The etc__round_just_enough function implicitly chooses an n to
741// implement WUFFS_BASE__RENDER_NUMBER_FXX__JUST_ENOUGH_PRECISION.
742//
743// Preconditions:
744// - h is non-NULL.
745// - h->decimal_point is "not extreme".
746//
747// "Not extreme" means within
748// ±WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE.
749
750static void //
751wuffs_base__private_implementation__high_prec_dec__round_down(
752 wuffs_base__private_implementation__high_prec_dec* h,
753 int32_t n) {
754 if ((n < 0) || (h->num_digits <= (uint32_t)n)) {
755 return;
756 }
757 h->num_digits = (uint32_t)(n);
758 wuffs_base__private_implementation__high_prec_dec__trim(h);
759}
760
761static void //
762wuffs_base__private_implementation__high_prec_dec__round_up(
763 wuffs_base__private_implementation__high_prec_dec* h,
764 int32_t n) {
765 if ((n < 0) || (h->num_digits <= (uint32_t)n)) {
766 return;
767 }
768
769 for (n--; n >= 0; n--) {
770 if (h->digits[n] < 9) {
771 h->digits[n]++;
772 h->num_digits = (uint32_t)(n + 1);
773 return;
774 }
775 }
776
777 // The number is all 9s. Change to a single 1 and adjust the decimal point.
778 h->digits[0] = 1;
779 h->num_digits = 1;
780 h->decimal_point++;
781}
782
783static void //
784wuffs_base__private_implementation__high_prec_dec__round_nearest(
785 wuffs_base__private_implementation__high_prec_dec* h,
786 int32_t n) {
787 if ((n < 0) || (h->num_digits <= (uint32_t)n)) {
788 return;
789 }
790 bool up = h->digits[n] >= 5;
791 if ((h->digits[n] == 5) && ((n + 1) == ((int32_t)(h->num_digits)))) {
792 up = h->truncated || //
793 ((n > 0) && ((h->digits[n - 1] & 1) != 0));
794 }
795
796 if (up) {
797 wuffs_base__private_implementation__high_prec_dec__round_up(h, n);
798 } else {
799 wuffs_base__private_implementation__high_prec_dec__round_down(h, n);
800 }
801}
802
803static void //
804wuffs_base__private_implementation__high_prec_dec__round_just_enough(
805 wuffs_base__private_implementation__high_prec_dec* h,
806 int32_t exp2,
807 uint64_t mantissa) {
808 // The magic numbers 52 and 53 in this function are because IEEE 754 double
809 // precision has 52 mantissa bits.
810 //
811 // Let f be the floating point number represented by exp2 and mantissa (and
812 // also the number in h): the number (mantissa * (2 ** (exp2 - 52))).
813 //
814 // If f is zero or a small integer, we can return early.
815 if ((mantissa == 0) ||
816 ((exp2 < 53) && (h->decimal_point >= ((int32_t)(h->num_digits))))) {
817 return;
818 }
819
820 // The smallest normal f has an exp2 of -1022 and a mantissa of (1 << 52).
821 // Subnormal numbers have the same exp2 but a smaller mantissa.
822 static const int32_t min_incl_normal_exp2 = -1022;
823 static const uint64_t min_incl_normal_mantissa = 0x0010000000000000ul;
824
825 // Compute lower and upper bounds such that any number between them (possibly
826 // inclusive) will round to f. First, the lower bound. Our number f is:
827 // ((mantissa + 0) * (2 ** ( exp2 - 52)))
828 //
829 // The next lowest floating point number is:
830 // ((mantissa - 1) * (2 ** ( exp2 - 52)))
831 // unless (mantissa - 1) drops the (1 << 52) bit and exp2 is not the
832 // min_incl_normal_exp2. Either way, call it:
833 // ((l_mantissa) * (2 ** (l_exp2 - 52)))
834 //
835 // The lower bound is halfway between them (noting that 52 became 53):
836 // (((2 * l_mantissa) + 1) * (2 ** (l_exp2 - 53)))
837 int32_t l_exp2 = exp2;
838 uint64_t l_mantissa = mantissa - 1;
839 if ((exp2 > min_incl_normal_exp2) && (mantissa <= min_incl_normal_mantissa)) {
840 l_exp2 = exp2 - 1;
841 l_mantissa = (2 * mantissa) - 1;
842 }
843 wuffs_base__private_implementation__high_prec_dec lower;
844 wuffs_base__private_implementation__high_prec_dec__assign(
845 &lower, (2 * l_mantissa) + 1, false);
846 wuffs_base__private_implementation__high_prec_dec__lshift(&lower,
847 l_exp2 - 53);
848
849 // Next, the upper bound. Our number f is:
850 // ((mantissa + 0) * (2 ** (exp2 - 52)))
851 //
852 // The next highest floating point number is:
853 // ((mantissa + 1) * (2 ** (exp2 - 52)))
854 //
855 // The upper bound is halfway between them (noting that 52 became 53):
856 // (((2 * mantissa) + 1) * (2 ** (exp2 - 53)))
857 wuffs_base__private_implementation__high_prec_dec upper;
858 wuffs_base__private_implementation__high_prec_dec__assign(
859 &upper, (2 * mantissa) + 1, false);
860 wuffs_base__private_implementation__high_prec_dec__lshift(&upper, exp2 - 53);
861
862 // The lower and upper bounds are possible outputs only if the original
863 // mantissa is even, so that IEEE round-to-even would round to the original
864 // mantissa and not its neighbors.
865 bool inclusive = (mantissa & 1) == 0;
866
867 // As we walk the digits, we want to know whether rounding up would fall
868 // within the upper bound. This is tracked by upper_delta:
869 // - When -1, the digits of h and upper are the same so far.
870 // - When +0, we saw a difference of 1 between h and upper on a previous
871 // digit and subsequently only 9s for h and 0s for upper. Thus, rounding
872 // up may fall outside of the bound if !inclusive.
873 // - When +1, the difference is greater than 1 and we know that rounding up
874 // falls within the bound.
875 //
876 // This is a state machine with three states. The numerical value for each
877 // state (-1, +0 or +1) isn't important, other than their order.
878 int upper_delta = -1;
879
880 // We can now figure out the shortest number of digits required. Walk the
881 // digits until h has distinguished itself from lower or upper.
882 //
883 // The zi and zd variables are indexes and digits, for z in l (lower), h (the
884 // number) and u (upper).
885 //
886 // The lower, h and upper numbers may have their decimal points at different
887 // places. In this case, upper is the longest, so we iterate ui starting from
888 // 0 and iterate li and hi starting from either 0 or -1.
889 int32_t ui = 0;
890 for (;; ui++) {
891 // Calculate hd, the middle number's digit.
892 int32_t hi = ui - upper.decimal_point + h->decimal_point;
893 if (hi >= ((int32_t)(h->num_digits))) {
894 break;
895 }
896 uint8_t hd = (((uint32_t)hi) < h->num_digits) ? h->digits[hi] : 0;
897
898 // Calculate ld, the lower bound's digit.
899 int32_t li = ui - upper.decimal_point + lower.decimal_point;
900 uint8_t ld = (((uint32_t)li) < lower.num_digits) ? lower.digits[li] : 0;
901
902 // We can round down (truncate) if lower has a different digit than h or if
903 // lower is inclusive and is exactly the result of rounding down (i.e. we
904 // have reached the final digit of lower).
905 bool can_round_down =
906 (ld != hd) || //
907 (inclusive && ((li + 1) == ((int32_t)(lower.num_digits))));
908
909 // Calculate ud, the upper bound's digit, and update upper_delta.
910 uint8_t ud = (((uint32_t)ui) < upper.num_digits) ? upper.digits[ui] : 0;
911 if (upper_delta < 0) {
912 if ((hd + 1) < ud) {
913 // For example:
914 // h = 12345???
915 // upper = 12347???
916 upper_delta = +1;
917 } else if (hd != ud) {
918 // For example:
919 // h = 12345???
920 // upper = 12346???
921 upper_delta = +0;
922 }
923 } else if (upper_delta == 0) {
924 if ((hd != 9) || (ud != 0)) {
925 // For example:
926 // h = 1234598?
927 // upper = 1234600?
928 upper_delta = +1;
929 }
930 }
931
932 // We can round up if upper has a different digit than h and either upper
933 // is inclusive or upper is bigger than the result of rounding up.
934 bool can_round_up =
935 (upper_delta > 0) || //
936 ((upper_delta == 0) && //
937 (inclusive || ((ui + 1) < ((int32_t)(upper.num_digits)))));
938
939 // If we can round either way, round to nearest. If we can round only one
940 // way, do it. If we can't round, continue the loop.
941 if (can_round_down) {
942 if (can_round_up) {
943 wuffs_base__private_implementation__high_prec_dec__round_nearest(
944 h, hi + 1);
945 return;
946 } else {
947 wuffs_base__private_implementation__high_prec_dec__round_down(h,
948 hi + 1);
949 return;
950 }
951 } else {
952 if (can_round_up) {
953 wuffs_base__private_implementation__high_prec_dec__round_up(h, hi + 1);
954 return;
955 }
956 }
957 }
958}
959
960// --------
961
Nigel Taoc4fa8e22020-07-18 17:35:13 +1000962// wuffs_base__private_implementation__parse_number_f64_eisel_lemire produces
963// the IEEE 754 double-precision value for an exact mantissa and base-10
964// exponent. For example:
Nigel Taob15a0fc2020-07-08 10:50:14 +1000965// - when parsing "12345.678e+02", man is 12345678 and exp10 is -1.
966// - when parsing "-12", man is 12 and exp10 is 0. Processing the leading
967// minus sign is the responsibility of the caller, not this function.
Nigel Tao2a7e1ed2020-07-07 21:50:06 +1000968//
969// On success, it returns a non-negative int64_t such that the low 63 bits hold
970// the 11-bit exponent and 52-bit mantissa.
971//
972// On failure, it returns a negative value.
973//
Nigel Taoc4fa8e22020-07-18 17:35:13 +1000974// The algorithm is based on an original idea by Michael Eisel that was refined
975// by Daniel Lemire. See
Nigel Tao2a7e1ed2020-07-07 21:50:06 +1000976// https://lemire.me/blog/2020/03/10/fast-float-parsing-in-practice/
Nigel Tao1d8d18f2020-10-07 22:13:51 +1100977// and
978// https://nigeltao.github.io/blog/2020/eisel-lemire.html
Nigel Tao2a7e1ed2020-07-07 21:50:06 +1000979//
980// Preconditions:
981// - man is non-zero.
Nigel Tao09872962020-09-15 22:22:51 +1000982// - exp10 is in the range [-307 ..= 288], the same range of the
Nigel Tao2a7e1ed2020-07-07 21:50:06 +1000983// wuffs_base__private_implementation__powers_of_10 array.
Nigel Tao8b45db02020-09-15 21:50:32 +1000984//
985// The exp10 range (and the fact that man is in the range [1 ..= UINT64_MAX],
986// approximately [1 ..= 1.85e+19]) means that (man * (10 ** exp10)) is in the
987// range [1e-307 ..= 1.85e+307]. This is entirely within the range of normal
988// (neither subnormal nor non-finite) f64 values: DBL_MIN and DBL_MAX are
989// approximately 2.23e–308 and 1.80e+308.
Nigel Tao2a7e1ed2020-07-07 21:50:06 +1000990static int64_t //
Nigel Taoc4fa8e22020-07-18 17:35:13 +1000991wuffs_base__private_implementation__parse_number_f64_eisel_lemire(
992 uint64_t man,
993 int32_t exp10) {
Nigel Tao2a7e1ed2020-07-07 21:50:06 +1000994 // Look up the (possibly truncated) base-2 representation of (10 ** exp10).
995 // The look-up table was constructed so that it is already normalized: the
996 // table entry's mantissa's MSB (most significant bit) is on.
Nigel Taoafe7f272020-09-23 15:52:13 +1000997 const uint64_t* po10 =
998 &wuffs_base__private_implementation__powers_of_10[exp10 + 307][0];
Nigel Tao2a7e1ed2020-07-07 21:50:06 +1000999
1000 // Normalize the man argument. The (man != 0) precondition means that a
1001 // non-zero bit exists.
1002 uint32_t clz = wuffs_base__count_leading_zeroes_u64(man);
1003 man <<= clz;
1004
1005 // Calculate the return value's base-2 exponent. We might tweak it by ±1
Nigel Taob6d85522020-09-23 15:21:47 +10001006 // later, but its initial value comes from a linear scaling of exp10,
1007 // converting from power-of-10 to power-of-2, and adjusting by clz.
1008 //
1009 // The magic constants are:
1010 // - 1087 = 1023 + 64. The 1023 is the f64 exponent bias. The 64 is because
1011 // the look-up table uses 64-bit mantissas.
1012 // - 217706 is such that the ratio 217706 / 65536 ≈ 3.321930 is close enough
1013 // (over the practical range of exp10) to log(10) / log(2) ≈ 3.321928.
1014 // - 65536 = 1<<16 is arbitrary but a power of 2, so division is a shift.
1015 //
1016 // Equality of the linearly-scaled value and the actual power-of-2, over the
1017 // range of exp10 arguments that this function accepts, is confirmed by
1018 // script/print-mpb-powers-of-10.go
1019 uint64_t ret_exp2 =
1020 ((uint64_t)(((217706 * exp10) >> 16) + 1087)) - ((uint64_t)clz);
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001021
1022 // Multiply the two mantissas. Normalization means that both mantissas are at
1023 // least (1<<63), so the 128-bit product must be at least (1<<126). The high
Nigel Tao74d4af62020-07-10 11:27:17 +10001024 // 64 bits of the product, x_hi, must therefore be at least (1<<62).
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001025 //
Nigel Tao74d4af62020-07-10 11:27:17 +10001026 // As a consequence, x_hi has either 0 or 1 leading zeroes. Shifting x_hi
1027 // right by either 9 or 10 bits (depending on x_hi's MSB) will therefore
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001028 // leave the top 10 MSBs (bits 54 ..= 63) off and the 11th MSB (bit 53) on.
Nigel Taoafe7f272020-09-23 15:52:13 +10001029 wuffs_base__multiply_u64__output x = wuffs_base__multiply_u64(man, po10[1]);
Nigel Tao74d4af62020-07-10 11:27:17 +10001030 uint64_t x_hi = x.hi;
1031 uint64_t x_lo = x.lo;
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001032
1033 // Before we shift right by at least 9 bits, recall that the look-up table
1034 // entry was possibly truncated. We have so far only calculated a lower bound
1035 // for the product (man * e), where e is (10 ** exp10). The upper bound would
1036 // add a further (man * 1) to the 128-bit product, which overflows the lower
Nigel Tao74d4af62020-07-10 11:27:17 +10001037 // 64-bit limb if ((x_lo + man) < man).
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001038 //
Nigel Tao74d4af62020-07-10 11:27:17 +10001039 // If overflow occurs, that adds 1 to x_hi. Since we're about to shift right
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001040 // by at least 9 bits, that carried 1 can be ignored unless the higher 64-bit
1041 // limb's low 9 bits are all on.
Nigel Taoba3818c2020-09-28 12:51:45 +10001042 //
1043 // For example, parsing "9999999999999999999" will take the if-true branch
1044 // here, since:
1045 // - x_hi = 0x4563918244F3FFFF
1046 // - x_lo = 0x8000000000000000
1047 // - man = 0x8AC7230489E7FFFF
Nigel Tao74d4af62020-07-10 11:27:17 +10001048 if (((x_hi & 0x1FF) == 0x1FF) && ((x_lo + man) < man)) {
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001049 // Refine our calculation of (man * e). Before, our approximation of e used
1050 // a "low resolution" 64-bit mantissa. Now use a "high resolution" 128-bit
1051 // mantissa. We've already calculated x = (man * bits_0_to_63_incl_of_e).
1052 // Now calculate y = (man * bits_64_to_127_incl_of_e).
Nigel Taoafe7f272020-09-23 15:52:13 +10001053 wuffs_base__multiply_u64__output y = wuffs_base__multiply_u64(man, po10[0]);
Nigel Tao74d4af62020-07-10 11:27:17 +10001054 uint64_t y_hi = y.hi;
1055 uint64_t y_lo = y.lo;
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001056
1057 // Merge the 128-bit x and 128-bit y, which overlap by 64 bits, to
1058 // calculate the 192-bit product of the 64-bit man by the 128-bit e.
1059 // As we exit this if-block, we only care about the high 128 bits
1060 // (merged_hi and merged_lo) of that 192-bit product.
Nigel Taoba3818c2020-09-28 12:51:45 +10001061 //
1062 // For example, parsing "1.234e-45" will take the if-true branch here,
1063 // since:
1064 // - x_hi = 0x70B7E3696DB29FFF
1065 // - x_lo = 0xE040000000000000
1066 // - y_hi = 0x33718BBEAB0E0D7A
1067 // - y_lo = 0xA880000000000000
Nigel Tao74d4af62020-07-10 11:27:17 +10001068 uint64_t merged_hi = x_hi;
1069 uint64_t merged_lo = x_lo + y_hi;
1070 if (merged_lo < x_lo) {
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001071 merged_hi++; // Carry the overflow bit.
1072 }
1073
1074 // The "high resolution" approximation of e is still a lower bound. Once
1075 // again, see if the upper bound is large enough to produce a different
1076 // result. This time, if it does, give up instead of reaching for an even
1077 // more precise approximation to e.
1078 //
1079 // This three-part check is similar to the two-part check that guarded the
1080 // if block that we're now in, but it has an extra term for the middle 64
1081 // bits (checking that adding 1 to merged_lo would overflow).
Nigel Taoba3818c2020-09-28 12:51:45 +10001082 //
1083 // For example, parsing "5.9604644775390625e-8" will take the if-true
1084 // branch here, since:
1085 // - merged_hi = 0x7FFFFFFFFFFFFFFF
1086 // - merged_lo = 0xFFFFFFFFFFFFFFFF
1087 // - y_lo = 0x4DB3FFC120988200
1088 // - man = 0xD3C21BCECCEDA100
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001089 if (((merged_hi & 0x1FF) == 0x1FF) && ((merged_lo + 1) == 0) &&
Nigel Tao74d4af62020-07-10 11:27:17 +10001090 (y_lo + man < man)) {
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001091 return -1;
1092 }
1093
1094 // Replace the 128-bit x with merged.
Nigel Tao74d4af62020-07-10 11:27:17 +10001095 x_hi = merged_hi;
1096 x_lo = merged_lo;
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001097 }
1098
Nigel Tao74d4af62020-07-10 11:27:17 +10001099 // As mentioned above, shifting x_hi right by either 9 or 10 bits will leave
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001100 // the top 10 MSBs (bits 54 ..= 63) off and the 11th MSB (bit 53) on. If the
1101 // MSB (before shifting) was on, adjust ret_exp2 for the larger shift.
1102 //
1103 // Having bit 53 on (and higher bits off) means that ret_mantissa is a 54-bit
1104 // number.
Nigel Tao74d4af62020-07-10 11:27:17 +10001105 uint64_t msb = x_hi >> 63;
1106 uint64_t ret_mantissa = x_hi >> (msb + 9);
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001107 ret_exp2 -= 1 ^ msb;
1108
1109 // IEEE 754 rounds to-nearest with ties rounded to-even. Rounding to-even can
1110 // be tricky. If we're half-way between two exactly representable numbers
1111 // (x's low 73 bits are zero and the next 2 bits that matter are "01"), give
1112 // up instead of trying to pick the winner.
1113 //
1114 // Technically, we could tighten the condition by changing "73" to "73 or 74,
1115 // depending on msb", but a flat "73" is simpler.
Nigel Taoba3818c2020-09-28 12:51:45 +10001116 //
1117 // For example, parsing "1e+23" will take the if-true branch here, since:
1118 // - x_hi = 0x54B40B1F852BDA00
1119 // - ret_mantissa = 0x002A5A058FC295ED
Nigel Tao74d4af62020-07-10 11:27:17 +10001120 if ((x_lo == 0) && ((x_hi & 0x1FF) == 0) && ((ret_mantissa & 3) == 1)) {
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001121 return -1;
1122 }
1123
1124 // If we're not halfway then it's rounding to-nearest. Starting with a 54-bit
1125 // number, carry the lowest bit (bit 0) up if it's on. Regardless of whether
1126 // it was on or off, shifting right by one then produces a 53-bit number. If
1127 // carrying up overflowed, shift again.
1128 ret_mantissa += ret_mantissa & 1;
1129 ret_mantissa >>= 1;
Nigel Tao8b45db02020-09-15 21:50:32 +10001130 // This if block is equivalent to (but benchmarks slightly faster than) the
1131 // following branchless form:
1132 // uint64_t overflow_adjustment = ret_mantissa >> 53;
1133 // ret_mantissa >>= overflow_adjustment;
1134 // ret_exp2 += overflow_adjustment;
Nigel Taoba3818c2020-09-28 12:51:45 +10001135 //
1136 // For example, parsing "7.2057594037927933e+16" will take the if-true
1137 // branch here, since:
1138 // - x_hi = 0x7FFFFFFFFFFFFE80
1139 // - ret_mantissa = 0x0020000000000000
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001140 if ((ret_mantissa >> 53) > 0) {
1141 ret_mantissa >>= 1;
1142 ret_exp2++;
1143 }
1144
1145 // Starting with a 53-bit number, IEEE 754 double-precision normal numbers
1146 // have an implicit mantissa bit. Mask that away and keep the low 52 bits.
1147 ret_mantissa &= 0x000FFFFFFFFFFFFF;
1148
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001149 // Pack the bits and return.
1150 return ((int64_t)(ret_mantissa | (ret_exp2 << 52)));
1151}
1152
1153// --------
1154
1155static wuffs_base__result_f64 //
Nigel Taoe0c5de92020-07-11 11:48:17 +10001156wuffs_base__private_implementation__parse_number_f64_special(
1157 wuffs_base__slice_u8 s,
Nigel Tao4d61a052020-07-11 12:34:40 +10001158 uint32_t options) {
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001159 do {
Nigel Tao4d61a052020-07-11 12:34:40 +10001160 if (options & WUFFS_BASE__PARSE_NUMBER_FXX__REJECT_INF_AND_NAN) {
1161 goto fail;
1162 }
1163
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001164 uint8_t* p = s.ptr;
1165 uint8_t* q = s.ptr + s.len;
1166
1167 for (; (p < q) && (*p == '_'); p++) {
1168 }
1169 if (p >= q) {
Nigel Tao4d61a052020-07-11 12:34:40 +10001170 goto fail;
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001171 }
1172
1173 // Parse sign.
1174 bool negative = false;
1175 do {
1176 if (*p == '+') {
1177 p++;
1178 } else if (*p == '-') {
1179 negative = true;
1180 p++;
1181 } else {
1182 break;
1183 }
1184 for (; (p < q) && (*p == '_'); p++) {
1185 }
1186 } while (0);
1187 if (p >= q) {
Nigel Tao4d61a052020-07-11 12:34:40 +10001188 goto fail;
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001189 }
1190
1191 bool nan = false;
1192 switch (p[0]) {
1193 case 'I':
1194 case 'i':
1195 if (((q - p) < 3) || //
1196 ((p[1] != 'N') && (p[1] != 'n')) || //
1197 ((p[2] != 'F') && (p[2] != 'f'))) {
Nigel Tao4d61a052020-07-11 12:34:40 +10001198 goto fail;
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001199 }
1200 p += 3;
1201
1202 if ((p >= q) || (*p == '_')) {
1203 break;
1204 } else if (((q - p) < 5) || //
1205 ((p[0] != 'I') && (p[0] != 'i')) || //
1206 ((p[1] != 'N') && (p[1] != 'n')) || //
1207 ((p[2] != 'I') && (p[2] != 'i')) || //
1208 ((p[3] != 'T') && (p[3] != 't')) || //
1209 ((p[4] != 'Y') && (p[4] != 'y'))) {
Nigel Tao4d61a052020-07-11 12:34:40 +10001210 goto fail;
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001211 }
1212 p += 5;
1213
1214 if ((p >= q) || (*p == '_')) {
1215 break;
1216 }
Nigel Tao4d61a052020-07-11 12:34:40 +10001217 goto fail;
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001218
1219 case 'N':
1220 case 'n':
1221 if (((q - p) < 3) || //
1222 ((p[1] != 'A') && (p[1] != 'a')) || //
1223 ((p[2] != 'N') && (p[2] != 'n'))) {
Nigel Tao4d61a052020-07-11 12:34:40 +10001224 goto fail;
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001225 }
1226 p += 3;
1227
1228 if ((p >= q) || (*p == '_')) {
1229 nan = true;
1230 break;
1231 }
Nigel Tao4d61a052020-07-11 12:34:40 +10001232 goto fail;
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001233
1234 default:
Nigel Tao4d61a052020-07-11 12:34:40 +10001235 goto fail;
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001236 }
1237
1238 // Finish.
1239 for (; (p < q) && (*p == '_'); p++) {
1240 }
1241 if (p != q) {
Nigel Tao4d61a052020-07-11 12:34:40 +10001242 goto fail;
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001243 }
1244 wuffs_base__result_f64 ret;
1245 ret.status.repr = NULL;
Nigel Tao4d449dc2020-07-12 11:00:47 +10001246 ret.value = wuffs_base__ieee_754_bit_representation__from_u64_to_f64(
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001247 (nan ? 0x7FFFFFFFFFFFFFFF : 0x7FF0000000000000) |
1248 (negative ? 0x8000000000000000 : 0));
1249 return ret;
1250 } while (0);
1251
Nigel Tao4d61a052020-07-11 12:34:40 +10001252fail:
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001253 do {
1254 wuffs_base__result_f64 ret;
Nigel Tao4d61a052020-07-11 12:34:40 +10001255 ret.status.repr = wuffs_base__error__bad_argument;
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001256 ret.value = 0;
1257 return ret;
1258 } while (0);
1259}
1260
1261WUFFS_BASE__MAYBE_STATIC wuffs_base__result_f64 //
Nigel Taoe0c5de92020-07-11 11:48:17 +10001262wuffs_base__private_implementation__high_prec_dec__to_f64(
Nigel Tao4d61a052020-07-11 12:34:40 +10001263 wuffs_base__private_implementation__high_prec_dec* h,
1264 uint32_t options) {
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001265 do {
1266 // powers converts decimal powers of 10 to binary powers of 2. For example,
1267 // (10000 >> 13) is 1. It stops before the elements exceed 60, also known
1268 // as WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL.
1269 static const uint32_t num_powers = 19;
1270 static const uint8_t powers[19] = {
1271 0, 3, 6, 9, 13, 16, 19, 23, 26, 29, //
1272 33, 36, 39, 43, 46, 49, 53, 56, 59, //
1273 };
1274
1275 // Handle zero and obvious extremes. The largest and smallest positive
1276 // finite f64 values are approximately 1.8e+308 and 4.9e-324.
1277 if ((h->num_digits == 0) || (h->decimal_point < -326)) {
1278 goto zero;
1279 } else if (h->decimal_point > 310) {
1280 goto infinity;
1281 }
1282
Nigel Taoc4fa8e22020-07-18 17:35:13 +10001283 // Try the fast Eisel-Lemire algorithm again. Calculating the (man, exp10)
1284 // pair from the high_prec_dec h is more correct but slower than the
1285 // approach taken in wuffs_base__parse_number_f64. The latter is optimized
1286 // for the common cases (e.g. assuming no underscores or a leading '+'
1287 // sign) rather than the full set of cases allowed by the Wuffs API.
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001288 if (h->num_digits <= 19) {
1289 uint64_t man = 0;
1290 uint32_t i;
1291 for (i = 0; i < h->num_digits; i++) {
1292 man = (10 * man) + h->digits[i];
1293 }
1294 int32_t exp10 = h->decimal_point - ((int32_t)(h->num_digits));
Nigel Tao8b45db02020-09-15 21:50:32 +10001295 if ((man != 0) && (-307 <= exp10) && (exp10 <= 288)) {
Nigel Taoc4fa8e22020-07-18 17:35:13 +10001296 int64_t r =
1297 wuffs_base__private_implementation__parse_number_f64_eisel_lemire(
1298 man, exp10);
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001299 if (r >= 0) {
1300 wuffs_base__result_f64 ret;
1301 ret.status.repr = NULL;
Nigel Tao4d449dc2020-07-12 11:00:47 +10001302 ret.value = wuffs_base__ieee_754_bit_representation__from_u64_to_f64(
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001303 ((uint64_t)r) | (((uint64_t)(h->negative)) << 63));
1304 return ret;
1305 }
1306 }
1307 }
1308
Nigel Taoce685a62020-11-03 15:24:02 +11001309 // When Eisel-Lemire fails, fall back to Simple Decimal Conversion. See
1310 // https://nigeltao.github.io/blog/2020/parse-number-f64-simple.html
1311 //
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001312 // Scale by powers of 2 until we're in the range [½ .. 1], which gives us
1313 // our exponent (in base-2). First we shift right, possibly a little too
1314 // far, ending with a value certainly below 1 and possibly below ½...
1315 const int32_t f64_bias = -1023;
1316 int32_t exp2 = 0;
1317 while (h->decimal_point > 0) {
1318 uint32_t n = (uint32_t)(+h->decimal_point);
1319 uint32_t shift =
1320 (n < num_powers)
1321 ? powers[n]
1322 : WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL;
1323
1324 wuffs_base__private_implementation__high_prec_dec__small_rshift(h, shift);
1325 if (h->decimal_point <
1326 -WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE) {
1327 goto zero;
1328 }
1329 exp2 += (int32_t)shift;
1330 }
1331 // ...then we shift left, putting us in [½ .. 1].
1332 while (h->decimal_point <= 0) {
1333 uint32_t shift;
1334 if (h->decimal_point == 0) {
1335 if (h->digits[0] >= 5) {
1336 break;
1337 }
Nigel Tao57d47c62020-09-08 16:43:31 +10001338 shift = (h->digits[0] < 2) ? 2 : 1;
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001339 } else {
1340 uint32_t n = (uint32_t)(-h->decimal_point);
1341 shift = (n < num_powers)
1342 ? powers[n]
1343 : WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL;
1344 }
1345
1346 wuffs_base__private_implementation__high_prec_dec__small_lshift(h, shift);
1347 if (h->decimal_point >
1348 +WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE) {
1349 goto infinity;
1350 }
1351 exp2 -= (int32_t)shift;
1352 }
1353
1354 // We're in the range [½ .. 1] but f64 uses [1 .. 2].
1355 exp2--;
1356
1357 // The minimum normal exponent is (f64_bias + 1).
1358 while ((f64_bias + 1) > exp2) {
1359 uint32_t n = (uint32_t)((f64_bias + 1) - exp2);
1360 if (n > WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL) {
1361 n = WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL;
1362 }
1363 wuffs_base__private_implementation__high_prec_dec__small_rshift(h, n);
1364 exp2 += (int32_t)n;
1365 }
1366
1367 // Check for overflow.
1368 if ((exp2 - f64_bias) >= 0x07FF) { // (1 << 11) - 1.
1369 goto infinity;
1370 }
1371
1372 // Extract 53 bits for the mantissa (in base-2).
1373 wuffs_base__private_implementation__high_prec_dec__small_lshift(h, 53);
1374 uint64_t man2 =
1375 wuffs_base__private_implementation__high_prec_dec__rounded_integer(h);
1376
1377 // Rounding might have added one bit. If so, shift and re-check overflow.
1378 if ((man2 >> 53) != 0) {
1379 man2 >>= 1;
1380 exp2++;
1381 if ((exp2 - f64_bias) >= 0x07FF) { // (1 << 11) - 1.
1382 goto infinity;
1383 }
1384 }
1385
1386 // Handle subnormal numbers.
1387 if ((man2 >> 52) == 0) {
1388 exp2 = f64_bias;
1389 }
1390
1391 // Pack the bits and return.
1392 uint64_t exp2_bits =
1393 (uint64_t)((exp2 - f64_bias) & 0x07FF); // (1 << 11) - 1.
1394 uint64_t bits = (man2 & 0x000FFFFFFFFFFFFF) | // (1 << 52) - 1.
1395 (exp2_bits << 52) | //
1396 (h->negative ? 0x8000000000000000 : 0); // (1 << 63).
1397
1398 wuffs_base__result_f64 ret;
1399 ret.status.repr = NULL;
Nigel Tao4d449dc2020-07-12 11:00:47 +10001400 ret.value = wuffs_base__ieee_754_bit_representation__from_u64_to_f64(bits);
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001401 return ret;
1402 } while (0);
1403
1404zero:
1405 do {
1406 uint64_t bits = h->negative ? 0x8000000000000000 : 0;
1407
1408 wuffs_base__result_f64 ret;
1409 ret.status.repr = NULL;
Nigel Tao4d449dc2020-07-12 11:00:47 +10001410 ret.value = wuffs_base__ieee_754_bit_representation__from_u64_to_f64(bits);
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001411 return ret;
1412 } while (0);
1413
1414infinity:
1415 do {
Nigel Tao4d61a052020-07-11 12:34:40 +10001416 if (options & WUFFS_BASE__PARSE_NUMBER_FXX__REJECT_INF_AND_NAN) {
1417 wuffs_base__result_f64 ret;
1418 ret.status.repr = wuffs_base__error__bad_argument;
1419 ret.value = 0;
1420 return ret;
1421 }
1422
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001423 uint64_t bits = h->negative ? 0xFFF0000000000000 : 0x7FF0000000000000;
1424
1425 wuffs_base__result_f64 ret;
1426 ret.status.repr = NULL;
Nigel Tao4d449dc2020-07-12 11:00:47 +10001427 ret.value = wuffs_base__ieee_754_bit_representation__from_u64_to_f64(bits);
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001428 return ret;
1429 } while (0);
1430}
1431
1432static inline bool //
1433wuffs_base__private_implementation__is_decimal_digit(uint8_t c) {
1434 return ('0' <= c) && (c <= '9');
1435}
1436
1437WUFFS_BASE__MAYBE_STATIC wuffs_base__result_f64 //
1438wuffs_base__parse_number_f64(wuffs_base__slice_u8 s, uint32_t options) {
1439 // In practice, almost all "dd.ddddE±xxx" numbers can be represented
1440 // losslessly by a uint64_t mantissa "dddddd" and an int32_t base-10
1441 // exponent, adjusting "xxx" for the position (if present) of the decimal
1442 // separator '.' or ','.
1443 //
1444 // This (u64 man, i32 exp10) data structure is superficially similar to the
1445 // "Do It Yourself Floating Point" type from Loitsch (†), but the exponent
1446 // here is base-10, not base-2.
1447 //
Nigel Taoc4fa8e22020-07-18 17:35:13 +10001448 // If s's number fits in a (man, exp10), parse that pair with the
1449 // Eisel-Lemire algorithm. If not, or if Eisel-Lemire fails, parsing s with
1450 // the fallback algorithm is slower but comprehensive.
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001451 //
1452 // † "Printing Floating-Point Numbers Quickly and Accurately with Integers"
1453 // (https://www.cs.tufts.edu/~nr/cs257/archive/florian-loitsch/printf.pdf).
1454 // Florian Loitsch is also the primary contributor to
1455 // https://github.com/google/double-conversion
1456 do {
1457 // Calculating that (man, exp10) pair needs to stay within s's bounds.
1458 // Provided that s isn't extremely long, work on a NUL-terminated copy of
1459 // s's contents. The NUL byte isn't a valid part of "±dd.ddddE±xxx".
1460 //
1461 // As the pointer p walks the contents, it's faster to repeatedly check "is
1462 // *p a valid digit" than "is p within bounds and *p a valid digit".
1463 if (s.len >= 256) {
1464 goto fallback;
1465 }
1466 uint8_t z[256];
1467 memcpy(&z[0], s.ptr, s.len);
1468 z[s.len] = 0;
1469 const uint8_t* p = &z[0];
1470
1471 // Look for a leading minus sign. Technically, we could also look for an
1472 // optional plus sign, but the "script/process-json-numbers.c with -p"
1473 // benchmark is noticably slower if we do. It's optional and, in practice,
1474 // usually absent. Let the fallback catch it.
1475 bool negative = (*p == '-');
1476 if (negative) {
1477 p++;
1478 }
1479
1480 // After walking "dd.dddd", comparing p later with p now will produce the
1481 // number of "d"s and "."s.
1482 const uint8_t* const start_of_digits_ptr = p;
1483
1484 // Walk the "d"s before a '.', 'E', NUL byte, etc. If it starts with '0',
1485 // it must be a single '0'. If it starts with a non-zero decimal digit, it
1486 // can be a sequence of decimal digits.
1487 //
1488 // Update the man variable during the walk. It's OK if man overflows now.
1489 // We'll detect that later.
1490 uint64_t man;
1491 if (*p == '0') {
1492 man = 0;
1493 p++;
1494 if (wuffs_base__private_implementation__is_decimal_digit(*p)) {
1495 goto fallback;
1496 }
1497 } else if (wuffs_base__private_implementation__is_decimal_digit(*p)) {
1498 man = ((uint8_t)(*p - '0'));
1499 p++;
1500 for (; wuffs_base__private_implementation__is_decimal_digit(*p); p++) {
1501 man = (10 * man) + ((uint8_t)(*p - '0'));
1502 }
1503 } else {
1504 goto fallback;
1505 }
1506
1507 // Walk the "d"s after the optional decimal separator ('.' or ','),
1508 // updating the man and exp10 variables.
1509 int32_t exp10 = 0;
Nigel Taoe0c5de92020-07-11 11:48:17 +10001510 if (*p ==
1511 ((options & WUFFS_BASE__PARSE_NUMBER_FXX__DECIMAL_SEPARATOR_IS_A_COMMA)
1512 ? ','
1513 : '.')) {
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001514 p++;
1515 const uint8_t* first_after_separator_ptr = p;
1516 if (!wuffs_base__private_implementation__is_decimal_digit(*p)) {
1517 goto fallback;
1518 }
1519 man = (10 * man) + ((uint8_t)(*p - '0'));
1520 p++;
1521 for (; wuffs_base__private_implementation__is_decimal_digit(*p); p++) {
1522 man = (10 * man) + ((uint8_t)(*p - '0'));
1523 }
1524 exp10 = ((int32_t)(first_after_separator_ptr - p));
1525 }
1526
1527 // Count the number of digits:
1528 // - for an input of "314159", digit_count is 6.
1529 // - for an input of "3.14159", digit_count is 7.
1530 //
1531 // This is off-by-one if there is a decimal separator. That's OK for now.
1532 // We'll correct for that later. The "script/process-json-numbers.c with
1533 // -p" benchmark is noticably slower if we try to correct for that now.
1534 uint32_t digit_count = (uint32_t)(p - start_of_digits_ptr);
1535
1536 // Update exp10 for the optional exponent, starting with 'E' or 'e'.
1537 if ((*p | 0x20) == 'e') {
1538 p++;
1539 int32_t exp_sign = +1;
1540 if (*p == '-') {
1541 p++;
1542 exp_sign = -1;
1543 } else if (*p == '+') {
1544 p++;
1545 }
1546 if (!wuffs_base__private_implementation__is_decimal_digit(*p)) {
1547 goto fallback;
1548 }
1549 int32_t exp_num = ((uint8_t)(*p - '0'));
1550 p++;
1551 // The rest of the exp_num walking has a peculiar control flow but, once
1552 // again, the "script/process-json-numbers.c with -p" benchmark is
1553 // sensitive to alternative formulations.
1554 if (wuffs_base__private_implementation__is_decimal_digit(*p)) {
1555 exp_num = (10 * exp_num) + ((uint8_t)(*p - '0'));
1556 p++;
1557 }
1558 if (wuffs_base__private_implementation__is_decimal_digit(*p)) {
1559 exp_num = (10 * exp_num) + ((uint8_t)(*p - '0'));
1560 p++;
1561 }
1562 while (wuffs_base__private_implementation__is_decimal_digit(*p)) {
1563 if (exp_num > 0x1000000) {
1564 goto fallback;
1565 }
1566 exp_num = (10 * exp_num) + ((uint8_t)(*p - '0'));
1567 p++;
1568 }
1569 exp10 += exp_sign * exp_num;
1570 }
1571
1572 // The Wuffs API is that the original slice has no trailing data. It also
1573 // allows underscores, which we don't catch here but the fallback should.
1574 if (p != &z[s.len]) {
1575 goto fallback;
1576 }
1577
1578 // Check that the uint64_t typed man variable has not overflowed, based on
1579 // digit_count.
1580 //
1581 // For reference:
1582 // - (1 << 63) is 9223372036854775808, which has 19 decimal digits.
1583 // - (1 << 64) is 18446744073709551616, which has 20 decimal digits.
1584 // - 19 nines, 9999999999999999999, is 0x8AC7230489E7FFFF, which has 64
1585 // bits and 16 hexadecimal digits.
1586 // - 20 nines, 99999999999999999999, is 0x56BC75E2D630FFFFF, which has 67
1587 // bits and 17 hexadecimal digits.
1588 if (digit_count > 19) {
1589 // Even if we have more than 19 pseudo-digits, it's not yet definitely an
1590 // overflow. Recall that digit_count might be off-by-one (too large) if
1591 // there's a decimal separator. It will also over-report the number of
1592 // meaningful digits if the input looks something like "0.000dddExxx".
1593 //
1594 // We adjust by the number of leading '0's and '.'s and re-compare to 19.
1595 // Once again, technically, we could skip ','s too, but that perturbs the
1596 // "script/process-json-numbers.c with -p" benchmark.
1597 const uint8_t* q = start_of_digits_ptr;
1598 for (; (*q == '0') || (*q == '.'); q++) {
1599 }
1600 digit_count -= (uint32_t)(q - start_of_digits_ptr);
1601 if (digit_count > 19) {
1602 goto fallback;
1603 }
1604 }
1605
Nigel Taoc4fa8e22020-07-18 17:35:13 +10001606 // The wuffs_base__private_implementation__parse_number_f64_eisel_lemire
Nigel Tao8b45db02020-09-15 21:50:32 +10001607 // preconditions include that exp10 is in the range [-307 ..= 288].
1608 if ((exp10 < -307) || (288 < exp10)) {
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001609 goto fallback;
1610 }
1611
Nigel Tao9f22b5e2020-09-11 09:10:08 +10001612 // If both man and (10 ** exp10) are exactly representable by a double, we
1613 // don't need to run the Eisel-Lemire algorithm.
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001614 if ((-22 <= exp10) && (exp10 <= 22) && ((man >> 53) == 0)) {
1615 double d = (double)man;
1616 if (exp10 >= 0) {
1617 d *= wuffs_base__private_implementation__f64_powers_of_10[+exp10];
1618 } else {
1619 d /= wuffs_base__private_implementation__f64_powers_of_10[-exp10];
1620 }
1621 wuffs_base__result_f64 ret;
1622 ret.status.repr = NULL;
1623 ret.value = negative ? -d : +d;
1624 return ret;
1625 }
1626
Nigel Taoc4fa8e22020-07-18 17:35:13 +10001627 // The wuffs_base__private_implementation__parse_number_f64_eisel_lemire
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001628 // preconditions include that man is non-zero. Parsing "0" should be caught
Nigel Tao9f22b5e2020-09-11 09:10:08 +10001629 // by the "If both man and (10 ** exp10)" above, but "0e99" might not.
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001630 if (man == 0) {
1631 goto fallback;
1632 }
1633
Nigel Taoc4fa8e22020-07-18 17:35:13 +10001634 // Our man and exp10 are in range. Run the Eisel-Lemire algorithm.
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001635 int64_t r =
Nigel Taoc4fa8e22020-07-18 17:35:13 +10001636 wuffs_base__private_implementation__parse_number_f64_eisel_lemire(
1637 man, exp10);
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001638 if (r < 0) {
1639 goto fallback;
1640 }
1641 wuffs_base__result_f64 ret;
1642 ret.status.repr = NULL;
Nigel Tao4d449dc2020-07-12 11:00:47 +10001643 ret.value = wuffs_base__ieee_754_bit_representation__from_u64_to_f64(
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001644 ((uint64_t)r) | (((uint64_t)negative) << 63));
1645 return ret;
1646 } while (0);
1647
1648fallback:
1649 do {
1650 wuffs_base__private_implementation__high_prec_dec h;
1651 wuffs_base__status status =
Nigel Taoe0c5de92020-07-11 11:48:17 +10001652 wuffs_base__private_implementation__high_prec_dec__parse(&h, s,
1653 options);
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001654 if (status.repr) {
Nigel Taoe0c5de92020-07-11 11:48:17 +10001655 return wuffs_base__private_implementation__parse_number_f64_special(
Nigel Tao4d61a052020-07-11 12:34:40 +10001656 s, options);
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001657 }
Nigel Tao4d61a052020-07-11 12:34:40 +10001658 return wuffs_base__private_implementation__high_prec_dec__to_f64(&h,
1659 options);
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001660 } while (0);
1661}
1662
1663// --------
1664
1665static inline size_t //
1666wuffs_base__private_implementation__render_inf(wuffs_base__slice_u8 dst,
1667 bool neg,
1668 uint32_t options) {
1669 if (neg) {
1670 if (dst.len < 4) {
1671 return 0;
1672 }
1673 wuffs_base__store_u32le__no_bounds_check(dst.ptr, 0x666E492D); // '-Inf'le.
1674 return 4;
1675 }
1676
1677 if (options & WUFFS_BASE__RENDER_NUMBER_XXX__LEADING_PLUS_SIGN) {
1678 if (dst.len < 4) {
1679 return 0;
1680 }
1681 wuffs_base__store_u32le__no_bounds_check(dst.ptr, 0x666E492B); // '+Inf'le.
1682 return 4;
1683 }
1684
1685 if (dst.len < 3) {
1686 return 0;
1687 }
1688 wuffs_base__store_u24le__no_bounds_check(dst.ptr, 0x666E49); // 'Inf'le.
1689 return 3;
1690}
1691
1692static inline size_t //
1693wuffs_base__private_implementation__render_nan(wuffs_base__slice_u8 dst) {
1694 if (dst.len < 3) {
1695 return 0;
1696 }
1697 wuffs_base__store_u24le__no_bounds_check(dst.ptr, 0x4E614E); // 'NaN'le.
1698 return 3;
1699}
1700
1701static size_t //
1702wuffs_base__private_implementation__high_prec_dec__render_exponent_absent(
1703 wuffs_base__slice_u8 dst,
1704 wuffs_base__private_implementation__high_prec_dec* h,
1705 uint32_t precision,
1706 uint32_t options) {
1707 size_t n = (h->negative ||
1708 (options & WUFFS_BASE__RENDER_NUMBER_XXX__LEADING_PLUS_SIGN))
1709 ? 1
1710 : 0;
1711 if (h->decimal_point <= 0) {
1712 n += 1;
1713 } else {
1714 n += (size_t)(h->decimal_point);
1715 }
1716 if (precision > 0) {
1717 n += precision + 1; // +1 for the '.'.
1718 }
1719
1720 // Don't modify dst if the formatted number won't fit.
1721 if (n > dst.len) {
1722 return 0;
1723 }
1724
1725 // Align-left or align-right.
1726 uint8_t* ptr = (options & WUFFS_BASE__RENDER_NUMBER_XXX__ALIGN_RIGHT)
1727 ? &dst.ptr[dst.len - n]
1728 : &dst.ptr[0];
1729
1730 // Leading "±".
1731 if (h->negative) {
1732 *ptr++ = '-';
1733 } else if (options & WUFFS_BASE__RENDER_NUMBER_XXX__LEADING_PLUS_SIGN) {
1734 *ptr++ = '+';
1735 }
1736
1737 // Integral digits.
1738 if (h->decimal_point <= 0) {
1739 *ptr++ = '0';
1740 } else {
1741 uint32_t m =
1742 wuffs_base__u32__min(h->num_digits, (uint32_t)(h->decimal_point));
1743 uint32_t i = 0;
1744 for (; i < m; i++) {
1745 *ptr++ = (uint8_t)('0' | h->digits[i]);
1746 }
1747 for (; i < (uint32_t)(h->decimal_point); i++) {
1748 *ptr++ = '0';
1749 }
1750 }
1751
1752 // Separator and then fractional digits.
1753 if (precision > 0) {
1754 *ptr++ =
1755 (options & WUFFS_BASE__RENDER_NUMBER_FXX__DECIMAL_SEPARATOR_IS_A_COMMA)
1756 ? ','
1757 : '.';
1758 uint32_t i = 0;
1759 for (; i < precision; i++) {
1760 uint32_t j = ((uint32_t)(h->decimal_point)) + i;
1761 *ptr++ = (uint8_t)('0' | ((j < h->num_digits) ? h->digits[j] : 0));
1762 }
1763 }
1764
1765 return n;
1766}
1767
1768static size_t //
1769wuffs_base__private_implementation__high_prec_dec__render_exponent_present(
1770 wuffs_base__slice_u8 dst,
1771 wuffs_base__private_implementation__high_prec_dec* h,
1772 uint32_t precision,
1773 uint32_t options) {
1774 int32_t exp = 0;
1775 if (h->num_digits > 0) {
1776 exp = h->decimal_point - 1;
1777 }
1778 bool negative_exp = exp < 0;
1779 if (negative_exp) {
1780 exp = -exp;
1781 }
1782
1783 size_t n = (h->negative ||
1784 (options & WUFFS_BASE__RENDER_NUMBER_XXX__LEADING_PLUS_SIGN))
1785 ? 4
1786 : 3; // Mininum 3 bytes: first digit and then "e±".
1787 if (precision > 0) {
1788 n += precision + 1; // +1 for the '.'.
1789 }
1790 n += (exp < 100) ? 2 : 3;
1791
1792 // Don't modify dst if the formatted number won't fit.
1793 if (n > dst.len) {
1794 return 0;
1795 }
1796
1797 // Align-left or align-right.
1798 uint8_t* ptr = (options & WUFFS_BASE__RENDER_NUMBER_XXX__ALIGN_RIGHT)
1799 ? &dst.ptr[dst.len - n]
1800 : &dst.ptr[0];
1801
1802 // Leading "±".
1803 if (h->negative) {
1804 *ptr++ = '-';
1805 } else if (options & WUFFS_BASE__RENDER_NUMBER_XXX__LEADING_PLUS_SIGN) {
1806 *ptr++ = '+';
1807 }
1808
1809 // Integral digit.
1810 if (h->num_digits > 0) {
1811 *ptr++ = (uint8_t)('0' | h->digits[0]);
1812 } else {
1813 *ptr++ = '0';
1814 }
1815
1816 // Separator and then fractional digits.
1817 if (precision > 0) {
1818 *ptr++ =
1819 (options & WUFFS_BASE__RENDER_NUMBER_FXX__DECIMAL_SEPARATOR_IS_A_COMMA)
1820 ? ','
1821 : '.';
1822 uint32_t i = 1;
1823 uint32_t j = wuffs_base__u32__min(h->num_digits, precision + 1);
1824 for (; i < j; i++) {
1825 *ptr++ = (uint8_t)('0' | h->digits[i]);
1826 }
1827 for (; i <= precision; i++) {
1828 *ptr++ = '0';
1829 }
1830 }
1831
1832 // Exponent: "e±" and then 2 or 3 digits.
1833 *ptr++ = 'e';
1834 *ptr++ = negative_exp ? '-' : '+';
1835 if (exp < 10) {
1836 *ptr++ = '0';
1837 *ptr++ = (uint8_t)('0' | exp);
1838 } else if (exp < 100) {
1839 *ptr++ = (uint8_t)('0' | (exp / 10));
1840 *ptr++ = (uint8_t)('0' | (exp % 10));
1841 } else {
1842 int32_t e = exp / 100;
1843 exp -= e * 100;
1844 *ptr++ = (uint8_t)('0' | e);
1845 *ptr++ = (uint8_t)('0' | (exp / 10));
1846 *ptr++ = (uint8_t)('0' | (exp % 10));
1847 }
1848
1849 return n;
1850}
1851
1852WUFFS_BASE__MAYBE_STATIC size_t //
1853wuffs_base__render_number_f64(wuffs_base__slice_u8 dst,
1854 double x,
1855 uint32_t precision,
1856 uint32_t options) {
1857 // Decompose x (64 bits) into negativity (1 bit), base-2 exponent (11 bits
1858 // with a -1023 bias) and mantissa (52 bits).
Nigel Tao4d449dc2020-07-12 11:00:47 +10001859 uint64_t bits = wuffs_base__ieee_754_bit_representation__from_f64_to_u64(x);
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001860 bool neg = (bits >> 63) != 0;
1861 int32_t exp2 = ((int32_t)(bits >> 52)) & 0x7FF;
1862 uint64_t man = bits & 0x000FFFFFFFFFFFFFul;
1863
1864 // Apply the exponent bias and set the implicit top bit of the mantissa,
1865 // unless x is subnormal. Also take care of Inf and NaN.
1866 if (exp2 == 0x7FF) {
1867 if (man != 0) {
1868 return wuffs_base__private_implementation__render_nan(dst);
1869 }
1870 return wuffs_base__private_implementation__render_inf(dst, neg, options);
1871 } else if (exp2 == 0) {
1872 exp2 = -1022;
1873 } else {
1874 exp2 -= 1023;
1875 man |= 0x0010000000000000ul;
1876 }
1877
1878 // Ensure that precision isn't too large.
1879 if (precision > 4095) {
1880 precision = 4095;
1881 }
1882
1883 // Convert from the (neg, exp2, man) tuple to an HPD.
1884 wuffs_base__private_implementation__high_prec_dec h;
1885 wuffs_base__private_implementation__high_prec_dec__assign(&h, man, neg);
1886 if (h.num_digits > 0) {
1887 wuffs_base__private_implementation__high_prec_dec__lshift(
1888 &h, exp2 - 52); // 52 mantissa bits.
1889 }
1890
1891 // Handle the "%e" and "%f" formats.
1892 switch (options & (WUFFS_BASE__RENDER_NUMBER_FXX__EXPONENT_ABSENT |
1893 WUFFS_BASE__RENDER_NUMBER_FXX__EXPONENT_PRESENT)) {
1894 case WUFFS_BASE__RENDER_NUMBER_FXX__EXPONENT_ABSENT: // The "%"f" format.
1895 if (options & WUFFS_BASE__RENDER_NUMBER_FXX__JUST_ENOUGH_PRECISION) {
1896 wuffs_base__private_implementation__high_prec_dec__round_just_enough(
1897 &h, exp2, man);
1898 int32_t p = ((int32_t)(h.num_digits)) - h.decimal_point;
1899 precision = ((uint32_t)(wuffs_base__i32__max(0, p)));
1900 } else {
1901 wuffs_base__private_implementation__high_prec_dec__round_nearest(
1902 &h, ((int32_t)precision) + h.decimal_point);
1903 }
1904 return wuffs_base__private_implementation__high_prec_dec__render_exponent_absent(
1905 dst, &h, precision, options);
1906
1907 case WUFFS_BASE__RENDER_NUMBER_FXX__EXPONENT_PRESENT: // The "%e" format.
1908 if (options & WUFFS_BASE__RENDER_NUMBER_FXX__JUST_ENOUGH_PRECISION) {
1909 wuffs_base__private_implementation__high_prec_dec__round_just_enough(
1910 &h, exp2, man);
1911 precision = (h.num_digits > 0) ? (h.num_digits - 1) : 0;
1912 } else {
1913 wuffs_base__private_implementation__high_prec_dec__round_nearest(
1914 &h, ((int32_t)precision) + 1);
1915 }
1916 return wuffs_base__private_implementation__high_prec_dec__render_exponent_present(
1917 dst, &h, precision, options);
1918 }
1919
1920 // We have the "%g" format and so precision means the number of significant
1921 // digits, not the number of digits after the decimal separator. Perform
1922 // rounding and determine whether to use "%e" or "%f".
1923 int32_t e_threshold = 0;
1924 if (options & WUFFS_BASE__RENDER_NUMBER_FXX__JUST_ENOUGH_PRECISION) {
1925 wuffs_base__private_implementation__high_prec_dec__round_just_enough(
1926 &h, exp2, man);
1927 precision = h.num_digits;
1928 e_threshold = 6;
1929 } else {
1930 if (precision == 0) {
1931 precision = 1;
1932 }
1933 wuffs_base__private_implementation__high_prec_dec__round_nearest(
1934 &h, ((int32_t)precision));
1935 e_threshold = ((int32_t)precision);
1936 int32_t nd = ((int32_t)(h.num_digits));
1937 if ((e_threshold > nd) && (nd >= h.decimal_point)) {
1938 e_threshold = nd;
1939 }
1940 }
1941
1942 // Use the "%e" format if the exponent is large.
1943 int32_t e = h.decimal_point - 1;
1944 if ((e < -4) || (e_threshold <= e)) {
1945 uint32_t p = wuffs_base__u32__min(precision, h.num_digits);
1946 return wuffs_base__private_implementation__high_prec_dec__render_exponent_present(
1947 dst, &h, (p > 0) ? (p - 1) : 0, options);
1948 }
1949
1950 // Use the "%f" format otherwise.
1951 int32_t p = ((int32_t)precision);
1952 if (p > h.decimal_point) {
1953 p = ((int32_t)(h.num_digits));
1954 }
1955 precision = ((uint32_t)(wuffs_base__i32__max(0, p - h.decimal_point)));
1956 return wuffs_base__private_implementation__high_prec_dec__render_exponent_absent(
1957 dst, &h, precision, options);
1958}