blob: 0885066deb19a215a05085545b0cc4e44b913cda [file] [log] [blame]
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001// After editing this file, run "go generate" in the parent directory.
2
3// Copyright 2020 The Wuffs Authors.
4//
5// Licensed under the Apache License, Version 2.0 (the "License");
6// you may not use this file except in compliance with the License.
7// You may obtain a copy of the License at
8//
9// https://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16
17// ---------------- IEEE 754 Floating Point
18
19#define WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE 2047
20#define WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION 800
21
22// WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL is the largest N
23// such that ((10 << N) < (1 << 64)).
24#define WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL 60
25
26// wuffs_base__private_implementation__high_prec_dec (abbreviated as HPD) is a
27// fixed precision floating point decimal number, augmented with ±infinity
28// values, but it cannot represent NaN (Not a Number).
29//
30// "High precision" means that the mantissa holds 800 decimal digits. 800 is
31// WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION.
32//
33// An HPD isn't for general purpose arithmetic, only for conversions to and
34// from IEEE 754 double-precision floating point, where the largest and
35// smallest positive, finite values are approximately 1.8e+308 and 4.9e-324.
36// HPD exponents above +2047 mean infinity, below -2047 mean zero. The ±2047
37// bounds are further away from zero than ±(324 + 800), where 800 and 2047 is
38// WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION and
39// WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE.
40//
41// digits[.. num_digits] are the number's digits in big-endian order. The
42// uint8_t values are in the range [0 ..= 9], not ['0' ..= '9'], where e.g. '7'
43// is the ASCII value 0x37.
44//
45// decimal_point is the index (within digits) of the decimal point. It may be
46// negative or be larger than num_digits, in which case the explicit digits are
47// padded with implicit zeroes.
48//
49// For example, if num_digits is 3 and digits is "\x07\x08\x09":
50// - A decimal_point of -2 means ".00789"
51// - A decimal_point of -1 means ".0789"
52// - A decimal_point of +0 means ".789"
53// - A decimal_point of +1 means "7.89"
54// - A decimal_point of +2 means "78.9"
55// - A decimal_point of +3 means "789."
56// - A decimal_point of +4 means "7890."
57// - A decimal_point of +5 means "78900."
58//
59// As above, a decimal_point higher than +2047 means that the overall value is
60// infinity, lower than -2047 means zero.
61//
62// negative is a sign bit. An HPD can distinguish positive and negative zero.
63//
64// truncated is whether there are more than
65// WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION digits, and at
66// least one of those extra digits are non-zero. The existence of long-tail
67// digits can affect rounding.
68//
69// The "all fields are zero" value is valid, and represents the number +0.
70typedef struct {
71 uint32_t num_digits;
72 int32_t decimal_point;
73 bool negative;
74 bool truncated;
75 uint8_t digits[WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION];
76} wuffs_base__private_implementation__high_prec_dec;
77
78// wuffs_base__private_implementation__high_prec_dec__trim trims trailing
79// zeroes from the h->digits[.. h->num_digits] slice. They have no benefit,
80// since we explicitly track h->decimal_point.
81//
82// Preconditions:
83// - h is non-NULL.
84static inline void //
85wuffs_base__private_implementation__high_prec_dec__trim(
86 wuffs_base__private_implementation__high_prec_dec* h) {
87 while ((h->num_digits > 0) && (h->digits[h->num_digits - 1] == 0)) {
88 h->num_digits--;
89 }
90}
91
92// wuffs_base__private_implementation__high_prec_dec__assign sets h to
93// represent the number x.
94//
95// Preconditions:
96// - h is non-NULL.
97static void //
98wuffs_base__private_implementation__high_prec_dec__assign(
99 wuffs_base__private_implementation__high_prec_dec* h,
100 uint64_t x,
101 bool negative) {
102 uint32_t n = 0;
103
104 // Set h->digits.
105 if (x > 0) {
106 // Calculate the digits, working right-to-left. After we determine n (how
107 // many digits there are), copy from buf to h->digits.
108 //
109 // UINT64_MAX, 18446744073709551615, is 20 digits long. It can be faster to
110 // copy a constant number of bytes than a variable number (20 instead of
111 // n). Make buf large enough (and start writing to it from the middle) so
112 // that can we always copy 20 bytes: the slice buf[(20-n) .. (40-n)].
113 uint8_t buf[40] = {0};
114 uint8_t* ptr = &buf[20];
115 do {
116 uint64_t remaining = x / 10;
117 x -= remaining * 10;
118 ptr--;
119 *ptr = (uint8_t)x;
120 n++;
121 x = remaining;
122 } while (x > 0);
123 memcpy(h->digits, ptr, 20);
124 }
125
126 // Set h's other fields.
127 h->num_digits = n;
128 h->decimal_point = (int32_t)n;
129 h->negative = negative;
130 h->truncated = false;
131 wuffs_base__private_implementation__high_prec_dec__trim(h);
132}
133
134static wuffs_base__status //
135wuffs_base__private_implementation__high_prec_dec__parse(
136 wuffs_base__private_implementation__high_prec_dec* h,
Nigel Taoe0c5de92020-07-11 11:48:17 +1000137 wuffs_base__slice_u8 s,
138 uint32_t options) {
Nigel Tao2a7e1ed2020-07-07 21:50:06 +1000139 if (!h) {
140 return wuffs_base__make_status(wuffs_base__error__bad_receiver);
141 }
142 h->num_digits = 0;
143 h->decimal_point = 0;
144 h->negative = false;
145 h->truncated = false;
146
147 uint8_t* p = s.ptr;
148 uint8_t* q = s.ptr + s.len;
149
Nigel Taoc5c98852020-07-11 13:10:14 +1000150 if (options & WUFFS_BASE__PARSE_NUMBER_XXX__ALLOW_UNDERSCORES) {
151 for (;; p++) {
152 if (p >= q) {
153 return wuffs_base__make_status(wuffs_base__error__bad_argument);
154 } else if (*p != '_') {
155 break;
156 }
Nigel Tao2a7e1ed2020-07-07 21:50:06 +1000157 }
158 }
159
160 // Parse sign.
161 do {
162 if (*p == '+') {
163 p++;
164 } else if (*p == '-') {
165 h->negative = true;
166 p++;
167 } else {
168 break;
169 }
Nigel Taoc5c98852020-07-11 13:10:14 +1000170 if (options & WUFFS_BASE__PARSE_NUMBER_XXX__ALLOW_UNDERSCORES) {
171 for (;; p++) {
172 if (p >= q) {
173 return wuffs_base__make_status(wuffs_base__error__bad_argument);
174 } else if (*p != '_') {
175 break;
176 }
Nigel Tao2a7e1ed2020-07-07 21:50:06 +1000177 }
178 }
179 } while (0);
180
181 // Parse digits, up to (and including) a '.', 'E' or 'e'. Examples for each
182 // limb in this if-else chain:
183 // - "0.789"
184 // - "1002.789"
185 // - ".789"
186 // - Other (invalid input).
187 uint32_t nd = 0;
188 int32_t dp = 0;
189 bool no_digits_before_separator = false;
Nigel Taoe82bc8e2020-07-11 12:49:15 +1000190 if (('0' == *p) &&
191 !(options &
192 WUFFS_BASE__PARSE_NUMBER_XXX__ALLOW_MULTIPLE_LEADING_ZEROES)) {
Nigel Tao2a7e1ed2020-07-07 21:50:06 +1000193 p++;
194 for (;; p++) {
195 if (p >= q) {
196 goto after_all;
Nigel Taoe0c5de92020-07-11 11:48:17 +1000197 } else if (*p ==
198 ((options &
199 WUFFS_BASE__PARSE_NUMBER_FXX__DECIMAL_SEPARATOR_IS_A_COMMA)
200 ? ','
201 : '.')) {
Nigel Tao2a7e1ed2020-07-07 21:50:06 +1000202 p++;
203 goto after_sep;
204 } else if ((*p == 'E') || (*p == 'e')) {
205 p++;
206 goto after_exp;
Nigel Taoc5c98852020-07-11 13:10:14 +1000207 } else if ((*p != '_') ||
208 !(options & WUFFS_BASE__PARSE_NUMBER_XXX__ALLOW_UNDERSCORES)) {
Nigel Tao2a7e1ed2020-07-07 21:50:06 +1000209 return wuffs_base__make_status(wuffs_base__error__bad_argument);
210 }
211 }
212
Nigel Taoe82bc8e2020-07-11 12:49:15 +1000213 } else if (('0' <= *p) && (*p <= '9')) {
214 if (*p == '0') {
215 for (; (p < q) && (*p == '0'); p++) {
216 }
217 } else {
218 h->digits[nd++] = (uint8_t)(*p - '0');
219 dp = (int32_t)nd;
220 p++;
221 }
222
Nigel Tao2a7e1ed2020-07-07 21:50:06 +1000223 for (;; p++) {
224 if (p >= q) {
225 goto after_all;
226 } else if (('0' <= *p) && (*p <= '9')) {
227 if (nd < WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION) {
228 h->digits[nd++] = (uint8_t)(*p - '0');
229 dp = (int32_t)nd;
230 } else if ('0' != *p) {
231 // Long-tail non-zeroes set the truncated bit.
232 h->truncated = true;
233 }
Nigel Taoe0c5de92020-07-11 11:48:17 +1000234 } else if (*p ==
235 ((options &
236 WUFFS_BASE__PARSE_NUMBER_FXX__DECIMAL_SEPARATOR_IS_A_COMMA)
237 ? ','
238 : '.')) {
Nigel Tao2a7e1ed2020-07-07 21:50:06 +1000239 p++;
240 goto after_sep;
241 } else if ((*p == 'E') || (*p == 'e')) {
242 p++;
243 goto after_exp;
Nigel Taoc5c98852020-07-11 13:10:14 +1000244 } else if ((*p != '_') ||
245 !(options & WUFFS_BASE__PARSE_NUMBER_XXX__ALLOW_UNDERSCORES)) {
Nigel Tao2a7e1ed2020-07-07 21:50:06 +1000246 return wuffs_base__make_status(wuffs_base__error__bad_argument);
247 }
248 }
249
Nigel Taoe0c5de92020-07-11 11:48:17 +1000250 } else if (*p == ((options &
251 WUFFS_BASE__PARSE_NUMBER_FXX__DECIMAL_SEPARATOR_IS_A_COMMA)
252 ? ','
253 : '.')) {
Nigel Tao2a7e1ed2020-07-07 21:50:06 +1000254 p++;
255 no_digits_before_separator = true;
256
257 } else {
258 return wuffs_base__make_status(wuffs_base__error__bad_argument);
259 }
260
261after_sep:
262 for (;; p++) {
263 if (p >= q) {
264 goto after_all;
265 } else if ('0' == *p) {
266 if (nd == 0) {
267 // Track leading zeroes implicitly.
268 dp--;
269 } else if (nd <
270 WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION) {
271 h->digits[nd++] = (uint8_t)(*p - '0');
272 }
273 } else if (('0' < *p) && (*p <= '9')) {
274 if (nd < WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION) {
275 h->digits[nd++] = (uint8_t)(*p - '0');
276 } else {
277 // Long-tail non-zeroes set the truncated bit.
278 h->truncated = true;
279 }
280 } else if ((*p == 'E') || (*p == 'e')) {
281 p++;
282 goto after_exp;
Nigel Taoc5c98852020-07-11 13:10:14 +1000283 } else if ((*p != '_') ||
284 !(options & WUFFS_BASE__PARSE_NUMBER_XXX__ALLOW_UNDERSCORES)) {
Nigel Tao2a7e1ed2020-07-07 21:50:06 +1000285 return wuffs_base__make_status(wuffs_base__error__bad_argument);
286 }
287 }
288
289after_exp:
290 do {
Nigel Taoc5c98852020-07-11 13:10:14 +1000291 if (options & WUFFS_BASE__PARSE_NUMBER_XXX__ALLOW_UNDERSCORES) {
292 for (;; p++) {
293 if (p >= q) {
294 return wuffs_base__make_status(wuffs_base__error__bad_argument);
295 } else if (*p != '_') {
296 break;
297 }
Nigel Tao2a7e1ed2020-07-07 21:50:06 +1000298 }
299 }
300
301 int32_t exp_sign = +1;
302 if (*p == '+') {
303 p++;
304 } else if (*p == '-') {
305 exp_sign = -1;
306 p++;
307 }
308
309 int32_t exp = 0;
310 const int32_t exp_large =
311 WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE +
312 WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION;
313 bool saw_exp_digits = false;
314 for (; p < q; p++) {
Nigel Taoc5c98852020-07-11 13:10:14 +1000315 if ((*p == '_') &&
316 (options & WUFFS_BASE__PARSE_NUMBER_XXX__ALLOW_UNDERSCORES)) {
Nigel Tao2a7e1ed2020-07-07 21:50:06 +1000317 // No-op.
318 } else if (('0' <= *p) && (*p <= '9')) {
319 saw_exp_digits = true;
320 if (exp < exp_large) {
321 exp = (10 * exp) + ((int32_t)(*p - '0'));
322 }
323 } else {
324 break;
325 }
326 }
327 if (!saw_exp_digits) {
328 return wuffs_base__make_status(wuffs_base__error__bad_argument);
329 }
330 dp += exp_sign * exp;
331 } while (0);
332
333after_all:
334 if (p != q) {
335 return wuffs_base__make_status(wuffs_base__error__bad_argument);
336 }
337 h->num_digits = nd;
338 if (nd == 0) {
339 if (no_digits_before_separator) {
340 return wuffs_base__make_status(wuffs_base__error__bad_argument);
341 }
342 h->decimal_point = 0;
343 } else if (dp <
344 -WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE) {
345 h->decimal_point =
346 -WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE - 1;
347 } else if (dp >
348 +WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE) {
349 h->decimal_point =
350 +WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE + 1;
351 } else {
352 h->decimal_point = dp;
353 }
354 wuffs_base__private_implementation__high_prec_dec__trim(h);
355 return wuffs_base__make_status(NULL);
356}
357
358// --------
359
360// wuffs_base__private_implementation__high_prec_dec__lshift_num_new_digits
361// returns the number of additional decimal digits when left-shifting by shift.
362//
363// See below for preconditions.
364static uint32_t //
365wuffs_base__private_implementation__high_prec_dec__lshift_num_new_digits(
366 wuffs_base__private_implementation__high_prec_dec* h,
367 uint32_t shift) {
368 // Masking with 0x3F should be unnecessary (assuming the preconditions) but
369 // it's cheap and ensures that we don't overflow the
370 // wuffs_base__private_implementation__hpd_left_shift array.
371 shift &= 63;
372
373 uint32_t x_a = wuffs_base__private_implementation__hpd_left_shift[shift];
374 uint32_t x_b = wuffs_base__private_implementation__hpd_left_shift[shift + 1];
375 uint32_t num_new_digits = x_a >> 11;
376 uint32_t pow5_a = 0x7FF & x_a;
377 uint32_t pow5_b = 0x7FF & x_b;
378
379 const uint8_t* pow5 =
380 &wuffs_base__private_implementation__powers_of_5[pow5_a];
381 uint32_t i = 0;
382 uint32_t n = pow5_b - pow5_a;
383 for (; i < n; i++) {
384 if (i >= h->num_digits) {
385 return num_new_digits - 1;
386 } else if (h->digits[i] == pow5[i]) {
387 continue;
388 } else if (h->digits[i] < pow5[i]) {
389 return num_new_digits - 1;
390 } else {
391 return num_new_digits;
392 }
393 }
394 return num_new_digits;
395}
396
397// --------
398
399// wuffs_base__private_implementation__high_prec_dec__rounded_integer returns
400// the integral (non-fractional) part of h, provided that it is 18 or fewer
401// decimal digits. For 19 or more digits, it returns UINT64_MAX. Note that:
402// - (1 << 53) is 9007199254740992, which has 16 decimal digits.
403// - (1 << 56) is 72057594037927936, which has 17 decimal digits.
404// - (1 << 59) is 576460752303423488, which has 18 decimal digits.
405// - (1 << 63) is 9223372036854775808, which has 19 decimal digits.
406// and that IEEE 754 double precision has 52 mantissa bits.
407//
408// That integral part is rounded-to-even: rounding 7.5 or 8.5 both give 8.
409//
410// h's negative bit is ignored: rounding -8.6 returns 9.
411//
412// See below for preconditions.
413static uint64_t //
414wuffs_base__private_implementation__high_prec_dec__rounded_integer(
415 wuffs_base__private_implementation__high_prec_dec* h) {
416 if ((h->num_digits == 0) || (h->decimal_point < 0)) {
417 return 0;
418 } else if (h->decimal_point > 18) {
419 return UINT64_MAX;
420 }
421
422 uint32_t dp = (uint32_t)(h->decimal_point);
423 uint64_t n = 0;
424 uint32_t i = 0;
425 for (; i < dp; i++) {
426 n = (10 * n) + ((i < h->num_digits) ? h->digits[i] : 0);
427 }
428
429 bool round_up = false;
430 if (dp < h->num_digits) {
431 round_up = h->digits[dp] >= 5;
432 if ((h->digits[dp] == 5) && (dp + 1 == h->num_digits)) {
433 // We are exactly halfway. If we're truncated, round up, otherwise round
434 // to even.
435 round_up = h->truncated || //
436 ((dp > 0) && (1 & h->digits[dp - 1]));
437 }
438 }
439 if (round_up) {
440 n++;
441 }
442
443 return n;
444}
445
446// wuffs_base__private_implementation__high_prec_dec__small_xshift shifts h's
447// number (where 'x' is 'l' or 'r' for left or right) by a small shift value.
448//
449// Preconditions:
450// - h is non-NULL.
451// - h->decimal_point is "not extreme".
452// - shift is non-zero.
453// - shift is "a small shift".
454//
455// "Not extreme" means within
456// ±WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE.
457//
458// "A small shift" means not more than
459// WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL.
460//
461// wuffs_base__private_implementation__high_prec_dec__rounded_integer and
462// wuffs_base__private_implementation__high_prec_dec__lshift_num_new_digits
463// have the same preconditions.
464//
465// wuffs_base__private_implementation__high_prec_dec__lshift keeps the first
466// two preconditions but not the last two. Its shift argument is signed and
467// does not need to be "small": zero is a no-op, positive means left shift and
468// negative means right shift.
469
470static void //
471wuffs_base__private_implementation__high_prec_dec__small_lshift(
472 wuffs_base__private_implementation__high_prec_dec* h,
473 uint32_t shift) {
474 if (h->num_digits == 0) {
475 return;
476 }
477 uint32_t num_new_digits =
478 wuffs_base__private_implementation__high_prec_dec__lshift_num_new_digits(
479 h, shift);
480 uint32_t rx = h->num_digits - 1; // Read index.
481 uint32_t wx = h->num_digits - 1 + num_new_digits; // Write index.
482 uint64_t n = 0;
483
484 // Repeat: pick up a digit, put down a digit, right to left.
485 while (((int32_t)rx) >= 0) {
486 n += ((uint64_t)(h->digits[rx])) << shift;
487 uint64_t quo = n / 10;
488 uint64_t rem = n - (10 * quo);
489 if (wx < WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION) {
490 h->digits[wx] = (uint8_t)rem;
491 } else if (rem > 0) {
492 h->truncated = true;
493 }
494 n = quo;
495 wx--;
496 rx--;
497 }
498
499 // Put down leading digits, right to left.
500 while (n > 0) {
501 uint64_t quo = n / 10;
502 uint64_t rem = n - (10 * quo);
503 if (wx < WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION) {
504 h->digits[wx] = (uint8_t)rem;
505 } else if (rem > 0) {
506 h->truncated = true;
507 }
508 n = quo;
509 wx--;
510 }
511
512 // Finish.
513 h->num_digits += num_new_digits;
514 if (h->num_digits >
515 WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION) {
516 h->num_digits = WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION;
517 }
518 h->decimal_point += (int32_t)num_new_digits;
519 wuffs_base__private_implementation__high_prec_dec__trim(h);
520}
521
522static void //
523wuffs_base__private_implementation__high_prec_dec__small_rshift(
524 wuffs_base__private_implementation__high_prec_dec* h,
525 uint32_t shift) {
526 uint32_t rx = 0; // Read index.
527 uint32_t wx = 0; // Write index.
528 uint64_t n = 0;
529
530 // Pick up enough leading digits to cover the first shift.
531 while ((n >> shift) == 0) {
532 if (rx < h->num_digits) {
533 // Read a digit.
534 n = (10 * n) + h->digits[rx++];
535 } else if (n == 0) {
536 // h's number used to be zero and remains zero.
537 return;
538 } else {
539 // Read sufficient implicit trailing zeroes.
540 while ((n >> shift) == 0) {
541 n = 10 * n;
542 rx++;
543 }
544 break;
545 }
546 }
547 h->decimal_point -= ((int32_t)(rx - 1));
548 if (h->decimal_point <
549 -WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE) {
550 // After the shift, h's number is effectively zero.
551 h->num_digits = 0;
552 h->decimal_point = 0;
553 h->negative = false;
554 h->truncated = false;
555 return;
556 }
557
558 // Repeat: pick up a digit, put down a digit, left to right.
559 uint64_t mask = (((uint64_t)(1)) << shift) - 1;
560 while (rx < h->num_digits) {
561 uint8_t new_digit = ((uint8_t)(n >> shift));
562 n = (10 * (n & mask)) + h->digits[rx++];
563 h->digits[wx++] = new_digit;
564 }
565
566 // Put down trailing digits, left to right.
567 while (n > 0) {
568 uint8_t new_digit = ((uint8_t)(n >> shift));
569 n = 10 * (n & mask);
570 if (wx < WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DIGITS_PRECISION) {
571 h->digits[wx++] = new_digit;
572 } else if (new_digit > 0) {
573 h->truncated = true;
574 }
575 }
576
577 // Finish.
578 h->num_digits = wx;
579 wuffs_base__private_implementation__high_prec_dec__trim(h);
580}
581
582static void //
583wuffs_base__private_implementation__high_prec_dec__lshift(
584 wuffs_base__private_implementation__high_prec_dec* h,
585 int32_t shift) {
586 if (shift > 0) {
587 while (shift > +WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL) {
588 wuffs_base__private_implementation__high_prec_dec__small_lshift(
589 h, WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL);
590 shift -= WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL;
591 }
592 wuffs_base__private_implementation__high_prec_dec__small_lshift(
593 h, ((uint32_t)(+shift)));
594 } else if (shift < 0) {
595 while (shift < -WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL) {
596 wuffs_base__private_implementation__high_prec_dec__small_rshift(
597 h, WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL);
598 shift += WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL;
599 }
600 wuffs_base__private_implementation__high_prec_dec__small_rshift(
601 h, ((uint32_t)(-shift)));
602 }
603}
604
605// --------
606
607// wuffs_base__private_implementation__high_prec_dec__round_etc rounds h's
608// number. For those functions that take an n argument, rounding produces at
609// most n digits (which is not necessarily at most n decimal places). Negative
610// n values are ignored, as well as any n greater than or equal to h's number
611// of digits. The etc__round_just_enough function implicitly chooses an n to
612// implement WUFFS_BASE__RENDER_NUMBER_FXX__JUST_ENOUGH_PRECISION.
613//
614// Preconditions:
615// - h is non-NULL.
616// - h->decimal_point is "not extreme".
617//
618// "Not extreme" means within
619// ±WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE.
620
621static void //
622wuffs_base__private_implementation__high_prec_dec__round_down(
623 wuffs_base__private_implementation__high_prec_dec* h,
624 int32_t n) {
625 if ((n < 0) || (h->num_digits <= (uint32_t)n)) {
626 return;
627 }
628 h->num_digits = (uint32_t)(n);
629 wuffs_base__private_implementation__high_prec_dec__trim(h);
630}
631
632static void //
633wuffs_base__private_implementation__high_prec_dec__round_up(
634 wuffs_base__private_implementation__high_prec_dec* h,
635 int32_t n) {
636 if ((n < 0) || (h->num_digits <= (uint32_t)n)) {
637 return;
638 }
639
640 for (n--; n >= 0; n--) {
641 if (h->digits[n] < 9) {
642 h->digits[n]++;
643 h->num_digits = (uint32_t)(n + 1);
644 return;
645 }
646 }
647
648 // The number is all 9s. Change to a single 1 and adjust the decimal point.
649 h->digits[0] = 1;
650 h->num_digits = 1;
651 h->decimal_point++;
652}
653
654static void //
655wuffs_base__private_implementation__high_prec_dec__round_nearest(
656 wuffs_base__private_implementation__high_prec_dec* h,
657 int32_t n) {
658 if ((n < 0) || (h->num_digits <= (uint32_t)n)) {
659 return;
660 }
661 bool up = h->digits[n] >= 5;
662 if ((h->digits[n] == 5) && ((n + 1) == ((int32_t)(h->num_digits)))) {
663 up = h->truncated || //
664 ((n > 0) && ((h->digits[n - 1] & 1) != 0));
665 }
666
667 if (up) {
668 wuffs_base__private_implementation__high_prec_dec__round_up(h, n);
669 } else {
670 wuffs_base__private_implementation__high_prec_dec__round_down(h, n);
671 }
672}
673
674static void //
675wuffs_base__private_implementation__high_prec_dec__round_just_enough(
676 wuffs_base__private_implementation__high_prec_dec* h,
677 int32_t exp2,
678 uint64_t mantissa) {
679 // The magic numbers 52 and 53 in this function are because IEEE 754 double
680 // precision has 52 mantissa bits.
681 //
682 // Let f be the floating point number represented by exp2 and mantissa (and
683 // also the number in h): the number (mantissa * (2 ** (exp2 - 52))).
684 //
685 // If f is zero or a small integer, we can return early.
686 if ((mantissa == 0) ||
687 ((exp2 < 53) && (h->decimal_point >= ((int32_t)(h->num_digits))))) {
688 return;
689 }
690
691 // The smallest normal f has an exp2 of -1022 and a mantissa of (1 << 52).
692 // Subnormal numbers have the same exp2 but a smaller mantissa.
693 static const int32_t min_incl_normal_exp2 = -1022;
694 static const uint64_t min_incl_normal_mantissa = 0x0010000000000000ul;
695
696 // Compute lower and upper bounds such that any number between them (possibly
697 // inclusive) will round to f. First, the lower bound. Our number f is:
698 // ((mantissa + 0) * (2 ** ( exp2 - 52)))
699 //
700 // The next lowest floating point number is:
701 // ((mantissa - 1) * (2 ** ( exp2 - 52)))
702 // unless (mantissa - 1) drops the (1 << 52) bit and exp2 is not the
703 // min_incl_normal_exp2. Either way, call it:
704 // ((l_mantissa) * (2 ** (l_exp2 - 52)))
705 //
706 // The lower bound is halfway between them (noting that 52 became 53):
707 // (((2 * l_mantissa) + 1) * (2 ** (l_exp2 - 53)))
708 int32_t l_exp2 = exp2;
709 uint64_t l_mantissa = mantissa - 1;
710 if ((exp2 > min_incl_normal_exp2) && (mantissa <= min_incl_normal_mantissa)) {
711 l_exp2 = exp2 - 1;
712 l_mantissa = (2 * mantissa) - 1;
713 }
714 wuffs_base__private_implementation__high_prec_dec lower;
715 wuffs_base__private_implementation__high_prec_dec__assign(
716 &lower, (2 * l_mantissa) + 1, false);
717 wuffs_base__private_implementation__high_prec_dec__lshift(&lower,
718 l_exp2 - 53);
719
720 // Next, the upper bound. Our number f is:
721 // ((mantissa + 0) * (2 ** (exp2 - 52)))
722 //
723 // The next highest floating point number is:
724 // ((mantissa + 1) * (2 ** (exp2 - 52)))
725 //
726 // The upper bound is halfway between them (noting that 52 became 53):
727 // (((2 * mantissa) + 1) * (2 ** (exp2 - 53)))
728 wuffs_base__private_implementation__high_prec_dec upper;
729 wuffs_base__private_implementation__high_prec_dec__assign(
730 &upper, (2 * mantissa) + 1, false);
731 wuffs_base__private_implementation__high_prec_dec__lshift(&upper, exp2 - 53);
732
733 // The lower and upper bounds are possible outputs only if the original
734 // mantissa is even, so that IEEE round-to-even would round to the original
735 // mantissa and not its neighbors.
736 bool inclusive = (mantissa & 1) == 0;
737
738 // As we walk the digits, we want to know whether rounding up would fall
739 // within the upper bound. This is tracked by upper_delta:
740 // - When -1, the digits of h and upper are the same so far.
741 // - When +0, we saw a difference of 1 between h and upper on a previous
742 // digit and subsequently only 9s for h and 0s for upper. Thus, rounding
743 // up may fall outside of the bound if !inclusive.
744 // - When +1, the difference is greater than 1 and we know that rounding up
745 // falls within the bound.
746 //
747 // This is a state machine with three states. The numerical value for each
748 // state (-1, +0 or +1) isn't important, other than their order.
749 int upper_delta = -1;
750
751 // We can now figure out the shortest number of digits required. Walk the
752 // digits until h has distinguished itself from lower or upper.
753 //
754 // The zi and zd variables are indexes and digits, for z in l (lower), h (the
755 // number) and u (upper).
756 //
757 // The lower, h and upper numbers may have their decimal points at different
758 // places. In this case, upper is the longest, so we iterate ui starting from
759 // 0 and iterate li and hi starting from either 0 or -1.
760 int32_t ui = 0;
761 for (;; ui++) {
762 // Calculate hd, the middle number's digit.
763 int32_t hi = ui - upper.decimal_point + h->decimal_point;
764 if (hi >= ((int32_t)(h->num_digits))) {
765 break;
766 }
767 uint8_t hd = (((uint32_t)hi) < h->num_digits) ? h->digits[hi] : 0;
768
769 // Calculate ld, the lower bound's digit.
770 int32_t li = ui - upper.decimal_point + lower.decimal_point;
771 uint8_t ld = (((uint32_t)li) < lower.num_digits) ? lower.digits[li] : 0;
772
773 // We can round down (truncate) if lower has a different digit than h or if
774 // lower is inclusive and is exactly the result of rounding down (i.e. we
775 // have reached the final digit of lower).
776 bool can_round_down =
777 (ld != hd) || //
778 (inclusive && ((li + 1) == ((int32_t)(lower.num_digits))));
779
780 // Calculate ud, the upper bound's digit, and update upper_delta.
781 uint8_t ud = (((uint32_t)ui) < upper.num_digits) ? upper.digits[ui] : 0;
782 if (upper_delta < 0) {
783 if ((hd + 1) < ud) {
784 // For example:
785 // h = 12345???
786 // upper = 12347???
787 upper_delta = +1;
788 } else if (hd != ud) {
789 // For example:
790 // h = 12345???
791 // upper = 12346???
792 upper_delta = +0;
793 }
794 } else if (upper_delta == 0) {
795 if ((hd != 9) || (ud != 0)) {
796 // For example:
797 // h = 1234598?
798 // upper = 1234600?
799 upper_delta = +1;
800 }
801 }
802
803 // We can round up if upper has a different digit than h and either upper
804 // is inclusive or upper is bigger than the result of rounding up.
805 bool can_round_up =
806 (upper_delta > 0) || //
807 ((upper_delta == 0) && //
808 (inclusive || ((ui + 1) < ((int32_t)(upper.num_digits)))));
809
810 // If we can round either way, round to nearest. If we can round only one
811 // way, do it. If we can't round, continue the loop.
812 if (can_round_down) {
813 if (can_round_up) {
814 wuffs_base__private_implementation__high_prec_dec__round_nearest(
815 h, hi + 1);
816 return;
817 } else {
818 wuffs_base__private_implementation__high_prec_dec__round_down(h,
819 hi + 1);
820 return;
821 }
822 } else {
823 if (can_round_up) {
824 wuffs_base__private_implementation__high_prec_dec__round_up(h, hi + 1);
825 return;
826 }
827 }
828 }
829}
830
831// --------
832
833// wuffs_base__private_implementation__parse_number_f64_eisel produces the IEEE
Nigel Taob15a0fc2020-07-08 10:50:14 +1000834// 754 double-precision value for an exact mantissa and base-10 exponent. For
835// example:
836// - when parsing "12345.678e+02", man is 12345678 and exp10 is -1.
837// - when parsing "-12", man is 12 and exp10 is 0. Processing the leading
838// minus sign is the responsibility of the caller, not this function.
Nigel Tao2a7e1ed2020-07-07 21:50:06 +1000839//
840// On success, it returns a non-negative int64_t such that the low 63 bits hold
841// the 11-bit exponent and 52-bit mantissa.
842//
843// On failure, it returns a negative value.
844//
845// The algorithm is based on an original idea by Michael Eisel. See
846// https://lemire.me/blog/2020/03/10/fast-float-parsing-in-practice/
847//
848// Preconditions:
849// - man is non-zero.
850// - exp10 is in the range -326 ..= 310, the same range of the
851// wuffs_base__private_implementation__powers_of_10 array.
852static int64_t //
853wuffs_base__private_implementation__parse_number_f64_eisel(uint64_t man,
854 int32_t exp10) {
855 // Look up the (possibly truncated) base-2 representation of (10 ** exp10).
856 // The look-up table was constructed so that it is already normalized: the
857 // table entry's mantissa's MSB (most significant bit) is on.
858 const uint32_t* po10 =
859 &wuffs_base__private_implementation__powers_of_10[5 * (exp10 + 326)];
860
861 // Normalize the man argument. The (man != 0) precondition means that a
862 // non-zero bit exists.
863 uint32_t clz = wuffs_base__count_leading_zeroes_u64(man);
864 man <<= clz;
865
866 // Calculate the return value's base-2 exponent. We might tweak it by ±1
867 // later, but its initial value comes from the look-up table and clz.
868 uint64_t ret_exp2 = ((uint64_t)po10[4]) - ((uint64_t)clz);
869
870 // Multiply the two mantissas. Normalization means that both mantissas are at
871 // least (1<<63), so the 128-bit product must be at least (1<<126). The high
Nigel Tao74d4af62020-07-10 11:27:17 +1000872 // 64 bits of the product, x_hi, must therefore be at least (1<<62).
Nigel Tao2a7e1ed2020-07-07 21:50:06 +1000873 //
Nigel Tao74d4af62020-07-10 11:27:17 +1000874 // As a consequence, x_hi has either 0 or 1 leading zeroes. Shifting x_hi
875 // right by either 9 or 10 bits (depending on x_hi's MSB) will therefore
Nigel Tao2a7e1ed2020-07-07 21:50:06 +1000876 // leave the top 10 MSBs (bits 54 ..= 63) off and the 11th MSB (bit 53) on.
Nigel Taoe722bb02020-07-10 20:33:09 +1000877#if defined(__SIZEOF_INT128__)
Nigel Taobff01d72020-07-10 12:15:09 +1000878 // See commit 18449ad75d582dd015c236abc85a16f333b796f3 "Optimize 128-bit muls
879 // in parse_number_f64_eisel" for benchmark numbers.
Nigel Tao18449ad2020-07-10 11:48:52 +1000880 __uint128_t x =
881 ((__uint128_t)man) * (((uint64_t)po10[2]) | (((uint64_t)po10[3]) << 32));
882 uint64_t x_hi = ((uint64_t)(x >> 64));
883 uint64_t x_lo = ((uint64_t)(x));
884#else
Nigel Tao2a7e1ed2020-07-07 21:50:06 +1000885 wuffs_base__multiply_u64__output x = wuffs_base__multiply_u64(
886 man, ((uint64_t)po10[2]) | (((uint64_t)po10[3]) << 32));
Nigel Tao74d4af62020-07-10 11:27:17 +1000887 uint64_t x_hi = x.hi;
888 uint64_t x_lo = x.lo;
Nigel Tao18449ad2020-07-10 11:48:52 +1000889#endif
Nigel Tao2a7e1ed2020-07-07 21:50:06 +1000890
891 // Before we shift right by at least 9 bits, recall that the look-up table
892 // entry was possibly truncated. We have so far only calculated a lower bound
893 // for the product (man * e), where e is (10 ** exp10). The upper bound would
894 // add a further (man * 1) to the 128-bit product, which overflows the lower
Nigel Tao74d4af62020-07-10 11:27:17 +1000895 // 64-bit limb if ((x_lo + man) < man).
Nigel Tao2a7e1ed2020-07-07 21:50:06 +1000896 //
Nigel Tao74d4af62020-07-10 11:27:17 +1000897 // If overflow occurs, that adds 1 to x_hi. Since we're about to shift right
Nigel Tao2a7e1ed2020-07-07 21:50:06 +1000898 // by at least 9 bits, that carried 1 can be ignored unless the higher 64-bit
899 // limb's low 9 bits are all on.
Nigel Tao74d4af62020-07-10 11:27:17 +1000900 if (((x_hi & 0x1FF) == 0x1FF) && ((x_lo + man) < man)) {
Nigel Tao2a7e1ed2020-07-07 21:50:06 +1000901 // Refine our calculation of (man * e). Before, our approximation of e used
902 // a "low resolution" 64-bit mantissa. Now use a "high resolution" 128-bit
903 // mantissa. We've already calculated x = (man * bits_0_to_63_incl_of_e).
904 // Now calculate y = (man * bits_64_to_127_incl_of_e).
Nigel Taoe722bb02020-07-10 20:33:09 +1000905#if defined(__SIZEOF_INT128__)
Nigel Taobff01d72020-07-10 12:15:09 +1000906 // See commit 18449ad75d582dd015c236abc85a16f333b796f3 "Optimize 128-bit
907 // muls in parse_number_f64_eisel" for benchmark numbers.
Nigel Tao18449ad2020-07-10 11:48:52 +1000908 __uint128_t y = ((__uint128_t)man) *
909 (((uint64_t)po10[0]) | (((uint64_t)po10[1]) << 32));
910 uint64_t y_hi = ((uint64_t)(y >> 64));
911 uint64_t y_lo = ((uint64_t)(y));
912#else
Nigel Tao2a7e1ed2020-07-07 21:50:06 +1000913 wuffs_base__multiply_u64__output y = wuffs_base__multiply_u64(
914 man, ((uint64_t)po10[0]) | (((uint64_t)po10[1]) << 32));
Nigel Tao74d4af62020-07-10 11:27:17 +1000915 uint64_t y_hi = y.hi;
916 uint64_t y_lo = y.lo;
Nigel Tao18449ad2020-07-10 11:48:52 +1000917#endif
Nigel Tao2a7e1ed2020-07-07 21:50:06 +1000918
919 // Merge the 128-bit x and 128-bit y, which overlap by 64 bits, to
920 // calculate the 192-bit product of the 64-bit man by the 128-bit e.
921 // As we exit this if-block, we only care about the high 128 bits
922 // (merged_hi and merged_lo) of that 192-bit product.
Nigel Tao74d4af62020-07-10 11:27:17 +1000923 uint64_t merged_hi = x_hi;
924 uint64_t merged_lo = x_lo + y_hi;
925 if (merged_lo < x_lo) {
Nigel Tao2a7e1ed2020-07-07 21:50:06 +1000926 merged_hi++; // Carry the overflow bit.
927 }
928
929 // The "high resolution" approximation of e is still a lower bound. Once
930 // again, see if the upper bound is large enough to produce a different
931 // result. This time, if it does, give up instead of reaching for an even
932 // more precise approximation to e.
933 //
934 // This three-part check is similar to the two-part check that guarded the
935 // if block that we're now in, but it has an extra term for the middle 64
936 // bits (checking that adding 1 to merged_lo would overflow).
937 if (((merged_hi & 0x1FF) == 0x1FF) && ((merged_lo + 1) == 0) &&
Nigel Tao74d4af62020-07-10 11:27:17 +1000938 (y_lo + man < man)) {
Nigel Tao2a7e1ed2020-07-07 21:50:06 +1000939 return -1;
940 }
941
942 // Replace the 128-bit x with merged.
Nigel Tao74d4af62020-07-10 11:27:17 +1000943 x_hi = merged_hi;
944 x_lo = merged_lo;
Nigel Tao2a7e1ed2020-07-07 21:50:06 +1000945 }
946
Nigel Tao74d4af62020-07-10 11:27:17 +1000947 // As mentioned above, shifting x_hi right by either 9 or 10 bits will leave
Nigel Tao2a7e1ed2020-07-07 21:50:06 +1000948 // the top 10 MSBs (bits 54 ..= 63) off and the 11th MSB (bit 53) on. If the
949 // MSB (before shifting) was on, adjust ret_exp2 for the larger shift.
950 //
951 // Having bit 53 on (and higher bits off) means that ret_mantissa is a 54-bit
952 // number.
Nigel Tao74d4af62020-07-10 11:27:17 +1000953 uint64_t msb = x_hi >> 63;
954 uint64_t ret_mantissa = x_hi >> (msb + 9);
Nigel Tao2a7e1ed2020-07-07 21:50:06 +1000955 ret_exp2 -= 1 ^ msb;
956
957 // IEEE 754 rounds to-nearest with ties rounded to-even. Rounding to-even can
958 // be tricky. If we're half-way between two exactly representable numbers
959 // (x's low 73 bits are zero and the next 2 bits that matter are "01"), give
960 // up instead of trying to pick the winner.
961 //
962 // Technically, we could tighten the condition by changing "73" to "73 or 74,
963 // depending on msb", but a flat "73" is simpler.
Nigel Tao74d4af62020-07-10 11:27:17 +1000964 if ((x_lo == 0) && ((x_hi & 0x1FF) == 0) && ((ret_mantissa & 3) == 1)) {
Nigel Tao2a7e1ed2020-07-07 21:50:06 +1000965 return -1;
966 }
967
968 // If we're not halfway then it's rounding to-nearest. Starting with a 54-bit
969 // number, carry the lowest bit (bit 0) up if it's on. Regardless of whether
970 // it was on or off, shifting right by one then produces a 53-bit number. If
971 // carrying up overflowed, shift again.
972 ret_mantissa += ret_mantissa & 1;
973 ret_mantissa >>= 1;
974 if ((ret_mantissa >> 53) > 0) {
975 ret_mantissa >>= 1;
976 ret_exp2++;
977 }
978
979 // Starting with a 53-bit number, IEEE 754 double-precision normal numbers
980 // have an implicit mantissa bit. Mask that away and keep the low 52 bits.
981 ret_mantissa &= 0x000FFFFFFFFFFFFF;
982
983 // IEEE 754 double-precision floating point has 11 exponent bits. All off (0)
984 // means subnormal numbers. All on (2047) means infinity or NaN.
985 if ((ret_exp2 <= 0) || (2047 <= ret_exp2)) {
986 return -1;
987 }
988
989 // Pack the bits and return.
990 return ((int64_t)(ret_mantissa | (ret_exp2 << 52)));
991}
992
993// --------
994
995static wuffs_base__result_f64 //
Nigel Taoe0c5de92020-07-11 11:48:17 +1000996wuffs_base__private_implementation__parse_number_f64_special(
997 wuffs_base__slice_u8 s,
Nigel Tao4d61a052020-07-11 12:34:40 +1000998 uint32_t options) {
Nigel Tao2a7e1ed2020-07-07 21:50:06 +1000999 do {
Nigel Tao4d61a052020-07-11 12:34:40 +10001000 if (options & WUFFS_BASE__PARSE_NUMBER_FXX__REJECT_INF_AND_NAN) {
1001 goto fail;
1002 }
1003
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001004 uint8_t* p = s.ptr;
1005 uint8_t* q = s.ptr + s.len;
1006
1007 for (; (p < q) && (*p == '_'); p++) {
1008 }
1009 if (p >= q) {
Nigel Tao4d61a052020-07-11 12:34:40 +10001010 goto fail;
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001011 }
1012
1013 // Parse sign.
1014 bool negative = false;
1015 do {
1016 if (*p == '+') {
1017 p++;
1018 } else if (*p == '-') {
1019 negative = true;
1020 p++;
1021 } else {
1022 break;
1023 }
1024 for (; (p < q) && (*p == '_'); p++) {
1025 }
1026 } while (0);
1027 if (p >= q) {
Nigel Tao4d61a052020-07-11 12:34:40 +10001028 goto fail;
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001029 }
1030
1031 bool nan = false;
1032 switch (p[0]) {
1033 case 'I':
1034 case 'i':
1035 if (((q - p) < 3) || //
1036 ((p[1] != 'N') && (p[1] != 'n')) || //
1037 ((p[2] != 'F') && (p[2] != 'f'))) {
Nigel Tao4d61a052020-07-11 12:34:40 +10001038 goto fail;
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001039 }
1040 p += 3;
1041
1042 if ((p >= q) || (*p == '_')) {
1043 break;
1044 } else if (((q - p) < 5) || //
1045 ((p[0] != 'I') && (p[0] != 'i')) || //
1046 ((p[1] != 'N') && (p[1] != 'n')) || //
1047 ((p[2] != 'I') && (p[2] != 'i')) || //
1048 ((p[3] != 'T') && (p[3] != 't')) || //
1049 ((p[4] != 'Y') && (p[4] != 'y'))) {
Nigel Tao4d61a052020-07-11 12:34:40 +10001050 goto fail;
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001051 }
1052 p += 5;
1053
1054 if ((p >= q) || (*p == '_')) {
1055 break;
1056 }
Nigel Tao4d61a052020-07-11 12:34:40 +10001057 goto fail;
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001058
1059 case 'N':
1060 case 'n':
1061 if (((q - p) < 3) || //
1062 ((p[1] != 'A') && (p[1] != 'a')) || //
1063 ((p[2] != 'N') && (p[2] != 'n'))) {
Nigel Tao4d61a052020-07-11 12:34:40 +10001064 goto fail;
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001065 }
1066 p += 3;
1067
1068 if ((p >= q) || (*p == '_')) {
1069 nan = true;
1070 break;
1071 }
Nigel Tao4d61a052020-07-11 12:34:40 +10001072 goto fail;
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001073
1074 default:
Nigel Tao4d61a052020-07-11 12:34:40 +10001075 goto fail;
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001076 }
1077
1078 // Finish.
1079 for (; (p < q) && (*p == '_'); p++) {
1080 }
1081 if (p != q) {
Nigel Tao4d61a052020-07-11 12:34:40 +10001082 goto fail;
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001083 }
1084 wuffs_base__result_f64 ret;
1085 ret.status.repr = NULL;
Nigel Tao4d449dc2020-07-12 11:00:47 +10001086 ret.value = wuffs_base__ieee_754_bit_representation__from_u64_to_f64(
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001087 (nan ? 0x7FFFFFFFFFFFFFFF : 0x7FF0000000000000) |
1088 (negative ? 0x8000000000000000 : 0));
1089 return ret;
1090 } while (0);
1091
Nigel Tao4d61a052020-07-11 12:34:40 +10001092fail:
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001093 do {
1094 wuffs_base__result_f64 ret;
Nigel Tao4d61a052020-07-11 12:34:40 +10001095 ret.status.repr = wuffs_base__error__bad_argument;
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001096 ret.value = 0;
1097 return ret;
1098 } while (0);
1099}
1100
1101WUFFS_BASE__MAYBE_STATIC wuffs_base__result_f64 //
Nigel Taoe0c5de92020-07-11 11:48:17 +10001102wuffs_base__private_implementation__high_prec_dec__to_f64(
Nigel Tao4d61a052020-07-11 12:34:40 +10001103 wuffs_base__private_implementation__high_prec_dec* h,
1104 uint32_t options) {
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001105 do {
1106 // powers converts decimal powers of 10 to binary powers of 2. For example,
1107 // (10000 >> 13) is 1. It stops before the elements exceed 60, also known
1108 // as WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL.
1109 static const uint32_t num_powers = 19;
1110 static const uint8_t powers[19] = {
1111 0, 3, 6, 9, 13, 16, 19, 23, 26, 29, //
1112 33, 36, 39, 43, 46, 49, 53, 56, 59, //
1113 };
1114
1115 // Handle zero and obvious extremes. The largest and smallest positive
1116 // finite f64 values are approximately 1.8e+308 and 4.9e-324.
1117 if ((h->num_digits == 0) || (h->decimal_point < -326)) {
1118 goto zero;
1119 } else if (h->decimal_point > 310) {
1120 goto infinity;
1121 }
1122
1123 // Try the fast Eisel algorithm again. Calculating the (man, exp10) pair
1124 // from the high_prec_dec h is more correct but slower than the approach
1125 // taken in wuffs_base__parse_number_f64. The latter is optimized for the
1126 // common cases (e.g. assuming no underscores or a leading '+' sign) rather
1127 // than the full set of cases allowed by the Wuffs API.
1128 if (h->num_digits <= 19) {
1129 uint64_t man = 0;
1130 uint32_t i;
1131 for (i = 0; i < h->num_digits; i++) {
1132 man = (10 * man) + h->digits[i];
1133 }
1134 int32_t exp10 = h->decimal_point - ((int32_t)(h->num_digits));
1135 if ((man != 0) && (-326 <= exp10) && (exp10 <= 310)) {
1136 int64_t r = wuffs_base__private_implementation__parse_number_f64_eisel(
1137 man, exp10);
1138 if (r >= 0) {
1139 wuffs_base__result_f64 ret;
1140 ret.status.repr = NULL;
Nigel Tao4d449dc2020-07-12 11:00:47 +10001141 ret.value = wuffs_base__ieee_754_bit_representation__from_u64_to_f64(
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001142 ((uint64_t)r) | (((uint64_t)(h->negative)) << 63));
1143 return ret;
1144 }
1145 }
1146 }
1147
1148 // Scale by powers of 2 until we're in the range [½ .. 1], which gives us
1149 // our exponent (in base-2). First we shift right, possibly a little too
1150 // far, ending with a value certainly below 1 and possibly below ½...
1151 const int32_t f64_bias = -1023;
1152 int32_t exp2 = 0;
1153 while (h->decimal_point > 0) {
1154 uint32_t n = (uint32_t)(+h->decimal_point);
1155 uint32_t shift =
1156 (n < num_powers)
1157 ? powers[n]
1158 : WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL;
1159
1160 wuffs_base__private_implementation__high_prec_dec__small_rshift(h, shift);
1161 if (h->decimal_point <
1162 -WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE) {
1163 goto zero;
1164 }
1165 exp2 += (int32_t)shift;
1166 }
1167 // ...then we shift left, putting us in [½ .. 1].
1168 while (h->decimal_point <= 0) {
1169 uint32_t shift;
1170 if (h->decimal_point == 0) {
1171 if (h->digits[0] >= 5) {
1172 break;
1173 }
1174 shift = (h->digits[0] <= 2) ? 2 : 1;
1175 } else {
1176 uint32_t n = (uint32_t)(-h->decimal_point);
1177 shift = (n < num_powers)
1178 ? powers[n]
1179 : WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL;
1180 }
1181
1182 wuffs_base__private_implementation__high_prec_dec__small_lshift(h, shift);
1183 if (h->decimal_point >
1184 +WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__DECIMAL_POINT__RANGE) {
1185 goto infinity;
1186 }
1187 exp2 -= (int32_t)shift;
1188 }
1189
1190 // We're in the range [½ .. 1] but f64 uses [1 .. 2].
1191 exp2--;
1192
1193 // The minimum normal exponent is (f64_bias + 1).
1194 while ((f64_bias + 1) > exp2) {
1195 uint32_t n = (uint32_t)((f64_bias + 1) - exp2);
1196 if (n > WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL) {
1197 n = WUFFS_BASE__PRIVATE_IMPLEMENTATION__HPD__SHIFT__MAX_INCL;
1198 }
1199 wuffs_base__private_implementation__high_prec_dec__small_rshift(h, n);
1200 exp2 += (int32_t)n;
1201 }
1202
1203 // Check for overflow.
1204 if ((exp2 - f64_bias) >= 0x07FF) { // (1 << 11) - 1.
1205 goto infinity;
1206 }
1207
1208 // Extract 53 bits for the mantissa (in base-2).
1209 wuffs_base__private_implementation__high_prec_dec__small_lshift(h, 53);
1210 uint64_t man2 =
1211 wuffs_base__private_implementation__high_prec_dec__rounded_integer(h);
1212
1213 // Rounding might have added one bit. If so, shift and re-check overflow.
1214 if ((man2 >> 53) != 0) {
1215 man2 >>= 1;
1216 exp2++;
1217 if ((exp2 - f64_bias) >= 0x07FF) { // (1 << 11) - 1.
1218 goto infinity;
1219 }
1220 }
1221
1222 // Handle subnormal numbers.
1223 if ((man2 >> 52) == 0) {
1224 exp2 = f64_bias;
1225 }
1226
1227 // Pack the bits and return.
1228 uint64_t exp2_bits =
1229 (uint64_t)((exp2 - f64_bias) & 0x07FF); // (1 << 11) - 1.
1230 uint64_t bits = (man2 & 0x000FFFFFFFFFFFFF) | // (1 << 52) - 1.
1231 (exp2_bits << 52) | //
1232 (h->negative ? 0x8000000000000000 : 0); // (1 << 63).
1233
1234 wuffs_base__result_f64 ret;
1235 ret.status.repr = NULL;
Nigel Tao4d449dc2020-07-12 11:00:47 +10001236 ret.value = wuffs_base__ieee_754_bit_representation__from_u64_to_f64(bits);
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001237 return ret;
1238 } while (0);
1239
1240zero:
1241 do {
1242 uint64_t bits = h->negative ? 0x8000000000000000 : 0;
1243
1244 wuffs_base__result_f64 ret;
1245 ret.status.repr = NULL;
Nigel Tao4d449dc2020-07-12 11:00:47 +10001246 ret.value = wuffs_base__ieee_754_bit_representation__from_u64_to_f64(bits);
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001247 return ret;
1248 } while (0);
1249
1250infinity:
1251 do {
Nigel Tao4d61a052020-07-11 12:34:40 +10001252 if (options & WUFFS_BASE__PARSE_NUMBER_FXX__REJECT_INF_AND_NAN) {
1253 wuffs_base__result_f64 ret;
1254 ret.status.repr = wuffs_base__error__bad_argument;
1255 ret.value = 0;
1256 return ret;
1257 }
1258
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001259 uint64_t bits = h->negative ? 0xFFF0000000000000 : 0x7FF0000000000000;
1260
1261 wuffs_base__result_f64 ret;
1262 ret.status.repr = NULL;
Nigel Tao4d449dc2020-07-12 11:00:47 +10001263 ret.value = wuffs_base__ieee_754_bit_representation__from_u64_to_f64(bits);
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001264 return ret;
1265 } while (0);
1266}
1267
1268static inline bool //
1269wuffs_base__private_implementation__is_decimal_digit(uint8_t c) {
1270 return ('0' <= c) && (c <= '9');
1271}
1272
1273WUFFS_BASE__MAYBE_STATIC wuffs_base__result_f64 //
1274wuffs_base__parse_number_f64(wuffs_base__slice_u8 s, uint32_t options) {
1275 // In practice, almost all "dd.ddddE±xxx" numbers can be represented
1276 // losslessly by a uint64_t mantissa "dddddd" and an int32_t base-10
1277 // exponent, adjusting "xxx" for the position (if present) of the decimal
1278 // separator '.' or ','.
1279 //
1280 // This (u64 man, i32 exp10) data structure is superficially similar to the
1281 // "Do It Yourself Floating Point" type from Loitsch (†), but the exponent
1282 // here is base-10, not base-2.
1283 //
1284 // If s's number fits in a (man, exp10), parse that pair with the Eisel
1285 // algorithm. If not, or if Eisel fails, parsing s with the fallback
1286 // algorithm is slower but comprehensive.
1287 //
1288 // † "Printing Floating-Point Numbers Quickly and Accurately with Integers"
1289 // (https://www.cs.tufts.edu/~nr/cs257/archive/florian-loitsch/printf.pdf).
1290 // Florian Loitsch is also the primary contributor to
1291 // https://github.com/google/double-conversion
1292 do {
1293 // Calculating that (man, exp10) pair needs to stay within s's bounds.
1294 // Provided that s isn't extremely long, work on a NUL-terminated copy of
1295 // s's contents. The NUL byte isn't a valid part of "±dd.ddddE±xxx".
1296 //
1297 // As the pointer p walks the contents, it's faster to repeatedly check "is
1298 // *p a valid digit" than "is p within bounds and *p a valid digit".
1299 if (s.len >= 256) {
1300 goto fallback;
1301 }
1302 uint8_t z[256];
1303 memcpy(&z[0], s.ptr, s.len);
1304 z[s.len] = 0;
1305 const uint8_t* p = &z[0];
1306
1307 // Look for a leading minus sign. Technically, we could also look for an
1308 // optional plus sign, but the "script/process-json-numbers.c with -p"
1309 // benchmark is noticably slower if we do. It's optional and, in practice,
1310 // usually absent. Let the fallback catch it.
1311 bool negative = (*p == '-');
1312 if (negative) {
1313 p++;
1314 }
1315
1316 // After walking "dd.dddd", comparing p later with p now will produce the
1317 // number of "d"s and "."s.
1318 const uint8_t* const start_of_digits_ptr = p;
1319
1320 // Walk the "d"s before a '.', 'E', NUL byte, etc. If it starts with '0',
1321 // it must be a single '0'. If it starts with a non-zero decimal digit, it
1322 // can be a sequence of decimal digits.
1323 //
1324 // Update the man variable during the walk. It's OK if man overflows now.
1325 // We'll detect that later.
1326 uint64_t man;
1327 if (*p == '0') {
1328 man = 0;
1329 p++;
1330 if (wuffs_base__private_implementation__is_decimal_digit(*p)) {
1331 goto fallback;
1332 }
1333 } else if (wuffs_base__private_implementation__is_decimal_digit(*p)) {
1334 man = ((uint8_t)(*p - '0'));
1335 p++;
1336 for (; wuffs_base__private_implementation__is_decimal_digit(*p); p++) {
1337 man = (10 * man) + ((uint8_t)(*p - '0'));
1338 }
1339 } else {
1340 goto fallback;
1341 }
1342
1343 // Walk the "d"s after the optional decimal separator ('.' or ','),
1344 // updating the man and exp10 variables.
1345 int32_t exp10 = 0;
Nigel Taoe0c5de92020-07-11 11:48:17 +10001346 if (*p ==
1347 ((options & WUFFS_BASE__PARSE_NUMBER_FXX__DECIMAL_SEPARATOR_IS_A_COMMA)
1348 ? ','
1349 : '.')) {
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001350 p++;
1351 const uint8_t* first_after_separator_ptr = p;
1352 if (!wuffs_base__private_implementation__is_decimal_digit(*p)) {
1353 goto fallback;
1354 }
1355 man = (10 * man) + ((uint8_t)(*p - '0'));
1356 p++;
1357 for (; wuffs_base__private_implementation__is_decimal_digit(*p); p++) {
1358 man = (10 * man) + ((uint8_t)(*p - '0'));
1359 }
1360 exp10 = ((int32_t)(first_after_separator_ptr - p));
1361 }
1362
1363 // Count the number of digits:
1364 // - for an input of "314159", digit_count is 6.
1365 // - for an input of "3.14159", digit_count is 7.
1366 //
1367 // This is off-by-one if there is a decimal separator. That's OK for now.
1368 // We'll correct for that later. The "script/process-json-numbers.c with
1369 // -p" benchmark is noticably slower if we try to correct for that now.
1370 uint32_t digit_count = (uint32_t)(p - start_of_digits_ptr);
1371
1372 // Update exp10 for the optional exponent, starting with 'E' or 'e'.
1373 if ((*p | 0x20) == 'e') {
1374 p++;
1375 int32_t exp_sign = +1;
1376 if (*p == '-') {
1377 p++;
1378 exp_sign = -1;
1379 } else if (*p == '+') {
1380 p++;
1381 }
1382 if (!wuffs_base__private_implementation__is_decimal_digit(*p)) {
1383 goto fallback;
1384 }
1385 int32_t exp_num = ((uint8_t)(*p - '0'));
1386 p++;
1387 // The rest of the exp_num walking has a peculiar control flow but, once
1388 // again, the "script/process-json-numbers.c with -p" benchmark is
1389 // sensitive to alternative formulations.
1390 if (wuffs_base__private_implementation__is_decimal_digit(*p)) {
1391 exp_num = (10 * exp_num) + ((uint8_t)(*p - '0'));
1392 p++;
1393 }
1394 if (wuffs_base__private_implementation__is_decimal_digit(*p)) {
1395 exp_num = (10 * exp_num) + ((uint8_t)(*p - '0'));
1396 p++;
1397 }
1398 while (wuffs_base__private_implementation__is_decimal_digit(*p)) {
1399 if (exp_num > 0x1000000) {
1400 goto fallback;
1401 }
1402 exp_num = (10 * exp_num) + ((uint8_t)(*p - '0'));
1403 p++;
1404 }
1405 exp10 += exp_sign * exp_num;
1406 }
1407
1408 // The Wuffs API is that the original slice has no trailing data. It also
1409 // allows underscores, which we don't catch here but the fallback should.
1410 if (p != &z[s.len]) {
1411 goto fallback;
1412 }
1413
1414 // Check that the uint64_t typed man variable has not overflowed, based on
1415 // digit_count.
1416 //
1417 // For reference:
1418 // - (1 << 63) is 9223372036854775808, which has 19 decimal digits.
1419 // - (1 << 64) is 18446744073709551616, which has 20 decimal digits.
1420 // - 19 nines, 9999999999999999999, is 0x8AC7230489E7FFFF, which has 64
1421 // bits and 16 hexadecimal digits.
1422 // - 20 nines, 99999999999999999999, is 0x56BC75E2D630FFFFF, which has 67
1423 // bits and 17 hexadecimal digits.
1424 if (digit_count > 19) {
1425 // Even if we have more than 19 pseudo-digits, it's not yet definitely an
1426 // overflow. Recall that digit_count might be off-by-one (too large) if
1427 // there's a decimal separator. It will also over-report the number of
1428 // meaningful digits if the input looks something like "0.000dddExxx".
1429 //
1430 // We adjust by the number of leading '0's and '.'s and re-compare to 19.
1431 // Once again, technically, we could skip ','s too, but that perturbs the
1432 // "script/process-json-numbers.c with -p" benchmark.
1433 const uint8_t* q = start_of_digits_ptr;
1434 for (; (*q == '0') || (*q == '.'); q++) {
1435 }
1436 digit_count -= (uint32_t)(q - start_of_digits_ptr);
1437 if (digit_count > 19) {
1438 goto fallback;
1439 }
1440 }
1441
1442 // The wuffs_base__private_implementation__parse_number_f64_eisel
1443 // preconditions include that exp10 is in the range -326 ..= 310.
1444 if ((exp10 < -326) || (310 < exp10)) {
1445 goto fallback;
1446 }
1447
1448 // If man and exp10 are small enough, all three of (man), (10 ** exp10) and
1449 // (man ** (10 ** exp10)) are exactly representable by a double. We don't
1450 // need to run the Eisel algorithm.
1451 if ((-22 <= exp10) && (exp10 <= 22) && ((man >> 53) == 0)) {
1452 double d = (double)man;
1453 if (exp10 >= 0) {
1454 d *= wuffs_base__private_implementation__f64_powers_of_10[+exp10];
1455 } else {
1456 d /= wuffs_base__private_implementation__f64_powers_of_10[-exp10];
1457 }
1458 wuffs_base__result_f64 ret;
1459 ret.status.repr = NULL;
1460 ret.value = negative ? -d : +d;
1461 return ret;
1462 }
1463
1464 // The wuffs_base__private_implementation__parse_number_f64_eisel
1465 // preconditions include that man is non-zero. Parsing "0" should be caught
1466 // by the "If man and exp10 are small enough" above, but "0e99" might not.
1467 if (man == 0) {
1468 goto fallback;
1469 }
1470
1471 // Our man and exp10 are in range. Run the Eisel algorithm.
1472 int64_t r =
1473 wuffs_base__private_implementation__parse_number_f64_eisel(man, exp10);
1474 if (r < 0) {
1475 goto fallback;
1476 }
1477 wuffs_base__result_f64 ret;
1478 ret.status.repr = NULL;
Nigel Tao4d449dc2020-07-12 11:00:47 +10001479 ret.value = wuffs_base__ieee_754_bit_representation__from_u64_to_f64(
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001480 ((uint64_t)r) | (((uint64_t)negative) << 63));
1481 return ret;
1482 } while (0);
1483
1484fallback:
1485 do {
1486 wuffs_base__private_implementation__high_prec_dec h;
1487 wuffs_base__status status =
Nigel Taoe0c5de92020-07-11 11:48:17 +10001488 wuffs_base__private_implementation__high_prec_dec__parse(&h, s,
1489 options);
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001490 if (status.repr) {
Nigel Taoe0c5de92020-07-11 11:48:17 +10001491 return wuffs_base__private_implementation__parse_number_f64_special(
Nigel Tao4d61a052020-07-11 12:34:40 +10001492 s, options);
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001493 }
Nigel Tao4d61a052020-07-11 12:34:40 +10001494 return wuffs_base__private_implementation__high_prec_dec__to_f64(&h,
1495 options);
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001496 } while (0);
1497}
1498
1499// --------
1500
1501static inline size_t //
1502wuffs_base__private_implementation__render_inf(wuffs_base__slice_u8 dst,
1503 bool neg,
1504 uint32_t options) {
1505 if (neg) {
1506 if (dst.len < 4) {
1507 return 0;
1508 }
1509 wuffs_base__store_u32le__no_bounds_check(dst.ptr, 0x666E492D); // '-Inf'le.
1510 return 4;
1511 }
1512
1513 if (options & WUFFS_BASE__RENDER_NUMBER_XXX__LEADING_PLUS_SIGN) {
1514 if (dst.len < 4) {
1515 return 0;
1516 }
1517 wuffs_base__store_u32le__no_bounds_check(dst.ptr, 0x666E492B); // '+Inf'le.
1518 return 4;
1519 }
1520
1521 if (dst.len < 3) {
1522 return 0;
1523 }
1524 wuffs_base__store_u24le__no_bounds_check(dst.ptr, 0x666E49); // 'Inf'le.
1525 return 3;
1526}
1527
1528static inline size_t //
1529wuffs_base__private_implementation__render_nan(wuffs_base__slice_u8 dst) {
1530 if (dst.len < 3) {
1531 return 0;
1532 }
1533 wuffs_base__store_u24le__no_bounds_check(dst.ptr, 0x4E614E); // 'NaN'le.
1534 return 3;
1535}
1536
1537static size_t //
1538wuffs_base__private_implementation__high_prec_dec__render_exponent_absent(
1539 wuffs_base__slice_u8 dst,
1540 wuffs_base__private_implementation__high_prec_dec* h,
1541 uint32_t precision,
1542 uint32_t options) {
1543 size_t n = (h->negative ||
1544 (options & WUFFS_BASE__RENDER_NUMBER_XXX__LEADING_PLUS_SIGN))
1545 ? 1
1546 : 0;
1547 if (h->decimal_point <= 0) {
1548 n += 1;
1549 } else {
1550 n += (size_t)(h->decimal_point);
1551 }
1552 if (precision > 0) {
1553 n += precision + 1; // +1 for the '.'.
1554 }
1555
1556 // Don't modify dst if the formatted number won't fit.
1557 if (n > dst.len) {
1558 return 0;
1559 }
1560
1561 // Align-left or align-right.
1562 uint8_t* ptr = (options & WUFFS_BASE__RENDER_NUMBER_XXX__ALIGN_RIGHT)
1563 ? &dst.ptr[dst.len - n]
1564 : &dst.ptr[0];
1565
1566 // Leading "±".
1567 if (h->negative) {
1568 *ptr++ = '-';
1569 } else if (options & WUFFS_BASE__RENDER_NUMBER_XXX__LEADING_PLUS_SIGN) {
1570 *ptr++ = '+';
1571 }
1572
1573 // Integral digits.
1574 if (h->decimal_point <= 0) {
1575 *ptr++ = '0';
1576 } else {
1577 uint32_t m =
1578 wuffs_base__u32__min(h->num_digits, (uint32_t)(h->decimal_point));
1579 uint32_t i = 0;
1580 for (; i < m; i++) {
1581 *ptr++ = (uint8_t)('0' | h->digits[i]);
1582 }
1583 for (; i < (uint32_t)(h->decimal_point); i++) {
1584 *ptr++ = '0';
1585 }
1586 }
1587
1588 // Separator and then fractional digits.
1589 if (precision > 0) {
1590 *ptr++ =
1591 (options & WUFFS_BASE__RENDER_NUMBER_FXX__DECIMAL_SEPARATOR_IS_A_COMMA)
1592 ? ','
1593 : '.';
1594 uint32_t i = 0;
1595 for (; i < precision; i++) {
1596 uint32_t j = ((uint32_t)(h->decimal_point)) + i;
1597 *ptr++ = (uint8_t)('0' | ((j < h->num_digits) ? h->digits[j] : 0));
1598 }
1599 }
1600
1601 return n;
1602}
1603
1604static size_t //
1605wuffs_base__private_implementation__high_prec_dec__render_exponent_present(
1606 wuffs_base__slice_u8 dst,
1607 wuffs_base__private_implementation__high_prec_dec* h,
1608 uint32_t precision,
1609 uint32_t options) {
1610 int32_t exp = 0;
1611 if (h->num_digits > 0) {
1612 exp = h->decimal_point - 1;
1613 }
1614 bool negative_exp = exp < 0;
1615 if (negative_exp) {
1616 exp = -exp;
1617 }
1618
1619 size_t n = (h->negative ||
1620 (options & WUFFS_BASE__RENDER_NUMBER_XXX__LEADING_PLUS_SIGN))
1621 ? 4
1622 : 3; // Mininum 3 bytes: first digit and then "e±".
1623 if (precision > 0) {
1624 n += precision + 1; // +1 for the '.'.
1625 }
1626 n += (exp < 100) ? 2 : 3;
1627
1628 // Don't modify dst if the formatted number won't fit.
1629 if (n > dst.len) {
1630 return 0;
1631 }
1632
1633 // Align-left or align-right.
1634 uint8_t* ptr = (options & WUFFS_BASE__RENDER_NUMBER_XXX__ALIGN_RIGHT)
1635 ? &dst.ptr[dst.len - n]
1636 : &dst.ptr[0];
1637
1638 // Leading "±".
1639 if (h->negative) {
1640 *ptr++ = '-';
1641 } else if (options & WUFFS_BASE__RENDER_NUMBER_XXX__LEADING_PLUS_SIGN) {
1642 *ptr++ = '+';
1643 }
1644
1645 // Integral digit.
1646 if (h->num_digits > 0) {
1647 *ptr++ = (uint8_t)('0' | h->digits[0]);
1648 } else {
1649 *ptr++ = '0';
1650 }
1651
1652 // Separator and then fractional digits.
1653 if (precision > 0) {
1654 *ptr++ =
1655 (options & WUFFS_BASE__RENDER_NUMBER_FXX__DECIMAL_SEPARATOR_IS_A_COMMA)
1656 ? ','
1657 : '.';
1658 uint32_t i = 1;
1659 uint32_t j = wuffs_base__u32__min(h->num_digits, precision + 1);
1660 for (; i < j; i++) {
1661 *ptr++ = (uint8_t)('0' | h->digits[i]);
1662 }
1663 for (; i <= precision; i++) {
1664 *ptr++ = '0';
1665 }
1666 }
1667
1668 // Exponent: "e±" and then 2 or 3 digits.
1669 *ptr++ = 'e';
1670 *ptr++ = negative_exp ? '-' : '+';
1671 if (exp < 10) {
1672 *ptr++ = '0';
1673 *ptr++ = (uint8_t)('0' | exp);
1674 } else if (exp < 100) {
1675 *ptr++ = (uint8_t)('0' | (exp / 10));
1676 *ptr++ = (uint8_t)('0' | (exp % 10));
1677 } else {
1678 int32_t e = exp / 100;
1679 exp -= e * 100;
1680 *ptr++ = (uint8_t)('0' | e);
1681 *ptr++ = (uint8_t)('0' | (exp / 10));
1682 *ptr++ = (uint8_t)('0' | (exp % 10));
1683 }
1684
1685 return n;
1686}
1687
1688WUFFS_BASE__MAYBE_STATIC size_t //
1689wuffs_base__render_number_f64(wuffs_base__slice_u8 dst,
1690 double x,
1691 uint32_t precision,
1692 uint32_t options) {
1693 // Decompose x (64 bits) into negativity (1 bit), base-2 exponent (11 bits
1694 // with a -1023 bias) and mantissa (52 bits).
Nigel Tao4d449dc2020-07-12 11:00:47 +10001695 uint64_t bits = wuffs_base__ieee_754_bit_representation__from_f64_to_u64(x);
Nigel Tao2a7e1ed2020-07-07 21:50:06 +10001696 bool neg = (bits >> 63) != 0;
1697 int32_t exp2 = ((int32_t)(bits >> 52)) & 0x7FF;
1698 uint64_t man = bits & 0x000FFFFFFFFFFFFFul;
1699
1700 // Apply the exponent bias and set the implicit top bit of the mantissa,
1701 // unless x is subnormal. Also take care of Inf and NaN.
1702 if (exp2 == 0x7FF) {
1703 if (man != 0) {
1704 return wuffs_base__private_implementation__render_nan(dst);
1705 }
1706 return wuffs_base__private_implementation__render_inf(dst, neg, options);
1707 } else if (exp2 == 0) {
1708 exp2 = -1022;
1709 } else {
1710 exp2 -= 1023;
1711 man |= 0x0010000000000000ul;
1712 }
1713
1714 // Ensure that precision isn't too large.
1715 if (precision > 4095) {
1716 precision = 4095;
1717 }
1718
1719 // Convert from the (neg, exp2, man) tuple to an HPD.
1720 wuffs_base__private_implementation__high_prec_dec h;
1721 wuffs_base__private_implementation__high_prec_dec__assign(&h, man, neg);
1722 if (h.num_digits > 0) {
1723 wuffs_base__private_implementation__high_prec_dec__lshift(
1724 &h, exp2 - 52); // 52 mantissa bits.
1725 }
1726
1727 // Handle the "%e" and "%f" formats.
1728 switch (options & (WUFFS_BASE__RENDER_NUMBER_FXX__EXPONENT_ABSENT |
1729 WUFFS_BASE__RENDER_NUMBER_FXX__EXPONENT_PRESENT)) {
1730 case WUFFS_BASE__RENDER_NUMBER_FXX__EXPONENT_ABSENT: // The "%"f" format.
1731 if (options & WUFFS_BASE__RENDER_NUMBER_FXX__JUST_ENOUGH_PRECISION) {
1732 wuffs_base__private_implementation__high_prec_dec__round_just_enough(
1733 &h, exp2, man);
1734 int32_t p = ((int32_t)(h.num_digits)) - h.decimal_point;
1735 precision = ((uint32_t)(wuffs_base__i32__max(0, p)));
1736 } else {
1737 wuffs_base__private_implementation__high_prec_dec__round_nearest(
1738 &h, ((int32_t)precision) + h.decimal_point);
1739 }
1740 return wuffs_base__private_implementation__high_prec_dec__render_exponent_absent(
1741 dst, &h, precision, options);
1742
1743 case WUFFS_BASE__RENDER_NUMBER_FXX__EXPONENT_PRESENT: // The "%e" format.
1744 if (options & WUFFS_BASE__RENDER_NUMBER_FXX__JUST_ENOUGH_PRECISION) {
1745 wuffs_base__private_implementation__high_prec_dec__round_just_enough(
1746 &h, exp2, man);
1747 precision = (h.num_digits > 0) ? (h.num_digits - 1) : 0;
1748 } else {
1749 wuffs_base__private_implementation__high_prec_dec__round_nearest(
1750 &h, ((int32_t)precision) + 1);
1751 }
1752 return wuffs_base__private_implementation__high_prec_dec__render_exponent_present(
1753 dst, &h, precision, options);
1754 }
1755
1756 // We have the "%g" format and so precision means the number of significant
1757 // digits, not the number of digits after the decimal separator. Perform
1758 // rounding and determine whether to use "%e" or "%f".
1759 int32_t e_threshold = 0;
1760 if (options & WUFFS_BASE__RENDER_NUMBER_FXX__JUST_ENOUGH_PRECISION) {
1761 wuffs_base__private_implementation__high_prec_dec__round_just_enough(
1762 &h, exp2, man);
1763 precision = h.num_digits;
1764 e_threshold = 6;
1765 } else {
1766 if (precision == 0) {
1767 precision = 1;
1768 }
1769 wuffs_base__private_implementation__high_prec_dec__round_nearest(
1770 &h, ((int32_t)precision));
1771 e_threshold = ((int32_t)precision);
1772 int32_t nd = ((int32_t)(h.num_digits));
1773 if ((e_threshold > nd) && (nd >= h.decimal_point)) {
1774 e_threshold = nd;
1775 }
1776 }
1777
1778 // Use the "%e" format if the exponent is large.
1779 int32_t e = h.decimal_point - 1;
1780 if ((e < -4) || (e_threshold <= e)) {
1781 uint32_t p = wuffs_base__u32__min(precision, h.num_digits);
1782 return wuffs_base__private_implementation__high_prec_dec__render_exponent_present(
1783 dst, &h, (p > 0) ? (p - 1) : 0, options);
1784 }
1785
1786 // Use the "%f" format otherwise.
1787 int32_t p = ((int32_t)precision);
1788 if (p > h.decimal_point) {
1789 p = ((int32_t)(h.num_digits));
1790 }
1791 precision = ((uint32_t)(wuffs_base__i32__max(0, p - h.decimal_point)));
1792 return wuffs_base__private_implementation__high_prec_dec__render_exponent_absent(
1793 dst, &h, precision, options);
1794}