Blame - src/ryu/f2s.cpp - chromium.googlesource.com/external/github.com/llvm/llvm-project/libcxx

blob: 7e10b498367efdd1f76fea4e04d06b3a6f10f43f [file] [log] [blame]

Mark de Wever	fa36ec7	2021-02-09 17:52:41 +0100	[diff] [blame^]	1	//===----------------------------------------------------------------------===//
				2	//
				3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
				4	// See https://llvm.org/LICENSE.txt for license information.
				5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
				6	//
				7	//===----------------------------------------------------------------------===//
				8
				9	// Copyright (c) Microsoft Corporation.
				10	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
				11
				12	// Copyright 2018 Ulf Adams
				13	// Copyright (c) Microsoft Corporation. All rights reserved.
				14
				15	// Boost Software License - Version 1.0 - August 17th, 2003
				16
				17	// Permission is hereby granted, free of charge, to any person or organization
				18	// obtaining a copy of the software and accompanying documentation covered by
				19	// this license (the "Software") to use, reproduce, display, distribute,
				20	// execute, and transmit the Software, and to prepare derivative works of the
				21	// Software, and to permit third-parties to whom the Software is furnished to
				22	// do so, all subject to the following:
				23
				24	// The copyright notices in the Software and this entire statement, including
				25	// the above license grant, this restriction and the following disclaimer,
				26	// must be included in all copies of the Software, in whole or in part, and
				27	// all derivative works of the Software, unless such copies or derivative
				28	// works are solely in the form of machine-executable object code generated by
				29	// a source language processor.
				30
				31	// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				32	// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				33	// FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
				34	// SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
				35	// FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
				36	// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
				37	// DEALINGS IN THE SOFTWARE.
				38
				39	// Avoid formatting to keep the changes with the original code minimal.
				40	// clang-format off
				41
				42	#include "__config"
				43	#include "charconv"
				44
				45	#include "include/ryu/common.h"
				46	#include "include/ryu/d2fixed.h"
				47	#include "include/ryu/d2s_intrinsics.h"
				48	#include "include/ryu/digit_table.h"
				49	#include "include/ryu/f2s.h"
				50	#include "include/ryu/ryu.h"
				51
				52	_LIBCPP_BEGIN_NAMESPACE_STD
				53
				54	inline constexpr int __FLOAT_MANTISSA_BITS = 23;
				55	inline constexpr int __FLOAT_EXPONENT_BITS = 8;
				56	inline constexpr int __FLOAT_BIAS = 127;
				57
				58	inline constexpr int __FLOAT_POW5_INV_BITCOUNT = 59;
				59	inline constexpr uint64_t __FLOAT_POW5_INV_SPLIT[31] = {
				60	576460752303423489u, 461168601842738791u, 368934881474191033u, 295147905179352826u,
				61	472236648286964522u, 377789318629571618u, 302231454903657294u, 483570327845851670u,
				62	386856262276681336u, 309485009821345069u, 495176015714152110u, 396140812571321688u,
				63	316912650057057351u, 507060240091291761u, 405648192073033409u, 324518553658426727u,
				64	519229685853482763u, 415383748682786211u, 332306998946228969u, 531691198313966350u,
				65	425352958651173080u, 340282366920938464u, 544451787073501542u, 435561429658801234u,
				66	348449143727040987u, 557518629963265579u, 446014903970612463u, 356811923176489971u,
				67	570899077082383953u, 456719261665907162u, 365375409332725730u
				68	};
				69	inline constexpr int __FLOAT_POW5_BITCOUNT = 61;
				70	inline constexpr uint64_t __FLOAT_POW5_SPLIT[47] = {
				71	1152921504606846976u, 1441151880758558720u, 1801439850948198400u, 2251799813685248000u,
				72	1407374883553280000u, 1759218604441600000u, 2199023255552000000u, 1374389534720000000u,
				73	1717986918400000000u, 2147483648000000000u, 1342177280000000000u, 1677721600000000000u,
				74	2097152000000000000u, 1310720000000000000u, 1638400000000000000u, 2048000000000000000u,
				75	1280000000000000000u, 1600000000000000000u, 2000000000000000000u, 1250000000000000000u,
				76	1562500000000000000u, 1953125000000000000u, 1220703125000000000u, 1525878906250000000u,
				77	1907348632812500000u, 1192092895507812500u, 1490116119384765625u, 1862645149230957031u,
				78	1164153218269348144u, 1455191522836685180u, 1818989403545856475u, 2273736754432320594u,
				79	1421085471520200371u, 1776356839400250464u, 2220446049250313080u, 1387778780781445675u,
				80	1734723475976807094u, 2168404344971008868u, 1355252715606880542u, 1694065894508600678u,
				81	2117582368135750847u, 1323488980084844279u, 1654361225106055349u, 2067951531382569187u,
				82	1292469707114105741u, 1615587133892632177u, 2019483917365790221u
				83	};
				84
				85	[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint32_t __pow5Factor(uint32_t __value) {
				86	uint32_t __count = 0;
				87	for (;;) {
				88	_LIBCPP_ASSERT(__value != 0, "");
				89	const uint32_t __q = __value / 5;
				90	const uint32_t __r = __value % 5;
				91	if (__r != 0) {
				92	break;
				93	}
				94	__value = __q;
				95	++__count;
				96	}
				97	return __count;
				98	}
				99
				100	// Returns true if __value is divisible by 5^__p.
				101	[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline bool __multipleOfPowerOf5(const uint32_t __value, const uint32_t __p) {
				102	return __pow5Factor(__value) >= __p;
				103	}
				104
				105	// Returns true if __value is divisible by 2^__p.
				106	[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline bool __multipleOfPowerOf2(const uint32_t __value, const uint32_t __p) {
				107	_LIBCPP_ASSERT(__value != 0, "");
				108	_LIBCPP_ASSERT(__p < 32, "");
				109	// __builtin_ctz doesn't appear to be faster here.
				110	return (__value & ((1u << __p) - 1)) == 0;
				111	}
				112
				113	[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint32_t __mulShift(const uint32_t __m, const uint64_t __factor, const int32_t __shift) {
				114	_LIBCPP_ASSERT(__shift > 32, "");
				115
				116	// The casts here help MSVC to avoid calls to the __allmul library
				117	// function.
				118	const uint32_t __factorLo = static_cast<uint32_t>(__factor);
				119	const uint32_t __factorHi = static_cast<uint32_t>(__factor >> 32);
				120	const uint64_t __bits0 = static_cast<uint64_t>(__m) * __factorLo;
				121	const uint64_t __bits1 = static_cast<uint64_t>(__m) * __factorHi;
				122
				123	#ifndef _LIBCPP_64_BIT
				124	// On 32-bit platforms we can avoid a 64-bit shift-right since we only
				125	// need the upper 32 bits of the result and the shift value is > 32.
				126	const uint32_t __bits0Hi = static_cast<uint32_t>(__bits0 >> 32);
				127	uint32_t __bits1Lo = static_cast<uint32_t>(__bits1);
				128	uint32_t __bits1Hi = static_cast<uint32_t>(__bits1 >> 32);
				129	__bits1Lo += __bits0Hi;
				130	__bits1Hi += (__bits1Lo < __bits0Hi);
				131	const int32_t __s = __shift - 32;
				132	return (__bits1Hi << (32 - __s)) \| (__bits1Lo >> __s);
				133	#else // ^^^ 32-bit ^^^ / vvv 64-bit vvv
				134	const uint64_t __sum = (__bits0 >> 32) + __bits1;
				135	const uint64_t __shiftedSum = __sum >> (__shift - 32);
				136	_LIBCPP_ASSERT(__shiftedSum <= UINT32_MAX, "");
				137	return static_cast<uint32_t>(__shiftedSum);
				138	#endif // ^^^ 64-bit ^^^
				139	}
				140
				141	[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint32_t __mulPow5InvDivPow2(const uint32_t __m, const uint32_t __q, const int32_t __j) {
				142	return __mulShift(__m, __FLOAT_POW5_INV_SPLIT[__q], __j);
				143	}
				144
				145	[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint32_t __mulPow5divPow2(const uint32_t __m, const uint32_t __i, const int32_t __j) {
				146	return __mulShift(__m, __FLOAT_POW5_SPLIT[__i], __j);
				147	}
				148
				149	// A floating decimal representing m * 10^e.
				150	struct __floating_decimal_32 {
				151	uint32_t __mantissa;
				152	int32_t __exponent;
				153	};
				154
				155	[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline __floating_decimal_32 __f2d(const uint32_t __ieeeMantissa, const uint32_t __ieeeExponent) {
				156	int32_t __e2;
				157	uint32_t __m2;
				158	if (__ieeeExponent == 0) {
				159	// We subtract 2 so that the bounds computation has 2 additional bits.
				160	__e2 = 1 - __FLOAT_BIAS - __FLOAT_MANTISSA_BITS - 2;
				161	__m2 = __ieeeMantissa;
				162	} else {
				163	__e2 = static_cast<int32_t>(__ieeeExponent) - __FLOAT_BIAS - __FLOAT_MANTISSA_BITS - 2;
				164	__m2 = (1u << __FLOAT_MANTISSA_BITS) \| __ieeeMantissa;
				165	}
				166	const bool __even = (__m2 & 1) == 0;
				167	const bool __acceptBounds = __even;
				168
				169	// Step 2: Determine the interval of valid decimal representations.
				170	const uint32_t __mv = 4 * __m2;
				171	const uint32_t __mp = 4 * __m2 + 2;
				172	// Implicit bool -> int conversion. True is 1, false is 0.
				173	const uint32_t __mmShift = __ieeeMantissa != 0 \|\| __ieeeExponent <= 1;
				174	const uint32_t __mm = 4 * __m2 - 1 - __mmShift;
				175
				176	// Step 3: Convert to a decimal power base using 64-bit arithmetic.
				177	uint32_t __vr, __vp, __vm;
				178	int32_t __e10;
				179	bool __vmIsTrailingZeros = false;
				180	bool __vrIsTrailingZeros = false;
				181	uint8_t __lastRemovedDigit = 0;
				182	if (__e2 >= 0) {
				183	const uint32_t __q = __log10Pow2(__e2);
				184	__e10 = static_cast<int32_t>(__q);
				185	const int32_t __k = __FLOAT_POW5_INV_BITCOUNT + __pow5bits(static_cast<int32_t>(__q)) - 1;
				186	const int32_t __i = -__e2 + static_cast<int32_t>(__q) + __k;
				187	__vr = __mulPow5InvDivPow2(__mv, __q, __i);
				188	__vp = __mulPow5InvDivPow2(__mp, __q, __i);
				189	__vm = __mulPow5InvDivPow2(__mm, __q, __i);
				190	if (__q != 0 && (__vp - 1) / 10 <= __vm / 10) {
				191	// We need to know one removed digit even if we are not going to loop below. We could use
				192	// __q = X - 1 above, except that would require 33 bits for the result, and we've found that
				193	// 32-bit arithmetic is faster even on 64-bit machines.
				194	const int32_t __l = __FLOAT_POW5_INV_BITCOUNT + __pow5bits(static_cast<int32_t>(__q - 1)) - 1;
				195	__lastRemovedDigit = static_cast<uint8_t>(__mulPow5InvDivPow2(__mv, __q - 1,
				196	-__e2 + static_cast<int32_t>(__q) - 1 + __l) % 10);
				197	}
				198	if (__q <= 9) {
				199	// The largest power of 5 that fits in 24 bits is 5^10, but __q <= 9 seems to be safe as well.
				200	// Only one of __mp, __mv, and __mm can be a multiple of 5, if any.
				201	if (__mv % 5 == 0) {
				202	__vrIsTrailingZeros = __multipleOfPowerOf5(__mv, __q);
				203	} else if (__acceptBounds) {
				204	__vmIsTrailingZeros = __multipleOfPowerOf5(__mm, __q);
				205	} else {
				206	__vp -= __multipleOfPowerOf5(__mp, __q);
				207	}
				208	}
				209	} else {
				210	const uint32_t __q = __log10Pow5(-__e2);
				211	__e10 = static_cast<int32_t>(__q) + __e2;
				212	const int32_t __i = -__e2 - static_cast<int32_t>(__q);
				213	const int32_t __k = __pow5bits(__i) - __FLOAT_POW5_BITCOUNT;
				214	int32_t __j = static_cast<int32_t>(__q) - __k;
				215	__vr = __mulPow5divPow2(__mv, static_cast<uint32_t>(__i), __j);
				216	__vp = __mulPow5divPow2(__mp, static_cast<uint32_t>(__i), __j);
				217	__vm = __mulPow5divPow2(__mm, static_cast<uint32_t>(__i), __j);
				218	if (__q != 0 && (__vp - 1) / 10 <= __vm / 10) {
				219	__j = static_cast<int32_t>(__q) - 1 - (__pow5bits(__i + 1) - __FLOAT_POW5_BITCOUNT);
				220	__lastRemovedDigit = static_cast<uint8_t>(__mulPow5divPow2(__mv, static_cast<uint32_t>(__i + 1), __j) % 10);
				221	}
				222	if (__q <= 1) {
				223	// {__vr,__vp,__vm} is trailing zeros if {__mv,__mp,__mm} has at least __q trailing 0 bits.
				224	// __mv = 4 * __m2, so it always has at least two trailing 0 bits.
				225	__vrIsTrailingZeros = true;
				226	if (__acceptBounds) {
				227	// __mm = __mv - 1 - __mmShift, so it has 1 trailing 0 bit iff __mmShift == 1.
				228	__vmIsTrailingZeros = __mmShift == 1;
				229	} else {
				230	// __mp = __mv + 2, so it always has at least one trailing 0 bit.
				231	--__vp;
				232	}
				233	} else if (__q < 31) { // TRANSITION(ulfjack): Use a tighter bound here.
				234	__vrIsTrailingZeros = __multipleOfPowerOf2(__mv, __q - 1);
				235	}
				236	}
				237
				238	// Step 4: Find the shortest decimal representation in the interval of valid representations.
				239	int32_t __removed = 0;
				240	uint32_t _Output;
				241	if (__vmIsTrailingZeros \|\| __vrIsTrailingZeros) {
				242	// General case, which happens rarely (~4.0%).
				243	while (__vp / 10 > __vm / 10) {
				244	#ifdef __clang__ // TRANSITION, LLVM-23106
				245	__vmIsTrailingZeros &= __vm - (__vm / 10) * 10 == 0;
				246	#else
				247	__vmIsTrailingZeros &= __vm % 10 == 0;
				248	#endif
				249	__vrIsTrailingZeros &= __lastRemovedDigit == 0;
				250	__lastRemovedDigit = static_cast<uint8_t>(__vr % 10);
				251	__vr /= 10;
				252	__vp /= 10;
				253	__vm /= 10;
				254	++__removed;
				255	}
				256	if (__vmIsTrailingZeros) {
				257	while (__vm % 10 == 0) {
				258	__vrIsTrailingZeros &= __lastRemovedDigit == 0;
				259	__lastRemovedDigit = static_cast<uint8_t>(__vr % 10);
				260	__vr /= 10;
				261	__vp /= 10;
				262	__vm /= 10;
				263	++__removed;
				264	}
				265	}
				266	if (__vrIsTrailingZeros && __lastRemovedDigit == 5 && __vr % 2 == 0) {
				267	// Round even if the exact number is .....50..0.
				268	__lastRemovedDigit = 4;
				269	}
				270	// We need to take __vr + 1 if __vr is outside bounds or we need to round up.
				271	_Output = __vr + ((__vr == __vm && (!__acceptBounds \|\| !__vmIsTrailingZeros)) \|\| __lastRemovedDigit >= 5);
				272	} else {
				273	// Specialized for the common case (~96.0%). Percentages below are relative to this.
				274	// Loop iterations below (approximately):
				275	// 0: 13.6%, 1: 70.7%, 2: 14.1%, 3: 1.39%, 4: 0.14%, 5+: 0.01%
				276	while (__vp / 10 > __vm / 10) {
				277	__lastRemovedDigit = static_cast<uint8_t>(__vr % 10);
				278	__vr /= 10;
				279	__vp /= 10;
				280	__vm /= 10;
				281	++__removed;
				282	}
				283	// We need to take __vr + 1 if __vr is outside bounds or we need to round up.
				284	_Output = __vr + (__vr == __vm \|\| __lastRemovedDigit >= 5);
				285	}
				286	const int32_t __exp = __e10 + __removed;
				287
				288	__floating_decimal_32 __fd;
				289	__fd.__exponent = __exp;
				290	__fd.__mantissa = _Output;
				291	return __fd;
				292	}
				293
				294	[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline to_chars_result _Large_integer_to_chars(char* const _First, char* const _Last,
				295	const uint32_t _Mantissa2, const int32_t _Exponent2) {
				296
				297	// Print the integer _Mantissa2 * 2^_Exponent2 exactly.
				298
				299	// For nonzero integers, _Exponent2 >= -23. (The minimum value occurs when _Mantissa2 * 2^_Exponent2 is 1.
				300	// In that case, _Mantissa2 is the implicit 1 bit followed by 23 zeros, so _Exponent2 is -23 to shift away
				301	// the zeros.) The dense range of exactly representable integers has negative or zero exponents
				302	// (as positive exponents make the range non-dense). For that dense range, Ryu will always be used:
				303	// every digit is necessary to uniquely identify the value, so Ryu must print them all.
				304
				305	// Positive exponents are the non-dense range of exactly representable integers.
				306	// This contains all of the values for which Ryu can't be used (and a few Ryu-friendly values).
				307
				308	// Performance note: Long division appears to be faster than losslessly widening float to double and calling
				309	// __d2fixed_buffered_n(). If __f2fixed_buffered_n() is implemented, it might be faster than long division.
				310
				311	_LIBCPP_ASSERT(_Exponent2 > 0, "");
				312	_LIBCPP_ASSERT(_Exponent2 <= 104, ""); // because __ieeeExponent <= 254
				313
				314	// Manually represent _Mantissa2 * 2^_Exponent2 as a large integer. _Mantissa2 is always 24 bits
				315	// (due to the implicit bit), while _Exponent2 indicates a shift of at most 104 bits.
				316	// 24 + 104 equals 128 equals 4 * 32, so we need exactly 4 32-bit elements.
				317	// We use a little-endian representation, visualized like this:
				318
				319	// << left shift <<
				320	// most significant
				321	// _Data[3] _Data[2] _Data[1] _Data[0]
				322	// least significant
				323	// >> right shift >>
				324
				325	constexpr uint32_t _Data_size = 4;
				326	uint32_t _Data[_Data_size]{};
				327
				328	// _Maxidx is the index of the most significant nonzero element.
				329	uint32_t _Maxidx = ((24 + static_cast<uint32_t>(_Exponent2) + 31) / 32) - 1;
				330	_LIBCPP_ASSERT(_Maxidx < _Data_size, "");
				331
				332	const uint32_t _Bit_shift = static_cast<uint32_t>(_Exponent2) % 32;
				333	if (_Bit_shift <= 8) { // _Mantissa2's 24 bits don't cross an element boundary
				334	_Data[_Maxidx] = _Mantissa2 << _Bit_shift;
				335	} else { // _Mantissa2's 24 bits cross an element boundary
				336	_Data[_Maxidx - 1] = _Mantissa2 << _Bit_shift;
				337	_Data[_Maxidx] = _Mantissa2 >> (32 - _Bit_shift);
				338	}
				339
				340	// If Ryu hasn't determined the total output length, we need to buffer the digits generated from right to left
				341	// by long division. The largest possible float is: 340'282346638'528859811'704183484'516925440
				342	uint32_t _Blocks[4];
				343	int32_t _Filled_blocks = 0;
				344	// From left to right, we're going to print:
				345	// _Data[0] will be [1, 10] digits.
				346	// Then if _Filled_blocks > 0:
				347	// _Blocks[_Filled_blocks - 1], ..., _Blocks[0] will be 0-filled 9-digit blocks.
				348
				349	if (_Maxidx != 0) { // If the integer is actually large, perform long division.
				350	// Otherwise, skip to printing _Data[0].
				351	for (;;) {
				352	// Loop invariant: _Maxidx != 0 (i.e. the integer is actually large)
				353
				354	const uint32_t _Most_significant_elem = _Data[_Maxidx];
				355	const uint32_t _Initial_remainder = _Most_significant_elem % 1000000000;
				356	const uint32_t _Initial_quotient = _Most_significant_elem / 1000000000;
				357	_Data[_Maxidx] = _Initial_quotient;
				358	uint64_t _Remainder = _Initial_remainder;
				359
				360	// Process less significant elements.
				361	uint32_t _Idx = _Maxidx;
				362	do {
				363	--_Idx; // Initially, _Remainder is at most 10^9 - 1.
				364
				365	// Now, _Remainder is at most (10^9 - 1) * 2^32 + 2^32 - 1, simplified to 10^9 * 2^32 - 1.
				366	_Remainder = (_Remainder << 32) \| _Data[_Idx];
				367
				368	// floor((10^9 * 2^32 - 1) / 10^9) == 2^32 - 1, so uint32_t _Quotient is lossless.
				369	const uint32_t _Quotient = static_cast<uint32_t>(__div1e9(_Remainder));
				370
				371	// _Remainder is at most 10^9 - 1 again.
				372	// For uint32_t truncation, see the __mod1e9() comment in d2s_intrinsics.h.
				373	_Remainder = static_cast<uint32_t>(_Remainder) - 1000000000u * _Quotient;
				374
				375	_Data[_Idx] = _Quotient;
				376	} while (_Idx != 0);
				377
				378	// Store a 0-filled 9-digit block.
				379	_Blocks[_Filled_blocks++] = static_cast<uint32_t>(_Remainder);
				380
				381	if (_Initial_quotient == 0) { // Is the large integer shrinking?
				382	--_Maxidx; // log2(10^9) is 29.9, so we can't shrink by more than one element.
				383	if (_Maxidx == 0) {
				384	break; // We've finished long division. Now we need to print _Data[0].
				385	}
				386	}
				387	}
				388	}
				389
				390	_LIBCPP_ASSERT(_Data[0] != 0, "");
				391	for (uint32_t _Idx = 1; _Idx < _Data_size; ++_Idx) {
				392	_LIBCPP_ASSERT(_Data[_Idx] == 0, "");
				393	}
				394
				395	const uint32_t _Data_olength = _Data[0] >= 1000000000 ? 10 : __decimalLength9(_Data[0]);
				396	const uint32_t _Total_fixed_length = _Data_olength + 9 * _Filled_blocks;
				397
				398	if (_Last - _First < static_cast<ptrdiff_t>(_Total_fixed_length)) {
				399	return { _Last, errc::value_too_large };
				400	}
				401
				402	char* _Result = _First;
				403
				404	// Print _Data[0]. While it's up to 10 digits,
				405	// which is more than Ryu generates, the code below can handle this.
				406	__append_n_digits(_Data_olength, _Data[0], _Result);
				407	_Result += _Data_olength;
				408
				409	// Print 0-filled 9-digit blocks.
				410	for (int32_t _Idx = _Filled_blocks - 1; _Idx >= 0; --_Idx) {
				411	__append_nine_digits(_Blocks[_Idx], _Result);
				412	_Result += 9;
				413	}
				414
				415	return { _Result, errc{} };
				416	}
				417
				418	[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline to_chars_result __to_chars(char* const _First, char* const _Last, const __floating_decimal_32 __v,
				419	chars_format _Fmt, const uint32_t __ieeeMantissa, const uint32_t __ieeeExponent) {
				420	// Step 5: Print the decimal representation.
				421	uint32_t _Output = __v.__mantissa;
				422	int32_t _Ryu_exponent = __v.__exponent;
				423	const uint32_t __olength = __decimalLength9(_Output);
				424	int32_t _Scientific_exponent = _Ryu_exponent + static_cast<int32_t>(__olength) - 1;
				425
				426	if (_Fmt == chars_format{}) {
				427	int32_t _Lower;
				428	int32_t _Upper;
				429
				430	if (__olength == 1) {
				431	// Value \| Fixed \| Scientific
				432	// 1e-3 \| "0.001" \| "1e-03"
				433	// 1e4 \| "10000" \| "1e+04"
				434	_Lower = -3;
				435	_Upper = 4;
				436	} else {
				437	// Value \| Fixed \| Scientific
				438	// 1234e-7 \| "0.0001234" \| "1.234e-04"
				439	// 1234e5 \| "123400000" \| "1.234e+08"
				440	_Lower = -static_cast<int32_t>(__olength + 3);
				441	_Upper = 5;
				442	}
				443
				444	if (_Lower <= _Ryu_exponent && _Ryu_exponent <= _Upper) {
				445	_Fmt = chars_format::fixed;
				446	} else {
				447	_Fmt = chars_format::scientific;
				448	}
				449	} else if (_Fmt == chars_format::general) {
				450	// C11 7.21.6.1 "The fprintf function"/8:
				451	// "Let P equal [...] 6 if the precision is omitted [...].
				452	// Then, if a conversion with style E would have an exponent of X:
				453	// - if P > X >= -4, the conversion is with style f [...].
				454	// - otherwise, the conversion is with style e [...]."
				455	if (-4 <= _Scientific_exponent && _Scientific_exponent < 6) {
				456	_Fmt = chars_format::fixed;
				457	} else {
				458	_Fmt = chars_format::scientific;
				459	}
				460	}
				461
				462	if (_Fmt == chars_format::fixed) {
				463	// Example: _Output == 1729, __olength == 4
				464
				465	// _Ryu_exponent \| Printed \| _Whole_digits \| _Total_fixed_length \| Notes
				466	// --------------\|----------\|---------------\|----------------------\|---------------------------------------
				467	// 2 \| 172900 \| 6 \| _Whole_digits \| Ryu can't be used for printing
				468	// 1 \| 17290 \| 5 \| (sometimes adjusted) \| when the trimmed digits are nonzero.
				469	// --------------\|----------\|---------------\|----------------------\|---------------------------------------
				470	// 0 \| 1729 \| 4 \| _Whole_digits \| Unified length cases.
				471	// --------------\|----------\|---------------\|----------------------\|---------------------------------------
				472	// -1 \| 172.9 \| 3 \| __olength + 1 \| This case can't happen for
				473	// -2 \| 17.29 \| 2 \| \| __olength == 1, but no additional
				474	// -3 \| 1.729 \| 1 \| \| code is needed to avoid it.
				475	// --------------\|----------\|---------------\|----------------------\|---------------------------------------
				476	// -4 \| 0.1729 \| 0 \| 2 - _Ryu_exponent \| C11 7.21.6.1 "The fprintf function"/8:
				477	// -5 \| 0.01729 \| -1 \| \| "If a decimal-point character appears,
				478	// -6 \| 0.001729 \| -2 \| \| at least one digit appears before it."
				479
				480	const int32_t _Whole_digits = static_cast<int32_t>(__olength) + _Ryu_exponent;
				481
				482	uint32_t _Total_fixed_length;
				483	if (_Ryu_exponent >= 0) { // cases "172900" and "1729"
				484	_Total_fixed_length = static_cast<uint32_t>(_Whole_digits);
				485	if (_Output == 1) {
				486	// Rounding can affect the number of digits.
				487	// For example, 1e11f is exactly "99999997952" which is 11 digits instead of 12.
				488	// We can use a lookup table to detect this and adjust the total length.
				489	static constexpr uint8_t _Adjustment[39] = {
				490	0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,1,0,1,1,1,0,0,1,1,0,1,0,1,1,0,0,1,0,1,1,0,1,1,1 };
				491	_Total_fixed_length -= _Adjustment[_Ryu_exponent];
				492	// _Whole_digits doesn't need to be adjusted because these cases won't refer to it later.
				493	}
				494	} else if (_Whole_digits > 0) { // case "17.29"
				495	_Total_fixed_length = __olength + 1;
				496	} else { // case "0.001729"
				497	_Total_fixed_length = static_cast<uint32_t>(2 - _Ryu_exponent);
				498	}
				499
				500	if (_Last - _First < static_cast<ptrdiff_t>(_Total_fixed_length)) {
				501	return { _Last, errc::value_too_large };
				502	}
				503
				504	char* _Mid;
				505	if (_Ryu_exponent > 0) { // case "172900"
				506	bool _Can_use_ryu;
				507
				508	if (_Ryu_exponent > 10) { // 10^10 is the largest power of 10 that's exactly representable as a float.
				509	_Can_use_ryu = false;
				510	} else {
				511	// Ryu generated X: __v.__mantissa * 10^_Ryu_exponent
				512	// __v.__mantissa == 2^_Trailing_zero_bits * (__v.__mantissa >> _Trailing_zero_bits)
				513	// 10^_Ryu_exponent == 2^_Ryu_exponent * 5^_Ryu_exponent
				514
				515	// _Trailing_zero_bits is [0, 29] (aside: because 2^29 is the largest power of 2
				516	// with 9 decimal digits, which is float's round-trip limit.)
				517	// _Ryu_exponent is [1, 10].
				518	// Normalization adds [2, 23] (aside: at least 2 because the pre-normalized mantissa is at least 5).
				519	// This adds up to [3, 62], which is well below float's maximum binary exponent 127.
				520
				521	// Therefore, we just need to consider (__v.__mantissa >> _Trailing_zero_bits) * 5^_Ryu_exponent.
				522
				523	// If that product would exceed 24 bits, then X can't be exactly represented as a float.
				524	// (That's not a problem for round-tripping, because X is close enough to the original float,
				525	// but X isn't mathematically equal to the original float.) This requires a high-precision fallback.
				526
				527	// If the product is 24 bits or smaller, then X can be exactly represented as a float (and we don't
				528	// need to re-synthesize it; the original float must have been X, because Ryu wouldn't produce the
				529	// same output for two different floats X and Y). This allows Ryu's output to be used (zero-filled).
				530
				531	// (2^24 - 1) / 5^0 (for indexing), (2^24 - 1) / 5^1, ..., (2^24 - 1) / 5^10
				532	static constexpr uint32_t _Max_shifted_mantissa[11] = {
				533	16777215, 3355443, 671088, 134217, 26843, 5368, 1073, 214, 42, 8, 1 };
				534
				535	unsigned long _Trailing_zero_bits;
				536	(void) _BitScanForward(&_Trailing_zero_bits, __v.__mantissa); // __v.__mantissa is guaranteed nonzero
				537	const uint32_t _Shifted_mantissa = __v.__mantissa >> _Trailing_zero_bits;
				538	_Can_use_ryu = _Shifted_mantissa <= _Max_shifted_mantissa[_Ryu_exponent];
				539	}
				540
				541	if (!_Can_use_ryu) {
				542	const uint32_t _Mantissa2 = __ieeeMantissa \| (1u << __FLOAT_MANTISSA_BITS); // restore implicit bit
				543	const int32_t _Exponent2 = static_cast<int32_t>(__ieeeExponent)
				544	- __FLOAT_BIAS - __FLOAT_MANTISSA_BITS; // bias and normalization
				545
				546	// Performance note: We've already called Ryu, so this will redundantly perform buffering and bounds checking.
				547	return _Large_integer_to_chars(_First, _Last, _Mantissa2, _Exponent2);
				548	}
				549
				550	// _Can_use_ryu
				551	// Print the decimal digits, left-aligned within [_First, _First + _Total_fixed_length).
				552	_Mid = _First + __olength;
				553	} else { // cases "1729", "17.29", and "0.001729"
				554	// Print the decimal digits, right-aligned within [_First, _First + _Total_fixed_length).
				555	_Mid = _First + _Total_fixed_length;
				556	}
				557
				558	while (_Output >= 10000) {
				559	#ifdef __clang__ // TRANSITION, LLVM-38217
				560	const uint32_t __c = _Output - 10000 * (_Output / 10000);
				561	#else
				562	const uint32_t __c = _Output % 10000;
				563	#endif
				564	_Output /= 10000;
				565	const uint32_t __c0 = (__c % 100) << 1;
				566	const uint32_t __c1 = (__c / 100) << 1;
				567	_VSTD::memcpy(_Mid -= 2, __DIGIT_TABLE + __c0, 2);
				568	_VSTD::memcpy(_Mid -= 2, __DIGIT_TABLE + __c1, 2);
				569	}
				570	if (_Output >= 100) {
				571	const uint32_t __c = (_Output % 100) << 1;
				572	_Output /= 100;
				573	_VSTD::memcpy(_Mid -= 2, __DIGIT_TABLE + __c, 2);
				574	}
				575	if (_Output >= 10) {
				576	const uint32_t __c = _Output << 1;
				577	_VSTD::memcpy(_Mid -= 2, __DIGIT_TABLE + __c, 2);
				578	} else {
				579	*--_Mid = static_cast<char>('0' + _Output);
				580	}
				581
				582	if (_Ryu_exponent > 0) { // case "172900" with _Can_use_ryu
				583	// Performance note: it might be more efficient to do this immediately after setting _Mid.
				584	_VSTD::memset(_First + __olength, '0', static_cast<size_t>(_Ryu_exponent));
				585	} else if (_Ryu_exponent == 0) { // case "1729"
				586	// Done!
				587	} else if (_Whole_digits > 0) { // case "17.29"
				588	// Performance note: moving digits might not be optimal.
				589	_VSTD::memmove(_First, _First + 1, static_cast<size_t>(_Whole_digits));
				590	_First[_Whole_digits] = '.';
				591	} else { // case "0.001729"
				592	// Performance note: a larger memset() followed by overwriting '.' might be more efficient.
				593	_First[0] = '0';
				594	_First[1] = '.';
				595	_VSTD::memset(_First + 2, '0', static_cast<size_t>(-_Whole_digits));
				596	}
				597
				598	return { _First + _Total_fixed_length, errc{} };
				599	}
				600
				601	const uint32_t _Total_scientific_length =
				602	__olength + (__olength > 1) + 4; // digits + possible decimal point + scientific exponent
				603	if (_Last - _First < static_cast<ptrdiff_t>(_Total_scientific_length)) {
				604	return { _Last, errc::value_too_large };
				605	}
				606	char* const __result = _First;
				607
				608	// Print the decimal digits.
				609	uint32_t __i = 0;
				610	while (_Output >= 10000) {
				611	#ifdef __clang__ // TRANSITION, LLVM-38217
				612	const uint32_t __c = _Output - 10000 * (_Output / 10000);
				613	#else
				614	const uint32_t __c = _Output % 10000;
				615	#endif
				616	_Output /= 10000;
				617	const uint32_t __c0 = (__c % 100) << 1;
				618	const uint32_t __c1 = (__c / 100) << 1;
				619	_VSTD::memcpy(__result + __olength - __i - 1, __DIGIT_TABLE + __c0, 2);
				620	_VSTD::memcpy(__result + __olength - __i - 3, __DIGIT_TABLE + __c1, 2);
				621	__i += 4;
				622	}
				623	if (_Output >= 100) {
				624	const uint32_t __c = (_Output % 100) << 1;
				625	_Output /= 100;
				626	_VSTD::memcpy(__result + __olength - __i - 1, __DIGIT_TABLE + __c, 2);
				627	__i += 2;
				628	}
				629	if (_Output >= 10) {
				630	const uint32_t __c = _Output << 1;
				631	// We can't use memcpy here: the decimal dot goes between these two digits.
				632	__result[2] = __DIGIT_TABLE[__c + 1];
				633	__result[0] = __DIGIT_TABLE[__c];
				634	} else {
				635	__result[0] = static_cast<char>('0' + _Output);
				636	}
				637
				638	// Print decimal point if needed.
				639	uint32_t __index;
				640	if (__olength > 1) {
				641	__result[1] = '.';
				642	__index = __olength + 1;
				643	} else {
				644	__index = 1;
				645	}
				646
				647	// Print the exponent.
				648	__result[__index++] = 'e';
				649	if (_Scientific_exponent < 0) {
				650	__result[__index++] = '-';
				651	_Scientific_exponent = -_Scientific_exponent;
				652	} else {
				653	__result[__index++] = '+';
				654	}
				655
				656	_VSTD::memcpy(__result + __index, __DIGIT_TABLE + 2 * _Scientific_exponent, 2);
				657	__index += 2;
				658
				659	return { _First + _Total_scientific_length, errc{} };
				660	}
				661
				662	[[nodiscard]] to_chars_result __f2s_buffered_n(char* const _First, char* const _Last, const float __f,
				663	const chars_format _Fmt) {
				664
				665	// Step 1: Decode the floating-point number, and unify normalized and subnormal cases.
				666	const uint32_t __bits = __float_to_bits(__f);
				667
				668	// Case distinction; exit early for the easy cases.
				669	if (__bits == 0) {
				670	if (_Fmt == chars_format::scientific) {
				671	if (_Last - _First < 5) {
				672	return { _Last, errc::value_too_large };
				673	}
				674
				675	_VSTD::memcpy(_First, "0e+00", 5);
				676
				677	return { _First + 5, errc{} };
				678	}
				679
				680	// Print "0" for chars_format::fixed, chars_format::general, and chars_format{}.
				681	if (_First == _Last) {
				682	return { _Last, errc::value_too_large };
				683	}
				684
				685	*_First = '0';
				686
				687	return { _First + 1, errc{} };
				688	}
				689
				690	// Decode __bits into mantissa and exponent.
				691	const uint32_t __ieeeMantissa = __bits & ((1u << __FLOAT_MANTISSA_BITS) - 1);
				692	const uint32_t __ieeeExponent = __bits >> __FLOAT_MANTISSA_BITS;
				693
				694	// When _Fmt == chars_format::fixed and the floating-point number is a large integer,
				695	// it's faster to skip Ryu and immediately print the integer exactly.
				696	if (_Fmt == chars_format::fixed) {
				697	const uint32_t _Mantissa2 = __ieeeMantissa \| (1u << __FLOAT_MANTISSA_BITS); // restore implicit bit
				698	const int32_t _Exponent2 = static_cast<int32_t>(__ieeeExponent)
				699	- __FLOAT_BIAS - __FLOAT_MANTISSA_BITS; // bias and normalization
				700
				701	// Normal values are equal to _Mantissa2 * 2^_Exponent2.
				702	// (Subnormals are different, but they'll be rejected by the _Exponent2 test here, so they can be ignored.)
				703
				704	if (_Exponent2 > 0) {
				705	return _Large_integer_to_chars(_First, _Last, _Mantissa2, _Exponent2);
				706	}
				707	}
				708
				709	const __floating_decimal_32 __v = __f2d(__ieeeMantissa, __ieeeExponent);
				710	return __to_chars(_First, _Last, __v, _Fmt, __ieeeMantissa, __ieeeExponent);
				711	}
				712
				713	_LIBCPP_END_NAMESPACE_STD
				714
				715	// clang-format on