Loading...
--- /dev/null
+++ Libc/Libc-1725.0.11/stdlib/strtofp.c
@@ -0,0 +1,3954 @@
+/*
+* Copyright (c) 2021 Apple Inc. All rights reserved.
+*
+* @APPLE_LICENSE_HEADER_START@
+*
+* This file contains Original Code and/or Modifications of Original Code
+* as defined in and that are subject to the Apple Public Source License
+* Version 2.0 (the 'License'). You may not use this file except in
+* compliance with the License. Please obtain a copy of the License at
+* http://www.opensource.apple.com/apsl/ and read it before using this
+* file.
+*
+* The Original Code and all software distributed under the License are
+* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+* Please see the License for the specific language governing rights and
+* limitations under the License.
+*
+* @APPLE_LICENSE_HEADER_END@
+*/
+
+// Implementation of `strtod` and related functions
+// Based on version 3 of "ffpp_strtofp"
+// Author: Tim Kientzle
+//
+// This file supports parsing floating-point values from ASCII/UTF8
+// text into the following commonly-used floating-point formats:
+// * IEEE 754/ISO 60559 binary16 ("half")
+// * IEEE 754/ISO 60559 binary32 ("float")
+// * IEEE 754/ISO 60559 binary64 ("double")
+// * Intel x87 80-bit extended format ("float80")
+// * IEEE 754/ISO 60559 binary128
+//
+// The core functionality consists of the following private functions,
+// which can be compiled for use on any platform regardless of the
+// local floating-point support:
+//
+// * _ffpp_strtoencf16_l (see footnote [1])
+// * _ffpp_strtoencf32_l
+// * _ffpp_strtoencf64_l
+// * _ffpp_strtoencf80_l (see footnote [2])
+// * _ffpp_strtoencf128_l
+//
+// The following standard functions are defined as wrappers around the
+// above to provide the APIs defined in the current C standardization
+// effort:
+//
+// * strtof (ISO C17)
+// * strtof_l (ISO C17)
+// * strtod (ISO C17)
+// * strtod_l (ISO C17)
+// * strtold (ISO C17, see footnote [3])
+// * strtold_l (ISO C17, see footnote [3])
+// * strtoencf16 (TS 18661-3)
+// * strtoencf32 (TS 18661-3)
+// * strtoencf64 (TS 18661-3)
+// * strtoencf64x (TS 18661-3, see footnote [2])
+// * strtoencf128 (TS 18661-3)
+//
+// TODO: The following wrappers can easily be added for
+// any platform that defines the required `_Float##` types.
+// * strtof16 (TS 18661-3)
+// * strtof32 (TS 18661-3)
+// * strtof64 (TS 18661-3)
+// * strtof64x (TS 18661-3, see footnote [2])
+// * strtof128 (TS 18661-3)
+//
+// IMPLEMENTATION NOTES
+// --------------------------------
+//
+// This is a new implementation that uses ideas from a number of
+// sources, including Clinger's 1990 paper, Gay's gdtoa library,
+// Lemire's fast_double_parser implementation, Google's abseil
+// library, as well as work I've done for the Swift standard library.
+//
+// All of the parsers use the same initial parsing logic and fall back
+// to the same arbitrary-precision integer code. In between these,
+// they use varying format-specific optimizations.
+//
+// First Step: Initial Parsing
+//
+// The initial parsing of the textual input is handled by
+// `fastParse64`. As the name suggests, this uses a fixed-size 64-bit
+// accumulator for speed and is heavily optimized assuming that the
+// input significand has at most 19 digits. Longer input will overflow
+// the accumulator, triggering an additional scan of the input. This
+// initial parse also detects Hex floats, nans, and "inf"/"infinity"
+// strings and dispatches those to specialized implementations.
+//
+// With the initial parse complete, the challenge is to compute
+// decimalSignificand * 10^decimalExponent
+// with precisely correct rounding as quickly as possible.
+//
+// Last Step: arbitrary-precision integer calculation (generalSlowpath)
+//
+// Specific formatters use a variety of optimized paths that provide
+// quick results for specific classes of input. But none of those
+// work for every input value. So we have a final fallback that uses
+// arbitrary-precision integer arithmetic to compute the exact results
+// with guaranteed accuracy for all inputs. Of course, the required
+// arbitrary-precision arithmetic can be significantly more expensive,
+// especially when the significand is very long or the exponent is
+// very large.
+//
+// Two optimizations are worth mentioning:
+//
+// Powers of 5: We break the power of 10 computation into a power of 5
+// and a power of 2. The latter can be simply folded into the final
+// FP exponent, so this effectively reduces the power of 10
+// computation to the computation of a power of 5, which is a
+// significantly smaller number. For very large exponents, the run
+// time is dominated by this power of 5 computation. (Up to 95% of
+// the CPU time for extreme binary128 values.)
+//
+// Limit on significand digits: I first saw this optimization in the
+// Abseil library. First, consider the exact decimal expansions for
+// all the exact midpoints between adjacent pairs of floating-point
+// values. There is some maximum number of significant digits
+// `maxDecimalMidpointDigits`. Following an argument from Clinger, we
+// only need to be able to distinguish whether we are above or below
+// any such midpoint. So it suffices to consider the first
+// `maxDecimalMidpointDigits`, appending a single digit that is
+// non-zero if the trailing digits are non-zero. This allows us to
+// limit the total size of the arithmetic used in this stage. In
+// particular, for double, this limits us to less than 1024 bytes of
+// total space, which can easily fit on the stack, allowing us to
+// parse any double input regardless of length without allocation.
+//
+// For binary128, the comparable limit is 11564 digits, which gives a
+// maximum work buffer size of nearly 10k. This seems a bit large for
+// the stack, but a buffer of 1536 bytes is big enough to process any
+// binary128 with less than 100 digits, regardless of exponent. TODO:
+// For a smaller range of exponents, we can limit
+// maxDecimalMidpointDigits further. That would allow us to process
+// any binary128 within a range of exponents regardless of number of
+// digits with the same 1536-byte buffer.
+//
+// Note: Compared to Clinger's AlgorithmR, this requires fewer
+// arbitrary-precision operations and gives the correct answer
+// directly without requiring a nearly-correct initial value.
+// Compared to Clinger's AlgorithmM, this takes advantage of the fact
+// that our integer arithmetic is occuring in the same base as used by
+// the final FP format. This means we can interpret the bits from a
+// simple calculation instead of doing additional work to abstractly
+// compute the target format.
+//
+// These first and last steps (`fastParse64` and `generalSlowpath`)
+// are sufficient to provide guaranteed correct results for any
+// format. The optimizations described next are accelerators that
+// allow us to provide a result more quickly for common cases where
+// the additional code complexity and testing cost can be justified.
+//
+// Optimization: Use a single Floating-point calculation
+//
+// Clinger(1990) observed that when converting to double, if the
+// significand and 10^k are both exactly representable by doubles,
+// then
+// (double)significand * (double)10^k
+// is always correct with a single double multiplication.
+// Similarly, if 10^-k is exactly representable, then
+// (double)significand / (double)10^(-k)
+// is always correct with a single double division.
+//
+// In particular, any significand of 15 digits or less can be exactly
+// represented by a double, as can any power of 10 up to 10^22.
+//
+// There are a few similar cases where we can provide exact inputs to
+// a single floating-point operation. Since a single FP operation
+// always produces correctly-rounded results (in the current rounding
+// mode), these always produce correct results for the corresponding
+// range of inputs. Since this relies on the hardware FPU, it is very
+// fast when it can be used.
+//
+// This optimization works especially well for hand-entered input,
+// which typically has few digits and a small exponent. It works less
+// well for JSON, as random double values in JSON are typically
+// presented with 16 or 17 digits. Fast FMA or mixed-precision
+// arithmetic can extend this technique further in certain
+// environments. In particular, FPU-supported multiplication and
+// division of binary128 arguments with a binary64 result would handle
+// the common JSON cases.
+//
+// Optimization: Interval calculation
+//
+// We can easily compute fixed-precision upper and lower bounds for
+// the power-of-10 value from a lookup table. Likewise, we can
+// construct bounds for an arbitrary-length significand by inspecting
+// just the first digits. From these bounds, we can compute upper and
+// lower bounds for the final result, all with fast fixed-precision
+// integer arithmetic. Depending on the precision, these upper and
+// lower bounds can coincide for more than 99% of all inputs,
+// guaranteeing the correct result in those cases. This also allows
+// us to use fast fixed-precision arithmetic for very long inputs,
+// only using the first digits of the significand in cases where the
+// additional digits do not affect the result.
+//
+// PERFORMANCE
+// --------------------------------
+//
+// This strtod is about 10x faster than the implementation used by
+// Apple's libc prior to 2022. On Lemire's `canada.txt` benchmark,
+// this implementation achieves over 700MB/s compared to 75MB/s for
+// the earlier implementation when parsing a large collection of
+// latitude/longitude values expressed as doubles with 15-17 digits.
+// That makes it about 70% the speed of Lemire's fast_double_parser
+// implementation, but still significantly faster than the other
+// implementations benchmarked by Lemire.
+//
+// Note: You can achieve speed similar to Lemire's implementation by
+// judiciously inlining supporting routines, disabling rounding mode
+// and locale support, and using a direct lookup into a fully-expanded
+// table instead of two table lookups and a multiplication.
+//
+// For other inputs, performance varies depending on the particular
+// optimization that gets used. Binary128 and Float80 parsing here
+// uses a more heavily-factored power-of-10 table that requires up to
+// 3 multiplications to produce a lower bound. Binary16 currently
+// has no format-specific optimizations since the arbitrary-precision
+// path is fast enough for such a small range of exponents.
+//
+// CORRECTNESS
+// --------------------------------
+//
+// Primary accuracy testing has relied on a modified form of the
+// "FFPP" test suite I developed for work on "SwiftDtoa.c". This includes
+// over 100 million computed test cases.
+//
+// This strtod has also been fuzz-tested to ensure that the behavior
+// matches the previous macOS libc implementation based on gdtoa
+// (except for known bugs in that code):
+// * It returns the same double result
+// * It updates `end` identically
+// * It updates `errno` identically
+// * For any locale
+// * For any standard FP rounding mode
+//
+// NOTES
+// --------------------------------
+//
+// General terminology note: I use "binary##" to refer to the IEEE 754
+// portable binary formats, and "float80" to refer to the Intel x87
+// 80-bit extended format.
+//
+// [1] TS18661-3 defines parsing functions that use the current
+// default locale, but it does not (as of early 2021) define any
+// variants that take an explicit locale argument. In order to provide
+// ISO C17 `strtod_l` functionality, I've found it useful to extend
+// the TS18661-3 suite of functions with `_l` variants.
+//
+// [2] The term "binary64x" or "f64x" in various standards refers
+// generally to any extended format, not specifically the Intel x87
+// 80-bit format. This makes it difficult to use the term `f64x` for
+// portable implementations. To keep things clear, I have
+// `strtoencf80_l` to specifically support the Intel float80 format
+// and define `strtoencf64x` as a wrapper that calls into either
+// `strtoencf64_l` or `strtoencf80_l` depending on the local system
+// conventions (using the same approach as used for `strtold`).
+//
+// [3] `strtold` is defined as a wrapper for whichever supported
+// format (binary64, float80, or binary128) corresponds to long double
+// on the local system. Search below for uses of `LONG_DOUBLE_IS_` to
+// see how this is done.
+//
+//
+// TODO:
+//
+// * Figure out how to test whether `_Float##` types are provided,
+// use that to define `strtof##` automatically on systems that
+// support TS18661-3.
+//
+// * Support big-endian systems. Much of the support for
+// binary16/32/64 should already work correctly since it builds the
+// result bitwise in an unsigned integer and uses memcpy() to copy
+// the bits. This works if ints and floats have the same
+// endianness, which I believe is universally true on all current
+// hardware. Other code builds up results assuming a particular
+// byte order and will need to be modified to support big-endian
+// environments.
+//
+
+#define _POSIX_C_SOURCE 200809L
+
+#include <assert.h>
+#include <ctype.h>
+#include <errno.h>
+#include <float.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+
+// Check whether access to the floating-point environment is supported
+#if !defined(ENABLE_FENV_ACCESS)
+#if __has_include(<fenv.h>)
+ #include <fenv.h>
+
+ #define ENABLE_FENV_ACCESS 1
+#else
+ #define ENABLE_FENV_ACCESS 0
+#endif
+#endif
+
+// Check whether precision/range of floating-point values is defined
+#if __has_include(<math.h>)
+ #include <math.h>
+#else
+#if !defined(FLT_EVAL_METHOD)
+ // Note that Clang's builtin float.h header does define this macro
+ #define FLT_EVAL_METHOD -1
+#endif
+#endif
+
+// Check whether locales are supported
+#if !defined(ENABLE_LOCALE_SUPPORT)
+#if __has_include(<langinfo.h>) && __has_include(<locale.h>)
+ #include <langinfo.h>
+ #include <locale.h>
+#if defined(__APPLE__)
+ #include <xlocale.h>
+ #include <xlocale_private.h>
+#endif
+
+ #define ENABLE_LOCALE_SUPPORT 1
+#else
+ #define ENABLE_LOCALE_SUPPORT 0
+#endif
+#endif
+
+// ================================================================
+// Detect the floating-point formats supported on this platform
+// by testing the standard macros defined in float.h
+// ================================================================
+
+// Does "float" on this system use IEEE 754 binary32 format?
+// (Almost all modern systems do this.)
+#if (FLT_RADIX == 2) && (FLT_MANT_DIG == 24) && (FLT_MIN_EXP == -125) && (FLT_MAX_EXP == 128)
+ #define FLOAT_IS_BINARY32 1
+#else
+ #define FLOAT_IS_BINARY32 0
+#endif
+
+// Does "double" on this system use IEEE 754 binary64 format?
+// (Almost all modern systems do this.)
+#if (FLT_RADIX == 2) && (DBL_MANT_DIG == 53) && (DBL_MIN_EXP == -1021) && (DBL_MAX_EXP == 1024)
+ #define DOUBLE_IS_BINARY64 1
+#else
+ #define DOUBLE_IS_BINARY64 0
+#endif
+
+// Does "long double" on this system use IEEE 754 binary64 format?
+// (Example: Windows on all hardware, macOS on ARM.)
+#if (FLT_RADIX == 2) && (LDBL_MANT_DIG == 53) && (LDBL_MIN_EXP == -1021) && (LDBL_MAX_EXP == 1024)
+ #define LONG_DOUBLE_IS_BINARY64 1
+#else
+ #define LONG_DOUBLE_IS_BINARY64 0
+#endif
+
+// Is "long double" on this system the same as Float80?
+// (Example: macOS, Linux, and FreeBSD when running on x86 or x86_64 processors.)
+#if (FLT_RADIX == 2) && (LDBL_MANT_DIG == 64) && (LDBL_MIN_EXP == -16381) && (LDBL_MAX_EXP == 16384)
+ #define LONG_DOUBLE_IS_FLOAT80 1
+#else
+ #define LONG_DOUBLE_IS_FLOAT80 0
+#endif
+
+// Does "long double" on this system use IEEE 754 binary128 format?
+// (Example: Android on LP64 hardware.)
+#if (FLT_RADIX == 2) && (LDBL_MANT_DIG == 113) && (LDBL_MIN_EXP == -16381) && (LDBL_MAX_EXP == 16384)
+ #define LONG_DOUBLE_IS_BINARY128 1
+#else
+ #define LONG_DOUBLE_IS_BINARY128 0
+#endif
+
+// ================================================================
+// Detect/configure local platform arithmetic
+
+#if defined(__SIZEOF_INT128__)
+ // We get a significant speed boost if we can use the __uint128_t
+ // type that's present in GCC and Clang on 64-bit architectures.
+ #define HAVE_UINT128_T 1
+ #define MP_WORD_BITS 32 // Multi-precision ints use 32-bit words on 64-bit platforms
+#else
+ #define HAVE_UINT128_T 0
+ #define MP_WORD_BITS 16 // Multi-precision ints use 16-bit words on 32-bit platforms
+#endif
+
+// ================================================================
+// How to get locale-specific decimal point character
+// (This is more complicated than it should be, largely because
+// the standard functions are a lot slower than we'd like and
+// the standard mechanisms return an arbitrary-length string
+// for the decimal point. So we go to some lengths to avoid
+// calling the standard functions: We defer such calls until
+// we see a character that is not a digit or other known character,
+// and we handle the "C" locale specially.)
+
+// Define macros used for locale information below:
+//
+// strtofp_locale_decimal_point(loc)
+// - Return the decimal point string for the specified locale
+//
+// strtofp_locale_t
+// - Type of locale info: locale_t on systems that support locales, else void *
+//
+// strtofp_C_locale
+// - Constant that defines the "C" locale, used to bypass other locale checks
+//
+// strtofp_current_locale()
+// - Returns the default locale to use when no explicit locale is given
+//
+#if !ENABLE_LOCALE_SUPPORT
+ // If no locale support, always use "." as the decimal point
+ #define strtofp_locale_t void *
+ #define strtofp_current_locale() (NULL)
+ #define strtofp_C_locale (NULL)
+ #define strtofp_locale_decimal_point(loc) ((const unsigned char *)("."))
+#elif defined(__linux__)
+ #define strtofp_locale_t locale_t
+ #define strtofp_current_locale() (uselocale(NULL))
+ #define strtofp_C_locale (NULL)
+ #define strtofp_locale_decimal_point(loc) \
+ ((const unsigned char *)((loc) == LC_GLOBAL_LOCALE ? nl_langinfo(RADIXCHAR) : nl_langinfo_l(RADIXCHAR, (loc))))
+#elif defined(__APPLE__)
+ #define strtofp_locale_t locale_t
+ #define strtofp_C_locale (NULL)
+ // For use inside of Libc: Use thread-specific locale if set, else global locale
+ #define strtofp_current_locale() (__current_locale())
+ // For use outside of libc
+ // #define strtofp_current_locale() (uselocale(NULL))
+ #define strtofp_locale_decimal_point(loc) ((const unsigned char *)(localeconv_l((loc))->decimal_point))
+#else
+ #error Need definition for strtofp_locale_decimal_point and strtofp_current_locale for this platform
+#endif
+
+// ================================================================
+// Floating-point rounding mode
+
+#if ENABLE_FENV_ACCESS
+ #pragma GCC diagnostic push
+ #pragma GCC diagnostic ignored "-Wignored-pragmas"
+ #pragma STDC FENV_ACCESS ON
+ #pragma GCC diagnostic pop
+
+ #define FENV_ROUNDING_MODE() fegetround()
+#else
+ // By default, we use FE_TONEAREST, which rounds to the nearest representable value
+ #define FENV_ROUNDING_MODE() 0
+#endif
+
+// In certain scenarios, we require FE_TOWARDZERO, which rounds towards zero,
+// but need to provide a sentinel value if it is not defined
+#ifndef FE_TOWARDZERO
+ #define FE_TOWARDZERO 0xFE00
+#endif
+
+#if defined(FE_DOWNWARD) && FE_TOWARDZERO == FE_DOWNWARD
+ #error "Definition of FE_TOWARDZERO conflicts with FE_DOWNWARD"
+#endif
+#if defined(FE_TONEAREST) && FE_TOWARDZERO == FE_TONEAREST
+ #error "Definition of FE_TOWARDZERO conflicts with FE_TONEAREST"
+#endif
+#if defined(FE_UPWARD) && FE_TOWARDZERO == FE_UPWARD
+ #error "Definition of FE_TOWARDZERO conflicts with FE_UPWARD"
+#endif
+
+// ================================================================
+//
+// Enable/disable particular formats
+//
+// ================================================================
+
+// Enable binary16/32/64 by default everywhere.
+#ifndef ENABLE_BINARY16_SUPPORT
+ #define ENABLE_BINARY16_SUPPORT 1
+#endif
+#ifndef ENABLE_BINARY32_SUPPORT
+ #define ENABLE_BINARY32_SUPPORT 1
+#endif
+#ifndef ENABLE_BINARY64_SUPPORT
+ #define ENABLE_BINARY64_SUPPORT 1
+#endif
+// Enable float80 by default only if necessary to support long double
+#ifndef ENABLE_FLOAT80_SUPPORT
+ #if LONG_DOUBLE_IS_FLOAT80
+ #define ENABLE_FLOAT80_SUPPORT 1
+ #else
+ #define ENABLE_FLOAT80_SUPPORT 0
+ #endif
+#endif
+// Enable binary128 by default only if necessary to support long double
+#ifndef ENABLE_BINARY128_SUPPORT
+ #if LONG_DOUBLE_IS_BINARY128
+ #define ENABLE_BINARY128_SUPPORT 1
+ #else
+ #define ENABLE_BINARY128_SUPPORT 0
+ #endif
+#endif
+
+// Enable float80 interval optimization by default.
+// If you want to be able to tick off "float80" support but don't want to
+// pay for it, define this to 0. Then you'll just get the minimum.
+// Enabling this costs about 3.5k of code.
+// Note: The code is shared with binary128, so there's no point to enabling
+// it for one but not the other.
+#if ENABLE_FLOAT80_SUPPORT
+ #ifndef ENABLE_FLOAT80_OPTIMIZATIONS
+ #define ENABLE_FLOAT80_OPTIMIZATIONS 1
+ #endif
+#else
+ #define ENABLE_FLOAT80_OPTIMIZATIONS 0
+#endif
+
+// Enable binary128 interval optimization by default.
+#if ENABLE_BINARY128_SUPPORT
+ #ifndef ENABLE_BINARY128_OPTIMIZATIONS
+ #define ENABLE_BINARY128_OPTIMIZATIONS 1
+ #endif
+#else
+ #define ENABLE_BINARY128_OPTIMIZATIONS 0
+#endif
+
+// At least one format must be enabled
+#if !ENABLE_BINARY16_SUPPORT && !ENABLE_BINARY32_SUPPORT && !ENABLE_BINARY64_SUPPORT && !ENABLE_FLOAT80_SUPPORT && !ENABLE_BINARY128_SUPPORT
+#error At least one format must be enabled
+#endif
+
+// ================================================================
+// Look up tables
+
+static const unsigned char hexdigit[256] = {
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 10, 11, 12, 13, 14, 15, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 10, 11, 12, 13, 14, 15, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+};
+
+// ================================================================
+// ================================================================
+//
+// Power-of-10 tables
+//
+// ================================================================
+// ================================================================
+
+
+#define binaryExponentFor10ToThe(p) ((int)(((((int64_t)(p)) * 55732705) >> 24) + 1))
+
+// For binary32 only, we need a single 880-byte powersOf10_Float table below
+
+// For binary64 only, we need a 224-byte subset of the first table and
+// a 232-byte additional table for a total of 456 bytes
+
+// For both, we can overlap somewhat so we only need the 880-byte binary32
+// table and the auxiliary 232-byte table for binary64, for a total of 1112 bytes.
+
+#if ENABLE_BINARY32_SUPPORT || ENABLE_BINARY64_SUPPORT || ENABLE_FLOAT80_OPTIMIZATIONS || ENABLE_BINARY128_OPTIMIZATIONS
+// These are the 64-bit binary significands of the
+// largest binary floating-point value that is not greater
+// than the corresponding power of 10.
+
+// That is, when these are not exact, they are less than
+// the exact decimal value. This allows us to use these
+// to construct tight intervals around the true power
+// of ten value.
+static const uint64_t powersOf10_Float[] = {
+#if ENABLE_BINARY32_SUPPORT
+ 0xb0af48ec79ace837ULL, // x 2^-232 ~= 10^-70
+ 0xdcdb1b2798182244ULL, // x 2^-229 ~= 10^-69
+ 0x8a08f0f8bf0f156bULL, // x 2^-225 ~= 10^-68
+ 0xac8b2d36eed2dac5ULL, // x 2^-222 ~= 10^-67
+ 0xd7adf884aa879177ULL, // x 2^-219 ~= 10^-66
+ 0x86ccbb52ea94baeaULL, // x 2^-215 ~= 10^-65
+ 0xa87fea27a539e9a5ULL, // x 2^-212 ~= 10^-64
+ 0xd29fe4b18e88640eULL, // x 2^-209 ~= 10^-63
+ 0x83a3eeeef9153e89ULL, // x 2^-205 ~= 10^-62
+ 0xa48ceaaab75a8e2bULL, // x 2^-202 ~= 10^-61
+ 0xcdb02555653131b6ULL, // x 2^-199 ~= 10^-60
+ 0x808e17555f3ebf11ULL, // x 2^-195 ~= 10^-59
+ 0xa0b19d2ab70e6ed6ULL, // x 2^-192 ~= 10^-58
+ 0xc8de047564d20a8bULL, // x 2^-189 ~= 10^-57
+ 0xfb158592be068d2eULL, // x 2^-186 ~= 10^-56
+ 0x9ced737bb6c4183dULL, // x 2^-182 ~= 10^-55
+ 0xc428d05aa4751e4cULL, // x 2^-179 ~= 10^-54
+ 0xf53304714d9265dfULL, // x 2^-176 ~= 10^-53
+ 0x993fe2c6d07b7fabULL, // x 2^-172 ~= 10^-52
+ 0xbf8fdb78849a5f96ULL, // x 2^-169 ~= 10^-51
+ 0xef73d256a5c0f77cULL, // x 2^-166 ~= 10^-50
+ 0x95a8637627989aadULL, // x 2^-162 ~= 10^-49
+ 0xbb127c53b17ec159ULL, // x 2^-159 ~= 10^-48
+ 0xe9d71b689dde71afULL, // x 2^-156 ~= 10^-47
+ 0x9226712162ab070dULL, // x 2^-152 ~= 10^-46
+ 0xb6b00d69bb55c8d1ULL, // x 2^-149 ~= 10^-45
+ 0xe45c10c42a2b3b05ULL, // x 2^-146 ~= 10^-44
+ 0x8eb98a7a9a5b04e3ULL, // x 2^-142 ~= 10^-43
+ 0xb267ed1940f1c61cULL, // x 2^-139 ~= 10^-42
+ 0xdf01e85f912e37a3ULL, // x 2^-136 ~= 10^-41
+ 0x8b61313bbabce2c6ULL, // x 2^-132 ~= 10^-40
+ 0xae397d8aa96c1b77ULL, // x 2^-129 ~= 10^-39
+ 0xd9c7dced53c72255ULL, // x 2^-126 ~= 10^-38
+ 0x881cea14545c7575ULL, // x 2^-122 ~= 10^-37
+ 0xaa242499697392d2ULL, // x 2^-119 ~= 10^-36
+ 0xd4ad2dbfc3d07787ULL, // x 2^-116 ~= 10^-35
+ 0x84ec3c97da624ab4ULL, // x 2^-112 ~= 10^-34
+ 0xa6274bbdd0fadd61ULL, // x 2^-109 ~= 10^-33
+ 0xcfb11ead453994baULL, // x 2^-106 ~= 10^-32
+ 0x81ceb32c4b43fcf4ULL, // x 2^-102 ~= 10^-31
+ 0xa2425ff75e14fc31ULL, // x 2^-99 ~= 10^-30
+ 0xcad2f7f5359a3b3eULL, // x 2^-96 ~= 10^-29
+ 0xfd87b5f28300ca0dULL, // x 2^-93 ~= 10^-28
+ 0x9e74d1b791e07e48ULL, // x 2^-89 ~= 10^-27
+ 0xc612062576589ddaULL, // x 2^-86 ~= 10^-26
+ 0xf79687aed3eec551ULL, // x 2^-83 ~= 10^-25
+ 0x9abe14cd44753b52ULL, // x 2^-79 ~= 10^-24
+ 0xc16d9a0095928a27ULL, // x 2^-76 ~= 10^-23
+ 0xf1c90080baf72cb1ULL, // x 2^-73 ~= 10^-22
+ 0x971da05074da7beeULL, // x 2^-69 ~= 10^-21
+ 0xbce5086492111aeaULL, // x 2^-66 ~= 10^-20
+ 0xec1e4a7db69561a5ULL, // x 2^-63 ~= 10^-19
+ 0x9392ee8e921d5d07ULL, // x 2^-59 ~= 10^-18
+ 0xb877aa3236a4b449ULL, // x 2^-56 ~= 10^-17
+ 0xe69594bec44de15bULL, // x 2^-53 ~= 10^-16
+ 0x901d7cf73ab0acd9ULL, // x 2^-49 ~= 10^-15
+ 0xb424dc35095cd80fULL, // x 2^-46 ~= 10^-14
+ 0xe12e13424bb40e13ULL, // x 2^-43 ~= 10^-13
+ 0x8cbccc096f5088cbULL, // x 2^-39 ~= 10^-12
+ 0xafebff0bcb24aafeULL, // x 2^-36 ~= 10^-11
+ 0xdbe6fecebdedd5beULL, // x 2^-33 ~= 10^-10
+ 0x89705f4136b4a597ULL, // x 2^-29 ~= 10^-9
+ 0xabcc77118461cefcULL, // x 2^-26 ~= 10^-8
+ 0xd6bf94d5e57a42bcULL, // x 2^-23 ~= 10^-7
+ 0x8637bd05af6c69b5ULL, // x 2^-19 ~= 10^-6
+ 0xa7c5ac471b478423ULL, // x 2^-16 ~= 10^-5
+ 0xd1b71758e219652bULL, // x 2^-13 ~= 10^-4
+ 0x83126e978d4fdf3bULL, // x 2^-9 ~= 10^-3
+ 0xa3d70a3d70a3d70aULL, // x 2^-6 ~= 10^-2
+ 0xccccccccccccccccULL, // x 2^-3 ~= 10^-1
+#endif
+ // These values are exact; we use them together with
+ // _CoarseBinary64 below for binary64 format.
+ 0x8000000000000000ULL, // x 2^1 == 10^0 exactly
+ 0xa000000000000000ULL, // x 2^4 == 10^1 exactly
+ 0xc800000000000000ULL, // x 2^7 == 10^2 exactly
+ 0xfa00000000000000ULL, // x 2^10 == 10^3 exactly
+ 0x9c40000000000000ULL, // x 2^14 == 10^4 exactly
+ 0xc350000000000000ULL, // x 2^17 == 10^5 exactly
+ 0xf424000000000000ULL, // x 2^20 == 10^6 exactly
+ 0x9896800000000000ULL, // x 2^24 == 10^7 exactly
+ 0xbebc200000000000ULL, // x 2^27 == 10^8 exactly
+ 0xee6b280000000000ULL, // x 2^30 == 10^9 exactly
+ 0x9502f90000000000ULL, // x 2^34 == 10^10 exactly
+ 0xba43b74000000000ULL, // x 2^37 == 10^11 exactly
+ 0xe8d4a51000000000ULL, // x 2^40 == 10^12 exactly
+ 0x9184e72a00000000ULL, // x 2^44 == 10^13 exactly
+ 0xb5e620f480000000ULL, // x 2^47 == 10^14 exactly
+ 0xe35fa931a0000000ULL, // x 2^50 == 10^15 exactly
+ 0x8e1bc9bf04000000ULL, // x 2^54 == 10^16 exactly
+ 0xb1a2bc2ec5000000ULL, // x 2^57 == 10^17 exactly
+ 0xde0b6b3a76400000ULL, // x 2^60 == 10^18 exactly
+ 0x8ac7230489e80000ULL, // x 2^64 == 10^19 exactly
+ 0xad78ebc5ac620000ULL, // x 2^67 == 10^20 exactly
+ 0xd8d726b7177a8000ULL, // x 2^70 == 10^21 exactly
+ 0x878678326eac9000ULL, // x 2^74 == 10^22 exactly
+ 0xa968163f0a57b400ULL, // x 2^77 == 10^23 exactly
+ 0xd3c21bcecceda100ULL, // x 2^80 == 10^24 exactly
+ 0x84595161401484a0ULL, // x 2^84 == 10^25 exactly
+ 0xa56fa5b99019a5c8ULL, // x 2^87 == 10^26 exactly
+ 0xcecb8f27f4200f3aULL, // x 2^90 == 10^27 exactly
+#if ENABLE_BINARY32_SUPPORT
+ 0x813f3978f8940984ULL, // x 2^94 ~= 10^28
+ 0xa18f07d736b90be5ULL, // x 2^97 ~= 10^29
+ 0xc9f2c9cd04674edeULL, // x 2^100 ~= 10^30
+ 0xfc6f7c4045812296ULL, // x 2^103 ~= 10^31
+ 0x9dc5ada82b70b59dULL, // x 2^107 ~= 10^32
+ 0xc5371912364ce305ULL, // x 2^110 ~= 10^33
+ 0xf684df56c3e01bc6ULL, // x 2^113 ~= 10^34
+ 0x9a130b963a6c115cULL, // x 2^117 ~= 10^35
+ 0xc097ce7bc90715b3ULL, // x 2^120 ~= 10^36
+ 0xf0bdc21abb48db20ULL, // x 2^123 ~= 10^37
+ 0x96769950b50d88f4ULL, // x 2^127 ~= 10^38
+ 0xbc143fa4e250eb31ULL, // x 2^130 ~= 10^39
+#endif
+};
+#endif
+
+#if ENABLE_BINARY64_SUPPORT || ENABLE_FLOAT80_OPTIMIZATIONS || ENABLE_BINARY128_OPTIMIZATIONS
+ #if ENABLE_BINARY32_SUPPORT
+ static const uint64_t *powersOf10_Exact64 = powersOf10_Float + 70;
+ #else
+ static const uint64_t *powersOf10_Exact64 = powersOf10_Float;
+ #endif
+#endif
+
+#if ENABLE_BINARY64_SUPPORT
+// Rounded-down values supporting the full range of binary64.
+// As above, when not exact, these are rounded down to the
+// nearest value lower than or equal to the exact power of 10.
+//
+// Table size: 232 bytes
+//
+// We only store every 28th power of ten here.
+// We can multiply by an exact 64-bit power of
+// ten from powersOf10_Exact64 above to reconstruct the
+// significand for any power of 10.
+static const uint64_t powersOf10_CoarseBinary64[30] = {
+ 0xdd5a2c3eab3097cbULL, // x 2^-1395 ~= 10^-420
+ 0xdf82365c497b5453ULL, // x 2^-1302 ~= 10^-392
+ 0xe1afa13afbd14d6dULL, // x 2^-1209 ~= 10^-364
+ 0xe3e27a444d8d98b7ULL, // x 2^-1116 ~= 10^-336
+ 0xe61acf033d1a45dfULL, // x 2^-1023 ~= 10^-308
+ 0xe858ad248f5c22c9ULL, // x 2^-930 ~= 10^-280
+ 0xea9c227723ee8bcbULL, // x 2^-837 ~= 10^-252
+ 0xece53cec4a314ebdULL, // x 2^-744 ~= 10^-224
+ 0xef340a98172aace4ULL, // x 2^-651 ~= 10^-196
+ 0xf18899b1bc3f8ca1ULL, // x 2^-558 ~= 10^-168
+ 0xf3e2f893dec3f126ULL, // x 2^-465 ~= 10^-140
+ 0xf64335bcf065d37dULL, // x 2^-372 ~= 10^-112
+ 0xf8a95fcf88747d94ULL, // x 2^-279 ~= 10^-84
+ 0xfb158592be068d2eULL, // x 2^-186 ~= 10^-56
+ 0xfd87b5f28300ca0dULL, // x 2^-93 ~= 10^-28
+ 0x8000000000000000ULL, // x 2^1 == 10^0 exactly
+ 0x813f3978f8940984ULL, // x 2^94 == 10^28 exactly
+ 0x82818f1281ed449fULL, // x 2^187 ~= 10^56
+ 0x83c7088e1aab65dbULL, // x 2^280 ~= 10^84
+ 0x850fadc09923329eULL, // x 2^373 ~= 10^112
+ 0x865b86925b9bc5c2ULL, // x 2^466 ~= 10^140
+ 0x87aa9aff79042286ULL, // x 2^559 ~= 10^168
+ 0x88fcf317f22241e2ULL, // x 2^652 ~= 10^196
+ 0x8a5296ffe33cc92fULL, // x 2^745 ~= 10^224
+ 0x8bab8eefb6409c1aULL, // x 2^838 ~= 10^252
+ 0x8d07e33455637eb2ULL, // x 2^931 ~= 10^280
+ 0x8e679c2f5e44ff8fULL, // x 2^1024 ~= 10^308
+ 0x8fcac257558ee4e6ULL, // x 2^1117 ~= 10^336
+ 0x91315e37db165aa9ULL, // x 2^1210 ~= 10^364
+ 0x929b7871de7f22b9ULL, // x 2^1303 ~= 10^392
+};
+#endif
+
+#if ENABLE_FLOAT80_OPTIMIZATIONS || ENABLE_BINARY128_OPTIMIZATIONS
+// Every 56th power of 10 across the range of Float80/Binary128
+//
+// Table size: 2880 bytes
+static const uint64_t powersOf10_Binary128[180] = {
+ // Low-order ... high-order
+ 0x0000000000000000ULL, 0x8000000000000000ULL, // x 2^1 == 10^0 exactly
+ 0xbff8f10e7a8921a4ULL, 0x82818f1281ed449fULL, // x 2^187 ~= 10^56
+ 0x03e2cf6bc604ddb0ULL, 0x850fadc09923329eULL, // x 2^373 ~= 10^112
+ 0x90fb44d2f05d0842ULL, 0x87aa9aff79042286ULL, // x 2^559 ~= 10^168
+ 0x82bd6b70d99aaa6fULL, 0x8a5296ffe33cc92fULL, // x 2^745 ~= 10^224
+ 0xdb0b487b6423e1e8ULL, 0x8d07e33455637eb2ULL, // x 2^931 ~= 10^280
+ 0x213a4f0aa5e8a7b1ULL, 0x8fcac257558ee4e6ULL, // x 2^1117 ~= 10^336
+ 0x1c306f5d1b0b5fdfULL, 0x929b7871de7f22b9ULL, // x 2^1303 ~= 10^392
+ 0xa7ea9c8838ce9437ULL, 0x957a4ae1ebf7f3d3ULL, // x 2^1489 ~= 10^448
+ 0xbf1d49cacccd5e68ULL, 0x9867806127ece4f4ULL, // x 2^1675 ~= 10^504
+ 0x655494c5c95d77f2ULL, 0x9b63610bb9243e46ULL, // x 2^1861 ~= 10^560
+ 0x02e008393fd60b55ULL, 0x9e6e366733f85561ULL, // x 2^2047 ~= 10^616
+ 0x55e04dba4b3bd4ddULL, 0xa1884b69ade24964ULL, // x 2^2233 ~= 10^672
+ 0x44b222741eb1ebbfULL, 0xa4b1ec80f47c84adULL, // x 2^2419 ~= 10^728
+ 0x1cf4a5c3bc09fa6fULL, 0xa7eb6799e8aec999ULL, // x 2^2605 ~= 10^784
+ 0x3c4a575151b294dcULL, 0xab350c27feb90accULL, // x 2^2791 ~= 10^840
+ 0x870a8d87239d8f35ULL, 0xae8f2b2ce3d5dbe9ULL, // x 2^2977 ~= 10^896
+ 0xdd929f09c3eff5acULL, 0xb1fa17404a30e5e8ULL, // x 2^3163 ~= 10^952
+ 0x1931b583a9431d7eULL, 0xb5762497dbf17a9eULL, // x 2^3349 ~= 10^1008
+ 0xe30db03e0f8dd286ULL, 0xb903a90f561d25e2ULL, // x 2^3535 ~= 10^1064
+ 0x9eb5cb19647508c5ULL, 0xbca2fc30cc19f090ULL, // x 2^3721 ~= 10^1120
+ 0x24bd4c00042ad125ULL, 0xc054773d149bf26bULL, // x 2^3907 ~= 10^1176
+ 0x7ea30dbd7ea479e3ULL, 0xc418753460cdcca9ULL, // x 2^4093 ~= 10^1232
+ 0x764f4cf916b4deceULL, 0xc7ef52defe87b751ULL, // x 2^4279 ~= 10^1288
+ 0xbeb7fbdc1cbe8b37ULL, 0xcbd96ed6466cf081ULL, // x 2^4465 ~= 10^1344
+ 0xdce472c619aa3f63ULL, 0xcfd7298db6cb9672ULL, // x 2^4651 ~= 10^1400
+ 0xe47defc14a406e4fULL, 0xd3e8e55c3c1f43d0ULL, // x 2^4837 ~= 10^1456
+ 0xb7157c60a24a0569ULL, 0xd80f0685a81b2a81ULL, // x 2^5023 ~= 10^1512
+ 0xfb0b98f6bbc4f0cbULL, 0xdc49f3445824e360ULL, // x 2^5209 ~= 10^1568
+ 0xc6c6c1764e047e15ULL, 0xe09a13d30c2dba62ULL, // x 2^5395 ~= 10^1624
+ 0x87e8dcfc09dbc33aULL, 0xe4ffd276eedce658ULL, // x 2^5581 ~= 10^1680
+ 0xb1a3642a8da3cf4fULL, 0xe97b9b89d001dab3ULL, // x 2^5767 ~= 10^1736
+ 0x2d4070f33b21ab7bULL, 0xee0ddd84924ab88cULL, // x 2^5953 ~= 10^1792
+ 0xa2bf0c63a814e04eULL, 0xf2b70909cd3fd35cULL, // x 2^6139 ~= 10^1848
+ 0x08f13995cf9c2747ULL, 0xf77790f0a48a45ceULL, // x 2^6325 ~= 10^1904
+ 0x7a37993eb21444faULL, 0xfc4fea4fd590b40aULL, // x 2^6511 ~= 10^1960
+ 0xb7b1ada9cdeba84dULL, 0x80a046447e3d49f1ULL, // x 2^6698 ~= 10^2016
+ 0x0cc6866c5d69b2cbULL, 0x8324f8aa08d7d411ULL, // x 2^6884 ~= 10^2072
+ 0x7fe2b4308dcbf1a3ULL, 0x85b64a659077660eULL, // x 2^7070 ~= 10^2128
+ 0x1d73ef3eaac3c964ULL, 0x88547abb1d8e5bd9ULL, // x 2^7256 ~= 10^2184
+ 0x1e34291b1ef566c7ULL, 0x8affca2bd1f88549ULL, // x 2^7442 ~= 10^2240
+ 0x9e9383d73d486881ULL, 0x8db87a7c1e56d873ULL, // x 2^7628 ~= 10^2296
+ 0x9cc5ee51962c011aULL, 0x907eceba168949b3ULL, // x 2^7814 ~= 10^2352
+ 0x413407cfeeac9743ULL, 0x93530b43e5e2c129ULL, // x 2^8000 ~= 10^2408
+ 0x7efa7d29c44e11b7ULL, 0x963575ce63b6332dULL, // x 2^8186 ~= 10^2464
+ 0x5a848859645d1c6fULL, 0x9926556bc8defe43ULL, // x 2^8372 ~= 10^2520
+ 0x51edea897b34601fULL, 0x9c25f29286e9ddb6ULL, // x 2^8558 ~= 10^2576
+ 0xb50008d92529e91fULL, 0x9f3497244186fca4ULL, // x 2^8744 ~= 10^2632
+ 0xf09e780bcc8238d9ULL, 0xa2528e74eaf101fcULL, // x 2^8930 ~= 10^2688
+ 0x3a5828869701a165ULL, 0xa580255203f84b47ULL, // x 2^9116 ~= 10^2744
+ 0x8b231a70eb5444ceULL, 0xa8bdaa0a0064fa44ULL, // x 2^9302 ~= 10^2800
+ 0xfa1bde1f473556a4ULL, 0xac0b6c73d065f8ccULL, // x 2^9488 ~= 10^2856
+ 0x7730e00421da4d55ULL, 0xaf69bdf68fc6a740ULL, // x 2^9674 ~= 10^2912
+ 0x7f959cb702329d14ULL, 0xb2d8f1915ba88ca5ULL, // x 2^9860 ~= 10^2968
+ 0x40c3a071220f5567ULL, 0xb6595be34f821493ULL, // x 2^10046 ~= 10^3024
+ 0x11c48d02b8326bd3ULL, 0xb9eb5333aa272e9bULL, // x 2^10232 ~= 10^3080
+ 0x566765461bd2f61bULL, 0xbd8f2f7a1ba47d6dULL, // x 2^10418 ~= 10^3136
+ 0xb889018e4f6e9a52ULL, 0xc1454a673cb9b1ceULL, // x 2^10604 ~= 10^3192
+ 0xf85333a94848659fULL, 0xc50dff6d30c3aefcULL, // x 2^10790 ~= 10^3248
+ 0x1a1aeae7cf8a9d3dULL, 0xc8e9abc872eb2bc1ULL, // x 2^10976 ~= 10^3304
+ 0x12e29f09d9061609ULL, 0xccd8ae88cf70ad84ULL, // x 2^11162 ~= 10^3360
+ 0xdf7601457ca20b35ULL, 0xd0db689a89f2f9b1ULL, // x 2^11348 ~= 10^3416
+ 0xcbdcd02f23cc7690ULL, 0xd4f23ccfb1916df5ULL, // x 2^11534 ~= 10^3472
+ 0x44289dd21b589d7aULL, 0xd91d8fe9a3d019ccULL, // x 2^11720 ~= 10^3528
+ 0x95aa118ec1d08317ULL, 0xdd5dc8a2bf27f3f7ULL, // x 2^11906 ~= 10^3584
+ 0x72c4d2cad73b0a7bULL, 0xe1b34fb846321d04ULL, // x 2^12092 ~= 10^3640
+ 0xe20a88f1134f906dULL, 0xe61e8ff47461cda9ULL, // x 2^12278 ~= 10^3696
+ 0xc7c91d5c341ed39dULL, 0xea9ff638c54554e1ULL, // x 2^12464 ~= 10^3752
+ 0xf659ede2159a45ecULL, 0xef37f1886f4b6690ULL, // x 2^12650 ~= 10^3808
+ 0x78d946bab954b82fULL, 0xf3e6f313130ef0efULL, // x 2^12836 ~= 10^3864
+ 0xc9b1474d8f89c269ULL, 0xf8ad6e3fa030bd15ULL, // x 2^13022 ~= 10^3920
+ 0x6b1d2745340e7b14ULL, 0xfd8bd8b770cb469eULL, // x 2^13208 ~= 10^3976
+ 0xf22e502fcdd4bca2ULL, 0x81415538ce493bd5ULL, // x 2^13395 ~= 10^4032
+ 0x7c1735fc3b813c8cULL, 0x83c92edf425b292dULL, // x 2^13581 ~= 10^4088
+ 0x0367500a8e9a178fULL, 0x865db7a9ccd2839eULL, // x 2^13767 ~= 10^4144
+ 0xc9ac50475e25293aULL, 0x88ff2f2bade74531ULL, // x 2^13953 ~= 10^4200
+ 0x0879b2e5f6ee8b1cULL, 0x8badd636cc48b341ULL, // x 2^14139 ~= 10^4256
+ 0x2f33c652bd12fab7ULL, 0x8e69eee1f23f2be5ULL, // x 2^14325 ~= 10^4312
+ 0xad6a6308a8e8b557ULL, 0x9133bc8f2a130fe5ULL, // x 2^14511 ~= 10^4368
+ 0x9dbaa465efe141a0ULL, 0x940b83f23a55842aULL, // x 2^14697 ~= 10^4424
+ 0x888c9ab2fc5b3437ULL, 0x96f18b1742aad751ULL, // x 2^14883 ~= 10^4480
+ 0xba00864671d1053fULL, 0x99e6196979b978f1ULL, // x 2^15069 ~= 10^4536
+ 0x61d59d402aae4feaULL, 0x9ce977ba0ce3a0bdULL, // x 2^15255 ~= 10^4592
+ 0x803c1cd864033781ULL, 0x9ffbf04722750449ULL, // x 2^15441 ~= 10^4648
+ 0xa28a151725a55e10ULL, 0xa31dcec2fef14b30ULL, // x 2^15627 ~= 10^4704
+ 0x5b8452af2302fe13ULL, 0xa64f605b4e3352cdULL, // x 2^15813 ~= 10^4760
+ 0x82b84cabc828bf93ULL, 0xa990f3c09110c544ULL, // x 2^15999 ~= 10^4816
+ 0x8d29dd5122e4278dULL, 0xace2d92db0390b59ULL, // x 2^16185 ~= 10^4872
+ 0x58f8fde02c03a6c6ULL, 0xb045626fb50a35e7ULL, // x 2^16371 ~= 10^4928
+ 0xd950102978dbd0ffULL, 0xb3b8e2eda91a232dULL, // x 2^16557 ~= 10^4984
+};
+#endif
+
+// ================================================================
+// ================================================================
+//
+// Power-of-5 tables
+//
+// ================================================================
+// ================================================================
+
+// Integer powers of 5 used to drive the power-of-5 calculator used
+// in the slow path
+const static uint64_t powersOfFive[] = {1, 5, 25, 125, 625, 3125, 15625,
+ 78125, 390625, 1953125, 9765625, 48828125UL, 244140625UL, 1220703125UL,
+ 6103515625ULL, 30517578125ULL, 152587890625ULL, 762939453125ULL,
+ 3814697265625ULL, 19073486328125ULL, 95367431640625ULL,
+ 476837158203125ULL, 2384185791015625ULL, 11920928955078125ULL,
+ 59604644775390625ULL, 298023223876953125ULL, 1490116119384765625ULL,
+ 7450580596923828125ULL
+};
+
+//
+// Large integer powers of 5. These are used to accelerate the
+// power-of-5 calculator when computing very large powers needed for
+// float80 and binary128 in the slow path.
+//
+// TODO: Consider dropping these. If float80/binary128 optimizations
+// are enabled, then the interval optimization removes a lot of the
+// need for these. (I've already disabled these to save code for
+// anyone that prefers code size to performance for these formats.)
+
+#if ENABLE_FLOAT80_OPTIMIZATIONS || ENABLE_BINARY128_OPTIMIZATIONS
+// Useful python snippet to generate these constants:
+// a = hex(5 ** 2000)[2:]
+// while a:
+// print("0x" + a[-8:] + "UL,")
+// a = a[:-8]
+
+#if MP_WORD_BITS == 32
+// Broken down into 32-bit words for use on 64-bit processors.
+// Stored from least-significant to most-significant.
+
+const static uint32_t fiveToThe1000[] = {
+ 0xe76e2661UL, 0xa75f463cUL, 0x8f9ac297UL, 0xb426e5d5UL, 0x8021f455UL,
+ 0x484b538eUL, 0xd39bcb19UL, 0x32c52790UL, 0x69805e7cUL, 0x78612ddbUL,
+ 0x84a48774UL, 0x4b7ad105UL, 0xa1bca9a9UL, 0x16d4cc76UL, 0x1b3bc79dUL,
+ 0x22c38759UL, 0x4ec6864aUL, 0x670dbd77UL, 0xa93ad95bUL, 0xfd07b514UL,
+ 0x185d4af8UL, 0x491981abUL, 0x7f6e513bUL, 0x1a8298bfUL, 0xccdd06fcUL,
+ 0x9c225999UL, 0x7a000d77UL, 0x211ecc86UL, 0x678b85c6UL, 0xccb243d3UL,
+ 0x8c2fb72dUL, 0xf95cada8UL, 0x9c82b396UL, 0x3ef53cd3UL, 0x9093cb88UL,
+ 0xb5aff7d4UL, 0x7da0bc3eUL, 0x522aea78UL, 0xa0165d55UL, 0x3b34e560UL,
+ 0x80a282e5UL, 0xf0c08162UL, 0x9d7e4698UL, 0xe0fe7772UL, 0x3d9d710eUL,
+ 0x8d98ec4eUL, 0xb5954022UL, 0xa0e542c3UL, 0x3be09b67UL, 0x5da7d253UL,
+ 0x86949f98UL, 0x68641b9eUL, 0xaa166a05UL, 0xdfb84e6bUL, 0x2955510aUL,
+ 0xe62bf617UL, 0xe8c18fc6UL, 0x8afb443aUL, 0x22f12a84UL, 0x048f59a7UL,
+ 0xaf620341UL, 0xd377548eUL, 0xefd7f15cUL, 0x2f1121d5UL, 0x9aa2146bUL,
+ 0xf36453b5UL, 0xc7a3e8ecUL, 0xcab2ae32UL, 0xdb4abe87UL, 0x5a54e6f2UL,
+ 0xb015e34aUL, 0xc7e774f6UL, 0x3ce36UL,
+};
+
+#elif MP_WORD_BITS == 16
+
+// Broken down into 16-bit words for use on 32-bit processors.
+// Stored from least-significant to most-significant.
+const static uint16_t fiveToThe1000[] = {
+ 0x2661U, 0xe76eU, 0x463cU, 0xa75fU, 0xc297U, 0x8f9aU, 0xe5d5U,
+ 0xb426U, 0xf455U, 0x8021U, 0x538eU, 0x484bU, 0xcb19U, 0xd39bU,
+ 0x2790U, 0x32c5U, 0x5e7cU, 0x6980U, 0x2ddbU, 0x7861U, 0x8774U,
+ 0x84a4U, 0xd105U, 0x4b7aU, 0xa9a9U, 0xa1bcU, 0xcc76U, 0x16d4U,
+ 0xc79dU, 0x1b3bU, 0x8759U, 0x22c3U, 0x864aU, 0x4ec6U, 0xbd77U,
+ 0x670dU, 0xd95bU, 0xa93aU, 0xb514U, 0xfd07U, 0x4af8U, 0x185dU,
+ 0x81abU, 0x4919U, 0x513bU, 0x7f6eU, 0x98bfU, 0x1a82U, 0x06fcU,
+ 0xccddU, 0x5999U, 0x9c22U, 0x0d77U, 0x7a00U, 0xcc86U, 0x211eU,
+ 0x85c6U, 0x678bU, 0x43d3U, 0xccb2U, 0xb72dU, 0x8c2fU, 0xada8U,
+ 0xf95cU, 0xb396U, 0x9c82U, 0x3cd3U, 0x3ef5U, 0xcb88U, 0x9093U,
+ 0xf7d4U, 0xb5afU, 0xbc3eU, 0x7da0U, 0xea78U, 0x522aU, 0x5d55U,
+ 0xa016U, 0xe560U, 0x3b34U, 0x82e5U, 0x80a2U, 0x8162U, 0xf0c0U,
+ 0x4698U, 0x9d7eU, 0x7772U, 0xe0feU, 0x710eU, 0x3d9dU, 0xec4eU,
+ 0x8d98U, 0x4022U, 0xb595U, 0x42c3U, 0xa0e5U, 0x9b67U, 0x3be0U,
+ 0xd253U, 0x5da7U, 0x9f98U, 0x8694U, 0x1b9eU, 0x6864U, 0x6a05U,
+ 0xaa16U, 0x4e6bU, 0xdfb8U, 0x510aU, 0x2955U, 0xf617U, 0xe62bU,
+ 0x8fc6U, 0xe8c1U, 0x443aU, 0x8afbU, 0x2a84U, 0x22f1U, 0x59a7U,
+ 0x048fU, 0x0341U, 0xaf62U, 0x548eU, 0xd377U, 0xf15cU, 0xefd7U,
+ 0x21d5U, 0x2f11U, 0x146bU, 0x9aa2U, 0x53b5U, 0xf364U, 0xe8ecU,
+ 0xc7a3U, 0xae32U, 0xcab2U, 0xbe87U, 0xdb4aU, 0xe6f2U, 0x5a54U,
+ 0xe34aU, 0xb015U, 0x74f6U, 0xc7e7U, 0xce36U, 0x3U,
+};
+#endif
+#endif
+
+// ================================================================
+// ================================================================
+//
+// Fixed-width integer routines
+//
+// ================================================================
+// ================================================================
+
+#if HAVE_UINT128_T
+#define multiply64x64RoundingDown(lhs, rhs) (((__uint128_t)(lhs) * (rhs)) >> 64)
+#else
+static uint64_t multiply64x64RoundingDown(uint64_t lhs, uint64_t rhs) {
+ uint64_t a = (lhs >> 32) * (rhs >> 32);
+ uint64_t b = (lhs >> 32) * (rhs & UINT32_MAX);
+ uint64_t c = (lhs & UINT32_MAX) * (rhs >> 32);
+ uint64_t d = (lhs & UINT32_MAX) * (rhs & UINT32_MAX);
+ b += (c & UINT32_MAX) + (d >> 32);
+ return a + (b >> 32) + (c >> 32);
+}
+#endif
+
+// ================================================================
+// ================================================================
+//
+// Multi-Precision Integer Routines
+//
+// ================================================================
+// ================================================================
+
+// Configure our multi-precision integer machinery with appropriate types
+#if MP_WORD_BITS == 16
+typedef uint16_t mp_word_t; // Type of a single word in an MP integer
+typedef uint32_t mp_dword_t; // Type big enough to hold a two-word MP integer
+static const mp_word_t MP_WORD_MAX = UINT16_MAX;
+#elif MP_WORD_BITS == 32
+typedef uint32_t mp_word_t; // Type of a single word in an MP integer
+typedef uint64_t mp_dword_t; // Type big enough to hold a two-word MP integer
+static const mp_word_t MP_WORD_MAX = UINT32_MAX;
+#else
+#error MP_WORD_BITS undefined
+#endif
+static const int mp_word_bits = sizeof(mp_word_t) * 8;
+// __builtin_clz() takes an `unsigned` which may be different from mp_word_t
+// This adjusts the result accordingly.
+#define CLZ_WORD(word) ((unsigned)__builtin_clz((word)) + (mp_word_bits - sizeof(unsigned) * 8))
+
+// A multi-precision integer is represented as two pointers:
+// lsw - points to least-significant word (lowest address in memory)
+// msw - points to one beyond the most-significant word
+// In particular, the following are true:
+// number of words == msw - lsw
+// mp is empty/zero if msw == lsw
+// lsw[0] is least-significant word
+// lsw[1] is second-least-significant word
+// ...
+// msw[-2] is second-most-significant word
+// msw[-1] is most-significant word
+
+// Caveat: The MP routines in this file are optimized to just extend
+// the value in-place as necessary without any checking to see whether
+// the allocated memory is sufficient. That requires that the original
+// allocation be sufficient for the largest value that will occur.
+typedef struct { mp_word_t *lsw; mp_word_t *msw; } mp_t;
+
+// Shift the mpint left by the indicated number of
+// bits. This will extend the mpint at the msw end
+// as necessary.
+static void shiftLeftMP(mp_t *work, int shift) {
+ int wordsShift = shift / mp_word_bits;
+ int bitsShift = shift % mp_word_bits;
+
+ if (wordsShift > 0) {
+ memmove(work->lsw + wordsShift,
+ work->lsw,
+ (work->msw - work->lsw) * sizeof(mp_word_t));
+ memset(work->lsw, 0, wordsShift * sizeof(mp_word_t));
+ work->msw += wordsShift;
+ }
+
+ if (bitsShift > 0) {
+ mp_dword_t t = 0;
+ mp_word_t *p = work->lsw;
+ for (; p < work->msw; p++) {
+ t |= (mp_dword_t)*p << bitsShift;
+ *p = (mp_word_t)t;
+ t >>= mp_word_bits;
+ }
+ if (t != 0) {
+ *p = (mp_word_t)t;
+ work->msw = p + 1;
+ }
+ }
+}
+
+// Return the index of the most-significant set bit
+static int bitCountMP(mp_t work) {
+ if (work.msw == work.lsw) {
+ return 0;
+ }
+ assert(work.msw[-1] != 0); // High-order word cannot be zero
+ return (int)(mp_word_bits - CLZ_WORD(work.msw[-1])) // Bits in high-order word
+ + (int)(mp_word_bits * (work.msw - work.lsw - 1)); // Bits in all other words
+}
+
+// Add small integer to varint
+static void addToMP(mp_t *work, uint64_t addend) {
+ uint64_t t = addend;
+ mp_word_t *p = work->lsw;
+ while (t > 0 && p < work->msw) {
+ t += *p;
+ *p = (mp_word_t)t;
+ t >>= mp_word_bits;
+ p += 1;
+ }
+ while (t > 0) {
+ *p = (mp_word_t)t;
+ t >>= mp_word_bits;
+ p += 1;
+ }
+ if (p > work->msw) {
+ work->msw = p;
+ }
+}
+
+// Shift an MP right, rounding the result according to
+// the current FP rounding mode.
+static mp_t shiftRightMPWithRounding(mp_t work,
+ int shift,
+ int trailingNonZero,
+ bool negative,
+ int roundingMode) {
+ (void)negative;
+
+ if (shift == 0) {
+ return work;
+ }
+ if (shift < 0) {
+ shiftLeftMP(&work, -shift);
+ return work;
+ }
+
+ mp_t result = work;
+ int wordsShift = shift / mp_word_bits;
+ int bitsShift = shift % mp_word_bits;
+
+ if (bitsShift == 0) {
+ // We don't really need to shift anything, just drop the low-order
+ // words and possibly increment.
+ result.lsw += wordsShift;
+ switch (roundingMode) {
+#ifdef FE_TOWARDZERO
+ case FE_TOWARDZERO:
+ return result;
+#endif
+#ifdef FE_DOWNWARD
+ case FE_DOWNWARD:
+ // Upwards & downwards rounding are symmetric
+ negative = !negative;
+ // FALL THROUGH
+#endif
+#ifdef FE_UPWARD
+ case FE_UPWARD:
+ for (mp_word_t *p = work.lsw; p < result.lsw; p++) {
+ trailingNonZero |= *p;
+ }
+ if (negative || !trailingNonZero) {
+ return result;
+ } else {
+ break; // Increment and return
+ }
+#endif
+#ifdef FE_TONEAREST
+ case FE_TONEAREST:
+#endif
+ default: {
+ // shift is non-zero, so result.lsw[-1] is valid
+ // and is the most-significant-word of the fraction:
+ mp_word_t fractionMsw = result.lsw[-1];
+ mp_word_t oneHalf = 1U << (mp_word_bits - 1);
+ if (fractionMsw < oneHalf) {
+ return result;
+ } else if (fractionMsw > oneHalf) {
+ break; // Increment and return
+ } else {
+ for (mp_word_t *p = work.lsw; p < result.lsw - 1; p++) {
+ trailingNonZero |= *p;
+ }
+ if (trailingNonZero || ((result.msw > result.lsw) && (result.lsw[0] & 1))) {
+ break; // Increment and return
+ }
+ return result;
+ }
+ }
+ }
+ // Increment and return
+ addToMP(&result, 1);
+ return result;
+ }
+
+ result.lsw += wordsShift;
+ mp_word_t fraction = result.lsw[0] & ((1U << bitsShift) - 1);
+
+ {
+ mp_word_t *p = result.lsw;
+ mp_dword_t t = *p++ >> bitsShift;
+ for (; p < result.msw; p++) {
+ t |= (mp_dword_t)*p << (mp_word_bits - bitsShift);
+ p[-1] = (mp_word_t)t;
+ t >>= mp_word_bits;
+ }
+ if (t == 0) {
+ result.msw -= 1;
+ } else {
+ p[-1] = (mp_word_t)t;
+ }
+ }
+
+ switch (roundingMode) {
+#ifdef FE_TOWARDZERO
+ case FE_TOWARDZERO:
+ return result;
+#endif
+#ifdef FE_DOWNWARD
+ case FE_DOWNWARD:
+ // Upwards & downwards rounding are symmetric
+ negative = !negative;
+ // FALL THROUGH
+#endif
+#ifdef FE_UPWARD
+ case FE_UPWARD:
+ trailingNonZero |= fraction;
+ for (mp_word_t *p = work.lsw; p < result.lsw; p++) {
+ trailingNonZero |= *p;
+ }
+ if (negative || !trailingNonZero) {
+ return result;
+ } else {
+ break; // Increment and return
+ }
+#endif
+#ifdef FE_TONEAREST
+ case FE_TONEAREST:
+#endif
+ default: {
+ mp_word_t half = (mp_word_t)(1U << (bitsShift - 1));
+ if (fraction < half) {
+ return result;
+ } else if (fraction > half) {
+ break; // Increment and return
+ } else { // First part of fraction is exact half..
+ for (mp_word_t *p = work.lsw; p < result.lsw; p++) {
+ trailingNonZero |= *p;
+ }
+ if (trailingNonZero || ((result.msw > result.lsw) && (result.lsw[0] & 1))) {
+ break; // Increment and return
+ } else {
+ return result;
+ }
+ }
+ }
+ }
+ // Increment and return
+ addToMP(&result, 1);
+ return result;
+}
+
+static mp_t shiftRightMPWithTruncation(mp_t work,
+ int shift) {
+ return shiftRightMPWithRounding(work, shift,
+ 0, 0, FE_TOWARDZERO);
+}
+
+// Multiply varint by N
+static void multiplyMPByN(mp_t *work, uint32_t n) {
+ uint64_t t = 0;
+ for (mp_word_t *p = work->lsw; p < work->msw; p++) {
+ t += *p * (uint64_t)n;
+ *p = (mp_word_t)t;
+ t >>= mp_word_bits;
+ }
+ while (t > 0) {
+ work->msw[0] = (mp_word_t)t;
+ t >>= mp_word_bits;
+ work->msw += 1;
+ }
+}
+
+static void multiplyByFiveToTheN(mp_t *dest, int power) {
+#if HAVE_UINT128_T
+ // 128-bit arithmetic lets us multiply 32-bit words by 5^40 (93 bits).
+ // For a double, this can loop up to 8 times.
+ // Without the large-power optimization in fiveToTheN, this can
+ // loop > 100 times for a binary128. :-/
+ while (power > 40) {
+ static const uint64_t fiveToThe20 = 95367431640625ULL;
+ static const __uint128_t fiveToThe40 = (__uint128_t)fiveToThe20 * fiveToThe20;
+ __uint128_t t = 0;
+ mp_word_t *p = dest->lsw;
+ while (p < dest->msw) {
+ t += *p * fiveToThe40;
+ *p++ = (mp_word_t)t;
+ t >>= mp_word_bits;
+ }
+ while (t > 0) {
+ *p++ = (mp_word_t)t;
+ t >>= mp_word_bits;
+ }
+ dest->msw = p;
+ power -= 40;
+ }
+#endif
+
+ while (power > 0) {
+#if MP_WORD_BITS == 16
+ // Limit to 5^20 (47 bits) so we don't overflow
+ // a 64-bit accumulator.
+ const static int maxPower = 13; // For 64-bit
+ uint64_t t = 0;
+ uint64_t powerOfFive = powersOfFive[power > maxPower ? maxPower : power];
+#elif MP_WORD_BITS == 32
+ // With a 128-bit accumulator, we can use the full table
+ // of 64-bit powers (up to 5^27).
+ const static int maxPower = 27;
+ __uint128_t t = 0;
+ __uint128_t powerOfFive = powersOfFive[power > maxPower ? maxPower : power];
+#endif
+ mp_word_t *p = dest->lsw;
+ while (p < dest->msw) {
+ t += *p * powerOfFive;
+ *p++ = (mp_word_t)t;
+ t >>= mp_word_bits;
+ }
+ while (t > 0) {
+ *p++ = (mp_word_t)t;
+ t >>= mp_word_bits;
+ }
+ dest->msw = p;
+ power -= maxPower;
+ }
+}
+
+// Compute 5^N
+// This is _THE_ performance-critical function for the slow path
+// when handling float80 or binary128 with large exponents.
+// (Of course, that only applies to the few percent of inputs
+// that don't get handled by the optimized paths.)
+static void fiveToTheN(mp_t *dest, int power) {
+
+#if ENABLE_FLOAT80_OPTIMIZATIONS || ENABLE_BINARY128_OPTIMIZATIONS
+ // Accelerate a very large power with a pre-computed initial value
+ // Only for float80/binary128 since binary64 only goes up to 10^325
+ if (power >= 1000) {
+ memcpy(dest->lsw, fiveToThe1000, sizeof(fiveToThe1000));
+ dest->msw = dest->lsw + sizeof(fiveToThe1000) / sizeof(fiveToThe1000[0]);
+ power -= 1000;
+ }
+ else
+#endif
+ {
+ // Initialize with as large a power of 5 as we can from the standard table
+ const static int maxTablePower = sizeof(powersOfFive) / sizeof(powersOfFive[0]) - 1;
+ int thisPower = power > maxTablePower ? maxTablePower : power;
+ uint64_t t = powersOfFive[thisPower];
+ mp_word_t *p = dest->lsw;
+ while (t > 0) {
+ *p++ = (mp_word_t)t;
+ t >>= mp_word_bits;
+ }
+ dest->msw = p;
+ power -= thisPower;
+ }
+
+ multiplyByFiveToTheN(dest, power);
+}
+
+// Following "Algorithm D" from Knuth AOCP Section 4.3.1
+// Accepts:
+// Numerator
+// Denominator
+// *nonZeroRemainder: integer to hold remainder status
+// Returns:
+// quotient stored in numerator area
+// numerator is destroyed
+// *nonZeroRemainder set to non-zero iff remainder was non-zero
+static mp_t divideMPByMP(mp_t numerator, mp_t denominator, int *nonZeroRemainder) {
+ // Make sure we haven't picked up a leading zero word anywhere...
+ assert(numerator.msw > numerator.lsw);
+ assert(denominator.msw > denominator.lsw);
+ assert(numerator.msw[-1] != 0);
+ assert(denominator.msw[-1] != 0);
+
+ // Full long division algorithm assumes denominator is more than 1 word,
+ // so we need to handle 1-word case separately.
+ if (denominator.msw - denominator.lsw == 1) {
+ mp_dword_t n = denominator.lsw[0];
+ mp_dword_t t = 0;
+ mp_word_t *p = numerator.msw;
+ while (p > numerator.lsw) {
+ p -= 1;
+ t <<= mp_word_bits;
+ t += *p;
+ mp_word_t q0 = (mp_word_t)(t / n);
+ *p = q0;
+ t -= q0 * n;
+ }
+ *nonZeroRemainder = (t != 0);
+ while (numerator.msw[-1] == 0) {
+ numerator.msw -= 1;
+ }
+ return numerator;
+ }
+
+ // D1. Normalize: Multiply numerator and denominator by a power of 2
+ // so that denominator has the most significant bit set in the
+ // most significant word. This guarantees that qhat below
+ // will always be very good.
+ int shift = (int)CLZ_WORD(denominator.msw[-1]);
+ shiftLeftMP(&denominator, shift);
+ shiftLeftMP(&numerator, shift);
+
+ // Add a high-order word to the numerator if necessary
+ if (numerator.msw[-1] >= denominator.msw[-1]) {
+ numerator.msw[0] = 0;
+ numerator.msw += 1;
+ }
+
+ // D2. Iterate
+ // Numerator and denominator must not be immediately adjacent in
+ // memory, since we need an extra word for the quotient to fit in.
+ // TODO: Rearrange this so that the quotient can exactly overlay
+ // the numerator instead of going one word beyond. The requirement
+ // for an empty word beyond the numerator is non-obvious and easy
+ // to screw up.
+ assert((numerator.msw < denominator.lsw) || (denominator.msw < numerator.lsw));
+ mp_t quotient = {numerator.msw + 1, numerator.msw + 1}; // Quotient will overwrite numerator
+ int iterations = (int)((numerator.msw - numerator.lsw) - (denominator.msw - denominator.lsw));
+ for (int j = 0; j < iterations; ++j) {
+ // D3. Trial division of high-order bits
+ mp_word_t qhat;
+ mp_dword_t numerator2 =
+ ((mp_dword_t)numerator.msw[-1] << mp_word_bits)
+ + numerator.msw[-2];
+ if (numerator.msw[-1] == denominator.msw[-1]) {
+ qhat = MP_WORD_MAX;
+ } else {
+ qhat = (mp_word_t)(numerator2 / denominator.msw[-1]);
+ }
+ while (1) {
+ mp_dword_t r = numerator2 - qhat * (mp_dword_t)denominator.msw[-1];
+ if (r <= MP_WORD_MAX
+ && ((denominator.msw[-2] * (mp_dword_t)qhat) > (numerator.msw[-3] + (r << mp_word_bits)))) {
+ qhat -= 1;
+ } else {
+ break;
+ }
+ }
+
+ // D4. numerator -= qhat * denominator
+ mp_dword_t t = 0;
+ for (mp_word_t *den = denominator.lsw,
+ *num = numerator.msw - (denominator.msw - denominator.lsw) - 1;
+ den < denominator.msw;
+ num += 1, den += 1) {
+ t += qhat * (mp_dword_t)*den;
+ unsigned borrow = *num < (mp_word_t)t;
+ *num -= (mp_word_t)t;
+ t >>= mp_word_bits;
+ t += borrow;
+ }
+
+ // D5/D6. qhat may have been one too high; if so, correct for that
+ // Per Knuth, this happens very infrequently
+ if (numerator.msw[-1] < t) {
+ qhat -= 1;
+ t = 0;
+ for (mp_word_t *den = denominator.lsw,
+ *num = numerator.msw - (denominator.msw - denominator.lsw) - 1;
+ den < denominator.msw;
+ num += 1, den += 1) {
+ t += *num + (mp_dword_t)*den;
+ *num = (mp_word_t)t;
+ t >>= mp_word_bits;
+ }
+ }
+ --quotient.lsw;
+ *quotient.lsw = qhat;
+
+ // D7. Iterate
+ numerator.msw -= 1;
+ }
+
+ // D8. Post-process the remainder
+ mp_word_t remainderHash = 0;
+ for (mp_word_t *num = numerator.lsw; num < numerator.msw; ++num) {
+ remainderHash |= *num;
+ }
+ *nonZeroRemainder = remainderHash != 0;
+
+ while (quotient.msw[-1] == 0) {
+ quotient.msw -= 1;
+ }
+ return quotient;
+}
+
+// ================================================================
+// ================================================================
+//
+// Parse state
+//
+// ================================================================
+// ================================================================
+
+// To reduce argument-passing overhead, we store all the state
+// in this struct and pass around pointers to it.
+struct parseInfo {
+ // ================================================================
+ // Basic parameters for the target FP format
+
+ // Number of significant bits (including hidden bit for IEEE formats)
+ int sigBits;
+ // Minimum/maximum binary exponents
+ int minBinaryExp;
+ int maxBinaryExp;
+ // Total number of bytes in this format
+ int bytes;
+ // Approximate bounds on the decimal exponent, used for
+ // early overflow/underflow checks.
+ int minDecimalExp;
+ int maxDecimalExp;
+ // Maximum number of significant digits in the full
+ // decimal representation of any exact midpoint.
+ int maxDecimalMidpointDigits;
+
+ // ================================================================
+ // Inputs
+
+ // Location where the value should be written
+ unsigned char *dest;
+ // First character of the provided string
+ const char *start;
+ // Where to put the address of the first unparsed character
+ char **end;
+ // Locale
+ strtofp_locale_t loc;
+
+ // ================================================================
+ // Outputs from the initial parse
+
+ // First 19 digits of significand as an integer
+ uint64_t digits;
+ // Total number of significand digits (ignoring decimal point)
+ int digitCount;
+ // Address of the 20th digit or NULL if digitCount < 20
+ const unsigned char *firstUnparsedDigit;
+ // Number of unparsed digits = max(digitCount - 19, 0)
+ int unparsedDigitCount;
+ // Decimal exponent, corrected for decimal point location
+ int base10Exponent;
+ // True if number is negative
+ bool negative;
+};
+
+// ================================================================
+// ================================================================
+//
+// Over/Underflow
+//
+// ================================================================
+// ================================================================
+
+// Store a suitably-signed infinity to the destination
+static void infinity(struct parseInfo *info) {
+ // 16/32/64-bit formats we can hardcode the full value
+ // and memcpy() it. This is endian-safe (assuming that
+ // integers and FP values have the same endianness).
+ switch (info->bytes) {
+#if ENABLE_BINARY16_SUPPORT
+ case 2: { // binary16
+ uint16_t raw = info->negative ? 0xfc00 : 0x7c00;
+ memcpy(info->dest, &raw, sizeof(raw));
+ return;
+ }
+#endif
+#if ENABLE_BINARY32_SUPPORT
+ case 4: { // binary32
+ uint32_t raw = info->negative ? 0xff800000UL : 0x7f800000UL;
+ memcpy(info->dest, &raw, sizeof(raw));
+ return;
+ }
+#endif
+#if ENABLE_BINARY64_SUPPORT
+ case 8: { // binary64
+ uint64_t raw = info->negative ? 0xfff0000000000000ULL : 0x7ff0000000000000ULL;
+ memcpy(info->dest, &raw, sizeof(raw));
+ return;
+ }
+#endif
+ default:
+ break;
+ }
+
+ // 80- and 128-bit formats we build up incrementally.
+ // TODO: Support big-endian.
+ memset(info->dest, 0, (size_t)info->bytes);
+ switch(info->bytes) {
+#if ENABLE_FLOAT80_SUPPORT
+ case 10: // float80
+ info->dest[7] = 0x80;
+ info->dest[8] = 0xff;
+ info->dest[9] = info->negative ? 0xff : 0x7f;
+#endif
+#if ENABLE_BINARY128_SUPPORT
+ case 16: // binary128
+ info->dest[14] = 0xff;
+ info->dest[15] = info->negative ? 0xff : 0x7f;
+ break;
+#endif
+ }
+}
+
+// Store the max normal value to the destination, suitably signed.
+static void max_value(struct parseInfo *info) {
+ switch (info->bytes) {
+#if ENABLE_BINARY16_SUPPORT
+ case 2: { // binary16
+ uint16_t raw = info->negative ? 0xfbff : 0x7bff;
+ memcpy(info->dest, &raw, sizeof(raw));
+ return;
+ }
+#endif
+#if ENABLE_BINARY32_SUPPORT
+ case 4: { // binary32
+ uint32_t raw = info->negative ? 0xff7fffffUL : 0x7f7fffffUL;
+ memcpy(info->dest, &raw, sizeof(raw));
+ return;
+ }
+#endif
+#if ENABLE_BINARY64_SUPPORT
+ case 8: { // binary64
+ uint64_t raw = info->negative ? 0xffefffffffffffffULL : 0x7fefffffffffffffULL;
+ memcpy(info->dest, &raw, sizeof(raw));
+ return;
+ }
+#endif
+ default:
+ break;
+ }
+
+ // 80- and 128-bit formats we build up incrementally.
+ // TODO: Support big-endian.
+ memset(info->dest, 0xff, info->bytes);
+ switch(info->bytes) {
+#if ENABLE_FLOAT80_SUPPORT
+ case 10: // float80
+ info->dest[7] = 0xff;
+ info->dest[8] = 0xfe;
+ info->dest[9] = info->negative ? 0xff : 0x7f;
+#endif
+#if ENABLE_BINARY128_SUPPORT
+ case 16: // binary128
+ info->dest[14] = 0xfe;
+ info->dest[15] = info->negative ? 0xff : 0x7f;
+ break;
+#endif
+ }
+}
+
+// This gets invoked for inputs whose magnitude is greater
+// than the max normal value + 1 ulp.
+// Gdtoa returns signed INF for such values regardless of the
+// rounding mode. Glibc rounds them correctly.
+static void overflow(struct parseInfo *info) {
+ // Overflow is always an ERANGE error
+ errno = ERANGE;
+ int roundingMode = FENV_ROUNDING_MODE();
+ int negative = info->negative;
+
+ if (0
+#ifdef FE_TOWARDZERO
+ || roundingMode == FE_TOWARDZERO
+#endif
+#ifdef FE_DOWNWARD
+ || (roundingMode == FE_DOWNWARD && !negative)
+#endif
+#ifdef FE_UPWARD
+ || (roundingMode == FE_UPWARD && negative)
+#endif
+ ) {
+ max_value(info);
+ } else {
+ infinity(info);
+ }
+}
+
+// This gets invoked for inputs that are nonzero, but closer to zero
+// than to the least positive or negative subnormal. For IEEE754
+// formats (and Intel x87 Extended format), these get returned as
+// either +/- zero or the least subnormal, depending on the current
+// rounding mode. In all of these cases, the bit pattern is all zero
+// bits except possibly for the top bit (sign) and bottom bit (either zero for
+// zero or 1 for the least subnormal).
+static void underflow(struct parseInfo *info) {
+ // Note: C17 allows implementations to set ERANGE for underflow or not.
+ // Traditionally, ERANGE has been set somewhat inconsistently:
+ // gdtoa sets it for subnormals expressed in decimal format but not
+ // hexadecimal; glibc uses ERANGE as an inexact flag in various cases.
+
+ // To generally match traditional usage, this implementation sets ERANGE
+ // for any subnormal return.
+ // It also sets it when a non-zero input is rounded to zero. This provides
+ // a way for clients to distinguish a true zero (such as "0.0e0") from a
+ // very small non-zero (such as "1e-999999").
+ errno = ERANGE;
+ uint8_t bottomBit = 0;
+ int roundingMode = FENV_ROUNDING_MODE();
+ if (0
+#ifdef FE_DOWNWARD
+ || (roundingMode == FE_DOWNWARD && info->negative)
+#endif
+#ifdef FE_UPWARD
+ || (roundingMode == FE_UPWARD && !info->negative)
+#endif
+ ) {
+ bottomBit = 1;
+ }
+
+ // 16/32/64-bit formats we can hardcode the full value
+ // and memcpy() it. This is endian-safe (assuming that
+ // integers and FP values have the same endianness).
+ switch (info->bytes) {
+#if ENABLE_BINARY16_SUPPORT
+ case 2: { // binary16
+ uint16_t raw = (info->negative ? 0x8000 : 0) | bottomBit;
+ memcpy(info->dest, &raw, sizeof(raw));
+ break;
+ }
+#endif
+#if ENABLE_BINARY32_SUPPORT
+ case 4: { // binary32
+ uint32_t raw = (info->negative ? 0x80000000UL : 0) | bottomBit;
+ memcpy(info->dest, &raw, sizeof(raw));
+ break;
+ }
+#endif
+#if ENABLE_BINARY64_SUPPORT
+ case 8: { // binary64
+ uint64_t raw = (info->negative ? 0x8000000000000000ULL : 0) | bottomBit;
+ memcpy(info->dest, &raw, sizeof(raw));
+ break;
+ }
+#endif
+#if ENABLE_FLOAT80_SUPPORT || ENABLE_BINARY128_SUPPORT
+ case 10: case 16: {
+ // TODO: Make this endian-safe
+ memset(info->dest, 0, (size_t)info->bytes); // Initialize to +0
+ info->dest[0] = bottomBit;
+ info->dest[info->bytes - 1] = info->negative ? 0x80 : 0;
+ break;
+ }
+#endif
+ }
+}
+
+// ================================================================
+// ================================================================
+//
+// General slow path
+//
+// ================================================================
+// ================================================================
+
+
+// Given a pointer to an ASCII digit string, initialize
+// the mpint with the decimal value of those digits.
+//
+// Any non-digit characters (e.g., decimal points) are ignored.
+//
+// If there are more than maxDecimalMidpointDigits, then
+// we stop when we've reached that many digits and then
+// add exactly one digit: zero if all the remaining digits
+// are zero, else one.
+//
+// Arguments:
+// work - pointer to the mpint
+// info - parseInfo struct
+static void initMPFromDigits(mp_t *work, struct parseInfo *info) {
+ // Break first19 digits into words
+ mp_word_t *component = work->lsw;
+ uint64_t first19 = info->digits;
+ while (first19 > 0) {
+ *component = (mp_word_t)first19;
+ first19 >>= mp_word_bits;
+ component += 1;
+ }
+ work->msw = component;
+
+ // Figure out how many more digits we should parse,
+ // "extra" digits are the ones beyond maxDecimalMidpointDigits.
+ int remainingDigitCount = info->unparsedDigitCount;
+ int extraDigitCount = 0;
+ if (info->digitCount > info->maxDecimalMidpointDigits) {
+ remainingDigitCount
+ = info->maxDecimalMidpointDigits - (info->digitCount - info->unparsedDigitCount);
+ extraDigitCount = info->unparsedDigitCount - remainingDigitCount;
+ }
+
+ // Handle remaining digits in batches of <= 9 digits
+ static const uint32_t powersOfTen[] = {1, 10, 100, 1000, 10000UL,
+ 100000UL, 1000000UL, 10000000UL, 100000000UL, 1000000000UL};
+ const unsigned char *p = info->firstUnparsedDigit;
+ while (remainingDigitCount > 0) {
+ int batchSize = remainingDigitCount > 9 ? 9 : remainingDigitCount;
+ uint64_t batch = 0;
+ for (int i = 0; i < batchSize; i++, p++) {
+ unsigned t = (unsigned)(*p - '0');
+ while (t > 9) {
+ p += 1; // Skip non-digits (decimal point)
+ t = (unsigned)(*p - '0');
+ }
+ batch = batch * 10 + t;
+ }
+ multiplyMPByN(work, powersOfTen[batchSize]);
+ addToMP(work, batch);
+ remainingDigitCount -= batchSize;
+ }
+
+ // Extra digits are summarized in a single digit:
+ // zero if all the extra digits are zero, else one.
+ if (extraDigitCount > 0) {
+ multiplyMPByN(work, 10);
+ while (extraDigitCount > 0) {
+ if (*p == '0') {
+ extraDigitCount -= 1;
+ } else if (*p >= '1' && *p <= '9') {
+ addToMP(work, 1);
+ return;
+ }
+ // Note: Non-digit (decimal point) chars are ignored
+ p += 1;
+ }
+ }
+}
+
+// This takes a significand and power of ten and returns the high-order
+// bits of the product (correctly rounded) using exact varint arithmetic.
+// This is the common slowpath used by all of the parsers below.
+
+// General strategy: For positive exponents, the significand is
+// multiplied by the power of 10 to form a large varint. The
+// high-order bits (suitably rounded) are returned as the binary
+// significand. The number of bits are returned as the
+// resultExponent. For negative exponents, the significand is divided
+// by the power of ten with suitable scaling to ensure the quotient
+// will have sufficient bits to return a correctly-rounded binary significand.
+
+// This takes a pointer to a stack work area; that allows each
+// format-specific implementation to pass a buffer that is
+// suitably-sized. For binary16/32/64, the passed-in buffer is sized
+// so that we will never have to allocate a larger buffer on the heap,
+// and those versions pass `false` for `heapAllocOK` to assert this.
+// For float80/binary128, the buffer is large enough for common
+// requests; larger inputs may require us to allocate a temporary
+// stack buffer for the duration of this call.
+
+// Arguments:
+// info - parseInfo struct with format details, original request
+// information, and parser output
+// roundingMode - result from fegetround()
+// stackWorkBuffer - pointer to an on-stack work area
+// stackWorkBufferBytes - size of work area in mp_word_t
+// heapAllocOK - if zero, we'll assert on any heap allocation
+static void generalSlowpath(struct parseInfo *info,
+ int roundingMode,
+ mp_word_t *stackWorkArea,
+ int stackWorkAreaWords,
+ bool heapAllocOK)
+{
+ mp_t mpSignificand;
+ int binaryExponent;
+
+ // If stackWorkArea is not big enough, space will be allocated
+ // on the heap that will need to be released before we return.
+ mp_word_t *heapAlloc = NULL;
+
+ // We'll process `digitCount` digits but no more than
+ // maxDecimalMidpointDigits + 1. (See initMPWithDigits above)
+ int significandDigits = info->digitCount;
+ if (significandDigits > info->maxDecimalMidpointDigits) {
+ significandDigits = info->maxDecimalMidpointDigits + 1;
+ }
+ int base10Exponent = info->base10Exponent - significandDigits + info->digitCount;
+
+ // Figure out how many words we need for the significand and power of 5
+ // 1701 / 512 is slightly bigger than log2(10) ~= 3.32
+ // 1189 / 512 is slightly bigger than log2(5) ~= 2.32
+ int significandBitsNeeded = (significandDigits * 1701) >> 9;
+ int significandWordsNeeded = (significandBitsNeeded + (mp_word_bits - 1)) / mp_word_bits;
+
+ int exponentBitsNeeded = (((base10Exponent < 0 ? -base10Exponent : base10Exponent) + 1) * 1189) >> 9;
+ int exponentWordsNeeded = (exponentBitsNeeded + (mp_word_bits - 1)) / mp_word_bits;
+
+ if (base10Exponent >= 0) {
+ // ================================================================
+ // Slow path for exponent > 0
+ int totalWordsNeeded = significandWordsNeeded + exponentWordsNeeded;
+ mp_t workMP;
+ if (totalWordsNeeded <= stackWorkAreaWords) {
+ memset(stackWorkArea, 0, (size_t)stackWorkAreaWords * sizeof(mp_word_t));
+ workMP.lsw = workMP.msw = stackWorkArea;
+ } else {
+ assert(heapAllocOK);
+ heapAlloc = (mp_word_t *)calloc((size_t)totalWordsNeeded, sizeof(mp_word_t));
+ if (heapAlloc == NULL) {
+ memset(info->dest, 0, (size_t)info->bytes);
+ return;
+ }
+ workMP.lsw = workMP.msw = heapAlloc;
+ }
+
+ // Load workMP with digits * 5^n
+ initMPFromDigits(&workMP, info);
+ multiplyByFiveToTheN(&workMP, base10Exponent);
+ assert((workMP.msw - workMP.lsw) <= totalWordsNeeded);
+
+ // Bit count => binary exponent
+ int bits = bitCountMP(workMP);
+ binaryExponent = bits + base10Exponent; // Factor of 2^n from above
+ // Leading bits => significand
+ mpSignificand = shiftRightMPWithRounding(workMP,
+ bits - info->sigBits, 0, info->negative, roundingMode);
+ // Adjust the exponent if a round-up gave us one too many bits
+ if (bitCountMP(mpSignificand) > info->sigBits) {
+ binaryExponent += 1;
+ mpSignificand = shiftRightMPWithTruncation(mpSignificand, 1);
+ }
+ if (binaryExponent > info->maxBinaryExp) {
+ // Overflow.
+ free(heapAlloc);
+ overflow(info);
+ return;
+ }
+ } else {
+ // ================================================================
+ // Slow path for exponent < 0
+
+ // Basic idea: Since base10Exponent is negative, we can't work
+ // directly with 10^base10Exponent (it's an infinite binary
+ // fraction), but 10^-base10Exponent is an integer. So we use
+ // varint arithmetic to compute
+ // digits / 10^(-base10Exponent)
+ // scaling the numerator so that the quotient will have at least
+ // 53 bits. The tricky part is keeping the right information to do
+ // accurate rounding of the result.
+
+ // Widen numerator so the result after division will have enough bits to round
+ int numeratorBitsNeeded = significandBitsNeeded;
+ if (numeratorBitsNeeded < exponentBitsNeeded + info->sigBits + 2) {
+ numeratorBitsNeeded = exponentBitsNeeded + info->sigBits + 2;
+ }
+ int numeratorWordsNeeded = (numeratorBitsNeeded + (mp_word_bits - 1)) / mp_word_bits + 2;
+ int denominatorWordsNeeded = exponentWordsNeeded;
+ int totalWordsNeeded = numeratorWordsNeeded + denominatorWordsNeeded;
+
+ mp_word_t *work;
+ if (totalWordsNeeded <= stackWorkAreaWords) {
+ memset(stackWorkArea, 0, stackWorkAreaWords * sizeof(mp_word_t));
+ work = stackWorkArea;
+ } else {
+ assert(heapAllocOK);
+ heapAlloc = (mp_word_t *)calloc((size_t)totalWordsNeeded, sizeof(mp_word_t));
+ if (heapAlloc == NULL) {
+ memset(info->dest, 0, (size_t)info->bytes);
+ return;
+ }
+ work = heapAlloc;
+ }
+
+ mp_t numerator = {work, work};
+ mp_t denominator = {work + numeratorWordsNeeded, work + numeratorWordsNeeded};
+
+ // Denominator holds power of 10^N
+ // (Actually 5^N, the remaining factor of 2^N is handled later.)
+ fiveToTheN(&denominator, -base10Exponent);
+ assert((denominator.msw - denominator.lsw) <= denominatorWordsNeeded);
+ assert(denominator.msw[-1] != 0);
+
+ // Populate numerator with digits, widen it to ensure final
+ // quotient has at least sigBits + 2 bits.
+ initMPFromDigits(&numerator, info);
+ assert(numerator.msw[-1] != 0);
+ int numeratorShift = bitCountMP(denominator) - bitCountMP(numerator) + info->sigBits + 2;
+ if (numeratorShift > 0) {
+ shiftLeftMP(&numerator, numeratorShift);
+ assert(numerator.msw[-1] != 0);
+ assert((numerator.msw - numerator.lsw) < numeratorWordsNeeded);
+ } else {
+ numeratorShift = 0;
+ }
+
+ // Divide, compute exact binaryExponent
+ // Note: division is destructive; overwrites numerator with quotient
+ int remainderNonZero;
+ mp_t quotient = divideMPByMP(numerator, denominator, &remainderNonZero);
+ // Binary exponent starts from number of bits in quotient
+ int quotientBits = bitCountMP(quotient);
+ binaryExponent = quotientBits;
+ // 2^base10Exponent was omitted from the 10^N denominator above
+ binaryExponent += base10Exponent;
+ // We multiplied by 2^numeratorShift above, so divide by
+ // 2^numeratorShift to cancel it out.
+ binaryExponent -= numeratorShift;
+
+ if (binaryExponent > info->minBinaryExp) {
+ // Normal decimal
+ mpSignificand = shiftRightMPWithRounding(quotient,
+ quotientBits - info->sigBits, remainderNonZero, info->negative, roundingMode);
+ if (bitCountMP(mpSignificand) > info->sigBits) {
+ binaryExponent += 1;
+ mpSignificand = shiftRightMPWithTruncation(mpSignificand, 1);
+ }
+ if (binaryExponent > info->maxBinaryExp) {
+ free(heapAlloc);
+ overflow(info);
+ return;
+ }
+ } else if (binaryExponent > info->minBinaryExp - info->sigBits) {
+ // Subnormal decimal
+ int bitsNeeded = binaryExponent - (info->minBinaryExp - info->sigBits + 1);
+ binaryExponent = info->minBinaryExp;
+ mpSignificand = shiftRightMPWithRounding(quotient,
+ quotientBits - bitsNeeded, remainderNonZero, info->negative, roundingMode);
+
+ // Usually, overflowing the expected number of bits doesn't
+ // break anything; it just results in a significand 1 bit longer
+ // than we expected.
+
+ // Except when the significand overflows into the exponent.
+ // Then we have a normal, so the extra overflow bit
+ // will naturally get dropped, we just have to bump the
+ // exponent.
+ if (bitCountMP(mpSignificand) >= info->sigBits) {
+ binaryExponent += 1;
+ } else {
+ errno = ERANGE; // This will be a true subnormal return, set ERANGE
+ }
+ } else {
+ // Underflow.
+ free(heapAlloc);
+ underflow(info);
+ return;
+ }
+ }
+
+ // Zero-extend to sigBits and copy to dest
+ size_t mpWords = (size_t)(mpSignificand.msw - mpSignificand.lsw);
+ size_t expectedWords = (size_t)((info->sigBits + mp_word_bits - 1) / mp_word_bits);
+ if (mpWords < expectedWords) {
+ memset(mpSignificand.lsw + mpWords, 0, (expectedWords - mpWords) * sizeof(mp_word_t));
+ }
+ // TODO: Endianness. This only works for little-endian systems.
+ memcpy(info->dest, mpSignificand.lsw, ((unsigned)info->sigBits + 7) / 8);
+ // Free the heap work area (if any)
+ free(heapAlloc);
+
+ // Set the exponent & sign bits
+ uint16_t exponentBits = (uint16_t)(binaryExponent - info->minBinaryExp);
+ if (info->bytes <= 8) {
+ // float80 and binary128 have 16 bit exponent+sign, so no shift needed
+ exponentBits <<= 16 - (info->bytes * 8 - info->sigBits + 1);
+ }
+ exponentBits |= info->negative ? 0x8000 : 0;
+
+ unsigned char *p = info->dest + info->bytes;
+ switch (info->bytes) {
+ case 2:
+ p[-1] = (info->dest[1] & 0x03) | (unsigned char)(exponentBits >> 8);
+ return;
+ case 4:
+ p[-2] = (info->dest[2] & 0x7f) | (unsigned char)exponentBits;
+ break;
+ case 8:
+ p[-2] = (info->dest[6] & 0x0f) | (unsigned char)exponentBits;
+ break;
+ case 10:
+ case 16:
+ p[-2] = (unsigned char)exponentBits;
+ break;
+ }
+ p[-1] = (unsigned char)(exponentBits >> 8);
+}
+
+// ================================================================
+// ================================================================
+//
+// Hex Float parsing
+//
+// ================================================================
+// ================================================================
+
+// This is called with `start` pointing to the `0x` that begins
+// the hex float. We assume the first two characters have already
+// been verified to be '0x'.
+static void
+hexFloat(const unsigned char *start, struct parseInfo *info) {
+ const unsigned char *p = start + 2; // Skip leading '0x'
+
+ // Two 64-bit ints that we use as a joint 128-bit accumulator
+ uint64_t significand_lsw = 0, significand_msw = 0;
+
+ // Digits before the decimal point
+ const unsigned char *firstDigit = p;
+ unsigned remainder = 0;
+ int base2Exponent = 0;
+ // Perf: Just use the lower 64 bits until it's full...
+ while (hexdigit[*p] < 16 && significand_lsw < (uint64_t)1 << 60) {
+ significand_lsw <<= 4;
+ significand_lsw += hexdigit[*p];
+ p += 1;
+ }
+ // ... then work with the full 128 bits until it's full ...
+ while (hexdigit[*p] < 16 && significand_msw < (uint64_t)1 << 59) {
+ significand_msw <<= 4;
+ significand_msw |= (significand_lsw >> 60);
+ significand_lsw <<= 4;
+ significand_lsw += hexdigit[*p];
+ p += 1;
+ }
+ // ... if there's more beyond that, just track whether it's non-zero.
+ while (hexdigit[*p] < 16) {
+ remainder |= hexdigit[*p];
+ base2Exponent += 4;
+ p += 1;
+ }
+ int digitCount = (int)(p - firstDigit);
+
+ // Try to match decimal point
+ if (info->loc == strtofp_C_locale) {
+ if (*p == '.') {
+ p += 1;
+ } else {
+ goto possible_exponent;
+ }
+ } else {
+ const unsigned char *decimalPoint = strtofp_locale_decimal_point(info->loc);
+ const unsigned char *startOfPotentialDecimalPoint = p;
+ for (const unsigned char *d = decimalPoint; *d; d++) {
+ if (*p != *d) {
+ p = startOfPotentialDecimalPoint;
+ goto possible_exponent;
+ }
+ p++;
+ }
+ }
+
+ // Collect digits after decimal point
+ const unsigned char *firstFractionDigit = p;
+ if (significand_msw == 0) {
+ while (hexdigit[*p] < 16 && significand_lsw < (uint64_t)1 << 60) {
+ significand_lsw <<= 4;
+ significand_lsw += hexdigit[*p];
+ p += 1;
+ }
+ }
+ while (hexdigit[*p] < 16 && significand_msw < (uint64_t)1 << 59) {
+ significand_msw <<= 4;
+ significand_msw |= (significand_lsw >> 60);
+ significand_lsw <<= 4;
+ significand_lsw += hexdigit[*p];
+ p += 1;
+ }
+ // Initialize exponent from the number of digits after
+ // the decimal point.
+ base2Exponent -= 4 * (p - firstFractionDigit);
+ // Any remaining digits may impact rounding...
+ while (hexdigit[*p] < 16) {
+ remainder |= hexdigit[*p];
+ p += 1;
+ }
+ digitCount += p - firstFractionDigit;
+
+ possible_exponent:
+ if (*p == 'p' || *p == 'P') {
+ const unsigned char *exponentPhrase = p;
+ p += 1;
+ int negativeExponent = 0;
+ if (*p == '-') {
+ negativeExponent = 1;
+ p += 1;
+ } else if (*p == '+') {
+ p += 1;
+ }
+ if (*p < '0' || *p > '9') {
+ // Ignore 'e' or 'E' not followed by number.
+ p = exponentPhrase;
+ } else {
+ // Skip zeros in "0x1p+0000000000000000000000000001"
+ int exp = 0;
+ unsigned t = (unsigned)(*p - '0');
+ while (t < 10) {
+ if (exp > 99999999) {
+ exp = 99999999;
+ } else {
+ exp = exp * 10 + (int)t;
+ }
+ p += 1;
+ t = (unsigned)(*p - '0');
+ }
+ if (negativeExponent) {
+ exp = -exp;
+ }
+ base2Exponent += exp;
+ }
+ }
+
+ if (significand_lsw == 0 && significand_msw == 0) {
+ if (digitCount == 0) {
+ // Malformed hexfloat with no digits after '0x',
+ // BUT '0x' is still a valid zero followed by non-parsed 'x'
+ p = start + 1; // Address of 'x'
+ } else {
+ // Just a regular zero
+ }
+ base2Exponent = info->minBinaryExp;
+ } else {
+ // Normalize to 127 bits
+ if (significand_msw == 0) {
+ if ((significand_lsw >> 63) == 0) {
+ significand_msw = significand_lsw;
+ significand_lsw = 0;
+ base2Exponent -= 64;
+ } else {
+ significand_msw = significand_lsw >> 1;
+ significand_lsw <<= 63;
+ base2Exponent -= 63;
+ }
+ }
+ int normalizeShift = __builtin_clzll(significand_msw) - 1;
+ if (normalizeShift > 0) {
+ significand_msw <<= normalizeShift;
+ significand_msw |= significand_lsw >> (64 - normalizeShift);
+ significand_lsw <<= normalizeShift;
+ base2Exponent -= normalizeShift;
+ }
+ base2Exponent += 127;
+ if (remainder) significand_lsw |= 1;
+
+ if (base2Exponent <= info->maxBinaryExp
+ && base2Exponent >= info->minBinaryExp - info->sigBits + 1) {
+ int fractionBits;
+ uint64_t fraction;
+ if (base2Exponent > info->minBinaryExp) {
+ // Hexfloat normal
+ fractionBits = 127 - info->sigBits;
+ } else {
+ fractionBits = 127 - (base2Exponent - info->minBinaryExp + info->sigBits - 1);
+ base2Exponent = info->minBinaryExp;
+ }
+ if (fractionBits < 64) {
+ fraction = significand_lsw << (64 - fractionBits);
+ significand_lsw >>= fractionBits;
+ significand_lsw |= significand_msw << (64 - fractionBits);
+ significand_msw >>= fractionBits;
+ } else {
+ fraction = (significand_msw << (128 - fractionBits))
+ | (significand_lsw >> (fractionBits - 64));
+ if ((significand_lsw << (128 - fractionBits)) != 0) {
+ fraction |= 1;
+ }
+ significand_lsw = significand_msw >> (fractionBits - 64);
+ significand_msw = 0;
+ }
+
+ switch (FENV_ROUNDING_MODE()) {
+#ifdef FE_TOWARDZERO
+ case FE_TOWARDZERO:
+ break;
+#endif
+#ifdef FE_DOWNWARD
+ case FE_DOWNWARD:
+ if (info->negative && (fraction != 0)) {
+ significand_lsw += 1;
+ if (significand_lsw == 0) {
+ significand_msw += 1;
+ }
+ }
+ break;
+#endif
+#ifdef FE_UPWARD
+ case FE_UPWARD:
+ if (!info->negative && (fraction != 0)) {
+ significand_lsw += 1;
+ if (significand_lsw == 0) {
+ significand_msw += 1;
+ }
+ }
+ break;
+#endif
+#ifdef FE_TONEAREST
+ case FE_TONEAREST:
+#endif
+ default: {
+ const uint64_t oneHalf = (uint64_t)1 << 63;
+ if (fraction > oneHalf
+ || (fraction == oneHalf && (significand_lsw & 1))) {
+ significand_lsw += 1;
+ if (significand_lsw == 0) {
+ significand_msw += 1;
+ }
+ }
+ break;
+ }
+ }
+
+ // Rounding up may have caused us to overflow:
+ // For subnormals, overflow to sigBits converts this to a normal
+ // For normal, overflow just needs to be renormalized
+ int overflowBits = (base2Exponent == info->minBinaryExp) ? (info->sigBits - 1) : info->sigBits;
+ if (((overflowBits > 64) && ((significand_msw >> (overflowBits - 64)) != 0))
+ || ((overflowBits == 64) && (significand_msw != 0))
+ || ((overflowBits < 64) && ((significand_msw != 0) || ((significand_lsw >> overflowBits) != 0)))) {
+ if (base2Exponent > info->minBinaryExp) {
+ significand_lsw >>= 1;
+ significand_lsw |= significand_msw << 63;
+ significand_msw >>= 1;
+ }
+ base2Exponent += 1;
+ } else if (base2Exponent == info->minBinaryExp // Subnormal
+ && fraction != 0) { // Not exact
+ errno = ERANGE;
+ }
+ }
+ }
+
+ if (info->end) *info->end = (char *)(uintptr_t)p;
+ if (base2Exponent > info->maxBinaryExp) {
+ overflow(info);
+ } else if (base2Exponent < info->minBinaryExp - info->sigBits + 1) {
+ underflow(info);
+ } else {
+ switch (info->bytes) {
+#if ENABLE_BINARY16_SUPPORT
+ case 2: {
+ uint16_t exponentBits = (uint16_t)(base2Exponent - info->minBinaryExp);
+ uint16_t raw = (uint16_t)
+ ((info->negative ? 0x8000U : 0U)
+ | (uint16_t)(exponentBits << 10)
+ | (significand_lsw & 0x3ffU));
+ memcpy(info->dest, &raw, sizeof(raw));
+ break;
+ }
+#endif
+#if ENABLE_BINARY32_SUPPORT
+ case 4: {
+ uint32_t exponentBits = (uint32_t)(base2Exponent - info->minBinaryExp);
+ uint32_t raw = (uint32_t)
+ ((info->negative ? 0x80000000UL : 0UL)
+ | (exponentBits << 23U)
+ | (uint32_t)(significand_lsw & 0x7fffffULL));
+ memcpy(info->dest, &raw, sizeof(raw));
+ break;
+ }
+#endif
+#if ENABLE_BINARY64_SUPPORT
+ case 8: {
+ uint64_t exponentBits = (uint64_t)(base2Exponent - info->minBinaryExp);
+ uint64_t raw = (uint64_t)
+ ((info->negative ? 0x8000000000000000ULL : 0UL)
+ | (exponentBits << 52U)
+ | (significand_lsw & 0xfffffffffffffULL));
+ memcpy(info->dest, &raw, sizeof(raw));
+ break;
+ }
+#endif
+#if ENABLE_FLOAT80_SUPPORT
+ case 10: {
+ // TODO: Support big-endian
+ uint16_t exponentBits = (uint16_t)(base2Exponent - info->minBinaryExp);
+ memcpy(info->dest, &significand_lsw, sizeof(significand_lsw));
+ info->dest[8] = exponentBits & 0xff;
+ info->dest[9] = (exponentBits >> 8) | (info->negative ? 0x80 : 0);
+ break;
+ }
+#endif
+#if ENABLE_BINARY128_SUPPORT
+ case 16: {
+ // TODO: Support big-endian
+ uint16_t exponentBits = (uint16_t)(base2Exponent - info->minBinaryExp);
+ memcpy(info->dest, &significand_lsw, sizeof(significand_lsw));
+ memcpy(info->dest + 8, &significand_msw, sizeof(significand_msw));
+ info->dest[14] = exponentBits & 0xff;
+ info->dest[15] = (exponentBits >> 8) | (info->negative ? 0x80 : 0);
+ break;
+ }
+#endif
+ }
+ }
+}
+
+// ================================================================
+// ================================================================
+//
+// NaN parsing
+//
+// ================================================================
+// ================================================================
+
+// Parse a NaN.
+// This implements the same logic as Apple's previous libc strtod. It
+// recognizes an optional payload between parentheses and uses
+// that to construct a valid NaN return value.
+
+static void parseNan(const unsigned char *start, struct parseInfo *info) {
+ const unsigned char *p = start + 3; // Skip "nan"
+ unsigned char _Alignas(sizeof(mp_word_t)) stackWorkArea[20];
+ memset(stackWorkArea, 0, sizeof(stackWorkArea));
+ const unsigned char *endNan = p;
+ if (*p == '(') {
+ p += 1;
+ unsigned base = 10;
+ if (*p == '0') {
+ p += 1;
+ if (*p == 'x') {
+ base = 16;
+ p += 1;
+ } else {
+ base = 8;
+ }
+ }
+ mp_t stackMP = { (mp_word_t *)stackWorkArea,
+ (mp_word_t *)(stackWorkArea + 16) };
+ mp_t payload = stackMP;
+ while (hexdigit[*p] < base) {
+ multiplyMPByN(&payload, base);
+ addToMP(&payload, hexdigit[*p]);
+ payload.msw = stackMP.msw; // Prune off excess bits
+ p += 1;
+ }
+ if (*p == ')') {
+ p += 1;
+ } else {
+ memset(stackWorkArea, 0, sizeof(stackWorkArea));
+ while (*p != '\0' && *p != ')') {
+ p += 1;
+ }
+ if (*p == ')') {
+ p += 1;
+ } else {
+ p = endNan;
+ }
+ }
+ }
+ // TODO: Endianness. This only works for little-endian.
+ memcpy(info->dest, stackWorkArea, (size_t)info->bytes);
+ switch (info->bytes) {
+#if ENABLE_BINARY16_SUPPORT
+ case 2: {
+ info->dest[1] = info->dest[1] | (info->negative ? 0xfe : 0x7e);
+ break;
+ }
+#endif
+#if ENABLE_BINARY32_SUPPORT
+ case 4: {
+ info->dest[2] |= 0xc0; // Set quiet bit and low-order exponent bit
+ info->dest[3] = info->negative ? 0xff : 0x7f; // exponent and sign bit
+ break;
+ }
+#endif
+#if ENABLE_BINARY64_SUPPORT
+ case 8: {
+ info->dest[6] |= 0xf8; // Set quiet bit and low-order exponent bits
+ info->dest[7] = info->negative ? 0xff : 0x7f; // exponent and sign bit
+ break;
+ }
+#endif
+#if ENABLE_FLOAT80_SUPPORT
+ case 10: {
+ info->dest[7] |= 0xc0; // Set bit 63 and quiet bit
+ info->dest[8] = 0xff;
+ info->dest[9] = info->negative ? 0xff : 0x7f;
+ break;
+ }
+#endif
+#if ENABLE_BINARY128_SUPPORT
+ case 16: {
+ info->dest[13] |= 0x80; // Set quiet bit
+ info->dest[14] = 0xff;
+ info->dest[15] = info->negative ? 0xff : 0x7f;
+ break;
+ }
+#endif
+ }
+ if (info->end) *info->end = (char *)(uintptr_t)p;
+}
+
+// This is used as the initial parse for all formats.
+//
+// It verifies the format and handles accordingly:
+// * hexFloat is parsed by calling hexFloat() above
+// * NaN payloads are parse by calling parseNan() above
+// * Infinity and NaN w/o payload are parsed directly
+// * True zero is parsed directly
+// The above are fully handled within this function and
+// it returns zero (false) to flag that there's nothing
+// more to do.
+//
+// For a decimal input, it collects the digits into a 64-bit
+// accumulator and returns that and the parsed exponent.
+// If there are <= 19 digits, this is enough to generate
+// a final result directly.
+//
+// If there are more than 19 digits, this logic first lets the
+// accumulator overflow. After we detect the overflow, we re-parse
+// the first 19 digits and return those. Callers who need more digits
+// will have to use firstUnparsedDigit and digitCount to re-parse the
+// significand (they can ignore the decimal point, though, since that
+// has already been factored into the base10Exponent).
+static int
+fastParse64(struct parseInfo *info) {
+ const unsigned char *p = (const unsigned char *)info->start;
+
+ uint64_t digits = 0;
+ int digitCount = 0;
+ int base10Exponent = 0;
+ info->negative = 0;
+
+ // Skip leading whitespace. Stop at a +/-/digit or other character.
+ // This is a little oddly phrased in order to avoid a pointless call
+ // to `isspace` for common cases.
+ while (1) {
+ if (*p >= '0' && *p <= '9') {
+ break;
+ } else if (*p == '-') {
+ info->negative = 1;
+ p += 1;
+ break;
+ } else if (*p == '+') {
+ p += 1;
+ break;
+ } else if (*p == ' ' || isspace(*p)) {
+ p += 1;
+ } else {
+ break;
+ }
+ }
+
+ if (*p == '0') {
+ if (p[1] == 'x' || p[1] == 'X') {
+ hexFloat(p, info);
+ return 0;
+ }
+ while (*p == '0') {
+ p += 1;
+ }
+ } else if (*p == 'i' || *p == 'I') {
+ if ((p[1] == 'n' || p[1] == 'N')
+ && (p[2] == 'f' || p[2] == 'F')) {
+ if ((p[3] == 'i' || p[3] == 'I')
+ && (p[4] == 'n' || p[4] == 'N')
+ && (p[5] == 'i' || p[5] == 'I')
+ && (p[6] == 't' || p[6] == 'T')
+ && (p[7] == 'y' || p[7] == 'Y')) {
+ p += 8;
+ } else {
+ p += 3;
+ }
+ // Matched 'inf' or 'infinity' case-insensitive
+ if (info->end) *info->end = (char *)(uintptr_t)p;
+ infinity(info);
+ return 0;
+ }
+ goto fail;
+ } else if (*p == 'n' || *p == 'N') {
+ if ((p[1] == 'a' || p[1] == 'A')
+ && (p[2] == 'n' || p[2] == 'N')) {
+ parseNan(p, info);
+ return 0;
+ }
+ goto fail;
+ } else if (*p < '0' || *p > '9') {
+ // If this isn't a hexfloat, nan, or infinity and does
+ // start with a digit, it must start with a decimal point,
+ // and that decimal point must be immediately followed
+ // by a digit.
+ if (info->loc == strtofp_C_locale) {
+ // Decimal point is '.' in C locale, avoid calling localeconv_l
+ if (*p == '.') {
+ p += 1;
+ if (*p >= '0' && *p <= '9') {
+ goto parseFraction;
+ }
+ }
+ } else {
+ // Look up decimal point in locale
+ const unsigned char *decimalPoint = strtofp_locale_decimal_point(info->loc);
+ if (decimalPoint[1] == '\0') {
+ if (decimalPoint[0] == *p) {
+ p += 1;
+ if (*p >= '0' && *p <= '9') {
+ goto parseFraction;
+ }
+ }
+ } else {
+ // Multi-byte decimal point
+ int matchedDecimalPoint = (1 == 1);
+ for (const unsigned char *d = decimalPoint; *d; d++) {
+ matchedDecimalPoint &= (*p == *d);
+ p++;
+ }
+ if (matchedDecimalPoint && *p >= '0' && *p <= '9') {
+ goto parseFraction;
+ }
+ }
+ }
+ goto fail;
+ }
+
+ // Collect digits before the decimal point
+ const unsigned char *firstUnparsedDigit = p;
+ {
+ uint8_t t = (uint8_t)(*p - '0');
+ if (t < 10) {
+ digits = t;
+ p += 1;
+ t = (uint8_t)(*p - '0');
+ while(t < 10) {
+ digits = 10 * digits + t;
+ p += 1;
+ t = (uint8_t)(*p - '0');
+ }
+ digitCount = (int)(p - firstUnparsedDigit);
+ }
+ }
+
+ // Try to match an optional decimal point
+ if (info->loc == strtofp_C_locale) {
+ // Decimal point is '.' in C locale, avoid calling localeconv_l
+ if (*p == '.') {
+ p += 1;
+ goto parseFraction;
+ }
+ goto maybeParseExponent;
+ } else if (*p == ' ' || *p == '\0' || *p == 'e' || *p == 'E') {
+ // Don't call localeconv_l if we know the next character
+ // cannot possibly be a decimal point.
+ goto maybeParseExponent;
+ } else {
+ // Look up decimal point in locale
+ const unsigned char *decimalPoint = strtofp_locale_decimal_point(info->loc);
+ if (decimalPoint[1] == '\0') {
+ if (decimalPoint[0] == *p) {
+ p += 1;
+ goto parseFraction;
+ }
+ } else {
+ // Multi-byte decimal point
+ const unsigned char *startOfPotentialDecimalPoint = p;
+ int matchedDecimalPoint = (1 == 1);
+ for (const unsigned char *d = decimalPoint; *d; d++) {
+ matchedDecimalPoint &= (*p == *d);
+ p++;
+ }
+ if (matchedDecimalPoint) {
+ goto parseFraction;
+ } else {
+ p = startOfPotentialDecimalPoint;
+ goto maybeParseExponent;
+ }
+ }
+ }
+
+ parseFraction:
+ {
+ const unsigned char *firstDigitAfterDecimalPoint = p;
+ if (digitCount == 0) {
+ while (*p == '0') {
+ p += 1;
+ }
+ // "0.000000001234" has 4 digits
+ firstUnparsedDigit = p;
+ unsigned t = (unsigned)(*p - '0');
+ if (t < 10) {
+ p += 1;
+ digits = t;
+ t = (unsigned)(*p - '0');
+ while (t < 10) {
+ digits = 10 * digits + t;
+ p += 1;
+ t = (unsigned)(*p - '0');
+ }
+ }
+ digitCount = (int)(p - firstUnparsedDigit);
+ } else {
+ // Perf: For canada.txt benchmark, this loop is ~30% of total runtime
+ unsigned t = (unsigned)(*p - '0');
+ if (t < 10) {
+ p += 1;
+ digits = 10 * digits + t;
+ t = (unsigned)(*p - '0');
+ while (t < 10) {
+ p += 1;
+ digits = 10 * digits + t;
+ t = (unsigned)(*p - '0');
+ }
+ }
+ digitCount += p - firstDigitAfterDecimalPoint;
+ }
+ base10Exponent = (int)(firstDigitAfterDecimalPoint - p);
+ }
+
+ // ================================================================
+ // Step 1e: Parse the optional exponent
+ maybeParseExponent:
+ if (*p == 'e' || *p == 'E') {
+ const unsigned char *exponentPhraseStart = p;
+ p += 1;
+ int negativeExponent = 1;
+ if (*p == '-') {
+ negativeExponent = -1;
+ p += 1;
+ } else if (*p == '+') {
+ p += 1;
+ }
+ uint8_t t = (uint8_t)(*p - '0');
+ if (t < 10) {
+ unsigned exp = t;
+ p += 1;
+ t = (uint8_t)(*p - '0');
+ while (t < 10) {
+ p += 1;
+ exp = 10 * exp + t;
+ t = (uint8_t)(*p - '0');
+ }
+ if (p - exponentPhraseStart > 9) {
+ // The exponent text was unusually long... re-parse
+ // it more carefully to see if it really should overflow.
+ const unsigned char *q = exponentPhraseStart + 1;
+ if (*q == '-' || *q == '+') {
+ q += 1;
+ }
+ while (*q == '0') {
+ q += 1;
+ }
+ if (p - q > 8) {
+ // If there were more than 8 digits with leading zeros
+ // excluded, we've definitely overflowed.
+ exp = 99999999;
+ }
+ }
+ base10Exponent += (int)exp * negativeExponent;
+ } else {
+ p = exponentPhraseStart;
+ }
+ }
+
+ if (info->end) *info->end = (char *)(uintptr_t)p;
+
+ // No non-zero digits, must be an explicit zero:
+ // "0", ".000", "0.0", "0e0", "0.0e999", etc.
+ if (digitCount == 0) {
+ memset(info->dest, 0, info->bytes);
+ info->dest[info->bytes - 1] = info->negative ? 0x80 : 0;
+ return 0;
+ }
+
+ // Coarse over/underflow check
+ if (base10Exponent + digitCount < info->minDecimalExp) {
+ underflow(info);
+ return 0;
+ }
+ if (base10Exponent + digitCount > info->maxDecimalExp) {
+ overflow(info);
+ return 0;
+ }
+
+ int unparsedDigitCount = 0;
+ if (digitCount > 19) {
+ digits = 0;
+ int i = 0;
+ const unsigned char *q = firstUnparsedDigit;
+ while (i < 19) {
+ // Note: Skip non-digit chars (e.g., decimal point)
+ unsigned t = (unsigned)(*q - '0');
+ if (t < 10) {
+ digits = digits * 10 + t;
+ i += 1;
+ }
+ q += 1;
+ }
+ firstUnparsedDigit = q;
+ unparsedDigitCount = digitCount - 19;
+ } else {
+ firstUnparsedDigit = NULL;
+ }
+
+ info->digitCount = digitCount;
+ info->firstUnparsedDigit = firstUnparsedDigit;
+ info->unparsedDigitCount = unparsedDigitCount;
+ info->digits = digits;
+ info->base10Exponent = base10Exponent;
+
+ return 1; // Regular decimal case...
+
+ fail:
+ if (info->end) *info->end = (char *)(uintptr_t)info->start;
+ memset(info->dest, 0, info->bytes);
+ return 0;
+}
+
+// ================================================================
+// ================================================================
+//
+// Parse an IEEE 754 Binary16 (aka "Half")
+//
+// ================================================================
+// ================================================================
+
+#if ENABLE_BINARY16_SUPPORT
+static void _ffpp_strtoencf16_l(unsigned char *dest, const char *start, char **end, strtofp_locale_t loc) {
+ static const int bytes = 2;
+ static const int sigBits = 11;
+ static const int minBinaryExp = -14;
+ static const int maxBinaryExp = 16;
+ static const int minDecimalExp = -7;
+ static const int maxDecimalExp = 5;
+ static const int maxDecimalMidpointDigits = 22;
+ struct parseInfo info = {
+ .bytes = bytes,
+ .sigBits = sigBits,
+ .minBinaryExp = minBinaryExp,
+ .maxBinaryExp = maxBinaryExp,
+ .minDecimalExp = minDecimalExp,
+ .maxDecimalExp = maxDecimalExp,
+ .maxDecimalMidpointDigits = maxDecimalMidpointDigits,
+ .dest = dest,
+ .start = start,
+ .end = end,
+ .loc = loc,
+ };
+
+ // ================================================================
+ // Parse the input (mostly)
+ // ================================================================
+ if (!fastParse64(&info)) {
+ return;
+ }
+
+ // TODO: Someday, we might implement fast paths for binary16
+ // But the range of binary16 is so small that the varint slow
+ // path is actually reasonably fast.
+
+ // ================================================================
+ // Slow Path (varint calculation)
+ // ================================================================
+ char _Alignas(sizeof(mp_word_t)) stackWorkArea[32];
+ static const size_t stackWorkAreaWords = sizeof(stackWorkArea) / sizeof(mp_word_t);
+ generalSlowpath(&info, FENV_ROUNDING_MODE(), (mp_word_t *)stackWorkArea, stackWorkAreaWords, false);
+}
+#endif
+
+// ================================================================
+// ================================================================
+//
+// Parse an IEEE 754 Binary32 (aka "Float" aka "Single")
+//
+// ================================================================
+// ================================================================
+#if ENABLE_BINARY32_SUPPORT
+static void _ffpp_strtoencf32_l(unsigned char *dest, const char *start, char **end, strtofp_locale_t loc) {
+ static const int bytes = 4;
+ static const int sigBits = 24;
+ static const int minBinaryExp = -126;
+ static const int maxBinaryExp = 128;
+ static const int minDecimalExp = -46;
+ static const int maxDecimalExp = 40;
+ static const int maxDecimalMidpointDigits = 113;
+ struct parseInfo info = {
+ .bytes = bytes,
+ .sigBits = sigBits,
+ .minBinaryExp = minBinaryExp,
+ .maxBinaryExp = maxBinaryExp,
+ .minDecimalExp = minDecimalExp,
+ .maxDecimalExp = maxDecimalExp,
+ .maxDecimalMidpointDigits = maxDecimalMidpointDigits,
+ .dest = dest,
+ .start = start,
+ .end = end,
+ .loc = loc,
+ };
+
+ // ================================================================
+ // Parse the input (mostly)
+ // ================================================================
+ if (!fastParse64(&info)) {
+ return;
+ }
+
+#if FLOAT_IS_BINARY32
+ // ================================================================
+ // Use a single float operation
+ // ================================================================
+ const static float floatPowerOf10[] = {1.0f, 10.0f, 100.0f,
+ 1e3f, 1e4f, 1e5f, 1e6f, 1e7f, 1e8f, 1e9f, 1e10f};
+ if (info.base10Exponent > -11 && info.base10Exponent < 11 && info.digitCount < 8) {
+ int32_t sdigits = info.negative ? -(int32_t)info.digits : (int32_t)info.digits;
+ if (info.base10Exponent < 0) {
+ float result = (float)sdigits / floatPowerOf10[-info.base10Exponent];
+ memcpy(info.dest, &result, sizeof(result));
+ return;
+ } else {
+ float result = (float)sdigits * floatPowerOf10[info.base10Exponent];
+ memcpy(info.dest, &result, sizeof(result));
+ return;
+ }
+ }
+#endif
+
+ int roundingMode = FENV_ROUNDING_MODE();
+
+#if 1
+ // ================================================================
+ // Fixed-precision interval arithmetic
+ // ================================================================
+ // The idea: Use 64-bit fixed-precision arithmetic to compute
+ // upper/lower bounds for the correct answer. If those bounds
+ // agree, then we can return the result.
+ int16_t exp10;
+ unsigned upperBoundOffset;
+ if (info.digitCount <= 19) {
+ exp10 = (int16_t)info.base10Exponent;
+ upperBoundOffset = 4;
+ } else {
+ exp10 = (int16_t)(info.base10Exponent + info.digitCount - 19);
+ upperBoundOffset = 36;
+ }
+
+ // Powers in the table are rounded so that
+ // powerOfTenRoundedDown <= true value <= powerOfTenRoundedDown + 1
+ const uint64_t powerOfTenRoundedDown = (powersOf10_Float + 70)[exp10];
+
+ // Binary exponent for the power-of-10 product
+ const int powerOfTenExponent = binaryExponentFor10ToThe(exp10);
+
+ // Normalize the digits, adjust binary exponent
+ int normalizeDigits = __builtin_clzll(info.digits); // 0 <= normalizeDigits <= 4
+ assert(normalizeDigits <= 4 || info.digitCount < 20);
+ uint64_t d = info.digits << normalizeDigits;
+ // For <= 19 digits, the upper bound for d is just d
+ // For > 19 digits, the upper bound is 1 << normalizeDigits <= 16
+ int binaryExponent = powerOfTenExponent - normalizeDigits + 64;
+
+ // A 64-bit lower bound on the binary significand
+ uint64_t l = multiply64x64RoundingDown(powerOfTenRoundedDown, d);
+ // An upper bound:
+ // <= 19 digits: (powerOfTenRoundedDown + 1) * d == l128 + d
+ // > 19 digits: (powerOfTenRoundedDown + 1) * (d + 16)
+ // == l128 + d + 16 * powerOfTenRoundedDown + 16
+ // For <=19 digits, upper bound is l + 2
+ // For >19 digits, upper bound is l + 18
+
+ // Normalize the product, adjust binary exponent
+ // (In particular, this lets us shift by a constant below.)
+ int normalizeProduct = __builtin_clzll(l); // 0 <= normalizeProduct <= 1
+ assert(normalizeProduct <= 1);
+ // Upper bound is <= (l + 2) << 1 or (l + 18) << 1
+ l <<= normalizeProduct;
+ // Upper bound is <= l + 4 or l + 36
+ binaryExponent -= normalizeProduct;
+
+ // Upper/lower bounds for the 24-bit significand
+ uint64_t u = l + upperBoundOffset;
+ uint32_t lowerSignificand, upperSignificand;
+ switch (roundingMode) {
+#ifdef FE_TOWARDZERO
+ case FE_TOWARDZERO:
+ lowerSignificand = (l) >> 40;
+ upperSignificand = (u) >> 40;
+ break;
+#endif
+#ifdef FE_DOWNWARD
+ case FE_DOWNWARD:
+ if (info.negative) {
+ lowerSignificand = (l + 0x0ffffffffff) >> 40;
+ upperSignificand = (u + 0x10000000000) >> 40;
+ } else {
+ lowerSignificand = (l) >> 40;
+ upperSignificand = (l + 4) >> 40;
+ }
+ break;
+#endif
+#ifdef FE_UPWARD
+ case FE_UPWARD:
+ if (!info.negative) {
+ lowerSignificand = (l + 0x0ffffffffff) >> 40;
+ upperSignificand = (u + 0x10000000000) >> 40;
+ } else {
+ lowerSignificand = (l) >> 40;
+ upperSignificand = (u) >> 40;
+ }
+ break;
+#endif
+#ifdef FE_TONEAREST
+ case FE_TONEAREST:
+#endif
+ default:
+ // Instead of worrying about exact ties-round-even, round lower
+ // down (adding 0x7ff...ff) and upper up (adding 0x800...00) so
+ // that exact ties fall through to be handled elsewhere.
+ lowerSignificand = (l + 0x7fffffffff) >> 40;
+ upperSignificand = (u + 0x8000000000) >> 40;
+ }
+ if (lowerSignificand == 0) { // lowerSignificand wrapped...
+ binaryExponent += 1;
+ }
+ if (binaryExponent > maxBinaryExp) {
+ overflow(&info);
+ return;
+ } else if (binaryExponent <= minBinaryExp) {
+ if (binaryExponent <= minBinaryExp - sigBits) {
+ underflow(&info);
+ return;
+ }
+ // TODO: ... Subnormal? ...
+ } else if (upperSignificand == lowerSignificand) {
+ uint32_t exponentBits = (uint32_t)(binaryExponent - minBinaryExp) << (sigBits - 1);
+ uint32_t significandMask = (((uint32_t)1 << (sigBits - 1)) - 1);
+ uint32_t significandBits = lowerSignificand & significandMask;
+ uint32_t signbit = info.negative ? 0x80000000UL : 0ULL;
+ uint32_t raw = signbit | exponentBits | significandBits;
+ memcpy(info.dest, &raw, sizeof(raw));
+ return;
+ }
+#endif
+
+ // ================================================================
+ // Slow Path (varint calculation)
+ // ================================================================
+ char _Alignas(sizeof(mp_word_t)) stackWorkArea[128];
+ static const size_t stackWorkAreaWords = sizeof(stackWorkArea) / sizeof(mp_word_t);
+ generalSlowpath(&info, FENV_ROUNDING_MODE(), (mp_word_t *)stackWorkArea, stackWorkAreaWords, false);
+}
+#endif
+
+// ================================================================
+// ================================================================
+//
+// Parse an IEEE 754 Binary64 (Double)
+//
+// ================================================================
+// ================================================================
+
+#if ENABLE_BINARY64_SUPPORT
+static void _ffpp_strtoencf64_l(unsigned char *dest, const char *start, char **end, strtofp_locale_t loc) {
+ static const int bytes = 8;
+ static const int sigBits = 53;
+ static const int minBinaryExp = -1022;
+ static const int maxBinaryExp = 1024;
+ static const int minDecimalExp = -325;
+ static const int maxDecimalExp = 310;
+ static const int maxDecimalMidpointDigits = 768;
+ struct parseInfo info = {
+ .bytes = bytes,
+ .sigBits = sigBits,
+ .minBinaryExp = minBinaryExp,
+ .maxBinaryExp = maxBinaryExp,
+ .minDecimalExp = minDecimalExp,
+ .maxDecimalExp = maxDecimalExp,
+ .maxDecimalMidpointDigits = maxDecimalMidpointDigits,
+ .dest = dest,
+ .start = start,
+ .end = end,
+ .loc = loc,
+ };
+
+ // ================================================================
+ // Parse the input (mostly)
+ // ================================================================
+ if (!fastParse64(&info)) {
+ return;
+ }
+
+ // If digitCount <= 19, then the result we want is:
+ // (info.negative ? -1 : 1) * info.digits * 10^info.base10Exponent
+
+ // The rest of this function consists of several different methods
+ // of computing this product with varying trade-offs of input range
+ // and performance. The first ones are fast but only work for
+ // certain inputs; the later ones are slower and more general.
+
+ // ================================================================
+ // Floating-point Calculation
+ // ================================================================
+ // Note: This optimization relies on the host platform `double` type
+ // supporting true IEEE754 binary64 arithmetic.
+#if DOUBLE_IS_BINARY64 && (FLT_EVAL_METHOD == 0 || FLT_EVAL_METHOD == 1)
+
+ const static double doublePowerOf10[] = { 1.0, 10.0, 100.0, 1e3, 1e4, 1e5,
+ 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16,
+ 1e17, 1e18, 1e19, 1e20, 1e21, 1e22 };
+
+ if (info.base10Exponent >= -22 && info.base10Exponent <= 18) {
+ if (info.base10Exponent < 0) {
+ // We can give exact inputs to a division operation if
+ // we have <= 15 digits and base10Exponent is in [-22..0]
+ // For example, this handles 1.23456789012345
+ if (info.digitCount <= 15) {
+ int64_t sdigits = info.negative ? -(int64_t)info.digits : (int64_t)info.digits;
+ double result = (double)sdigits / doublePowerOf10[-info.base10Exponent];
+ memcpy(info.dest, &result, sizeof(result));
+ return;
+ }
+ // TODO: I really want to handle 35.678912345678934 here
+ // (17 digits and negative exponent). This case is the common
+ // case for Lemire's "canada.txt" benchmark (which is derived from
+ // latitude/longitude data).
+ } else if (info.base10Exponent == 0) {
+ if (info.digitCount <= 19) {
+ if (!info.negative) {
+ // We can use HW conversion of unsigned (positive) int
+ double result = (double)info.digits;
+ memcpy(info.dest, &result, sizeof(result));
+ return;
+ } else if (info.digits <= INT64_MAX) {
+ // We can use HW conversion of signed (negative) int
+ double result = (double)(-(int64_t)info.digits);
+ memcpy(info.dest, &result, sizeof(result));
+ return;
+ }
+ }
+ } else {
+ if (info.digitCount <= 19) {
+ // We can give exact inputs to fma if we have <= 19 digits
+ // and base10Exponent is in [1..18].
+ uint64_t lowMask = 0x7ff;
+ uint64_t highMask = ~lowMask;
+ // digits has <= 64 bits (19 digits)
+ double highDigits = (double)(info.digits & highMask); // <= 53 bits
+ double lowDigits = (double)(info.digits & lowMask); // 11 bits
+ double p10 = doublePowerOf10[info.base10Exponent]; // Exact (10^18 has 42 bits)
+ if (info.negative) p10 = -p10;
+ double u = lowDigits * p10; // Exact (11 bits + 42 bits)
+ // Inputs to fma are all exact, so result is correctly rounded
+ double result = __builtin_fma(highDigits, p10, u);
+ memcpy(info.dest, &result, sizeof(result));
+ return;
+ }
+ }
+ }
+#endif
+
+ int roundingMode = FENV_ROUNDING_MODE();
+
+#if 1
+ // ================================================================
+ // Fixed-width interval arithmetic
+ // ================================================================
+ int16_t exp10;
+ // Total possible inaccuracy in our calculations below
+ unsigned intervalWidth;
+ if (info.digitCount <= 19) {
+ exp10 = (int16_t)info.base10Exponent;
+ // We have all the digits, so our upper/lower bounds
+ // on the significand are the same, which makes the final
+ // bounds fairly tight.
+ intervalWidth = 12;
+ } else {
+ // We're going to treat the input as if it were:
+ // (first 19 digits).(remaining digits) * 10^p
+ // In this form, (first 19 digits) is a lower bound for the decimal
+ // significand and (first 19 digits) + 1 is an upper bound. We also
+ // have upper/lower bounds for 10^p.
+ // info.digits already has the first 19 digits, we just need to
+ // adjust the effective exponent.
+ exp10 = (int16_t)(info.base10Exponent + info.digitCount - 19);
+ // The non-trivial significand bounds lead to a wider final interval.
+ // This still gives us success ~96% of the time.
+ intervalWidth = 80;
+ }
+
+ // Multiply an exact value for 10^{0..27} times a rounded value 10^{n * 28}
+ const int16_t coarseIndex = (exp10 * 585 + 256) >> 14; // exp10 / 28;
+ const int16_t coarsePower = (coarseIndex * 32) - (coarseIndex * 4); // coarseIndex * 28
+ const int16_t exactPower = exp10 - coarsePower; // exp10 % 28
+ const uint64_t exact = powersOf10_Exact64[exactPower]; // Exact
+ const uint64_t coarse = (powersOf10_CoarseBinary64 + 15)[coarseIndex];
+
+ // Powers in the coarse table are never rounded up, so
+ // coarse <= true value <= coarse + 1
+ const uint64_t powerOfTenRoundedDown = multiply64x64RoundingDown(coarse, exact);
+ // And powerOfTenRoundedUp
+ // <= ((coarse + 1) * exact + UINT64_MAX) >> 64
+ // <= (coarse * exact + exact + UINT64_MAX) >> 64
+ // <= powerOfTenRoundedDown + 2
+
+ // Binary exponent for the power-of-10 product
+ const int powerOfTenExponent = binaryExponentFor10ToThe(coarsePower)
+ + binaryExponentFor10ToThe(exactPower);
+
+ // Normalize the digits, adjust binary exponent
+ int normalizeDigits = __builtin_clzll(info.digits);
+ assert(normalizeDigits <= 4 || info.digitCount < 20);
+ uint64_t d = info.digits << normalizeDigits;
+ int binaryExponent = powerOfTenExponent - normalizeDigits + 64;
+ // The upper bound for d is:
+ // exactly d (for <= 19 digit case)
+ // d + 16 (for > 19 digit case)
+
+ // A 64-bit lower bound on the binary significand
+ uint64_t l = multiply64x64RoundingDown(powerOfTenRoundedDown, d);
+ // Corresponding upper bound:
+ // <= 19 digits: (powerOfTenRoundedDown + 2) * d == l128 + d + d
+ // > 19 digits: (powerOfTenRoundedDown + 2) * (d + 16)
+ // l is a lower bound for the 64-bit binary significand
+ // An upper bound for <= 19 digit case:
+ // (l128 + d + d + UINT64_MAX) >> 64 <= l + 3
+ // For >19 digits, we similarly get l + 20 as an upper bound.
+
+ // Normalize the product, adjust binary exponent
+ // (In particular, this lets us shift by a constant below.)
+ int normalizeProduct = __builtin_clzll(l); // 0 <= normalizeProduct <= 2
+ assert(normalizeProduct <= 2);
+ // Upper bound is (l + 3) or (l + 20)
+ l <<= normalizeProduct;
+ // Upper bound is (l + 12) or (l + 80)
+ binaryExponent -= normalizeProduct;
+
+ // Upper/lower bounds for the 53-bit significand, with rounding
+ //
+ // We round by adding an offset and then truncating the bottom 11
+ // bits. For example, to precisely round away from zero, we
+ // would ideally want to add 0x7ff.ffffffffffffffff. Rounding that
+ // value down/up to the nearest integer gives us 0x7ff, 0x800.
+ unsigned lowerRoundOffset, upperRoundOffset;
+ {
+ bool negative = info.negative;
+ switch (roundingMode) {
+#ifdef FE_UPWARD
+ case FE_UPWARD:
+ negative = !negative;
+ // FALL THROUGH
+#endif
+#ifdef FE_DOWNWARD
+ case FE_DOWNWARD:
+ if (negative) {
+ lowerRoundOffset = 0x7ff; upperRoundOffset = 0x800 + intervalWidth;
+ break;
+ }
+ // FALL THROUGH
+#endif
+#ifdef FE_TOWARDZERO
+ case FE_TOWARDZERO:
+ lowerRoundOffset = 0; upperRoundOffset = intervalWidth;
+ break;
+#endif
+#ifdef FE_TONEAREST
+ case FE_TONEAREST:
+#endif
+ default:
+ (void)negative;
+ lowerRoundOffset = 0x3ff; upperRoundOffset = 0x400 + intervalWidth;
+ break;
+ }
+ }
+ uint64_t lowerSignificand = (l + lowerRoundOffset) >> 11;
+ uint64_t upperSignificand = (l + upperRoundOffset) >> 11;
+
+ int adjustedBinaryExponent = binaryExponent;
+ if (lowerSignificand == 0) { // lowerSignificand wrapped...
+ adjustedBinaryExponent += 1;
+ }
+ if (adjustedBinaryExponent > maxBinaryExp) {
+ overflow(&info);
+ return;
+ } else if (adjustedBinaryExponent <= minBinaryExp) {
+ if (adjustedBinaryExponent <= minBinaryExp - sigBits) {
+ underflow(&info);
+ return;
+ }
+ // Might be subnormal; we need to re-round to the appropriate
+ // number of bits, which requires recomputing the shift amount and
+ // the rounding offsets. This is a generalized version of the
+ // code above that's also slightly slower due to the variable shifts.
+ int shift = 65 - binaryExponent + (minBinaryExp - sigBits);
+ if (shift < 64) {
+ assert(0 < shift && shift < 64);
+ uint64_t lro, uro;
+ bool negative = info.negative;
+ switch (roundingMode) {
+#ifdef FE_UPWARD
+ case FE_UPWARD:
+ negative = !negative;
+ // FALL THROUGH
+#endif
+#ifdef FE_DOWNWARD
+ case FE_DOWNWARD:
+ if (negative) {
+ lro = (1ULL << (shift)) - 1; uro = lro + intervalWidth; break;
+ }
+ // FALL THROUGH
+#endif
+#ifdef FE_TOWARDZERO
+ case FE_TOWARDZERO:
+ lro = 0; uro = intervalWidth; break;
+#endif
+#ifdef FE_TONEAREST
+ case FE_TONEAREST:
+#endif
+ default:
+ (void)negative;
+ lro = (1ULL << (shift - 1)) - 1; uro = (1ULL << (shift - 1)) + intervalWidth; break;
+ }
+ lowerSignificand = (l + lro) >> shift;
+ upperSignificand = (l + uro) >> shift;
+ if (upperSignificand == lowerSignificand) {
+ if (lowerSignificand == 0) { // lowerSignificand wrapped...
+ lowerSignificand = 1ULL << (64 - shift);
+ }
+ // Subnormal
+ uint64_t signbit = info.negative ? 0x8000000000000000ULL : 0ULL;
+ uint64_t raw = signbit | lowerSignificand;
+ if (raw != 0x0010000000000000ULL) {
+ errno = ERANGE;
+ }
+ memcpy(info.dest, &raw, sizeof(raw));
+ return;
+ }
+ }
+ } else if (upperSignificand == lowerSignificand) {
+ // Normal
+ uint64_t exponentBits = (uint64_t)(adjustedBinaryExponent - minBinaryExp) << (sigBits - 1);
+ uint64_t significandMask = (((uint64_t)1 << (sigBits - 1)) - 1);
+ uint64_t significandBits = lowerSignificand & significandMask;
+ uint64_t signbit = info.negative ? 0x8000000000000000ULL : 0ULL;
+ uint64_t raw = signbit | exponentBits | significandBits;
+ memcpy(info.dest, &raw, sizeof(raw));
+ return;
+ }
+ // TODO: If we fall through, should we try refining the
+ // estimate? If we can extend `l` to 128 bits with very
+ // little work, it might be worth the effort.
+#endif
+
+ // ================================================================
+ // Slow Path (varint calculation)
+ // ================================================================
+ // Random testing suggests this step only runs about 3% of the
+ // time, so we focus here on optimizing for code size rather than perf.
+ char _Alignas(sizeof(mp_word_t)) stackWorkArea[656];
+ static const size_t stackWorkAreaWords = sizeof(stackWorkArea) / sizeof(mp_word_t);
+ generalSlowpath(&info, roundingMode, (mp_word_t *)stackWorkArea, stackWorkAreaWords, false);
+}
+#endif
+
+// ================================================================
+// ================================================================
+//
+// 128-bit interval calculation for Float80 and Binary128
+//
+// Unlike binary32/64 above (where we optimize for performance),
+// this logic has been generalized to support both float80 and
+// binary128 with common code. It's a little slower, but these
+// formats are less commonly used, so the code savings are worth
+// it. (And we're still about 80x faster than gdtoa even with this,
+// so I doubt anyone will complain about the performance here!)
+//
+// ================================================================
+// ================================================================
+
+#if ENABLE_FLOAT80_OPTIMIZATIONS || ENABLE_BINARY128_OPTIMIZATIONS
+#if HAVE_UINT128_T
+typedef __uint128_t my_uint128_t;
+#define create128FromHighLow64(high,low) ((low) + ((__uint128_t)(high) << 64))
+#define multiply128xInt(lhs, rhs) ((lhs) * (rhs))
+#define fullMultiply64x64(lhs, rhs) ((__uint128_t)(lhs) * (rhs))
+#define add128x64(lhs, rhs) ((lhs) + (rhs))
+#define add128x128(lhs, rhs) ((lhs) + (rhs))
+#define extractLow64(x) ((uint64_t)(x))
+#define extractHigh64(x) ((uint64_t)((x) >> 64))
+#define isZero(x) ((x) == 0)
+#define isEqual(lhs, rhs) ((lhs) == (rhs))
+#define shiftLeft(lhs, rhs) ((lhs) << (rhs))
+#define shiftRight(lhs, rhs) ((lhs) >> (rhs))
+#else
+typedef struct { uint64_t low; uint64_t high; } my_uint128_t;
+#define extractLow64(x) ((x).low)
+#define extractHigh64(x) ((x).high)
+#define isZero(x) ((x).low == 0 && (x).high == 0)
+#define isEqual(lhs, rhs) ((lhs).low == (rhs).low && ((lhs).high == (rhs).high))
+static my_uint128_t shiftLeft(my_uint128_t lhs, int rhs) {
+ if (rhs > 64) {
+ lhs.high = lhs.low << (rhs - 64);
+ lhs.low = 0;
+ } else if (rhs == 64) {
+ lhs.high = lhs.low;
+ lhs.low = 0;
+ } else if (rhs > 0) {
+ lhs.high = (lhs.high << rhs) + (lhs.low >> (64 - rhs));
+ lhs.low <<= rhs;
+ }
+ return lhs;
+}
+static my_uint128_t shiftRight(my_uint128_t lhs, int rhs) {
+ if (rhs > 64) {
+ lhs.low = lhs.high >> (rhs - 64);
+ lhs.high = 0;
+ } else if (rhs == 64) {
+ lhs.low = lhs.high;
+ lhs.high = 0;
+ } else if (rhs > 0) {
+ lhs.low = (lhs.low >> rhs) + (lhs.high << (64 - rhs));
+ lhs.high >>= rhs;
+ }
+ return lhs;
+}
+static my_uint128_t create128FromHighLow64(uint64_t high, uint64_t low) {
+ my_uint128_t result = {low, high};
+ return result;
+}
+static my_uint128_t add128x64(my_uint128_t lhs, uint64_t rhs) {
+ if (lhs.low > UINT64_MAX - rhs) {
+ lhs.high += 1;
+ }
+ lhs.low += rhs;
+ return lhs;
+}
+static my_uint128_t add128x128(my_uint128_t lhs, my_uint128_t rhs) {
+ if (lhs.low > UINT64_MAX - rhs.low) {
+ lhs.high += 1;
+ }
+ lhs.low += rhs.low;
+ lhs.high += rhs.high;
+ return lhs;
+}
+static my_uint128_t fullMultiply64x64(uint64_t lhs, uint64_t rhs) {
+ uint64_t a = (lhs >> 32) * (rhs >> 32);
+ uint64_t b = (lhs >> 32) * (rhs & UINT32_MAX);
+ uint64_t c = (lhs & UINT32_MAX) * (rhs >> 32);
+ uint64_t d = (lhs & UINT32_MAX) * (rhs & UINT32_MAX);
+ b += (c & UINT32_MAX) + (d >> 32);
+ return create128FromHighLow64(a + (b >> 32) + (c >> 32),
+ (b << 32) + (d & UINT32_MAX));
+}
+static my_uint128_t multiply128xInt(my_uint128_t lhs, int rhs) {
+ uint64_t a = (lhs.low & UINT32_MAX) * (uint64_t)rhs;
+ uint64_t b = (lhs.low >> 32) * (uint64_t)rhs;
+ b += (a >> 32);
+ lhs.high = (lhs.high * (uint64_t)rhs) + (b >> 32);
+ lhs.low = (a & UINT32_MAX) + (b << 32);
+ return lhs;
+}
+#endif
+static my_uint128_t multiply128x128RoundingDown(my_uint128_t lhs, my_uint128_t rhs) {
+ my_uint128_t a = fullMultiply64x64(extractHigh64(lhs), extractHigh64(rhs));
+ my_uint128_t b = fullMultiply64x64(extractHigh64(lhs), extractLow64(rhs));
+ my_uint128_t c = fullMultiply64x64(extractLow64(lhs), extractHigh64(rhs));
+ my_uint128_t d = fullMultiply64x64(extractLow64(lhs), extractLow64(rhs));
+ b = add128x64(b, extractLow64(c));
+ b = add128x64(b, extractHigh64(d));
+ a = add128x64(a, extractHigh64(b));
+ a = add128x64(a, extractHigh64(c));
+ return a;
+}
+
+static my_uint128_t getPowerOfTenRoundedDown(int p, int *exponent) {
+ my_uint128_t result;
+ int e;
+
+ // If power is < 0, multiply by 10^-5040 and adjust p
+ // That lets us use a table with only positive powers (half the size)
+ if (p < 0) {
+ result = create128FromHighLow64(0xb2d31bf022977fd8ULL, 0xbf034c011f5000deULL);
+ p += 5040;
+ e = binaryExponentFor10ToThe(-5040);
+ } else {
+ result = create128FromHighLow64(1ULL << 63, 0);
+ e = 1;
+ }
+
+ int finePower = p % 56;
+ my_uint128_t fine;
+ p -= finePower;
+ if (finePower <= 27) {
+ fine = create128FromHighLow64(powersOf10_Exact64[finePower], 0);
+ } else if (finePower <= 54) {
+ fine = fullMultiply64x64(powersOf10_Exact64[finePower - 27],
+ 0xcecb8f27f4200f3aULL); // 10^27
+ if ((extractHigh64(fine) >> 63) == 0) {
+ fine = shiftLeft(fine, 1);
+ }
+ } else {
+ fine = create128FromHighLow64(0xd0cf4b50cfe20765ULL, 0xfff4b4e3f741cf6dULL);
+ }
+ e += binaryExponentFor10ToThe(finePower);
+ result = multiply128x128RoundingDown(result, fine);
+
+ int coarseIndex = p / 56;
+ const uint64_t *c = powersOf10_Binary128 + coarseIndex * 2;
+ my_uint128_t coarse = create128FromHighLow64(c[1], c[0]);
+ e += binaryExponentFor10ToThe(coarseIndex * 56);
+ result = multiply128x128RoundingDown(result, coarse);
+
+ *exponent = e;
+ return result;
+}
+
+static int highPrecisionIntervalPath(struct parseInfo *info, int roundingMode) {
+ int16_t exp10;
+ unsigned upperBoundOffset;
+ if (info->digitCount <= 38) {
+ exp10 = (int16_t)info->base10Exponent;
+ upperBoundOffset = 16; // FIXME
+ } else {
+ exp10 = (int16_t)(info->base10Exponent + info->digitCount - 38);
+ upperBoundOffset = 272; // FIXME
+ }
+
+ my_uint128_t digits = create128FromHighLow64(0, info->digits);
+ int normalizeDigits;
+ if (info->digitCount <= 19) {
+ normalizeDigits = __builtin_clzll(info->digits) + 64;
+ } else {
+ int remaining = info->digitCount - 19;
+ if (remaining > 19) { remaining = 19; }
+ const unsigned char *p = info->firstUnparsedDigit;
+ for (int i = 0; i < remaining; i++) {
+ // Skip decimal point (depends on locale, can be any non-digit!)
+ while (*p < '0' || *p > '9') {
+ p++;
+ }
+ digits = multiply128xInt(digits, 10);
+ digits = add128x64(digits, (uint64_t)(*p - '0'));
+ p += 1;
+ }
+ if (extractHigh64(digits) != 0) {
+ normalizeDigits = __builtin_clzll(extractHigh64(digits));
+ } else {
+ normalizeDigits = __builtin_clzll(extractLow64(digits)) + 64;
+ }
+ }
+ assert(normalizeDigits <= 5 || info->digitCount <= 38);
+ digits = shiftLeft(digits, normalizeDigits);
+ int binaryExponent = 128 - normalizeDigits;
+ // For <= 38 digits, the upper bound for d is just d
+ // For > 38 digits, the upper bound is 1 << normalizeDigits <= 32
+
+ int powerOfTenExponent;
+ const my_uint128_t powerOfTenRoundedDown = getPowerOfTenRoundedDown(exp10, &powerOfTenExponent);
+ binaryExponent += powerOfTenExponent;
+ // powerOfTenRoundedDown <= true value <= powerOfTenRoundedDown + 2
+
+ // A 128-bit lower bound on the binary significand
+ my_uint128_t l = multiply128x128RoundingDown(powerOfTenRoundedDown, digits);
+
+ // (In particular, this lets us shift by a constant below.)
+ int normalizeProduct = __builtin_clzll(extractHigh64(l)); // 0 <= normalizeProduct <= 1
+ // Upper bound is <= (l + 2) or (l + 34)
+ assert(normalizeProduct <= 3);
+ l = shiftLeft(l, normalizeProduct);
+ // Upper bound is <= l + 16 or l + 272
+ binaryExponent -= normalizeProduct;
+
+ // Upper/lower bounds for the 64-bit significand
+ my_uint128_t u = add128x64(l, upperBoundOffset);
+ bool negative = info->negative;
+ switch (roundingMode) {
+#ifdef FE_DOWNWARD
+ case FE_DOWNWARD:
+ negative = !negative;
+ // FALL THROUGH
+#endif
+#ifdef FE_UPWARD
+ case FE_UPWARD:
+ if (!negative) {
+ my_uint128_t offset = create128FromHighLow64(UINT64_MAX, UINT64_MAX);
+ offset = shiftRight(offset, info->sigBits);
+ l = add128x128(l, offset);
+ u = add128x128(u, offset);
+ u = add128x64(u, 1);
+ break;
+ }
+ // FALL THROUGH
+#endif
+#ifdef FE_TOWARDZERO
+ case FE_TOWARDZERO:
+ break;
+#endif
+#ifdef FE_TONEAREST
+ case FE_TONEAREST:
+#endif
+ default: {
+ (void)negative;
+ my_uint128_t offset = create128FromHighLow64(UINT64_MAX, UINT64_MAX);
+ offset = shiftRight(offset, info->sigBits + 1);
+ l = add128x128(l, offset);
+ u = add128x128(u, offset);
+ u = add128x64(u, 1);
+ }
+ }
+ my_uint128_t lowerSignificand = shiftRight(l, (128 - info->sigBits));
+ my_uint128_t upperSignificand = shiftRight(u, (128 - info->sigBits));
+ if (isZero(lowerSignificand)) { // lowerSignificand wrapped...
+ binaryExponent += 1;
+ lowerSignificand = create128FromHighLow64(1ULL << 63, 0);
+ lowerSignificand = shiftRight(lowerSignificand, 128 - info->sigBits);
+ }
+ if (binaryExponent > info->maxBinaryExp) {
+ overflow(info);
+ return 1;
+ } else if (binaryExponent <= info->minBinaryExp) {
+ if (binaryExponent <= info->minBinaryExp - info->sigBits) {
+ underflow(info);
+ return 1;
+ }
+ // TODO: ... Subnormal? ...
+ } else if (isEqual(upperSignificand, lowerSignificand)) {
+ switch (info->bytes) {
+ case 10: {
+ uint16_t signbit = info->negative ? 0x8000U : 0U;
+ uint16_t exponentBits = signbit | (uint16_t)(binaryExponent - info->minBinaryExp);
+ uint64_t significandBits = extractLow64(lowerSignificand);
+ memcpy(info->dest, &significandBits, sizeof(significandBits));
+ memcpy(info->dest + 8, &exponentBits, sizeof(exponentBits));
+ return 1;
+ }
+ case 16: {
+ uint16_t signbit = info->negative ? 0x8000U : 0U;
+ uint16_t exponentBits = signbit | (uint16_t)(binaryExponent - info->minBinaryExp);
+ memcpy(info->dest, &lowerSignificand, sizeof(lowerSignificand));
+ memcpy(info->dest + 14, &exponentBits, sizeof(exponentBits));
+ return 1;
+ }
+ }
+ }
+ return 0;
+}
+#endif
+
+// ================================================================
+// ================================================================
+//
+// Parse an Intel x87 80-bit extended format value
+//
+// ================================================================
+// ================================================================
+
+#if ENABLE_FLOAT80_SUPPORT
+static void _ffpp_strtoencf80_l(unsigned char *dest, const char *start, char **end, strtofp_locale_t loc) {
+ static const int bytes = 10;
+ static const int sigBits = 64;
+ static const int minBinaryExp = -16382;
+ static const int maxBinaryExp = 16384;
+ static const int minDecimalExp = -5000;
+ static const int maxDecimalExp = 5000;
+ static const int maxDecimalMidpointDigits = 11515;
+ struct parseInfo info = {
+ .bytes = bytes,
+ .sigBits = sigBits,
+ .minBinaryExp = minBinaryExp,
+ .maxBinaryExp = maxBinaryExp,
+ .minDecimalExp = minDecimalExp,
+ .maxDecimalExp = maxDecimalExp,
+ .maxDecimalMidpointDigits = maxDecimalMidpointDigits,
+ .dest = dest,
+ .start = start,
+ .end = end,
+ .loc = loc,
+ };
+
+ // ================================================================
+ // Parse the input (mostly)
+ // ================================================================
+ if (!fastParse64(&info)) {
+ return;
+ }
+
+#if ENABLE_FLOAT80_OPTIMIZATIONS && LONG_DOUBLE_IS_FLOAT80
+ // ================================================================
+ // Use a single float80 operation when we can
+ // ================================================================
+ const static long double longDoublePowerOf10[] = {1.0L, 10.0L, 100.0L,
+ 1e3L, 1e4L, 1e5L, 1e6L, 1e7L, 1e8L, 1e9L, 1e10L, 1e11L, 1e12L,
+ 1e13L, 1e14L, 1e15L, 1e16L, 1e17L, 1e18L, 1e19L, 1e20L, 1e21L,
+ 1e22L, 1e23L, 1e24L, 1e25L, 1e26L, 1e27L};
+ if (info.base10Exponent > -28 && info.base10Exponent < 28 && info.digitCount <= 19) {
+ if (info.base10Exponent < 0) {
+ long double p = longDoublePowerOf10[-info.base10Exponent];
+ if (info.negative) p = -p;
+ long double result = (long double)info.digits / p;
+ memcpy(info.dest, &result, sizeof(result));
+ return;
+ } else {
+ long double p = longDoublePowerOf10[info.base10Exponent];
+ if (info.negative) p = -p;
+ long double result = (long double)info.digits * p;
+ memcpy(info.dest, &result, sizeof(result));
+ return;
+ }
+ }
+#endif
+
+ int roundingMode = FENV_ROUNDING_MODE();
+
+#if ENABLE_FLOAT80_OPTIMIZATIONS
+ if (highPrecisionIntervalPath(&info, roundingMode)) {
+ return;
+ }
+#endif
+
+ // ================================================================
+ // Slow Path (varint calculation)
+ // ================================================================
+ char _Alignas(sizeof(mp_word_t)) stackWorkArea[1536];
+ static const size_t stackWorkAreaWords = sizeof(stackWorkArea) / sizeof(mp_word_t);
+ generalSlowpath(&info, roundingMode, (mp_word_t *)stackWorkArea, stackWorkAreaWords, true);
+}
+#endif
+
+// ================================================================
+// ================================================================
+//
+// Parse an IEEE 754 Binary128
+//
+// ================================================================
+// ================================================================
+
+#if ENABLE_BINARY128_SUPPORT
+static void _ffpp_strtoencf128_l(unsigned char *dest, const char *start, char **end, strtofp_locale_t loc) {
+ static const int bytes = 16;
+ static const int sigBits = 113;
+ static const int minBinaryExp = -16382;
+ static const int maxBinaryExp = 16384;
+ static const int minDecimalExp = -5000;
+ static const int maxDecimalExp = 5000;
+ static const int maxDecimalMidpointDigits = 11564;
+ struct parseInfo info = {
+ .bytes = bytes,
+ .sigBits = sigBits,
+ .minBinaryExp = minBinaryExp,
+ .maxBinaryExp = maxBinaryExp,
+ .minDecimalExp = minDecimalExp,
+ .maxDecimalExp = maxDecimalExp,
+ .maxDecimalMidpointDigits = maxDecimalMidpointDigits,
+ .dest = dest,
+ .start = start,
+ .end = end,
+ .loc = loc,
+ };
+
+ // ================================================================
+ // Parse the input (mostly)
+ // ================================================================
+ if (!fastParse64(&info)) {
+ return;
+ }
+
+#if LONG_DOUBLE_IS_BINARY128 && ENABLE_BINARY128_OPTIMIZATIONS
+ // ================================================================
+ // Use a single binary128 operation when we can
+ // ================================================================
+ const static long double longDoublePowerOf10[] = {
+ 1.0L, 10.0L, 100.0L, 1e3L, 1e4L, 1e5L, 1e6L, 1e7L, 1e8L, 1e9L,
+ 1e10L, 1e11L, 1e12L, 1e13L, 1e14L, 1e15L, 1e16L, 1e17L, 1e18L, 1e19L,
+ 1e20L, 1e21L, 1e22L, 1e23L, 1e24L, 1e25L, 1e26L, 1e27L, 1e28L, 1e29L,
+ 1e30L, 1e31L, 1e32L, 1e33L, 1e34L, 1e35L, 1e36L, 1e37L, 1e38L, 1e39L,
+ 1e40L, 1e41L, 1e42L, 1e43L, 1e44L, 1e45L, 1e46L, 1e47L, 1e48L};
+ if (info.base10Exponent > -49 && info.base10Exponent < 49 && info.digitCount <= 19) {
+ if (info.base10Exponent < 0) {
+ long double p = longDoublePowerOf10[-info.base10Exponent];
+ if (info.negative) p = -p;
+ long double result = (long double)info.digits / p;
+ memcpy(info.dest, &result, sizeof(result));
+ return;
+ } else {
+ long double p = longDoublePowerOf10[info.base10Exponent];
+ if (info.negative) p = -p;
+ long double result = (long double)info.digits * p;
+ memcpy(info.dest, &result, sizeof(result));
+ return;
+ }
+ }
+#endif
+
+ int roundingMode = FENV_ROUNDING_MODE();
+
+#if ENABLE_BINARY128_OPTIMIZATIONS
+ if (highPrecisionIntervalPath(&info, roundingMode)) {
+ return;
+ }
+#endif
+
+ // ================================================================
+ // Slow Path (varint calculation)
+ // ================================================================
+ char _Alignas(sizeof(mp_word_t)) stackWorkArea[1536];
+ static const size_t stackWorkAreaWords = sizeof(stackWorkArea) / sizeof(mp_word_t);
+ generalSlowpath(&info, roundingMode, (mp_word_t *)stackWorkArea, stackWorkAreaWords, true);
+}
+#endif
+
+// ================================================================
+// ================================================================
+//
+// Public APIs
+//
+// The public functions exported from this file are all defined
+// in terms of the private `_ffpp_strtoencf**_l` functions defined
+// above.
+//
+// ================================================================
+// ================================================================
+
+// ================================================================
+// Wrappers for Binary16
+#if ENABLE_BINARY16_SUPPORT
+// TS 18661-3 `strtoencf16` API that can be supported on
+// every platform regardless of local FP
+void strtoencf16(unsigned char * restrict encptr,
+ const char * restrict nptr,
+ char ** restrict endptr) {
+ _ffpp_strtoencf16_l(encptr, nptr, endptr, strtofp_current_locale());
+}
+#endif
+
+// ================================================================
+// Wrappers for Binary32
+
+#if ENABLE_BINARY32_SUPPORT
+// TS 18661-3 `strtoencf32` API that can be supported on
+// every platform regardless of local FP
+void strtoencf32(unsigned char * restrict encptr,
+ const char * restrict nptr,
+ char ** restrict endptr) {
+ _ffpp_strtoencf32_l(encptr, nptr, endptr, strtofp_current_locale());
+}
+#endif
+
+#if ENABLE_BINARY32_SUPPORT && FLOAT_IS_BINARY32
+// ISO C17 `strtof` API
+float strtof(const char * restrict nptr,
+ char ** restrict endptr) {
+ union { float d; unsigned char raw[4]; } result;
+ _ffpp_strtoencf32_l(result.raw, nptr, endptr, strtofp_current_locale());
+ return result.d;
+}
+#endif
+
+#if ENABLE_BINARY32_SUPPORT && FLOAT_IS_BINARY32 && ENABLE_LOCALE_SUPPORT
+// ISO C17 `strtof_l` API
+float strtof_l(const char * restrict nptr,
+ char ** restrict endptr,
+ strtofp_locale_t loc) {
+ union { float d; unsigned char raw[4]; } result;
+ _ffpp_strtoencf32_l(result.raw, nptr, endptr, loc);
+ return result.d;
+}
+#endif
+
+// ================================================================
+// Wrappers for Binary64
+
+#if ENABLE_BINARY64_SUPPORT
+// TS 18661-3 `strtoencf64` API that can be supported on
+// every platform regardless of local FP
+void strtoencf64(unsigned char * restrict encptr,
+ const char * restrict nptr,
+ char ** restrict endptr) {
+ _ffpp_strtoencf64_l(encptr, nptr, endptr, strtofp_current_locale());
+}
+#endif
+
+#if ENABLE_BINARY64_SUPPORT && LONG_DOUBLE_IS_BINARY64
+// TS 18661-3 `strtoencf64x` API
+// If `long double` is binary64, we assume Float64x is binary64
+void strtoencf64x(unsigned char * restrict encptr,
+ const char * restrict nptr,
+ char ** restrict endptr) {
+ _ffpp_strtoencf64_l(encptr, nptr, endptr, strtofp_current_locale());
+}
+#endif
+
+#if ENABLE_BINARY64_SUPPORT && DOUBLE_IS_BINARY64
+// ISO C17 `strtod` API
+double strtod(const char * restrict nptr,
+ char ** restrict endptr) {
+ union { double d; unsigned char raw[8]; } result;
+ _ffpp_strtoencf64_l(result.raw, nptr, endptr, strtofp_current_locale());
+ return result.d;
+}
+#endif
+
+#if ENABLE_BINARY64_SUPPORT && DOUBLE_IS_BINARY64 && ENABLE_LOCALE_SUPPORT
+// ISO C17 `strtod_l` API
+double strtod_l(const char * restrict nptr,
+ char ** restrict endptr,
+ strtofp_locale_t loc) {
+ union { double d; unsigned char raw[8]; } result;
+ _ffpp_strtoencf64_l(result.raw, nptr, endptr, loc);
+ return result.d;
+}
+#endif
+
+#if ENABLE_BINARY64_SUPPORT && LONG_DOUBLE_IS_BINARY64
+// ISO C17 `strtold` API
+long double strtold(const char * restrict nptr,
+ char ** restrict endptr) {
+ union { long double d; unsigned char raw[8]; } result;
+ _ffpp_strtoencf64_l(result.raw, nptr, endptr, strtofp_current_locale());
+ return result.d;
+}
+#endif
+
+#if ENABLE_BINARY64_SUPPORT && LONG_DOUBLE_IS_BINARY64 && ENABLE_LOCALE_SUPPORT
+// ISO C17 `strtold_l` API
+long double strtold_l(const char * restrict nptr,
+ char ** restrict endptr,
+ strtofp_locale_t loc) {
+ union { long double d; unsigned char raw[8]; } result;
+ _ffpp_strtoencf64_l(result.raw, nptr, endptr, loc);
+ return result.d;
+}
+#endif
+
+// ================================================================
+// Wrappers for Float80
+
+#if ENABLE_FLOAT80_SUPPORT && ENABLE_LOCALE_SUPPORT
+// Non-standard but helpful for testing.
+void strtoencf80_l(unsigned char * restrict encptr,
+ const char * restrict nptr,
+ char ** restrict endptr,
+ strtofp_locale_t loc) {
+ _ffpp_strtoencf80_l(encptr, nptr, endptr, loc);
+}
+#endif
+
+#if ENABLE_FLOAT80_SUPPORT && LONG_DOUBLE_IS_FLOAT80
+// TS 18661-3 `strtoencf64x` API
+// If `long double` is float80, assume `Float64x` is float80
+void strtoencf64x(unsigned char * restrict encptr,
+ const char * restrict nptr,
+ char ** restrict endptr) {
+ _ffpp_strtoencf80_l(encptr, nptr, endptr, strtofp_current_locale());
+}
+#endif
+
+#if ENABLE_FLOAT80_SUPPORT && LONG_DOUBLE_IS_FLOAT80
+// ISO C17 `strtold` API
+long double strtold(const char * restrict nptr,
+ char ** restrict endptr) {
+ union { long double d; unsigned char raw[10]; } result;
+ _ffpp_strtoencf80_l(result.raw, nptr, endptr, strtofp_current_locale());
+ return result.d;
+}
+#endif
+
+#if ENABLE_FLOAT80_SUPPORT && LONG_DOUBLE_IS_FLOAT80 && ENABLE_LOCALE_SUPPORT
+// ISO C17 `strtold` API
+long double strtold_l(const char * restrict nptr,
+ char ** restrict endptr,
+ strtofp_locale_t loc) {
+ union { long double d; unsigned char raw[10]; } result;
+ _ffpp_strtoencf80_l(result.raw, nptr, endptr, loc);
+ return result.d;
+}
+#endif
+
+// ================================================================
+// Wrappers for Binary128
+
+#if ENABLE_BINARY128_SUPPORT
+// TS 18661-3 `strtoencf128` API that can be supported on
+// every platform regardless of local FP
+void strtoencf128(unsigned char * restrict encptr,
+ const char * restrict nptr,
+ char ** restrict endptr) {
+ _ffpp_strtoencf128_l(encptr, nptr, endptr, strtofp_current_locale());
+}
+#endif
+
+#if ENABLE_BINARY128_SUPPORT && LONG_DOUBLE_IS_BINARY128
+// ISO C17 `strtold` API
+long double strtold(const char * restrict nptr,
+ char ** restrict endptr) {
+ union { long double d; unsigned char raw[16]; } result;
+ _ffpp_strtoencf128_l(result.raw, nptr, endptr, strtofp_current_locale());
+ return result.d;
+}
+#endif
+
+#if ENABLE_BINARY128_SUPPORT && LONG_DOUBLE_IS_BINARY128 && ENABLE_LOCALE_SUPPORT
+// ISO C17 `strtold_l` API
+long double strtold_l(const char * restrict nptr,
+ char ** restrict endptr,
+ strtofp_locale_t loc) {
+ union { long double d; unsigned char raw[16]; } result;
+ _ffpp_strtoencf128_l(result.raw, nptr, endptr, loc);
+ return result.d;
+}
+#endif