Loading...
--- Libc/Libc-1725.40.4/stdlib/strtofp.c
+++ /dev/null
@@ -1,3954 +0,0 @@
-/*
-* Copyright (c) 2021 Apple Inc. All rights reserved.
-*
-* @APPLE_LICENSE_HEADER_START@
-*
-* This file contains Original Code and/or Modifications of Original Code
-* as defined in and that are subject to the Apple Public Source License
-* Version 2.0 (the 'License'). You may not use this file except in
-* compliance with the License. Please obtain a copy of the License at
-* http://www.opensource.apple.com/apsl/ and read it before using this
-* file.
-*
-* The Original Code and all software distributed under the License are
-* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
-* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
-* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
-* Please see the License for the specific language governing rights and
-* limitations under the License.
-*
-* @APPLE_LICENSE_HEADER_END@
-*/
-
-// Implementation of `strtod` and related functions
-// Based on version 3 of "ffpp_strtofp"
-// Author: Tim Kientzle
-//
-// This file supports parsing floating-point values from ASCII/UTF8
-// text into the following commonly-used floating-point formats:
-// * IEEE 754/ISO 60559 binary16 ("half")
-// * IEEE 754/ISO 60559 binary32 ("float")
-// * IEEE 754/ISO 60559 binary64 ("double")
-// * Intel x87 80-bit extended format ("float80")
-// * IEEE 754/ISO 60559 binary128
-//
-// The core functionality consists of the following private functions,
-// which can be compiled for use on any platform regardless of the
-// local floating-point support:
-//
-// * _ffpp_strtoencf16_l (see footnote [1])
-// * _ffpp_strtoencf32_l
-// * _ffpp_strtoencf64_l
-// * _ffpp_strtoencf80_l (see footnote [2])
-// * _ffpp_strtoencf128_l
-//
-// The following standard functions are defined as wrappers around the
-// above to provide the APIs defined in the current C standardization
-// effort:
-//
-// * strtof (ISO C17)
-// * strtof_l (ISO C17)
-// * strtod (ISO C17)
-// * strtod_l (ISO C17)
-// * strtold (ISO C17, see footnote [3])
-// * strtold_l (ISO C17, see footnote [3])
-// * strtoencf16 (TS 18661-3)
-// * strtoencf32 (TS 18661-3)
-// * strtoencf64 (TS 18661-3)
-// * strtoencf64x (TS 18661-3, see footnote [2])
-// * strtoencf128 (TS 18661-3)
-//
-// TODO: The following wrappers can easily be added for
-// any platform that defines the required `_Float##` types.
-// * strtof16 (TS 18661-3)
-// * strtof32 (TS 18661-3)
-// * strtof64 (TS 18661-3)
-// * strtof64x (TS 18661-3, see footnote [2])
-// * strtof128 (TS 18661-3)
-//
-// IMPLEMENTATION NOTES
-// --------------------------------
-//
-// This is a new implementation that uses ideas from a number of
-// sources, including Clinger's 1990 paper, Gay's gdtoa library,
-// Lemire's fast_double_parser implementation, Google's abseil
-// library, as well as work I've done for the Swift standard library.
-//
-// All of the parsers use the same initial parsing logic and fall back
-// to the same arbitrary-precision integer code. In between these,
-// they use varying format-specific optimizations.
-//
-// First Step: Initial Parsing
-//
-// The initial parsing of the textual input is handled by
-// `fastParse64`. As the name suggests, this uses a fixed-size 64-bit
-// accumulator for speed and is heavily optimized assuming that the
-// input significand has at most 19 digits. Longer input will overflow
-// the accumulator, triggering an additional scan of the input. This
-// initial parse also detects Hex floats, nans, and "inf"/"infinity"
-// strings and dispatches those to specialized implementations.
-//
-// With the initial parse complete, the challenge is to compute
-// decimalSignificand * 10^decimalExponent
-// with precisely correct rounding as quickly as possible.
-//
-// Last Step: arbitrary-precision integer calculation (generalSlowpath)
-//
-// Specific formatters use a variety of optimized paths that provide
-// quick results for specific classes of input. But none of those
-// work for every input value. So we have a final fallback that uses
-// arbitrary-precision integer arithmetic to compute the exact results
-// with guaranteed accuracy for all inputs. Of course, the required
-// arbitrary-precision arithmetic can be significantly more expensive,
-// especially when the significand is very long or the exponent is
-// very large.
-//
-// Two optimizations are worth mentioning:
-//
-// Powers of 5: We break the power of 10 computation into a power of 5
-// and a power of 2. The latter can be simply folded into the final
-// FP exponent, so this effectively reduces the power of 10
-// computation to the computation of a power of 5, which is a
-// significantly smaller number. For very large exponents, the run
-// time is dominated by this power of 5 computation. (Up to 95% of
-// the CPU time for extreme binary128 values.)
-//
-// Limit on significand digits: I first saw this optimization in the
-// Abseil library. First, consider the exact decimal expansions for
-// all the exact midpoints between adjacent pairs of floating-point
-// values. There is some maximum number of significant digits
-// `maxDecimalMidpointDigits`. Following an argument from Clinger, we
-// only need to be able to distinguish whether we are above or below
-// any such midpoint. So it suffices to consider the first
-// `maxDecimalMidpointDigits`, appending a single digit that is
-// non-zero if the trailing digits are non-zero. This allows us to
-// limit the total size of the arithmetic used in this stage. In
-// particular, for double, this limits us to less than 1024 bytes of
-// total space, which can easily fit on the stack, allowing us to
-// parse any double input regardless of length without allocation.
-//
-// For binary128, the comparable limit is 11564 digits, which gives a
-// maximum work buffer size of nearly 10k. This seems a bit large for
-// the stack, but a buffer of 1536 bytes is big enough to process any
-// binary128 with less than 100 digits, regardless of exponent. TODO:
-// For a smaller range of exponents, we can limit
-// maxDecimalMidpointDigits further. That would allow us to process
-// any binary128 within a range of exponents regardless of number of
-// digits with the same 1536-byte buffer.
-//
-// Note: Compared to Clinger's AlgorithmR, this requires fewer
-// arbitrary-precision operations and gives the correct answer
-// directly without requiring a nearly-correct initial value.
-// Compared to Clinger's AlgorithmM, this takes advantage of the fact
-// that our integer arithmetic is occuring in the same base as used by
-// the final FP format. This means we can interpret the bits from a
-// simple calculation instead of doing additional work to abstractly
-// compute the target format.
-//
-// These first and last steps (`fastParse64` and `generalSlowpath`)
-// are sufficient to provide guaranteed correct results for any
-// format. The optimizations described next are accelerators that
-// allow us to provide a result more quickly for common cases where
-// the additional code complexity and testing cost can be justified.
-//
-// Optimization: Use a single Floating-point calculation
-//
-// Clinger(1990) observed that when converting to double, if the
-// significand and 10^k are both exactly representable by doubles,
-// then
-// (double)significand * (double)10^k
-// is always correct with a single double multiplication.
-// Similarly, if 10^-k is exactly representable, then
-// (double)significand / (double)10^(-k)
-// is always correct with a single double division.
-//
-// In particular, any significand of 15 digits or less can be exactly
-// represented by a double, as can any power of 10 up to 10^22.
-//
-// There are a few similar cases where we can provide exact inputs to
-// a single floating-point operation. Since a single FP operation
-// always produces correctly-rounded results (in the current rounding
-// mode), these always produce correct results for the corresponding
-// range of inputs. Since this relies on the hardware FPU, it is very
-// fast when it can be used.
-//
-// This optimization works especially well for hand-entered input,
-// which typically has few digits and a small exponent. It works less
-// well for JSON, as random double values in JSON are typically
-// presented with 16 or 17 digits. Fast FMA or mixed-precision
-// arithmetic can extend this technique further in certain
-// environments. In particular, FPU-supported multiplication and
-// division of binary128 arguments with a binary64 result would handle
-// the common JSON cases.
-//
-// Optimization: Interval calculation
-//
-// We can easily compute fixed-precision upper and lower bounds for
-// the power-of-10 value from a lookup table. Likewise, we can
-// construct bounds for an arbitrary-length significand by inspecting
-// just the first digits. From these bounds, we can compute upper and
-// lower bounds for the final result, all with fast fixed-precision
-// integer arithmetic. Depending on the precision, these upper and
-// lower bounds can coincide for more than 99% of all inputs,
-// guaranteeing the correct result in those cases. This also allows
-// us to use fast fixed-precision arithmetic for very long inputs,
-// only using the first digits of the significand in cases where the
-// additional digits do not affect the result.
-//
-// PERFORMANCE
-// --------------------------------
-//
-// This strtod is about 10x faster than the implementation used by
-// Apple's libc prior to 2022. On Lemire's `canada.txt` benchmark,
-// this implementation achieves over 700MB/s compared to 75MB/s for
-// the earlier implementation when parsing a large collection of
-// latitude/longitude values expressed as doubles with 15-17 digits.
-// That makes it about 70% the speed of Lemire's fast_double_parser
-// implementation, but still significantly faster than the other
-// implementations benchmarked by Lemire.
-//
-// Note: You can achieve speed similar to Lemire's implementation by
-// judiciously inlining supporting routines, disabling rounding mode
-// and locale support, and using a direct lookup into a fully-expanded
-// table instead of two table lookups and a multiplication.
-//
-// For other inputs, performance varies depending on the particular
-// optimization that gets used. Binary128 and Float80 parsing here
-// uses a more heavily-factored power-of-10 table that requires up to
-// 3 multiplications to produce a lower bound. Binary16 currently
-// has no format-specific optimizations since the arbitrary-precision
-// path is fast enough for such a small range of exponents.
-//
-// CORRECTNESS
-// --------------------------------
-//
-// Primary accuracy testing has relied on a modified form of the
-// "FFPP" test suite I developed for work on "SwiftDtoa.c". This includes
-// over 100 million computed test cases.
-//
-// This strtod has also been fuzz-tested to ensure that the behavior
-// matches the previous macOS libc implementation based on gdtoa
-// (except for known bugs in that code):
-// * It returns the same double result
-// * It updates `end` identically
-// * It updates `errno` identically
-// * For any locale
-// * For any standard FP rounding mode
-//
-// NOTES
-// --------------------------------
-//
-// General terminology note: I use "binary##" to refer to the IEEE 754
-// portable binary formats, and "float80" to refer to the Intel x87
-// 80-bit extended format.
-//
-// [1] TS18661-3 defines parsing functions that use the current
-// default locale, but it does not (as of early 2021) define any
-// variants that take an explicit locale argument. In order to provide
-// ISO C17 `strtod_l` functionality, I've found it useful to extend
-// the TS18661-3 suite of functions with `_l` variants.
-//
-// [2] The term "binary64x" or "f64x" in various standards refers
-// generally to any extended format, not specifically the Intel x87
-// 80-bit format. This makes it difficult to use the term `f64x` for
-// portable implementations. To keep things clear, I have
-// `strtoencf80_l` to specifically support the Intel float80 format
-// and define `strtoencf64x` as a wrapper that calls into either
-// `strtoencf64_l` or `strtoencf80_l` depending on the local system
-// conventions (using the same approach as used for `strtold`).
-//
-// [3] `strtold` is defined as a wrapper for whichever supported
-// format (binary64, float80, or binary128) corresponds to long double
-// on the local system. Search below for uses of `LONG_DOUBLE_IS_` to
-// see how this is done.
-//
-//
-// TODO:
-//
-// * Figure out how to test whether `_Float##` types are provided,
-// use that to define `strtof##` automatically on systems that
-// support TS18661-3.
-//
-// * Support big-endian systems. Much of the support for
-// binary16/32/64 should already work correctly since it builds the
-// result bitwise in an unsigned integer and uses memcpy() to copy
-// the bits. This works if ints and floats have the same
-// endianness, which I believe is universally true on all current
-// hardware. Other code builds up results assuming a particular
-// byte order and will need to be modified to support big-endian
-// environments.
-//
-
-#define _POSIX_C_SOURCE 200809L
-
-#include <assert.h>
-#include <ctype.h>
-#include <errno.h>
-#include <float.h>
-#include <stdbool.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <strings.h>
-
-// Check whether access to the floating-point environment is supported
-#if !defined(ENABLE_FENV_ACCESS)
-#if __has_include(<fenv.h>)
- #include <fenv.h>
-
- #define ENABLE_FENV_ACCESS 1
-#else
- #define ENABLE_FENV_ACCESS 0
-#endif
-#endif
-
-// Check whether precision/range of floating-point values is defined
-#if __has_include(<math.h>)
- #include <math.h>
-#else
-#if !defined(FLT_EVAL_METHOD)
- // Note that Clang's builtin float.h header does define this macro
- #define FLT_EVAL_METHOD -1
-#endif
-#endif
-
-// Check whether locales are supported
-#if !defined(ENABLE_LOCALE_SUPPORT)
-#if __has_include(<langinfo.h>) && __has_include(<locale.h>)
- #include <langinfo.h>
- #include <locale.h>
-#if defined(__APPLE__)
- #include <xlocale.h>
- #include <xlocale_private.h>
-#endif
-
- #define ENABLE_LOCALE_SUPPORT 1
-#else
- #define ENABLE_LOCALE_SUPPORT 0
-#endif
-#endif
-
-// ================================================================
-// Detect the floating-point formats supported on this platform
-// by testing the standard macros defined in float.h
-// ================================================================
-
-// Does "float" on this system use IEEE 754 binary32 format?
-// (Almost all modern systems do this.)
-#if (FLT_RADIX == 2) && (FLT_MANT_DIG == 24) && (FLT_MIN_EXP == -125) && (FLT_MAX_EXP == 128)
- #define FLOAT_IS_BINARY32 1
-#else
- #define FLOAT_IS_BINARY32 0
-#endif
-
-// Does "double" on this system use IEEE 754 binary64 format?
-// (Almost all modern systems do this.)
-#if (FLT_RADIX == 2) && (DBL_MANT_DIG == 53) && (DBL_MIN_EXP == -1021) && (DBL_MAX_EXP == 1024)
- #define DOUBLE_IS_BINARY64 1
-#else
- #define DOUBLE_IS_BINARY64 0
-#endif
-
-// Does "long double" on this system use IEEE 754 binary64 format?
-// (Example: Windows on all hardware, macOS on ARM.)
-#if (FLT_RADIX == 2) && (LDBL_MANT_DIG == 53) && (LDBL_MIN_EXP == -1021) && (LDBL_MAX_EXP == 1024)
- #define LONG_DOUBLE_IS_BINARY64 1
-#else
- #define LONG_DOUBLE_IS_BINARY64 0
-#endif
-
-// Is "long double" on this system the same as Float80?
-// (Example: macOS, Linux, and FreeBSD when running on x86 or x86_64 processors.)
-#if (FLT_RADIX == 2) && (LDBL_MANT_DIG == 64) && (LDBL_MIN_EXP == -16381) && (LDBL_MAX_EXP == 16384)
- #define LONG_DOUBLE_IS_FLOAT80 1
-#else
- #define LONG_DOUBLE_IS_FLOAT80 0
-#endif
-
-// Does "long double" on this system use IEEE 754 binary128 format?
-// (Example: Android on LP64 hardware.)
-#if (FLT_RADIX == 2) && (LDBL_MANT_DIG == 113) && (LDBL_MIN_EXP == -16381) && (LDBL_MAX_EXP == 16384)
- #define LONG_DOUBLE_IS_BINARY128 1
-#else
- #define LONG_DOUBLE_IS_BINARY128 0
-#endif
-
-// ================================================================
-// Detect/configure local platform arithmetic
-
-#if defined(__SIZEOF_INT128__)
- // We get a significant speed boost if we can use the __uint128_t
- // type that's present in GCC and Clang on 64-bit architectures.
- #define HAVE_UINT128_T 1
- #define MP_WORD_BITS 32 // Multi-precision ints use 32-bit words on 64-bit platforms
-#else
- #define HAVE_UINT128_T 0
- #define MP_WORD_BITS 16 // Multi-precision ints use 16-bit words on 32-bit platforms
-#endif
-
-// ================================================================
-// How to get locale-specific decimal point character
-// (This is more complicated than it should be, largely because
-// the standard functions are a lot slower than we'd like and
-// the standard mechanisms return an arbitrary-length string
-// for the decimal point. So we go to some lengths to avoid
-// calling the standard functions: We defer such calls until
-// we see a character that is not a digit or other known character,
-// and we handle the "C" locale specially.)
-
-// Define macros used for locale information below:
-//
-// strtofp_locale_decimal_point(loc)
-// - Return the decimal point string for the specified locale
-//
-// strtofp_locale_t
-// - Type of locale info: locale_t on systems that support locales, else void *
-//
-// strtofp_C_locale
-// - Constant that defines the "C" locale, used to bypass other locale checks
-//
-// strtofp_current_locale()
-// - Returns the default locale to use when no explicit locale is given
-//
-#if !ENABLE_LOCALE_SUPPORT
- // If no locale support, always use "." as the decimal point
- #define strtofp_locale_t void *
- #define strtofp_current_locale() (NULL)
- #define strtofp_C_locale (NULL)
- #define strtofp_locale_decimal_point(loc) ((const unsigned char *)("."))
-#elif defined(__linux__)
- #define strtofp_locale_t locale_t
- #define strtofp_current_locale() (uselocale(NULL))
- #define strtofp_C_locale (NULL)
- #define strtofp_locale_decimal_point(loc) \
- ((const unsigned char *)((loc) == LC_GLOBAL_LOCALE ? nl_langinfo(RADIXCHAR) : nl_langinfo_l(RADIXCHAR, (loc))))
-#elif defined(__APPLE__)
- #define strtofp_locale_t locale_t
- #define strtofp_C_locale (NULL)
- // For use inside of Libc: Use thread-specific locale if set, else global locale
- #define strtofp_current_locale() (__current_locale())
- // For use outside of libc
- // #define strtofp_current_locale() (uselocale(NULL))
- #define strtofp_locale_decimal_point(loc) ((const unsigned char *)(localeconv_l((loc))->decimal_point))
-#else
- #error Need definition for strtofp_locale_decimal_point and strtofp_current_locale for this platform
-#endif
-
-// ================================================================
-// Floating-point rounding mode
-
-#if ENABLE_FENV_ACCESS
- #pragma GCC diagnostic push
- #pragma GCC diagnostic ignored "-Wignored-pragmas"
- #pragma STDC FENV_ACCESS ON
- #pragma GCC diagnostic pop
-
- #define FENV_ROUNDING_MODE() fegetround()
-#else
- // By default, we use FE_TONEAREST, which rounds to the nearest representable value
- #define FENV_ROUNDING_MODE() 0
-#endif
-
-// In certain scenarios, we require FE_TOWARDZERO, which rounds towards zero,
-// but need to provide a sentinel value if it is not defined
-#ifndef FE_TOWARDZERO
- #define FE_TOWARDZERO 0xFE00
-#endif
-
-#if defined(FE_DOWNWARD) && FE_TOWARDZERO == FE_DOWNWARD
- #error "Definition of FE_TOWARDZERO conflicts with FE_DOWNWARD"
-#endif
-#if defined(FE_TONEAREST) && FE_TOWARDZERO == FE_TONEAREST
- #error "Definition of FE_TOWARDZERO conflicts with FE_TONEAREST"
-#endif
-#if defined(FE_UPWARD) && FE_TOWARDZERO == FE_UPWARD
- #error "Definition of FE_TOWARDZERO conflicts with FE_UPWARD"
-#endif
-
-// ================================================================
-//
-// Enable/disable particular formats
-//
-// ================================================================
-
-// Enable binary16/32/64 by default everywhere.
-#ifndef ENABLE_BINARY16_SUPPORT
- #define ENABLE_BINARY16_SUPPORT 1
-#endif
-#ifndef ENABLE_BINARY32_SUPPORT
- #define ENABLE_BINARY32_SUPPORT 1
-#endif
-#ifndef ENABLE_BINARY64_SUPPORT
- #define ENABLE_BINARY64_SUPPORT 1
-#endif
-// Enable float80 by default only if necessary to support long double
-#ifndef ENABLE_FLOAT80_SUPPORT
- #if LONG_DOUBLE_IS_FLOAT80
- #define ENABLE_FLOAT80_SUPPORT 1
- #else
- #define ENABLE_FLOAT80_SUPPORT 0
- #endif
-#endif
-// Enable binary128 by default only if necessary to support long double
-#ifndef ENABLE_BINARY128_SUPPORT
- #if LONG_DOUBLE_IS_BINARY128
- #define ENABLE_BINARY128_SUPPORT 1
- #else
- #define ENABLE_BINARY128_SUPPORT 0
- #endif
-#endif
-
-// Enable float80 interval optimization by default.
-// If you want to be able to tick off "float80" support but don't want to
-// pay for it, define this to 0. Then you'll just get the minimum.
-// Enabling this costs about 3.5k of code.
-// Note: The code is shared with binary128, so there's no point to enabling
-// it for one but not the other.
-#if ENABLE_FLOAT80_SUPPORT
- #ifndef ENABLE_FLOAT80_OPTIMIZATIONS
- #define ENABLE_FLOAT80_OPTIMIZATIONS 1
- #endif
-#else
- #define ENABLE_FLOAT80_OPTIMIZATIONS 0
-#endif
-
-// Enable binary128 interval optimization by default.
-#if ENABLE_BINARY128_SUPPORT
- #ifndef ENABLE_BINARY128_OPTIMIZATIONS
- #define ENABLE_BINARY128_OPTIMIZATIONS 1
- #endif
-#else
- #define ENABLE_BINARY128_OPTIMIZATIONS 0
-#endif
-
-// At least one format must be enabled
-#if !ENABLE_BINARY16_SUPPORT && !ENABLE_BINARY32_SUPPORT && !ENABLE_BINARY64_SUPPORT && !ENABLE_FLOAT80_SUPPORT && !ENABLE_BINARY128_SUPPORT
-#error At least one format must be enabled
-#endif
-
-// ================================================================
-// Look up tables
-
-static const unsigned char hexdigit[256] = {
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0, 1, 2, 3, 4, 5, 6, 7,
- 8, 9, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 10, 11, 12, 13, 14, 15, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 10, 11, 12, 13, 14, 15, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
-};
-
-// ================================================================
-// ================================================================
-//
-// Power-of-10 tables
-//
-// ================================================================
-// ================================================================
-
-
-#define binaryExponentFor10ToThe(p) ((int)(((((int64_t)(p)) * 55732705) >> 24) + 1))
-
-// For binary32 only, we need a single 880-byte powersOf10_Float table below
-
-// For binary64 only, we need a 224-byte subset of the first table and
-// a 232-byte additional table for a total of 456 bytes
-
-// For both, we can overlap somewhat so we only need the 880-byte binary32
-// table and the auxiliary 232-byte table for binary64, for a total of 1112 bytes.
-
-#if ENABLE_BINARY32_SUPPORT || ENABLE_BINARY64_SUPPORT || ENABLE_FLOAT80_OPTIMIZATIONS || ENABLE_BINARY128_OPTIMIZATIONS
-// These are the 64-bit binary significands of the
-// largest binary floating-point value that is not greater
-// than the corresponding power of 10.
-
-// That is, when these are not exact, they are less than
-// the exact decimal value. This allows us to use these
-// to construct tight intervals around the true power
-// of ten value.
-static const uint64_t powersOf10_Float[] = {
-#if ENABLE_BINARY32_SUPPORT
- 0xb0af48ec79ace837ULL, // x 2^-232 ~= 10^-70
- 0xdcdb1b2798182244ULL, // x 2^-229 ~= 10^-69
- 0x8a08f0f8bf0f156bULL, // x 2^-225 ~= 10^-68
- 0xac8b2d36eed2dac5ULL, // x 2^-222 ~= 10^-67
- 0xd7adf884aa879177ULL, // x 2^-219 ~= 10^-66
- 0x86ccbb52ea94baeaULL, // x 2^-215 ~= 10^-65
- 0xa87fea27a539e9a5ULL, // x 2^-212 ~= 10^-64
- 0xd29fe4b18e88640eULL, // x 2^-209 ~= 10^-63
- 0x83a3eeeef9153e89ULL, // x 2^-205 ~= 10^-62
- 0xa48ceaaab75a8e2bULL, // x 2^-202 ~= 10^-61
- 0xcdb02555653131b6ULL, // x 2^-199 ~= 10^-60
- 0x808e17555f3ebf11ULL, // x 2^-195 ~= 10^-59
- 0xa0b19d2ab70e6ed6ULL, // x 2^-192 ~= 10^-58
- 0xc8de047564d20a8bULL, // x 2^-189 ~= 10^-57
- 0xfb158592be068d2eULL, // x 2^-186 ~= 10^-56
- 0x9ced737bb6c4183dULL, // x 2^-182 ~= 10^-55
- 0xc428d05aa4751e4cULL, // x 2^-179 ~= 10^-54
- 0xf53304714d9265dfULL, // x 2^-176 ~= 10^-53
- 0x993fe2c6d07b7fabULL, // x 2^-172 ~= 10^-52
- 0xbf8fdb78849a5f96ULL, // x 2^-169 ~= 10^-51
- 0xef73d256a5c0f77cULL, // x 2^-166 ~= 10^-50
- 0x95a8637627989aadULL, // x 2^-162 ~= 10^-49
- 0xbb127c53b17ec159ULL, // x 2^-159 ~= 10^-48
- 0xe9d71b689dde71afULL, // x 2^-156 ~= 10^-47
- 0x9226712162ab070dULL, // x 2^-152 ~= 10^-46
- 0xb6b00d69bb55c8d1ULL, // x 2^-149 ~= 10^-45
- 0xe45c10c42a2b3b05ULL, // x 2^-146 ~= 10^-44
- 0x8eb98a7a9a5b04e3ULL, // x 2^-142 ~= 10^-43
- 0xb267ed1940f1c61cULL, // x 2^-139 ~= 10^-42
- 0xdf01e85f912e37a3ULL, // x 2^-136 ~= 10^-41
- 0x8b61313bbabce2c6ULL, // x 2^-132 ~= 10^-40
- 0xae397d8aa96c1b77ULL, // x 2^-129 ~= 10^-39
- 0xd9c7dced53c72255ULL, // x 2^-126 ~= 10^-38
- 0x881cea14545c7575ULL, // x 2^-122 ~= 10^-37
- 0xaa242499697392d2ULL, // x 2^-119 ~= 10^-36
- 0xd4ad2dbfc3d07787ULL, // x 2^-116 ~= 10^-35
- 0x84ec3c97da624ab4ULL, // x 2^-112 ~= 10^-34
- 0xa6274bbdd0fadd61ULL, // x 2^-109 ~= 10^-33
- 0xcfb11ead453994baULL, // x 2^-106 ~= 10^-32
- 0x81ceb32c4b43fcf4ULL, // x 2^-102 ~= 10^-31
- 0xa2425ff75e14fc31ULL, // x 2^-99 ~= 10^-30
- 0xcad2f7f5359a3b3eULL, // x 2^-96 ~= 10^-29
- 0xfd87b5f28300ca0dULL, // x 2^-93 ~= 10^-28
- 0x9e74d1b791e07e48ULL, // x 2^-89 ~= 10^-27
- 0xc612062576589ddaULL, // x 2^-86 ~= 10^-26
- 0xf79687aed3eec551ULL, // x 2^-83 ~= 10^-25
- 0x9abe14cd44753b52ULL, // x 2^-79 ~= 10^-24
- 0xc16d9a0095928a27ULL, // x 2^-76 ~= 10^-23
- 0xf1c90080baf72cb1ULL, // x 2^-73 ~= 10^-22
- 0x971da05074da7beeULL, // x 2^-69 ~= 10^-21
- 0xbce5086492111aeaULL, // x 2^-66 ~= 10^-20
- 0xec1e4a7db69561a5ULL, // x 2^-63 ~= 10^-19
- 0x9392ee8e921d5d07ULL, // x 2^-59 ~= 10^-18
- 0xb877aa3236a4b449ULL, // x 2^-56 ~= 10^-17
- 0xe69594bec44de15bULL, // x 2^-53 ~= 10^-16
- 0x901d7cf73ab0acd9ULL, // x 2^-49 ~= 10^-15
- 0xb424dc35095cd80fULL, // x 2^-46 ~= 10^-14
- 0xe12e13424bb40e13ULL, // x 2^-43 ~= 10^-13
- 0x8cbccc096f5088cbULL, // x 2^-39 ~= 10^-12
- 0xafebff0bcb24aafeULL, // x 2^-36 ~= 10^-11
- 0xdbe6fecebdedd5beULL, // x 2^-33 ~= 10^-10
- 0x89705f4136b4a597ULL, // x 2^-29 ~= 10^-9
- 0xabcc77118461cefcULL, // x 2^-26 ~= 10^-8
- 0xd6bf94d5e57a42bcULL, // x 2^-23 ~= 10^-7
- 0x8637bd05af6c69b5ULL, // x 2^-19 ~= 10^-6
- 0xa7c5ac471b478423ULL, // x 2^-16 ~= 10^-5
- 0xd1b71758e219652bULL, // x 2^-13 ~= 10^-4
- 0x83126e978d4fdf3bULL, // x 2^-9 ~= 10^-3
- 0xa3d70a3d70a3d70aULL, // x 2^-6 ~= 10^-2
- 0xccccccccccccccccULL, // x 2^-3 ~= 10^-1
-#endif
- // These values are exact; we use them together with
- // _CoarseBinary64 below for binary64 format.
- 0x8000000000000000ULL, // x 2^1 == 10^0 exactly
- 0xa000000000000000ULL, // x 2^4 == 10^1 exactly
- 0xc800000000000000ULL, // x 2^7 == 10^2 exactly
- 0xfa00000000000000ULL, // x 2^10 == 10^3 exactly
- 0x9c40000000000000ULL, // x 2^14 == 10^4 exactly
- 0xc350000000000000ULL, // x 2^17 == 10^5 exactly
- 0xf424000000000000ULL, // x 2^20 == 10^6 exactly
- 0x9896800000000000ULL, // x 2^24 == 10^7 exactly
- 0xbebc200000000000ULL, // x 2^27 == 10^8 exactly
- 0xee6b280000000000ULL, // x 2^30 == 10^9 exactly
- 0x9502f90000000000ULL, // x 2^34 == 10^10 exactly
- 0xba43b74000000000ULL, // x 2^37 == 10^11 exactly
- 0xe8d4a51000000000ULL, // x 2^40 == 10^12 exactly
- 0x9184e72a00000000ULL, // x 2^44 == 10^13 exactly
- 0xb5e620f480000000ULL, // x 2^47 == 10^14 exactly
- 0xe35fa931a0000000ULL, // x 2^50 == 10^15 exactly
- 0x8e1bc9bf04000000ULL, // x 2^54 == 10^16 exactly
- 0xb1a2bc2ec5000000ULL, // x 2^57 == 10^17 exactly
- 0xde0b6b3a76400000ULL, // x 2^60 == 10^18 exactly
- 0x8ac7230489e80000ULL, // x 2^64 == 10^19 exactly
- 0xad78ebc5ac620000ULL, // x 2^67 == 10^20 exactly
- 0xd8d726b7177a8000ULL, // x 2^70 == 10^21 exactly
- 0x878678326eac9000ULL, // x 2^74 == 10^22 exactly
- 0xa968163f0a57b400ULL, // x 2^77 == 10^23 exactly
- 0xd3c21bcecceda100ULL, // x 2^80 == 10^24 exactly
- 0x84595161401484a0ULL, // x 2^84 == 10^25 exactly
- 0xa56fa5b99019a5c8ULL, // x 2^87 == 10^26 exactly
- 0xcecb8f27f4200f3aULL, // x 2^90 == 10^27 exactly
-#if ENABLE_BINARY32_SUPPORT
- 0x813f3978f8940984ULL, // x 2^94 ~= 10^28
- 0xa18f07d736b90be5ULL, // x 2^97 ~= 10^29
- 0xc9f2c9cd04674edeULL, // x 2^100 ~= 10^30
- 0xfc6f7c4045812296ULL, // x 2^103 ~= 10^31
- 0x9dc5ada82b70b59dULL, // x 2^107 ~= 10^32
- 0xc5371912364ce305ULL, // x 2^110 ~= 10^33
- 0xf684df56c3e01bc6ULL, // x 2^113 ~= 10^34
- 0x9a130b963a6c115cULL, // x 2^117 ~= 10^35
- 0xc097ce7bc90715b3ULL, // x 2^120 ~= 10^36
- 0xf0bdc21abb48db20ULL, // x 2^123 ~= 10^37
- 0x96769950b50d88f4ULL, // x 2^127 ~= 10^38
- 0xbc143fa4e250eb31ULL, // x 2^130 ~= 10^39
-#endif
-};
-#endif
-
-#if ENABLE_BINARY64_SUPPORT || ENABLE_FLOAT80_OPTIMIZATIONS || ENABLE_BINARY128_OPTIMIZATIONS
- #if ENABLE_BINARY32_SUPPORT
- static const uint64_t *powersOf10_Exact64 = powersOf10_Float + 70;
- #else
- static const uint64_t *powersOf10_Exact64 = powersOf10_Float;
- #endif
-#endif
-
-#if ENABLE_BINARY64_SUPPORT
-// Rounded-down values supporting the full range of binary64.
-// As above, when not exact, these are rounded down to the
-// nearest value lower than or equal to the exact power of 10.
-//
-// Table size: 232 bytes
-//
-// We only store every 28th power of ten here.
-// We can multiply by an exact 64-bit power of
-// ten from powersOf10_Exact64 above to reconstruct the
-// significand for any power of 10.
-static const uint64_t powersOf10_CoarseBinary64[30] = {
- 0xdd5a2c3eab3097cbULL, // x 2^-1395 ~= 10^-420
- 0xdf82365c497b5453ULL, // x 2^-1302 ~= 10^-392
- 0xe1afa13afbd14d6dULL, // x 2^-1209 ~= 10^-364
- 0xe3e27a444d8d98b7ULL, // x 2^-1116 ~= 10^-336
- 0xe61acf033d1a45dfULL, // x 2^-1023 ~= 10^-308
- 0xe858ad248f5c22c9ULL, // x 2^-930 ~= 10^-280
- 0xea9c227723ee8bcbULL, // x 2^-837 ~= 10^-252
- 0xece53cec4a314ebdULL, // x 2^-744 ~= 10^-224
- 0xef340a98172aace4ULL, // x 2^-651 ~= 10^-196
- 0xf18899b1bc3f8ca1ULL, // x 2^-558 ~= 10^-168
- 0xf3e2f893dec3f126ULL, // x 2^-465 ~= 10^-140
- 0xf64335bcf065d37dULL, // x 2^-372 ~= 10^-112
- 0xf8a95fcf88747d94ULL, // x 2^-279 ~= 10^-84
- 0xfb158592be068d2eULL, // x 2^-186 ~= 10^-56
- 0xfd87b5f28300ca0dULL, // x 2^-93 ~= 10^-28
- 0x8000000000000000ULL, // x 2^1 == 10^0 exactly
- 0x813f3978f8940984ULL, // x 2^94 == 10^28 exactly
- 0x82818f1281ed449fULL, // x 2^187 ~= 10^56
- 0x83c7088e1aab65dbULL, // x 2^280 ~= 10^84
- 0x850fadc09923329eULL, // x 2^373 ~= 10^112
- 0x865b86925b9bc5c2ULL, // x 2^466 ~= 10^140
- 0x87aa9aff79042286ULL, // x 2^559 ~= 10^168
- 0x88fcf317f22241e2ULL, // x 2^652 ~= 10^196
- 0x8a5296ffe33cc92fULL, // x 2^745 ~= 10^224
- 0x8bab8eefb6409c1aULL, // x 2^838 ~= 10^252
- 0x8d07e33455637eb2ULL, // x 2^931 ~= 10^280
- 0x8e679c2f5e44ff8fULL, // x 2^1024 ~= 10^308
- 0x8fcac257558ee4e6ULL, // x 2^1117 ~= 10^336
- 0x91315e37db165aa9ULL, // x 2^1210 ~= 10^364
- 0x929b7871de7f22b9ULL, // x 2^1303 ~= 10^392
-};
-#endif
-
-#if ENABLE_FLOAT80_OPTIMIZATIONS || ENABLE_BINARY128_OPTIMIZATIONS
-// Every 56th power of 10 across the range of Float80/Binary128
-//
-// Table size: 2880 bytes
-static const uint64_t powersOf10_Binary128[180] = {
- // Low-order ... high-order
- 0x0000000000000000ULL, 0x8000000000000000ULL, // x 2^1 == 10^0 exactly
- 0xbff8f10e7a8921a4ULL, 0x82818f1281ed449fULL, // x 2^187 ~= 10^56
- 0x03e2cf6bc604ddb0ULL, 0x850fadc09923329eULL, // x 2^373 ~= 10^112
- 0x90fb44d2f05d0842ULL, 0x87aa9aff79042286ULL, // x 2^559 ~= 10^168
- 0x82bd6b70d99aaa6fULL, 0x8a5296ffe33cc92fULL, // x 2^745 ~= 10^224
- 0xdb0b487b6423e1e8ULL, 0x8d07e33455637eb2ULL, // x 2^931 ~= 10^280
- 0x213a4f0aa5e8a7b1ULL, 0x8fcac257558ee4e6ULL, // x 2^1117 ~= 10^336
- 0x1c306f5d1b0b5fdfULL, 0x929b7871de7f22b9ULL, // x 2^1303 ~= 10^392
- 0xa7ea9c8838ce9437ULL, 0x957a4ae1ebf7f3d3ULL, // x 2^1489 ~= 10^448
- 0xbf1d49cacccd5e68ULL, 0x9867806127ece4f4ULL, // x 2^1675 ~= 10^504
- 0x655494c5c95d77f2ULL, 0x9b63610bb9243e46ULL, // x 2^1861 ~= 10^560
- 0x02e008393fd60b55ULL, 0x9e6e366733f85561ULL, // x 2^2047 ~= 10^616
- 0x55e04dba4b3bd4ddULL, 0xa1884b69ade24964ULL, // x 2^2233 ~= 10^672
- 0x44b222741eb1ebbfULL, 0xa4b1ec80f47c84adULL, // x 2^2419 ~= 10^728
- 0x1cf4a5c3bc09fa6fULL, 0xa7eb6799e8aec999ULL, // x 2^2605 ~= 10^784
- 0x3c4a575151b294dcULL, 0xab350c27feb90accULL, // x 2^2791 ~= 10^840
- 0x870a8d87239d8f35ULL, 0xae8f2b2ce3d5dbe9ULL, // x 2^2977 ~= 10^896
- 0xdd929f09c3eff5acULL, 0xb1fa17404a30e5e8ULL, // x 2^3163 ~= 10^952
- 0x1931b583a9431d7eULL, 0xb5762497dbf17a9eULL, // x 2^3349 ~= 10^1008
- 0xe30db03e0f8dd286ULL, 0xb903a90f561d25e2ULL, // x 2^3535 ~= 10^1064
- 0x9eb5cb19647508c5ULL, 0xbca2fc30cc19f090ULL, // x 2^3721 ~= 10^1120
- 0x24bd4c00042ad125ULL, 0xc054773d149bf26bULL, // x 2^3907 ~= 10^1176
- 0x7ea30dbd7ea479e3ULL, 0xc418753460cdcca9ULL, // x 2^4093 ~= 10^1232
- 0x764f4cf916b4deceULL, 0xc7ef52defe87b751ULL, // x 2^4279 ~= 10^1288
- 0xbeb7fbdc1cbe8b37ULL, 0xcbd96ed6466cf081ULL, // x 2^4465 ~= 10^1344
- 0xdce472c619aa3f63ULL, 0xcfd7298db6cb9672ULL, // x 2^4651 ~= 10^1400
- 0xe47defc14a406e4fULL, 0xd3e8e55c3c1f43d0ULL, // x 2^4837 ~= 10^1456
- 0xb7157c60a24a0569ULL, 0xd80f0685a81b2a81ULL, // x 2^5023 ~= 10^1512
- 0xfb0b98f6bbc4f0cbULL, 0xdc49f3445824e360ULL, // x 2^5209 ~= 10^1568
- 0xc6c6c1764e047e15ULL, 0xe09a13d30c2dba62ULL, // x 2^5395 ~= 10^1624
- 0x87e8dcfc09dbc33aULL, 0xe4ffd276eedce658ULL, // x 2^5581 ~= 10^1680
- 0xb1a3642a8da3cf4fULL, 0xe97b9b89d001dab3ULL, // x 2^5767 ~= 10^1736
- 0x2d4070f33b21ab7bULL, 0xee0ddd84924ab88cULL, // x 2^5953 ~= 10^1792
- 0xa2bf0c63a814e04eULL, 0xf2b70909cd3fd35cULL, // x 2^6139 ~= 10^1848
- 0x08f13995cf9c2747ULL, 0xf77790f0a48a45ceULL, // x 2^6325 ~= 10^1904
- 0x7a37993eb21444faULL, 0xfc4fea4fd590b40aULL, // x 2^6511 ~= 10^1960
- 0xb7b1ada9cdeba84dULL, 0x80a046447e3d49f1ULL, // x 2^6698 ~= 10^2016
- 0x0cc6866c5d69b2cbULL, 0x8324f8aa08d7d411ULL, // x 2^6884 ~= 10^2072
- 0x7fe2b4308dcbf1a3ULL, 0x85b64a659077660eULL, // x 2^7070 ~= 10^2128
- 0x1d73ef3eaac3c964ULL, 0x88547abb1d8e5bd9ULL, // x 2^7256 ~= 10^2184
- 0x1e34291b1ef566c7ULL, 0x8affca2bd1f88549ULL, // x 2^7442 ~= 10^2240
- 0x9e9383d73d486881ULL, 0x8db87a7c1e56d873ULL, // x 2^7628 ~= 10^2296
- 0x9cc5ee51962c011aULL, 0x907eceba168949b3ULL, // x 2^7814 ~= 10^2352
- 0x413407cfeeac9743ULL, 0x93530b43e5e2c129ULL, // x 2^8000 ~= 10^2408
- 0x7efa7d29c44e11b7ULL, 0x963575ce63b6332dULL, // x 2^8186 ~= 10^2464
- 0x5a848859645d1c6fULL, 0x9926556bc8defe43ULL, // x 2^8372 ~= 10^2520
- 0x51edea897b34601fULL, 0x9c25f29286e9ddb6ULL, // x 2^8558 ~= 10^2576
- 0xb50008d92529e91fULL, 0x9f3497244186fca4ULL, // x 2^8744 ~= 10^2632
- 0xf09e780bcc8238d9ULL, 0xa2528e74eaf101fcULL, // x 2^8930 ~= 10^2688
- 0x3a5828869701a165ULL, 0xa580255203f84b47ULL, // x 2^9116 ~= 10^2744
- 0x8b231a70eb5444ceULL, 0xa8bdaa0a0064fa44ULL, // x 2^9302 ~= 10^2800
- 0xfa1bde1f473556a4ULL, 0xac0b6c73d065f8ccULL, // x 2^9488 ~= 10^2856
- 0x7730e00421da4d55ULL, 0xaf69bdf68fc6a740ULL, // x 2^9674 ~= 10^2912
- 0x7f959cb702329d14ULL, 0xb2d8f1915ba88ca5ULL, // x 2^9860 ~= 10^2968
- 0x40c3a071220f5567ULL, 0xb6595be34f821493ULL, // x 2^10046 ~= 10^3024
- 0x11c48d02b8326bd3ULL, 0xb9eb5333aa272e9bULL, // x 2^10232 ~= 10^3080
- 0x566765461bd2f61bULL, 0xbd8f2f7a1ba47d6dULL, // x 2^10418 ~= 10^3136
- 0xb889018e4f6e9a52ULL, 0xc1454a673cb9b1ceULL, // x 2^10604 ~= 10^3192
- 0xf85333a94848659fULL, 0xc50dff6d30c3aefcULL, // x 2^10790 ~= 10^3248
- 0x1a1aeae7cf8a9d3dULL, 0xc8e9abc872eb2bc1ULL, // x 2^10976 ~= 10^3304
- 0x12e29f09d9061609ULL, 0xccd8ae88cf70ad84ULL, // x 2^11162 ~= 10^3360
- 0xdf7601457ca20b35ULL, 0xd0db689a89f2f9b1ULL, // x 2^11348 ~= 10^3416
- 0xcbdcd02f23cc7690ULL, 0xd4f23ccfb1916df5ULL, // x 2^11534 ~= 10^3472
- 0x44289dd21b589d7aULL, 0xd91d8fe9a3d019ccULL, // x 2^11720 ~= 10^3528
- 0x95aa118ec1d08317ULL, 0xdd5dc8a2bf27f3f7ULL, // x 2^11906 ~= 10^3584
- 0x72c4d2cad73b0a7bULL, 0xe1b34fb846321d04ULL, // x 2^12092 ~= 10^3640
- 0xe20a88f1134f906dULL, 0xe61e8ff47461cda9ULL, // x 2^12278 ~= 10^3696
- 0xc7c91d5c341ed39dULL, 0xea9ff638c54554e1ULL, // x 2^12464 ~= 10^3752
- 0xf659ede2159a45ecULL, 0xef37f1886f4b6690ULL, // x 2^12650 ~= 10^3808
- 0x78d946bab954b82fULL, 0xf3e6f313130ef0efULL, // x 2^12836 ~= 10^3864
- 0xc9b1474d8f89c269ULL, 0xf8ad6e3fa030bd15ULL, // x 2^13022 ~= 10^3920
- 0x6b1d2745340e7b14ULL, 0xfd8bd8b770cb469eULL, // x 2^13208 ~= 10^3976
- 0xf22e502fcdd4bca2ULL, 0x81415538ce493bd5ULL, // x 2^13395 ~= 10^4032
- 0x7c1735fc3b813c8cULL, 0x83c92edf425b292dULL, // x 2^13581 ~= 10^4088
- 0x0367500a8e9a178fULL, 0x865db7a9ccd2839eULL, // x 2^13767 ~= 10^4144
- 0xc9ac50475e25293aULL, 0x88ff2f2bade74531ULL, // x 2^13953 ~= 10^4200
- 0x0879b2e5f6ee8b1cULL, 0x8badd636cc48b341ULL, // x 2^14139 ~= 10^4256
- 0x2f33c652bd12fab7ULL, 0x8e69eee1f23f2be5ULL, // x 2^14325 ~= 10^4312
- 0xad6a6308a8e8b557ULL, 0x9133bc8f2a130fe5ULL, // x 2^14511 ~= 10^4368
- 0x9dbaa465efe141a0ULL, 0x940b83f23a55842aULL, // x 2^14697 ~= 10^4424
- 0x888c9ab2fc5b3437ULL, 0x96f18b1742aad751ULL, // x 2^14883 ~= 10^4480
- 0xba00864671d1053fULL, 0x99e6196979b978f1ULL, // x 2^15069 ~= 10^4536
- 0x61d59d402aae4feaULL, 0x9ce977ba0ce3a0bdULL, // x 2^15255 ~= 10^4592
- 0x803c1cd864033781ULL, 0x9ffbf04722750449ULL, // x 2^15441 ~= 10^4648
- 0xa28a151725a55e10ULL, 0xa31dcec2fef14b30ULL, // x 2^15627 ~= 10^4704
- 0x5b8452af2302fe13ULL, 0xa64f605b4e3352cdULL, // x 2^15813 ~= 10^4760
- 0x82b84cabc828bf93ULL, 0xa990f3c09110c544ULL, // x 2^15999 ~= 10^4816
- 0x8d29dd5122e4278dULL, 0xace2d92db0390b59ULL, // x 2^16185 ~= 10^4872
- 0x58f8fde02c03a6c6ULL, 0xb045626fb50a35e7ULL, // x 2^16371 ~= 10^4928
- 0xd950102978dbd0ffULL, 0xb3b8e2eda91a232dULL, // x 2^16557 ~= 10^4984
-};
-#endif
-
-// ================================================================
-// ================================================================
-//
-// Power-of-5 tables
-//
-// ================================================================
-// ================================================================
-
-// Integer powers of 5 used to drive the power-of-5 calculator used
-// in the slow path
-const static uint64_t powersOfFive[] = {1, 5, 25, 125, 625, 3125, 15625,
- 78125, 390625, 1953125, 9765625, 48828125UL, 244140625UL, 1220703125UL,
- 6103515625ULL, 30517578125ULL, 152587890625ULL, 762939453125ULL,
- 3814697265625ULL, 19073486328125ULL, 95367431640625ULL,
- 476837158203125ULL, 2384185791015625ULL, 11920928955078125ULL,
- 59604644775390625ULL, 298023223876953125ULL, 1490116119384765625ULL,
- 7450580596923828125ULL
-};
-
-//
-// Large integer powers of 5. These are used to accelerate the
-// power-of-5 calculator when computing very large powers needed for
-// float80 and binary128 in the slow path.
-//
-// TODO: Consider dropping these. If float80/binary128 optimizations
-// are enabled, then the interval optimization removes a lot of the
-// need for these. (I've already disabled these to save code for
-// anyone that prefers code size to performance for these formats.)
-
-#if ENABLE_FLOAT80_OPTIMIZATIONS || ENABLE_BINARY128_OPTIMIZATIONS
-// Useful python snippet to generate these constants:
-// a = hex(5 ** 2000)[2:]
-// while a:
-// print("0x" + a[-8:] + "UL,")
-// a = a[:-8]
-
-#if MP_WORD_BITS == 32
-// Broken down into 32-bit words for use on 64-bit processors.
-// Stored from least-significant to most-significant.
-
-const static uint32_t fiveToThe1000[] = {
- 0xe76e2661UL, 0xa75f463cUL, 0x8f9ac297UL, 0xb426e5d5UL, 0x8021f455UL,
- 0x484b538eUL, 0xd39bcb19UL, 0x32c52790UL, 0x69805e7cUL, 0x78612ddbUL,
- 0x84a48774UL, 0x4b7ad105UL, 0xa1bca9a9UL, 0x16d4cc76UL, 0x1b3bc79dUL,
- 0x22c38759UL, 0x4ec6864aUL, 0x670dbd77UL, 0xa93ad95bUL, 0xfd07b514UL,
- 0x185d4af8UL, 0x491981abUL, 0x7f6e513bUL, 0x1a8298bfUL, 0xccdd06fcUL,
- 0x9c225999UL, 0x7a000d77UL, 0x211ecc86UL, 0x678b85c6UL, 0xccb243d3UL,
- 0x8c2fb72dUL, 0xf95cada8UL, 0x9c82b396UL, 0x3ef53cd3UL, 0x9093cb88UL,
- 0xb5aff7d4UL, 0x7da0bc3eUL, 0x522aea78UL, 0xa0165d55UL, 0x3b34e560UL,
- 0x80a282e5UL, 0xf0c08162UL, 0x9d7e4698UL, 0xe0fe7772UL, 0x3d9d710eUL,
- 0x8d98ec4eUL, 0xb5954022UL, 0xa0e542c3UL, 0x3be09b67UL, 0x5da7d253UL,
- 0x86949f98UL, 0x68641b9eUL, 0xaa166a05UL, 0xdfb84e6bUL, 0x2955510aUL,
- 0xe62bf617UL, 0xe8c18fc6UL, 0x8afb443aUL, 0x22f12a84UL, 0x048f59a7UL,
- 0xaf620341UL, 0xd377548eUL, 0xefd7f15cUL, 0x2f1121d5UL, 0x9aa2146bUL,
- 0xf36453b5UL, 0xc7a3e8ecUL, 0xcab2ae32UL, 0xdb4abe87UL, 0x5a54e6f2UL,
- 0xb015e34aUL, 0xc7e774f6UL, 0x3ce36UL,
-};
-
-#elif MP_WORD_BITS == 16
-
-// Broken down into 16-bit words for use on 32-bit processors.
-// Stored from least-significant to most-significant.
-const static uint16_t fiveToThe1000[] = {
- 0x2661U, 0xe76eU, 0x463cU, 0xa75fU, 0xc297U, 0x8f9aU, 0xe5d5U,
- 0xb426U, 0xf455U, 0x8021U, 0x538eU, 0x484bU, 0xcb19U, 0xd39bU,
- 0x2790U, 0x32c5U, 0x5e7cU, 0x6980U, 0x2ddbU, 0x7861U, 0x8774U,
- 0x84a4U, 0xd105U, 0x4b7aU, 0xa9a9U, 0xa1bcU, 0xcc76U, 0x16d4U,
- 0xc79dU, 0x1b3bU, 0x8759U, 0x22c3U, 0x864aU, 0x4ec6U, 0xbd77U,
- 0x670dU, 0xd95bU, 0xa93aU, 0xb514U, 0xfd07U, 0x4af8U, 0x185dU,
- 0x81abU, 0x4919U, 0x513bU, 0x7f6eU, 0x98bfU, 0x1a82U, 0x06fcU,
- 0xccddU, 0x5999U, 0x9c22U, 0x0d77U, 0x7a00U, 0xcc86U, 0x211eU,
- 0x85c6U, 0x678bU, 0x43d3U, 0xccb2U, 0xb72dU, 0x8c2fU, 0xada8U,
- 0xf95cU, 0xb396U, 0x9c82U, 0x3cd3U, 0x3ef5U, 0xcb88U, 0x9093U,
- 0xf7d4U, 0xb5afU, 0xbc3eU, 0x7da0U, 0xea78U, 0x522aU, 0x5d55U,
- 0xa016U, 0xe560U, 0x3b34U, 0x82e5U, 0x80a2U, 0x8162U, 0xf0c0U,
- 0x4698U, 0x9d7eU, 0x7772U, 0xe0feU, 0x710eU, 0x3d9dU, 0xec4eU,
- 0x8d98U, 0x4022U, 0xb595U, 0x42c3U, 0xa0e5U, 0x9b67U, 0x3be0U,
- 0xd253U, 0x5da7U, 0x9f98U, 0x8694U, 0x1b9eU, 0x6864U, 0x6a05U,
- 0xaa16U, 0x4e6bU, 0xdfb8U, 0x510aU, 0x2955U, 0xf617U, 0xe62bU,
- 0x8fc6U, 0xe8c1U, 0x443aU, 0x8afbU, 0x2a84U, 0x22f1U, 0x59a7U,
- 0x048fU, 0x0341U, 0xaf62U, 0x548eU, 0xd377U, 0xf15cU, 0xefd7U,
- 0x21d5U, 0x2f11U, 0x146bU, 0x9aa2U, 0x53b5U, 0xf364U, 0xe8ecU,
- 0xc7a3U, 0xae32U, 0xcab2U, 0xbe87U, 0xdb4aU, 0xe6f2U, 0x5a54U,
- 0xe34aU, 0xb015U, 0x74f6U, 0xc7e7U, 0xce36U, 0x3U,
-};
-#endif
-#endif
-
-// ================================================================
-// ================================================================
-//
-// Fixed-width integer routines
-//
-// ================================================================
-// ================================================================
-
-#if HAVE_UINT128_T
-#define multiply64x64RoundingDown(lhs, rhs) (((__uint128_t)(lhs) * (rhs)) >> 64)
-#else
-static uint64_t multiply64x64RoundingDown(uint64_t lhs, uint64_t rhs) {
- uint64_t a = (lhs >> 32) * (rhs >> 32);
- uint64_t b = (lhs >> 32) * (rhs & UINT32_MAX);
- uint64_t c = (lhs & UINT32_MAX) * (rhs >> 32);
- uint64_t d = (lhs & UINT32_MAX) * (rhs & UINT32_MAX);
- b += (c & UINT32_MAX) + (d >> 32);
- return a + (b >> 32) + (c >> 32);
-}
-#endif
-
-// ================================================================
-// ================================================================
-//
-// Multi-Precision Integer Routines
-//
-// ================================================================
-// ================================================================
-
-// Configure our multi-precision integer machinery with appropriate types
-#if MP_WORD_BITS == 16
-typedef uint16_t mp_word_t; // Type of a single word in an MP integer
-typedef uint32_t mp_dword_t; // Type big enough to hold a two-word MP integer
-static const mp_word_t MP_WORD_MAX = UINT16_MAX;
-#elif MP_WORD_BITS == 32
-typedef uint32_t mp_word_t; // Type of a single word in an MP integer
-typedef uint64_t mp_dword_t; // Type big enough to hold a two-word MP integer
-static const mp_word_t MP_WORD_MAX = UINT32_MAX;
-#else
-#error MP_WORD_BITS undefined
-#endif
-static const int mp_word_bits = sizeof(mp_word_t) * 8;
-// __builtin_clz() takes an `unsigned` which may be different from mp_word_t
-// This adjusts the result accordingly.
-#define CLZ_WORD(word) ((unsigned)__builtin_clz((word)) + (mp_word_bits - sizeof(unsigned) * 8))
-
-// A multi-precision integer is represented as two pointers:
-// lsw - points to least-significant word (lowest address in memory)
-// msw - points to one beyond the most-significant word
-// In particular, the following are true:
-// number of words == msw - lsw
-// mp is empty/zero if msw == lsw
-// lsw[0] is least-significant word
-// lsw[1] is second-least-significant word
-// ...
-// msw[-2] is second-most-significant word
-// msw[-1] is most-significant word
-
-// Caveat: The MP routines in this file are optimized to just extend
-// the value in-place as necessary without any checking to see whether
-// the allocated memory is sufficient. That requires that the original
-// allocation be sufficient for the largest value that will occur.
-typedef struct { mp_word_t *lsw; mp_word_t *msw; } mp_t;
-
-// Shift the mpint left by the indicated number of
-// bits. This will extend the mpint at the msw end
-// as necessary.
-static void shiftLeftMP(mp_t *work, int shift) {
- int wordsShift = shift / mp_word_bits;
- int bitsShift = shift % mp_word_bits;
-
- if (wordsShift > 0) {
- memmove(work->lsw + wordsShift,
- work->lsw,
- (work->msw - work->lsw) * sizeof(mp_word_t));
- memset(work->lsw, 0, wordsShift * sizeof(mp_word_t));
- work->msw += wordsShift;
- }
-
- if (bitsShift > 0) {
- mp_dword_t t = 0;
- mp_word_t *p = work->lsw;
- for (; p < work->msw; p++) {
- t |= (mp_dword_t)*p << bitsShift;
- *p = (mp_word_t)t;
- t >>= mp_word_bits;
- }
- if (t != 0) {
- *p = (mp_word_t)t;
- work->msw = p + 1;
- }
- }
-}
-
-// Return the index of the most-significant set bit
-static int bitCountMP(mp_t work) {
- if (work.msw == work.lsw) {
- return 0;
- }
- assert(work.msw[-1] != 0); // High-order word cannot be zero
- return (int)(mp_word_bits - CLZ_WORD(work.msw[-1])) // Bits in high-order word
- + (int)(mp_word_bits * (work.msw - work.lsw - 1)); // Bits in all other words
-}
-
-// Add small integer to varint
-static void addToMP(mp_t *work, uint64_t addend) {
- uint64_t t = addend;
- mp_word_t *p = work->lsw;
- while (t > 0 && p < work->msw) {
- t += *p;
- *p = (mp_word_t)t;
- t >>= mp_word_bits;
- p += 1;
- }
- while (t > 0) {
- *p = (mp_word_t)t;
- t >>= mp_word_bits;
- p += 1;
- }
- if (p > work->msw) {
- work->msw = p;
- }
-}
-
-// Shift an MP right, rounding the result according to
-// the current FP rounding mode.
-static mp_t shiftRightMPWithRounding(mp_t work,
- int shift,
- int trailingNonZero,
- bool negative,
- int roundingMode) {
- (void)negative;
-
- if (shift == 0) {
- return work;
- }
- if (shift < 0) {
- shiftLeftMP(&work, -shift);
- return work;
- }
-
- mp_t result = work;
- int wordsShift = shift / mp_word_bits;
- int bitsShift = shift % mp_word_bits;
-
- if (bitsShift == 0) {
- // We don't really need to shift anything, just drop the low-order
- // words and possibly increment.
- result.lsw += wordsShift;
- switch (roundingMode) {
-#ifdef FE_TOWARDZERO
- case FE_TOWARDZERO:
- return result;
-#endif
-#ifdef FE_DOWNWARD
- case FE_DOWNWARD:
- // Upwards & downwards rounding are symmetric
- negative = !negative;
- // FALL THROUGH
-#endif
-#ifdef FE_UPWARD
- case FE_UPWARD:
- for (mp_word_t *p = work.lsw; p < result.lsw; p++) {
- trailingNonZero |= *p;
- }
- if (negative || !trailingNonZero) {
- return result;
- } else {
- break; // Increment and return
- }
-#endif
-#ifdef FE_TONEAREST
- case FE_TONEAREST:
-#endif
- default: {
- // shift is non-zero, so result.lsw[-1] is valid
- // and is the most-significant-word of the fraction:
- mp_word_t fractionMsw = result.lsw[-1];
- mp_word_t oneHalf = 1U << (mp_word_bits - 1);
- if (fractionMsw < oneHalf) {
- return result;
- } else if (fractionMsw > oneHalf) {
- break; // Increment and return
- } else {
- for (mp_word_t *p = work.lsw; p < result.lsw - 1; p++) {
- trailingNonZero |= *p;
- }
- if (trailingNonZero || ((result.msw > result.lsw) && (result.lsw[0] & 1))) {
- break; // Increment and return
- }
- return result;
- }
- }
- }
- // Increment and return
- addToMP(&result, 1);
- return result;
- }
-
- result.lsw += wordsShift;
- mp_word_t fraction = result.lsw[0] & ((1U << bitsShift) - 1);
-
- {
- mp_word_t *p = result.lsw;
- mp_dword_t t = *p++ >> bitsShift;
- for (; p < result.msw; p++) {
- t |= (mp_dword_t)*p << (mp_word_bits - bitsShift);
- p[-1] = (mp_word_t)t;
- t >>= mp_word_bits;
- }
- if (t == 0) {
- result.msw -= 1;
- } else {
- p[-1] = (mp_word_t)t;
- }
- }
-
- switch (roundingMode) {
-#ifdef FE_TOWARDZERO
- case FE_TOWARDZERO:
- return result;
-#endif
-#ifdef FE_DOWNWARD
- case FE_DOWNWARD:
- // Upwards & downwards rounding are symmetric
- negative = !negative;
- // FALL THROUGH
-#endif
-#ifdef FE_UPWARD
- case FE_UPWARD:
- trailingNonZero |= fraction;
- for (mp_word_t *p = work.lsw; p < result.lsw; p++) {
- trailingNonZero |= *p;
- }
- if (negative || !trailingNonZero) {
- return result;
- } else {
- break; // Increment and return
- }
-#endif
-#ifdef FE_TONEAREST
- case FE_TONEAREST:
-#endif
- default: {
- mp_word_t half = (mp_word_t)(1U << (bitsShift - 1));
- if (fraction < half) {
- return result;
- } else if (fraction > half) {
- break; // Increment and return
- } else { // First part of fraction is exact half..
- for (mp_word_t *p = work.lsw; p < result.lsw; p++) {
- trailingNonZero |= *p;
- }
- if (trailingNonZero || ((result.msw > result.lsw) && (result.lsw[0] & 1))) {
- break; // Increment and return
- } else {
- return result;
- }
- }
- }
- }
- // Increment and return
- addToMP(&result, 1);
- return result;
-}
-
-static mp_t shiftRightMPWithTruncation(mp_t work,
- int shift) {
- return shiftRightMPWithRounding(work, shift,
- 0, 0, FE_TOWARDZERO);
-}
-
-// Multiply varint by N
-static void multiplyMPByN(mp_t *work, uint32_t n) {
- uint64_t t = 0;
- for (mp_word_t *p = work->lsw; p < work->msw; p++) {
- t += *p * (uint64_t)n;
- *p = (mp_word_t)t;
- t >>= mp_word_bits;
- }
- while (t > 0) {
- work->msw[0] = (mp_word_t)t;
- t >>= mp_word_bits;
- work->msw += 1;
- }
-}
-
-static void multiplyByFiveToTheN(mp_t *dest, int power) {
-#if HAVE_UINT128_T
- // 128-bit arithmetic lets us multiply 32-bit words by 5^40 (93 bits).
- // For a double, this can loop up to 8 times.
- // Without the large-power optimization in fiveToTheN, this can
- // loop > 100 times for a binary128. :-/
- while (power > 40) {
- static const uint64_t fiveToThe20 = 95367431640625ULL;
- static const __uint128_t fiveToThe40 = (__uint128_t)fiveToThe20 * fiveToThe20;
- __uint128_t t = 0;
- mp_word_t *p = dest->lsw;
- while (p < dest->msw) {
- t += *p * fiveToThe40;
- *p++ = (mp_word_t)t;
- t >>= mp_word_bits;
- }
- while (t > 0) {
- *p++ = (mp_word_t)t;
- t >>= mp_word_bits;
- }
- dest->msw = p;
- power -= 40;
- }
-#endif
-
- while (power > 0) {
-#if MP_WORD_BITS == 16
- // Limit to 5^20 (47 bits) so we don't overflow
- // a 64-bit accumulator.
- const static int maxPower = 13; // For 64-bit
- uint64_t t = 0;
- uint64_t powerOfFive = powersOfFive[power > maxPower ? maxPower : power];
-#elif MP_WORD_BITS == 32
- // With a 128-bit accumulator, we can use the full table
- // of 64-bit powers (up to 5^27).
- const static int maxPower = 27;
- __uint128_t t = 0;
- __uint128_t powerOfFive = powersOfFive[power > maxPower ? maxPower : power];
-#endif
- mp_word_t *p = dest->lsw;
- while (p < dest->msw) {
- t += *p * powerOfFive;
- *p++ = (mp_word_t)t;
- t >>= mp_word_bits;
- }
- while (t > 0) {
- *p++ = (mp_word_t)t;
- t >>= mp_word_bits;
- }
- dest->msw = p;
- power -= maxPower;
- }
-}
-
-// Compute 5^N
-// This is _THE_ performance-critical function for the slow path
-// when handling float80 or binary128 with large exponents.
-// (Of course, that only applies to the few percent of inputs
-// that don't get handled by the optimized paths.)
-static void fiveToTheN(mp_t *dest, int power) {
-
-#if ENABLE_FLOAT80_OPTIMIZATIONS || ENABLE_BINARY128_OPTIMIZATIONS
- // Accelerate a very large power with a pre-computed initial value
- // Only for float80/binary128 since binary64 only goes up to 10^325
- if (power >= 1000) {
- memcpy(dest->lsw, fiveToThe1000, sizeof(fiveToThe1000));
- dest->msw = dest->lsw + sizeof(fiveToThe1000) / sizeof(fiveToThe1000[0]);
- power -= 1000;
- }
- else
-#endif
- {
- // Initialize with as large a power of 5 as we can from the standard table
- const static int maxTablePower = sizeof(powersOfFive) / sizeof(powersOfFive[0]) - 1;
- int thisPower = power > maxTablePower ? maxTablePower : power;
- uint64_t t = powersOfFive[thisPower];
- mp_word_t *p = dest->lsw;
- while (t > 0) {
- *p++ = (mp_word_t)t;
- t >>= mp_word_bits;
- }
- dest->msw = p;
- power -= thisPower;
- }
-
- multiplyByFiveToTheN(dest, power);
-}
-
-// Following "Algorithm D" from Knuth AOCP Section 4.3.1
-// Accepts:
-// Numerator
-// Denominator
-// *nonZeroRemainder: integer to hold remainder status
-// Returns:
-// quotient stored in numerator area
-// numerator is destroyed
-// *nonZeroRemainder set to non-zero iff remainder was non-zero
-static mp_t divideMPByMP(mp_t numerator, mp_t denominator, int *nonZeroRemainder) {
- // Make sure we haven't picked up a leading zero word anywhere...
- assert(numerator.msw > numerator.lsw);
- assert(denominator.msw > denominator.lsw);
- assert(numerator.msw[-1] != 0);
- assert(denominator.msw[-1] != 0);
-
- // Full long division algorithm assumes denominator is more than 1 word,
- // so we need to handle 1-word case separately.
- if (denominator.msw - denominator.lsw == 1) {
- mp_dword_t n = denominator.lsw[0];
- mp_dword_t t = 0;
- mp_word_t *p = numerator.msw;
- while (p > numerator.lsw) {
- p -= 1;
- t <<= mp_word_bits;
- t += *p;
- mp_word_t q0 = (mp_word_t)(t / n);
- *p = q0;
- t -= q0 * n;
- }
- *nonZeroRemainder = (t != 0);
- while (numerator.msw[-1] == 0) {
- numerator.msw -= 1;
- }
- return numerator;
- }
-
- // D1. Normalize: Multiply numerator and denominator by a power of 2
- // so that denominator has the most significant bit set in the
- // most significant word. This guarantees that qhat below
- // will always be very good.
- int shift = (int)CLZ_WORD(denominator.msw[-1]);
- shiftLeftMP(&denominator, shift);
- shiftLeftMP(&numerator, shift);
-
- // Add a high-order word to the numerator if necessary
- if (numerator.msw[-1] >= denominator.msw[-1]) {
- numerator.msw[0] = 0;
- numerator.msw += 1;
- }
-
- // D2. Iterate
- // Numerator and denominator must not be immediately adjacent in
- // memory, since we need an extra word for the quotient to fit in.
- // TODO: Rearrange this so that the quotient can exactly overlay
- // the numerator instead of going one word beyond. The requirement
- // for an empty word beyond the numerator is non-obvious and easy
- // to screw up.
- assert((numerator.msw < denominator.lsw) || (denominator.msw < numerator.lsw));
- mp_t quotient = {numerator.msw + 1, numerator.msw + 1}; // Quotient will overwrite numerator
- int iterations = (int)((numerator.msw - numerator.lsw) - (denominator.msw - denominator.lsw));
- for (int j = 0; j < iterations; ++j) {
- // D3. Trial division of high-order bits
- mp_word_t qhat;
- mp_dword_t numerator2 =
- ((mp_dword_t)numerator.msw[-1] << mp_word_bits)
- + numerator.msw[-2];
- if (numerator.msw[-1] == denominator.msw[-1]) {
- qhat = MP_WORD_MAX;
- } else {
- qhat = (mp_word_t)(numerator2 / denominator.msw[-1]);
- }
- while (1) {
- mp_dword_t r = numerator2 - qhat * (mp_dword_t)denominator.msw[-1];
- if (r <= MP_WORD_MAX
- && ((denominator.msw[-2] * (mp_dword_t)qhat) > (numerator.msw[-3] + (r << mp_word_bits)))) {
- qhat -= 1;
- } else {
- break;
- }
- }
-
- // D4. numerator -= qhat * denominator
- mp_dword_t t = 0;
- for (mp_word_t *den = denominator.lsw,
- *num = numerator.msw - (denominator.msw - denominator.lsw) - 1;
- den < denominator.msw;
- num += 1, den += 1) {
- t += qhat * (mp_dword_t)*den;
- unsigned borrow = *num < (mp_word_t)t;
- *num -= (mp_word_t)t;
- t >>= mp_word_bits;
- t += borrow;
- }
-
- // D5/D6. qhat may have been one too high; if so, correct for that
- // Per Knuth, this happens very infrequently
- if (numerator.msw[-1] < t) {
- qhat -= 1;
- t = 0;
- for (mp_word_t *den = denominator.lsw,
- *num = numerator.msw - (denominator.msw - denominator.lsw) - 1;
- den < denominator.msw;
- num += 1, den += 1) {
- t += *num + (mp_dword_t)*den;
- *num = (mp_word_t)t;
- t >>= mp_word_bits;
- }
- }
- --quotient.lsw;
- *quotient.lsw = qhat;
-
- // D7. Iterate
- numerator.msw -= 1;
- }
-
- // D8. Post-process the remainder
- mp_word_t remainderHash = 0;
- for (mp_word_t *num = numerator.lsw; num < numerator.msw; ++num) {
- remainderHash |= *num;
- }
- *nonZeroRemainder = remainderHash != 0;
-
- while (quotient.msw[-1] == 0) {
- quotient.msw -= 1;
- }
- return quotient;
-}
-
-// ================================================================
-// ================================================================
-//
-// Parse state
-//
-// ================================================================
-// ================================================================
-
-// To reduce argument-passing overhead, we store all the state
-// in this struct and pass around pointers to it.
-struct parseInfo {
- // ================================================================
- // Basic parameters for the target FP format
-
- // Number of significant bits (including hidden bit for IEEE formats)
- int sigBits;
- // Minimum/maximum binary exponents
- int minBinaryExp;
- int maxBinaryExp;
- // Total number of bytes in this format
- int bytes;
- // Approximate bounds on the decimal exponent, used for
- // early overflow/underflow checks.
- int minDecimalExp;
- int maxDecimalExp;
- // Maximum number of significant digits in the full
- // decimal representation of any exact midpoint.
- int maxDecimalMidpointDigits;
-
- // ================================================================
- // Inputs
-
- // Location where the value should be written
- unsigned char *dest;
- // First character of the provided string
- const char *start;
- // Where to put the address of the first unparsed character
- char **end;
- // Locale
- strtofp_locale_t loc;
-
- // ================================================================
- // Outputs from the initial parse
-
- // First 19 digits of significand as an integer
- uint64_t digits;
- // Total number of significand digits (ignoring decimal point)
- int digitCount;
- // Address of the 20th digit or NULL if digitCount < 20
- const unsigned char *firstUnparsedDigit;
- // Number of unparsed digits = max(digitCount - 19, 0)
- int unparsedDigitCount;
- // Decimal exponent, corrected for decimal point location
- int base10Exponent;
- // True if number is negative
- bool negative;
-};
-
-// ================================================================
-// ================================================================
-//
-// Over/Underflow
-//
-// ================================================================
-// ================================================================
-
-// Store a suitably-signed infinity to the destination
-static void infinity(struct parseInfo *info) {
- // 16/32/64-bit formats we can hardcode the full value
- // and memcpy() it. This is endian-safe (assuming that
- // integers and FP values have the same endianness).
- switch (info->bytes) {
-#if ENABLE_BINARY16_SUPPORT
- case 2: { // binary16
- uint16_t raw = info->negative ? 0xfc00 : 0x7c00;
- memcpy(info->dest, &raw, sizeof(raw));
- return;
- }
-#endif
-#if ENABLE_BINARY32_SUPPORT
- case 4: { // binary32
- uint32_t raw = info->negative ? 0xff800000UL : 0x7f800000UL;
- memcpy(info->dest, &raw, sizeof(raw));
- return;
- }
-#endif
-#if ENABLE_BINARY64_SUPPORT
- case 8: { // binary64
- uint64_t raw = info->negative ? 0xfff0000000000000ULL : 0x7ff0000000000000ULL;
- memcpy(info->dest, &raw, sizeof(raw));
- return;
- }
-#endif
- default:
- break;
- }
-
- // 80- and 128-bit formats we build up incrementally.
- // TODO: Support big-endian.
- memset(info->dest, 0, (size_t)info->bytes);
- switch(info->bytes) {
-#if ENABLE_FLOAT80_SUPPORT
- case 10: // float80
- info->dest[7] = 0x80;
- info->dest[8] = 0xff;
- info->dest[9] = info->negative ? 0xff : 0x7f;
-#endif
-#if ENABLE_BINARY128_SUPPORT
- case 16: // binary128
- info->dest[14] = 0xff;
- info->dest[15] = info->negative ? 0xff : 0x7f;
- break;
-#endif
- }
-}
-
-// Store the max normal value to the destination, suitably signed.
-static void max_value(struct parseInfo *info) {
- switch (info->bytes) {
-#if ENABLE_BINARY16_SUPPORT
- case 2: { // binary16
- uint16_t raw = info->negative ? 0xfbff : 0x7bff;
- memcpy(info->dest, &raw, sizeof(raw));
- return;
- }
-#endif
-#if ENABLE_BINARY32_SUPPORT
- case 4: { // binary32
- uint32_t raw = info->negative ? 0xff7fffffUL : 0x7f7fffffUL;
- memcpy(info->dest, &raw, sizeof(raw));
- return;
- }
-#endif
-#if ENABLE_BINARY64_SUPPORT
- case 8: { // binary64
- uint64_t raw = info->negative ? 0xffefffffffffffffULL : 0x7fefffffffffffffULL;
- memcpy(info->dest, &raw, sizeof(raw));
- return;
- }
-#endif
- default:
- break;
- }
-
- // 80- and 128-bit formats we build up incrementally.
- // TODO: Support big-endian.
- memset(info->dest, 0xff, info->bytes);
- switch(info->bytes) {
-#if ENABLE_FLOAT80_SUPPORT
- case 10: // float80
- info->dest[7] = 0xff;
- info->dest[8] = 0xfe;
- info->dest[9] = info->negative ? 0xff : 0x7f;
-#endif
-#if ENABLE_BINARY128_SUPPORT
- case 16: // binary128
- info->dest[14] = 0xfe;
- info->dest[15] = info->negative ? 0xff : 0x7f;
- break;
-#endif
- }
-}
-
-// This gets invoked for inputs whose magnitude is greater
-// than the max normal value + 1 ulp.
-// Gdtoa returns signed INF for such values regardless of the
-// rounding mode. Glibc rounds them correctly.
-static void overflow(struct parseInfo *info) {
- // Overflow is always an ERANGE error
- errno = ERANGE;
- int roundingMode = FENV_ROUNDING_MODE();
- int negative = info->negative;
-
- if (0
-#ifdef FE_TOWARDZERO
- || roundingMode == FE_TOWARDZERO
-#endif
-#ifdef FE_DOWNWARD
- || (roundingMode == FE_DOWNWARD && !negative)
-#endif
-#ifdef FE_UPWARD
- || (roundingMode == FE_UPWARD && negative)
-#endif
- ) {
- max_value(info);
- } else {
- infinity(info);
- }
-}
-
-// This gets invoked for inputs that are nonzero, but closer to zero
-// than to the least positive or negative subnormal. For IEEE754
-// formats (and Intel x87 Extended format), these get returned as
-// either +/- zero or the least subnormal, depending on the current
-// rounding mode. In all of these cases, the bit pattern is all zero
-// bits except possibly for the top bit (sign) and bottom bit (either zero for
-// zero or 1 for the least subnormal).
-static void underflow(struct parseInfo *info) {
- // Note: C17 allows implementations to set ERANGE for underflow or not.
- // Traditionally, ERANGE has been set somewhat inconsistently:
- // gdtoa sets it for subnormals expressed in decimal format but not
- // hexadecimal; glibc uses ERANGE as an inexact flag in various cases.
-
- // To generally match traditional usage, this implementation sets ERANGE
- // for any subnormal return.
- // It also sets it when a non-zero input is rounded to zero. This provides
- // a way for clients to distinguish a true zero (such as "0.0e0") from a
- // very small non-zero (such as "1e-999999").
- errno = ERANGE;
- uint8_t bottomBit = 0;
- int roundingMode = FENV_ROUNDING_MODE();
- if (0
-#ifdef FE_DOWNWARD
- || (roundingMode == FE_DOWNWARD && info->negative)
-#endif
-#ifdef FE_UPWARD
- || (roundingMode == FE_UPWARD && !info->negative)
-#endif
- ) {
- bottomBit = 1;
- }
-
- // 16/32/64-bit formats we can hardcode the full value
- // and memcpy() it. This is endian-safe (assuming that
- // integers and FP values have the same endianness).
- switch (info->bytes) {
-#if ENABLE_BINARY16_SUPPORT
- case 2: { // binary16
- uint16_t raw = (info->negative ? 0x8000 : 0) | bottomBit;
- memcpy(info->dest, &raw, sizeof(raw));
- break;
- }
-#endif
-#if ENABLE_BINARY32_SUPPORT
- case 4: { // binary32
- uint32_t raw = (info->negative ? 0x80000000UL : 0) | bottomBit;
- memcpy(info->dest, &raw, sizeof(raw));
- break;
- }
-#endif
-#if ENABLE_BINARY64_SUPPORT
- case 8: { // binary64
- uint64_t raw = (info->negative ? 0x8000000000000000ULL : 0) | bottomBit;
- memcpy(info->dest, &raw, sizeof(raw));
- break;
- }
-#endif
-#if ENABLE_FLOAT80_SUPPORT || ENABLE_BINARY128_SUPPORT
- case 10: case 16: {
- // TODO: Make this endian-safe
- memset(info->dest, 0, (size_t)info->bytes); // Initialize to +0
- info->dest[0] = bottomBit;
- info->dest[info->bytes - 1] = info->negative ? 0x80 : 0;
- break;
- }
-#endif
- }
-}
-
-// ================================================================
-// ================================================================
-//
-// General slow path
-//
-// ================================================================
-// ================================================================
-
-
-// Given a pointer to an ASCII digit string, initialize
-// the mpint with the decimal value of those digits.
-//
-// Any non-digit characters (e.g., decimal points) are ignored.
-//
-// If there are more than maxDecimalMidpointDigits, then
-// we stop when we've reached that many digits and then
-// add exactly one digit: zero if all the remaining digits
-// are zero, else one.
-//
-// Arguments:
-// work - pointer to the mpint
-// info - parseInfo struct
-static void initMPFromDigits(mp_t *work, struct parseInfo *info) {
- // Break first19 digits into words
- mp_word_t *component = work->lsw;
- uint64_t first19 = info->digits;
- while (first19 > 0) {
- *component = (mp_word_t)first19;
- first19 >>= mp_word_bits;
- component += 1;
- }
- work->msw = component;
-
- // Figure out how many more digits we should parse,
- // "extra" digits are the ones beyond maxDecimalMidpointDigits.
- int remainingDigitCount = info->unparsedDigitCount;
- int extraDigitCount = 0;
- if (info->digitCount > info->maxDecimalMidpointDigits) {
- remainingDigitCount
- = info->maxDecimalMidpointDigits - (info->digitCount - info->unparsedDigitCount);
- extraDigitCount = info->unparsedDigitCount - remainingDigitCount;
- }
-
- // Handle remaining digits in batches of <= 9 digits
- static const uint32_t powersOfTen[] = {1, 10, 100, 1000, 10000UL,
- 100000UL, 1000000UL, 10000000UL, 100000000UL, 1000000000UL};
- const unsigned char *p = info->firstUnparsedDigit;
- while (remainingDigitCount > 0) {
- int batchSize = remainingDigitCount > 9 ? 9 : remainingDigitCount;
- uint64_t batch = 0;
- for (int i = 0; i < batchSize; i++, p++) {
- unsigned t = (unsigned)(*p - '0');
- while (t > 9) {
- p += 1; // Skip non-digits (decimal point)
- t = (unsigned)(*p - '0');
- }
- batch = batch * 10 + t;
- }
- multiplyMPByN(work, powersOfTen[batchSize]);
- addToMP(work, batch);
- remainingDigitCount -= batchSize;
- }
-
- // Extra digits are summarized in a single digit:
- // zero if all the extra digits are zero, else one.
- if (extraDigitCount > 0) {
- multiplyMPByN(work, 10);
- while (extraDigitCount > 0) {
- if (*p == '0') {
- extraDigitCount -= 1;
- } else if (*p >= '1' && *p <= '9') {
- addToMP(work, 1);
- return;
- }
- // Note: Non-digit (decimal point) chars are ignored
- p += 1;
- }
- }
-}
-
-// This takes a significand and power of ten and returns the high-order
-// bits of the product (correctly rounded) using exact varint arithmetic.
-// This is the common slowpath used by all of the parsers below.
-
-// General strategy: For positive exponents, the significand is
-// multiplied by the power of 10 to form a large varint. The
-// high-order bits (suitably rounded) are returned as the binary
-// significand. The number of bits are returned as the
-// resultExponent. For negative exponents, the significand is divided
-// by the power of ten with suitable scaling to ensure the quotient
-// will have sufficient bits to return a correctly-rounded binary significand.
-
-// This takes a pointer to a stack work area; that allows each
-// format-specific implementation to pass a buffer that is
-// suitably-sized. For binary16/32/64, the passed-in buffer is sized
-// so that we will never have to allocate a larger buffer on the heap,
-// and those versions pass `false` for `heapAllocOK` to assert this.
-// For float80/binary128, the buffer is large enough for common
-// requests; larger inputs may require us to allocate a temporary
-// stack buffer for the duration of this call.
-
-// Arguments:
-// info - parseInfo struct with format details, original request
-// information, and parser output
-// roundingMode - result from fegetround()
-// stackWorkBuffer - pointer to an on-stack work area
-// stackWorkBufferBytes - size of work area in mp_word_t
-// heapAllocOK - if zero, we'll assert on any heap allocation
-static void generalSlowpath(struct parseInfo *info,
- int roundingMode,
- mp_word_t *stackWorkArea,
- int stackWorkAreaWords,
- bool heapAllocOK)
-{
- mp_t mpSignificand;
- int binaryExponent;
-
- // If stackWorkArea is not big enough, space will be allocated
- // on the heap that will need to be released before we return.
- mp_word_t *heapAlloc = NULL;
-
- // We'll process `digitCount` digits but no more than
- // maxDecimalMidpointDigits + 1. (See initMPWithDigits above)
- int significandDigits = info->digitCount;
- if (significandDigits > info->maxDecimalMidpointDigits) {
- significandDigits = info->maxDecimalMidpointDigits + 1;
- }
- int base10Exponent = info->base10Exponent - significandDigits + info->digitCount;
-
- // Figure out how many words we need for the significand and power of 5
- // 1701 / 512 is slightly bigger than log2(10) ~= 3.32
- // 1189 / 512 is slightly bigger than log2(5) ~= 2.32
- int significandBitsNeeded = (significandDigits * 1701) >> 9;
- int significandWordsNeeded = (significandBitsNeeded + (mp_word_bits - 1)) / mp_word_bits;
-
- int exponentBitsNeeded = (((base10Exponent < 0 ? -base10Exponent : base10Exponent) + 1) * 1189) >> 9;
- int exponentWordsNeeded = (exponentBitsNeeded + (mp_word_bits - 1)) / mp_word_bits;
-
- if (base10Exponent >= 0) {
- // ================================================================
- // Slow path for exponent > 0
- int totalWordsNeeded = significandWordsNeeded + exponentWordsNeeded;
- mp_t workMP;
- if (totalWordsNeeded <= stackWorkAreaWords) {
- memset(stackWorkArea, 0, (size_t)stackWorkAreaWords * sizeof(mp_word_t));
- workMP.lsw = workMP.msw = stackWorkArea;
- } else {
- assert(heapAllocOK);
- heapAlloc = (mp_word_t *)calloc((size_t)totalWordsNeeded, sizeof(mp_word_t));
- if (heapAlloc == NULL) {
- memset(info->dest, 0, (size_t)info->bytes);
- return;
- }
- workMP.lsw = workMP.msw = heapAlloc;
- }
-
- // Load workMP with digits * 5^n
- initMPFromDigits(&workMP, info);
- multiplyByFiveToTheN(&workMP, base10Exponent);
- assert((workMP.msw - workMP.lsw) <= totalWordsNeeded);
-
- // Bit count => binary exponent
- int bits = bitCountMP(workMP);
- binaryExponent = bits + base10Exponent; // Factor of 2^n from above
- // Leading bits => significand
- mpSignificand = shiftRightMPWithRounding(workMP,
- bits - info->sigBits, 0, info->negative, roundingMode);
- // Adjust the exponent if a round-up gave us one too many bits
- if (bitCountMP(mpSignificand) > info->sigBits) {
- binaryExponent += 1;
- mpSignificand = shiftRightMPWithTruncation(mpSignificand, 1);
- }
- if (binaryExponent > info->maxBinaryExp) {
- // Overflow.
- free(heapAlloc);
- overflow(info);
- return;
- }
- } else {
- // ================================================================
- // Slow path for exponent < 0
-
- // Basic idea: Since base10Exponent is negative, we can't work
- // directly with 10^base10Exponent (it's an infinite binary
- // fraction), but 10^-base10Exponent is an integer. So we use
- // varint arithmetic to compute
- // digits / 10^(-base10Exponent)
- // scaling the numerator so that the quotient will have at least
- // 53 bits. The tricky part is keeping the right information to do
- // accurate rounding of the result.
-
- // Widen numerator so the result after division will have enough bits to round
- int numeratorBitsNeeded = significandBitsNeeded;
- if (numeratorBitsNeeded < exponentBitsNeeded + info->sigBits + 2) {
- numeratorBitsNeeded = exponentBitsNeeded + info->sigBits + 2;
- }
- int numeratorWordsNeeded = (numeratorBitsNeeded + (mp_word_bits - 1)) / mp_word_bits + 2;
- int denominatorWordsNeeded = exponentWordsNeeded;
- int totalWordsNeeded = numeratorWordsNeeded + denominatorWordsNeeded;
-
- mp_word_t *work;
- if (totalWordsNeeded <= stackWorkAreaWords) {
- memset(stackWorkArea, 0, stackWorkAreaWords * sizeof(mp_word_t));
- work = stackWorkArea;
- } else {
- assert(heapAllocOK);
- heapAlloc = (mp_word_t *)calloc((size_t)totalWordsNeeded, sizeof(mp_word_t));
- if (heapAlloc == NULL) {
- memset(info->dest, 0, (size_t)info->bytes);
- return;
- }
- work = heapAlloc;
- }
-
- mp_t numerator = {work, work};
- mp_t denominator = {work + numeratorWordsNeeded, work + numeratorWordsNeeded};
-
- // Denominator holds power of 10^N
- // (Actually 5^N, the remaining factor of 2^N is handled later.)
- fiveToTheN(&denominator, -base10Exponent);
- assert((denominator.msw - denominator.lsw) <= denominatorWordsNeeded);
- assert(denominator.msw[-1] != 0);
-
- // Populate numerator with digits, widen it to ensure final
- // quotient has at least sigBits + 2 bits.
- initMPFromDigits(&numerator, info);
- assert(numerator.msw[-1] != 0);
- int numeratorShift = bitCountMP(denominator) - bitCountMP(numerator) + info->sigBits + 2;
- if (numeratorShift > 0) {
- shiftLeftMP(&numerator, numeratorShift);
- assert(numerator.msw[-1] != 0);
- assert((numerator.msw - numerator.lsw) < numeratorWordsNeeded);
- } else {
- numeratorShift = 0;
- }
-
- // Divide, compute exact binaryExponent
- // Note: division is destructive; overwrites numerator with quotient
- int remainderNonZero;
- mp_t quotient = divideMPByMP(numerator, denominator, &remainderNonZero);
- // Binary exponent starts from number of bits in quotient
- int quotientBits = bitCountMP(quotient);
- binaryExponent = quotientBits;
- // 2^base10Exponent was omitted from the 10^N denominator above
- binaryExponent += base10Exponent;
- // We multiplied by 2^numeratorShift above, so divide by
- // 2^numeratorShift to cancel it out.
- binaryExponent -= numeratorShift;
-
- if (binaryExponent > info->minBinaryExp) {
- // Normal decimal
- mpSignificand = shiftRightMPWithRounding(quotient,
- quotientBits - info->sigBits, remainderNonZero, info->negative, roundingMode);
- if (bitCountMP(mpSignificand) > info->sigBits) {
- binaryExponent += 1;
- mpSignificand = shiftRightMPWithTruncation(mpSignificand, 1);
- }
- if (binaryExponent > info->maxBinaryExp) {
- free(heapAlloc);
- overflow(info);
- return;
- }
- } else if (binaryExponent > info->minBinaryExp - info->sigBits) {
- // Subnormal decimal
- int bitsNeeded = binaryExponent - (info->minBinaryExp - info->sigBits + 1);
- binaryExponent = info->minBinaryExp;
- mpSignificand = shiftRightMPWithRounding(quotient,
- quotientBits - bitsNeeded, remainderNonZero, info->negative, roundingMode);
-
- // Usually, overflowing the expected number of bits doesn't
- // break anything; it just results in a significand 1 bit longer
- // than we expected.
-
- // Except when the significand overflows into the exponent.
- // Then we have a normal, so the extra overflow bit
- // will naturally get dropped, we just have to bump the
- // exponent.
- if (bitCountMP(mpSignificand) >= info->sigBits) {
- binaryExponent += 1;
- } else {
- errno = ERANGE; // This will be a true subnormal return, set ERANGE
- }
- } else {
- // Underflow.
- free(heapAlloc);
- underflow(info);
- return;
- }
- }
-
- // Zero-extend to sigBits and copy to dest
- size_t mpWords = (size_t)(mpSignificand.msw - mpSignificand.lsw);
- size_t expectedWords = (size_t)((info->sigBits + mp_word_bits - 1) / mp_word_bits);
- if (mpWords < expectedWords) {
- memset(mpSignificand.lsw + mpWords, 0, (expectedWords - mpWords) * sizeof(mp_word_t));
- }
- // TODO: Endianness. This only works for little-endian systems.
- memcpy(info->dest, mpSignificand.lsw, ((unsigned)info->sigBits + 7) / 8);
- // Free the heap work area (if any)
- free(heapAlloc);
-
- // Set the exponent & sign bits
- uint16_t exponentBits = (uint16_t)(binaryExponent - info->minBinaryExp);
- if (info->bytes <= 8) {
- // float80 and binary128 have 16 bit exponent+sign, so no shift needed
- exponentBits <<= 16 - (info->bytes * 8 - info->sigBits + 1);
- }
- exponentBits |= info->negative ? 0x8000 : 0;
-
- unsigned char *p = info->dest + info->bytes;
- switch (info->bytes) {
- case 2:
- p[-1] = (info->dest[1] & 0x03) | (unsigned char)(exponentBits >> 8);
- return;
- case 4:
- p[-2] = (info->dest[2] & 0x7f) | (unsigned char)exponentBits;
- break;
- case 8:
- p[-2] = (info->dest[6] & 0x0f) | (unsigned char)exponentBits;
- break;
- case 10:
- case 16:
- p[-2] = (unsigned char)exponentBits;
- break;
- }
- p[-1] = (unsigned char)(exponentBits >> 8);
-}
-
-// ================================================================
-// ================================================================
-//
-// Hex Float parsing
-//
-// ================================================================
-// ================================================================
-
-// This is called with `start` pointing to the `0x` that begins
-// the hex float. We assume the first two characters have already
-// been verified to be '0x'.
-static void
-hexFloat(const unsigned char *start, struct parseInfo *info) {
- const unsigned char *p = start + 2; // Skip leading '0x'
-
- // Two 64-bit ints that we use as a joint 128-bit accumulator
- uint64_t significand_lsw = 0, significand_msw = 0;
-
- // Digits before the decimal point
- const unsigned char *firstDigit = p;
- unsigned remainder = 0;
- int base2Exponent = 0;
- // Perf: Just use the lower 64 bits until it's full...
- while (hexdigit[*p] < 16 && significand_lsw < (uint64_t)1 << 60) {
- significand_lsw <<= 4;
- significand_lsw += hexdigit[*p];
- p += 1;
- }
- // ... then work with the full 128 bits until it's full ...
- while (hexdigit[*p] < 16 && significand_msw < (uint64_t)1 << 59) {
- significand_msw <<= 4;
- significand_msw |= (significand_lsw >> 60);
- significand_lsw <<= 4;
- significand_lsw += hexdigit[*p];
- p += 1;
- }
- // ... if there's more beyond that, just track whether it's non-zero.
- while (hexdigit[*p] < 16) {
- remainder |= hexdigit[*p];
- base2Exponent += 4;
- p += 1;
- }
- int digitCount = (int)(p - firstDigit);
-
- // Try to match decimal point
- if (info->loc == strtofp_C_locale) {
- if (*p == '.') {
- p += 1;
- } else {
- goto possible_exponent;
- }
- } else {
- const unsigned char *decimalPoint = strtofp_locale_decimal_point(info->loc);
- const unsigned char *startOfPotentialDecimalPoint = p;
- for (const unsigned char *d = decimalPoint; *d; d++) {
- if (*p != *d) {
- p = startOfPotentialDecimalPoint;
- goto possible_exponent;
- }
- p++;
- }
- }
-
- // Collect digits after decimal point
- const unsigned char *firstFractionDigit = p;
- if (significand_msw == 0) {
- while (hexdigit[*p] < 16 && significand_lsw < (uint64_t)1 << 60) {
- significand_lsw <<= 4;
- significand_lsw += hexdigit[*p];
- p += 1;
- }
- }
- while (hexdigit[*p] < 16 && significand_msw < (uint64_t)1 << 59) {
- significand_msw <<= 4;
- significand_msw |= (significand_lsw >> 60);
- significand_lsw <<= 4;
- significand_lsw += hexdigit[*p];
- p += 1;
- }
- // Initialize exponent from the number of digits after
- // the decimal point.
- base2Exponent -= 4 * (p - firstFractionDigit);
- // Any remaining digits may impact rounding...
- while (hexdigit[*p] < 16) {
- remainder |= hexdigit[*p];
- p += 1;
- }
- digitCount += p - firstFractionDigit;
-
- possible_exponent:
- if (*p == 'p' || *p == 'P') {
- const unsigned char *exponentPhrase = p;
- p += 1;
- int negativeExponent = 0;
- if (*p == '-') {
- negativeExponent = 1;
- p += 1;
- } else if (*p == '+') {
- p += 1;
- }
- if (*p < '0' || *p > '9') {
- // Ignore 'e' or 'E' not followed by number.
- p = exponentPhrase;
- } else {
- // Skip zeros in "0x1p+0000000000000000000000000001"
- int exp = 0;
- unsigned t = (unsigned)(*p - '0');
- while (t < 10) {
- if (exp > 99999999) {
- exp = 99999999;
- } else {
- exp = exp * 10 + (int)t;
- }
- p += 1;
- t = (unsigned)(*p - '0');
- }
- if (negativeExponent) {
- exp = -exp;
- }
- base2Exponent += exp;
- }
- }
-
- if (significand_lsw == 0 && significand_msw == 0) {
- if (digitCount == 0) {
- // Malformed hexfloat with no digits after '0x',
- // BUT '0x' is still a valid zero followed by non-parsed 'x'
- p = start + 1; // Address of 'x'
- } else {
- // Just a regular zero
- }
- base2Exponent = info->minBinaryExp;
- } else {
- // Normalize to 127 bits
- if (significand_msw == 0) {
- if ((significand_lsw >> 63) == 0) {
- significand_msw = significand_lsw;
- significand_lsw = 0;
- base2Exponent -= 64;
- } else {
- significand_msw = significand_lsw >> 1;
- significand_lsw <<= 63;
- base2Exponent -= 63;
- }
- }
- int normalizeShift = __builtin_clzll(significand_msw) - 1;
- if (normalizeShift > 0) {
- significand_msw <<= normalizeShift;
- significand_msw |= significand_lsw >> (64 - normalizeShift);
- significand_lsw <<= normalizeShift;
- base2Exponent -= normalizeShift;
- }
- base2Exponent += 127;
- if (remainder) significand_lsw |= 1;
-
- if (base2Exponent <= info->maxBinaryExp
- && base2Exponent >= info->minBinaryExp - info->sigBits + 1) {
- int fractionBits;
- uint64_t fraction;
- if (base2Exponent > info->minBinaryExp) {
- // Hexfloat normal
- fractionBits = 127 - info->sigBits;
- } else {
- fractionBits = 127 - (base2Exponent - info->minBinaryExp + info->sigBits - 1);
- base2Exponent = info->minBinaryExp;
- }
- if (fractionBits < 64) {
- fraction = significand_lsw << (64 - fractionBits);
- significand_lsw >>= fractionBits;
- significand_lsw |= significand_msw << (64 - fractionBits);
- significand_msw >>= fractionBits;
- } else {
- fraction = (significand_msw << (128 - fractionBits))
- | (significand_lsw >> (fractionBits - 64));
- if ((significand_lsw << (128 - fractionBits)) != 0) {
- fraction |= 1;
- }
- significand_lsw = significand_msw >> (fractionBits - 64);
- significand_msw = 0;
- }
-
- switch (FENV_ROUNDING_MODE()) {
-#ifdef FE_TOWARDZERO
- case FE_TOWARDZERO:
- break;
-#endif
-#ifdef FE_DOWNWARD
- case FE_DOWNWARD:
- if (info->negative && (fraction != 0)) {
- significand_lsw += 1;
- if (significand_lsw == 0) {
- significand_msw += 1;
- }
- }
- break;
-#endif
-#ifdef FE_UPWARD
- case FE_UPWARD:
- if (!info->negative && (fraction != 0)) {
- significand_lsw += 1;
- if (significand_lsw == 0) {
- significand_msw += 1;
- }
- }
- break;
-#endif
-#ifdef FE_TONEAREST
- case FE_TONEAREST:
-#endif
- default: {
- const uint64_t oneHalf = (uint64_t)1 << 63;
- if (fraction > oneHalf
- || (fraction == oneHalf && (significand_lsw & 1))) {
- significand_lsw += 1;
- if (significand_lsw == 0) {
- significand_msw += 1;
- }
- }
- break;
- }
- }
-
- // Rounding up may have caused us to overflow:
- // For subnormals, overflow to sigBits converts this to a normal
- // For normal, overflow just needs to be renormalized
- int overflowBits = (base2Exponent == info->minBinaryExp) ? (info->sigBits - 1) : info->sigBits;
- if (((overflowBits > 64) && ((significand_msw >> (overflowBits - 64)) != 0))
- || ((overflowBits == 64) && (significand_msw != 0))
- || ((overflowBits < 64) && ((significand_msw != 0) || ((significand_lsw >> overflowBits) != 0)))) {
- if (base2Exponent > info->minBinaryExp) {
- significand_lsw >>= 1;
- significand_lsw |= significand_msw << 63;
- significand_msw >>= 1;
- }
- base2Exponent += 1;
- } else if (base2Exponent == info->minBinaryExp // Subnormal
- && fraction != 0) { // Not exact
- errno = ERANGE;
- }
- }
- }
-
- if (info->end) *info->end = (char *)(uintptr_t)p;
- if (base2Exponent > info->maxBinaryExp) {
- overflow(info);
- } else if (base2Exponent < info->minBinaryExp - info->sigBits + 1) {
- underflow(info);
- } else {
- switch (info->bytes) {
-#if ENABLE_BINARY16_SUPPORT
- case 2: {
- uint16_t exponentBits = (uint16_t)(base2Exponent - info->minBinaryExp);
- uint16_t raw = (uint16_t)
- ((info->negative ? 0x8000U : 0U)
- | (uint16_t)(exponentBits << 10)
- | (significand_lsw & 0x3ffU));
- memcpy(info->dest, &raw, sizeof(raw));
- break;
- }
-#endif
-#if ENABLE_BINARY32_SUPPORT
- case 4: {
- uint32_t exponentBits = (uint32_t)(base2Exponent - info->minBinaryExp);
- uint32_t raw = (uint32_t)
- ((info->negative ? 0x80000000UL : 0UL)
- | (exponentBits << 23U)
- | (uint32_t)(significand_lsw & 0x7fffffULL));
- memcpy(info->dest, &raw, sizeof(raw));
- break;
- }
-#endif
-#if ENABLE_BINARY64_SUPPORT
- case 8: {
- uint64_t exponentBits = (uint64_t)(base2Exponent - info->minBinaryExp);
- uint64_t raw = (uint64_t)
- ((info->negative ? 0x8000000000000000ULL : 0UL)
- | (exponentBits << 52U)
- | (significand_lsw & 0xfffffffffffffULL));
- memcpy(info->dest, &raw, sizeof(raw));
- break;
- }
-#endif
-#if ENABLE_FLOAT80_SUPPORT
- case 10: {
- // TODO: Support big-endian
- uint16_t exponentBits = (uint16_t)(base2Exponent - info->minBinaryExp);
- memcpy(info->dest, &significand_lsw, sizeof(significand_lsw));
- info->dest[8] = exponentBits & 0xff;
- info->dest[9] = (exponentBits >> 8) | (info->negative ? 0x80 : 0);
- break;
- }
-#endif
-#if ENABLE_BINARY128_SUPPORT
- case 16: {
- // TODO: Support big-endian
- uint16_t exponentBits = (uint16_t)(base2Exponent - info->minBinaryExp);
- memcpy(info->dest, &significand_lsw, sizeof(significand_lsw));
- memcpy(info->dest + 8, &significand_msw, sizeof(significand_msw));
- info->dest[14] = exponentBits & 0xff;
- info->dest[15] = (exponentBits >> 8) | (info->negative ? 0x80 : 0);
- break;
- }
-#endif
- }
- }
-}
-
-// ================================================================
-// ================================================================
-//
-// NaN parsing
-//
-// ================================================================
-// ================================================================
-
-// Parse a NaN.
-// This implements the same logic as Apple's previous libc strtod. It
-// recognizes an optional payload between parentheses and uses
-// that to construct a valid NaN return value.
-
-static void parseNan(const unsigned char *start, struct parseInfo *info) {
- const unsigned char *p = start + 3; // Skip "nan"
- unsigned char _Alignas(sizeof(mp_word_t)) stackWorkArea[20];
- memset(stackWorkArea, 0, sizeof(stackWorkArea));
- const unsigned char *endNan = p;
- if (*p == '(') {
- p += 1;
- unsigned base = 10;
- if (*p == '0') {
- p += 1;
- if (*p == 'x') {
- base = 16;
- p += 1;
- } else {
- base = 8;
- }
- }
- mp_t stackMP = { (mp_word_t *)stackWorkArea,
- (mp_word_t *)(stackWorkArea + 16) };
- mp_t payload = stackMP;
- while (hexdigit[*p] < base) {
- multiplyMPByN(&payload, base);
- addToMP(&payload, hexdigit[*p]);
- payload.msw = stackMP.msw; // Prune off excess bits
- p += 1;
- }
- if (*p == ')') {
- p += 1;
- } else {
- memset(stackWorkArea, 0, sizeof(stackWorkArea));
- while (*p != '\0' && *p != ')') {
- p += 1;
- }
- if (*p == ')') {
- p += 1;
- } else {
- p = endNan;
- }
- }
- }
- // TODO: Endianness. This only works for little-endian.
- memcpy(info->dest, stackWorkArea, (size_t)info->bytes);
- switch (info->bytes) {
-#if ENABLE_BINARY16_SUPPORT
- case 2: {
- info->dest[1] = info->dest[1] | (info->negative ? 0xfe : 0x7e);
- break;
- }
-#endif
-#if ENABLE_BINARY32_SUPPORT
- case 4: {
- info->dest[2] |= 0xc0; // Set quiet bit and low-order exponent bit
- info->dest[3] = info->negative ? 0xff : 0x7f; // exponent and sign bit
- break;
- }
-#endif
-#if ENABLE_BINARY64_SUPPORT
- case 8: {
- info->dest[6] |= 0xf8; // Set quiet bit and low-order exponent bits
- info->dest[7] = info->negative ? 0xff : 0x7f; // exponent and sign bit
- break;
- }
-#endif
-#if ENABLE_FLOAT80_SUPPORT
- case 10: {
- info->dest[7] |= 0xc0; // Set bit 63 and quiet bit
- info->dest[8] = 0xff;
- info->dest[9] = info->negative ? 0xff : 0x7f;
- break;
- }
-#endif
-#if ENABLE_BINARY128_SUPPORT
- case 16: {
- info->dest[13] |= 0x80; // Set quiet bit
- info->dest[14] = 0xff;
- info->dest[15] = info->negative ? 0xff : 0x7f;
- break;
- }
-#endif
- }
- if (info->end) *info->end = (char *)(uintptr_t)p;
-}
-
-// This is used as the initial parse for all formats.
-//
-// It verifies the format and handles accordingly:
-// * hexFloat is parsed by calling hexFloat() above
-// * NaN payloads are parse by calling parseNan() above
-// * Infinity and NaN w/o payload are parsed directly
-// * True zero is parsed directly
-// The above are fully handled within this function and
-// it returns zero (false) to flag that there's nothing
-// more to do.
-//
-// For a decimal input, it collects the digits into a 64-bit
-// accumulator and returns that and the parsed exponent.
-// If there are <= 19 digits, this is enough to generate
-// a final result directly.
-//
-// If there are more than 19 digits, this logic first lets the
-// accumulator overflow. After we detect the overflow, we re-parse
-// the first 19 digits and return those. Callers who need more digits
-// will have to use firstUnparsedDigit and digitCount to re-parse the
-// significand (they can ignore the decimal point, though, since that
-// has already been factored into the base10Exponent).
-static int
-fastParse64(struct parseInfo *info) {
- const unsigned char *p = (const unsigned char *)info->start;
-
- uint64_t digits = 0;
- int digitCount = 0;
- int base10Exponent = 0;
- info->negative = 0;
-
- // Skip leading whitespace. Stop at a +/-/digit or other character.
- // This is a little oddly phrased in order to avoid a pointless call
- // to `isspace` for common cases.
- while (1) {
- if (*p >= '0' && *p <= '9') {
- break;
- } else if (*p == '-') {
- info->negative = 1;
- p += 1;
- break;
- } else if (*p == '+') {
- p += 1;
- break;
- } else if (*p == ' ' || isspace(*p)) {
- p += 1;
- } else {
- break;
- }
- }
-
- if (*p == '0') {
- if (p[1] == 'x' || p[1] == 'X') {
- hexFloat(p, info);
- return 0;
- }
- while (*p == '0') {
- p += 1;
- }
- } else if (*p == 'i' || *p == 'I') {
- if ((p[1] == 'n' || p[1] == 'N')
- && (p[2] == 'f' || p[2] == 'F')) {
- if ((p[3] == 'i' || p[3] == 'I')
- && (p[4] == 'n' || p[4] == 'N')
- && (p[5] == 'i' || p[5] == 'I')
- && (p[6] == 't' || p[6] == 'T')
- && (p[7] == 'y' || p[7] == 'Y')) {
- p += 8;
- } else {
- p += 3;
- }
- // Matched 'inf' or 'infinity' case-insensitive
- if (info->end) *info->end = (char *)(uintptr_t)p;
- infinity(info);
- return 0;
- }
- goto fail;
- } else if (*p == 'n' || *p == 'N') {
- if ((p[1] == 'a' || p[1] == 'A')
- && (p[2] == 'n' || p[2] == 'N')) {
- parseNan(p, info);
- return 0;
- }
- goto fail;
- } else if (*p < '0' || *p > '9') {
- // If this isn't a hexfloat, nan, or infinity and does
- // start with a digit, it must start with a decimal point,
- // and that decimal point must be immediately followed
- // by a digit.
- if (info->loc == strtofp_C_locale) {
- // Decimal point is '.' in C locale, avoid calling localeconv_l
- if (*p == '.') {
- p += 1;
- if (*p >= '0' && *p <= '9') {
- goto parseFraction;
- }
- }
- } else {
- // Look up decimal point in locale
- const unsigned char *decimalPoint = strtofp_locale_decimal_point(info->loc);
- if (decimalPoint[1] == '\0') {
- if (decimalPoint[0] == *p) {
- p += 1;
- if (*p >= '0' && *p <= '9') {
- goto parseFraction;
- }
- }
- } else {
- // Multi-byte decimal point
- int matchedDecimalPoint = (1 == 1);
- for (const unsigned char *d = decimalPoint; *d; d++) {
- matchedDecimalPoint &= (*p == *d);
- p++;
- }
- if (matchedDecimalPoint && *p >= '0' && *p <= '9') {
- goto parseFraction;
- }
- }
- }
- goto fail;
- }
-
- // Collect digits before the decimal point
- const unsigned char *firstUnparsedDigit = p;
- {
- uint8_t t = (uint8_t)(*p - '0');
- if (t < 10) {
- digits = t;
- p += 1;
- t = (uint8_t)(*p - '0');
- while(t < 10) {
- digits = 10 * digits + t;
- p += 1;
- t = (uint8_t)(*p - '0');
- }
- digitCount = (int)(p - firstUnparsedDigit);
- }
- }
-
- // Try to match an optional decimal point
- if (info->loc == strtofp_C_locale) {
- // Decimal point is '.' in C locale, avoid calling localeconv_l
- if (*p == '.') {
- p += 1;
- goto parseFraction;
- }
- goto maybeParseExponent;
- } else if (*p == ' ' || *p == '\0' || *p == 'e' || *p == 'E') {
- // Don't call localeconv_l if we know the next character
- // cannot possibly be a decimal point.
- goto maybeParseExponent;
- } else {
- // Look up decimal point in locale
- const unsigned char *decimalPoint = strtofp_locale_decimal_point(info->loc);
- if (decimalPoint[1] == '\0') {
- if (decimalPoint[0] == *p) {
- p += 1;
- goto parseFraction;
- }
- } else {
- // Multi-byte decimal point
- const unsigned char *startOfPotentialDecimalPoint = p;
- int matchedDecimalPoint = (1 == 1);
- for (const unsigned char *d = decimalPoint; *d; d++) {
- matchedDecimalPoint &= (*p == *d);
- p++;
- }
- if (matchedDecimalPoint) {
- goto parseFraction;
- } else {
- p = startOfPotentialDecimalPoint;
- goto maybeParseExponent;
- }
- }
- }
-
- parseFraction:
- {
- const unsigned char *firstDigitAfterDecimalPoint = p;
- if (digitCount == 0) {
- while (*p == '0') {
- p += 1;
- }
- // "0.000000001234" has 4 digits
- firstUnparsedDigit = p;
- unsigned t = (unsigned)(*p - '0');
- if (t < 10) {
- p += 1;
- digits = t;
- t = (unsigned)(*p - '0');
- while (t < 10) {
- digits = 10 * digits + t;
- p += 1;
- t = (unsigned)(*p - '0');
- }
- }
- digitCount = (int)(p - firstUnparsedDigit);
- } else {
- // Perf: For canada.txt benchmark, this loop is ~30% of total runtime
- unsigned t = (unsigned)(*p - '0');
- if (t < 10) {
- p += 1;
- digits = 10 * digits + t;
- t = (unsigned)(*p - '0');
- while (t < 10) {
- p += 1;
- digits = 10 * digits + t;
- t = (unsigned)(*p - '0');
- }
- }
- digitCount += p - firstDigitAfterDecimalPoint;
- }
- base10Exponent = (int)(firstDigitAfterDecimalPoint - p);
- }
-
- // ================================================================
- // Step 1e: Parse the optional exponent
- maybeParseExponent:
- if (*p == 'e' || *p == 'E') {
- const unsigned char *exponentPhraseStart = p;
- p += 1;
- int negativeExponent = 1;
- if (*p == '-') {
- negativeExponent = -1;
- p += 1;
- } else if (*p == '+') {
- p += 1;
- }
- uint8_t t = (uint8_t)(*p - '0');
- if (t < 10) {
- unsigned exp = t;
- p += 1;
- t = (uint8_t)(*p - '0');
- while (t < 10) {
- p += 1;
- exp = 10 * exp + t;
- t = (uint8_t)(*p - '0');
- }
- if (p - exponentPhraseStart > 9) {
- // The exponent text was unusually long... re-parse
- // it more carefully to see if it really should overflow.
- const unsigned char *q = exponentPhraseStart + 1;
- if (*q == '-' || *q == '+') {
- q += 1;
- }
- while (*q == '0') {
- q += 1;
- }
- if (p - q > 8) {
- // If there were more than 8 digits with leading zeros
- // excluded, we've definitely overflowed.
- exp = 99999999;
- }
- }
- base10Exponent += (int)exp * negativeExponent;
- } else {
- p = exponentPhraseStart;
- }
- }
-
- if (info->end) *info->end = (char *)(uintptr_t)p;
-
- // No non-zero digits, must be an explicit zero:
- // "0", ".000", "0.0", "0e0", "0.0e999", etc.
- if (digitCount == 0) {
- memset(info->dest, 0, info->bytes);
- info->dest[info->bytes - 1] = info->negative ? 0x80 : 0;
- return 0;
- }
-
- // Coarse over/underflow check
- if (base10Exponent + digitCount < info->minDecimalExp) {
- underflow(info);
- return 0;
- }
- if (base10Exponent + digitCount > info->maxDecimalExp) {
- overflow(info);
- return 0;
- }
-
- int unparsedDigitCount = 0;
- if (digitCount > 19) {
- digits = 0;
- int i = 0;
- const unsigned char *q = firstUnparsedDigit;
- while (i < 19) {
- // Note: Skip non-digit chars (e.g., decimal point)
- unsigned t = (unsigned)(*q - '0');
- if (t < 10) {
- digits = digits * 10 + t;
- i += 1;
- }
- q += 1;
- }
- firstUnparsedDigit = q;
- unparsedDigitCount = digitCount - 19;
- } else {
- firstUnparsedDigit = NULL;
- }
-
- info->digitCount = digitCount;
- info->firstUnparsedDigit = firstUnparsedDigit;
- info->unparsedDigitCount = unparsedDigitCount;
- info->digits = digits;
- info->base10Exponent = base10Exponent;
-
- return 1; // Regular decimal case...
-
- fail:
- if (info->end) *info->end = (char *)(uintptr_t)info->start;
- memset(info->dest, 0, info->bytes);
- return 0;
-}
-
-// ================================================================
-// ================================================================
-//
-// Parse an IEEE 754 Binary16 (aka "Half")
-//
-// ================================================================
-// ================================================================
-
-#if ENABLE_BINARY16_SUPPORT
-static void _ffpp_strtoencf16_l(unsigned char *dest, const char *start, char **end, strtofp_locale_t loc) {
- static const int bytes = 2;
- static const int sigBits = 11;
- static const int minBinaryExp = -14;
- static const int maxBinaryExp = 16;
- static const int minDecimalExp = -7;
- static const int maxDecimalExp = 5;
- static const int maxDecimalMidpointDigits = 22;
- struct parseInfo info = {
- .bytes = bytes,
- .sigBits = sigBits,
- .minBinaryExp = minBinaryExp,
- .maxBinaryExp = maxBinaryExp,
- .minDecimalExp = minDecimalExp,
- .maxDecimalExp = maxDecimalExp,
- .maxDecimalMidpointDigits = maxDecimalMidpointDigits,
- .dest = dest,
- .start = start,
- .end = end,
- .loc = loc,
- };
-
- // ================================================================
- // Parse the input (mostly)
- // ================================================================
- if (!fastParse64(&info)) {
- return;
- }
-
- // TODO: Someday, we might implement fast paths for binary16
- // But the range of binary16 is so small that the varint slow
- // path is actually reasonably fast.
-
- // ================================================================
- // Slow Path (varint calculation)
- // ================================================================
- char _Alignas(sizeof(mp_word_t)) stackWorkArea[32];
- static const size_t stackWorkAreaWords = sizeof(stackWorkArea) / sizeof(mp_word_t);
- generalSlowpath(&info, FENV_ROUNDING_MODE(), (mp_word_t *)stackWorkArea, stackWorkAreaWords, false);
-}
-#endif
-
-// ================================================================
-// ================================================================
-//
-// Parse an IEEE 754 Binary32 (aka "Float" aka "Single")
-//
-// ================================================================
-// ================================================================
-#if ENABLE_BINARY32_SUPPORT
-static void _ffpp_strtoencf32_l(unsigned char *dest, const char *start, char **end, strtofp_locale_t loc) {
- static const int bytes = 4;
- static const int sigBits = 24;
- static const int minBinaryExp = -126;
- static const int maxBinaryExp = 128;
- static const int minDecimalExp = -46;
- static const int maxDecimalExp = 40;
- static const int maxDecimalMidpointDigits = 113;
- struct parseInfo info = {
- .bytes = bytes,
- .sigBits = sigBits,
- .minBinaryExp = minBinaryExp,
- .maxBinaryExp = maxBinaryExp,
- .minDecimalExp = minDecimalExp,
- .maxDecimalExp = maxDecimalExp,
- .maxDecimalMidpointDigits = maxDecimalMidpointDigits,
- .dest = dest,
- .start = start,
- .end = end,
- .loc = loc,
- };
-
- // ================================================================
- // Parse the input (mostly)
- // ================================================================
- if (!fastParse64(&info)) {
- return;
- }
-
-#if FLOAT_IS_BINARY32
- // ================================================================
- // Use a single float operation
- // ================================================================
- const static float floatPowerOf10[] = {1.0f, 10.0f, 100.0f,
- 1e3f, 1e4f, 1e5f, 1e6f, 1e7f, 1e8f, 1e9f, 1e10f};
- if (info.base10Exponent > -11 && info.base10Exponent < 11 && info.digitCount < 8) {
- int32_t sdigits = info.negative ? -(int32_t)info.digits : (int32_t)info.digits;
- if (info.base10Exponent < 0) {
- float result = (float)sdigits / floatPowerOf10[-info.base10Exponent];
- memcpy(info.dest, &result, sizeof(result));
- return;
- } else {
- float result = (float)sdigits * floatPowerOf10[info.base10Exponent];
- memcpy(info.dest, &result, sizeof(result));
- return;
- }
- }
-#endif
-
- int roundingMode = FENV_ROUNDING_MODE();
-
-#if 1
- // ================================================================
- // Fixed-precision interval arithmetic
- // ================================================================
- // The idea: Use 64-bit fixed-precision arithmetic to compute
- // upper/lower bounds for the correct answer. If those bounds
- // agree, then we can return the result.
- int16_t exp10;
- unsigned upperBoundOffset;
- if (info.digitCount <= 19) {
- exp10 = (int16_t)info.base10Exponent;
- upperBoundOffset = 4;
- } else {
- exp10 = (int16_t)(info.base10Exponent + info.digitCount - 19);
- upperBoundOffset = 36;
- }
-
- // Powers in the table are rounded so that
- // powerOfTenRoundedDown <= true value <= powerOfTenRoundedDown + 1
- const uint64_t powerOfTenRoundedDown = (powersOf10_Float + 70)[exp10];
-
- // Binary exponent for the power-of-10 product
- const int powerOfTenExponent = binaryExponentFor10ToThe(exp10);
-
- // Normalize the digits, adjust binary exponent
- int normalizeDigits = __builtin_clzll(info.digits); // 0 <= normalizeDigits <= 4
- assert(normalizeDigits <= 4 || info.digitCount < 20);
- uint64_t d = info.digits << normalizeDigits;
- // For <= 19 digits, the upper bound for d is just d
- // For > 19 digits, the upper bound is 1 << normalizeDigits <= 16
- int binaryExponent = powerOfTenExponent - normalizeDigits + 64;
-
- // A 64-bit lower bound on the binary significand
- uint64_t l = multiply64x64RoundingDown(powerOfTenRoundedDown, d);
- // An upper bound:
- // <= 19 digits: (powerOfTenRoundedDown + 1) * d == l128 + d
- // > 19 digits: (powerOfTenRoundedDown + 1) * (d + 16)
- // == l128 + d + 16 * powerOfTenRoundedDown + 16
- // For <=19 digits, upper bound is l + 2
- // For >19 digits, upper bound is l + 18
-
- // Normalize the product, adjust binary exponent
- // (In particular, this lets us shift by a constant below.)
- int normalizeProduct = __builtin_clzll(l); // 0 <= normalizeProduct <= 1
- assert(normalizeProduct <= 1);
- // Upper bound is <= (l + 2) << 1 or (l + 18) << 1
- l <<= normalizeProduct;
- // Upper bound is <= l + 4 or l + 36
- binaryExponent -= normalizeProduct;
-
- // Upper/lower bounds for the 24-bit significand
- uint64_t u = l + upperBoundOffset;
- uint32_t lowerSignificand, upperSignificand;
- switch (roundingMode) {
-#ifdef FE_TOWARDZERO
- case FE_TOWARDZERO:
- lowerSignificand = (l) >> 40;
- upperSignificand = (u) >> 40;
- break;
-#endif
-#ifdef FE_DOWNWARD
- case FE_DOWNWARD:
- if (info.negative) {
- lowerSignificand = (l + 0x0ffffffffff) >> 40;
- upperSignificand = (u + 0x10000000000) >> 40;
- } else {
- lowerSignificand = (l) >> 40;
- upperSignificand = (l + 4) >> 40;
- }
- break;
-#endif
-#ifdef FE_UPWARD
- case FE_UPWARD:
- if (!info.negative) {
- lowerSignificand = (l + 0x0ffffffffff) >> 40;
- upperSignificand = (u + 0x10000000000) >> 40;
- } else {
- lowerSignificand = (l) >> 40;
- upperSignificand = (u) >> 40;
- }
- break;
-#endif
-#ifdef FE_TONEAREST
- case FE_TONEAREST:
-#endif
- default:
- // Instead of worrying about exact ties-round-even, round lower
- // down (adding 0x7ff...ff) and upper up (adding 0x800...00) so
- // that exact ties fall through to be handled elsewhere.
- lowerSignificand = (l + 0x7fffffffff) >> 40;
- upperSignificand = (u + 0x8000000000) >> 40;
- }
- if (lowerSignificand == 0) { // lowerSignificand wrapped...
- binaryExponent += 1;
- }
- if (binaryExponent > maxBinaryExp) {
- overflow(&info);
- return;
- } else if (binaryExponent <= minBinaryExp) {
- if (binaryExponent <= minBinaryExp - sigBits) {
- underflow(&info);
- return;
- }
- // TODO: ... Subnormal? ...
- } else if (upperSignificand == lowerSignificand) {
- uint32_t exponentBits = (uint32_t)(binaryExponent - minBinaryExp) << (sigBits - 1);
- uint32_t significandMask = (((uint32_t)1 << (sigBits - 1)) - 1);
- uint32_t significandBits = lowerSignificand & significandMask;
- uint32_t signbit = info.negative ? 0x80000000UL : 0ULL;
- uint32_t raw = signbit | exponentBits | significandBits;
- memcpy(info.dest, &raw, sizeof(raw));
- return;
- }
-#endif
-
- // ================================================================
- // Slow Path (varint calculation)
- // ================================================================
- char _Alignas(sizeof(mp_word_t)) stackWorkArea[128];
- static const size_t stackWorkAreaWords = sizeof(stackWorkArea) / sizeof(mp_word_t);
- generalSlowpath(&info, FENV_ROUNDING_MODE(), (mp_word_t *)stackWorkArea, stackWorkAreaWords, false);
-}
-#endif
-
-// ================================================================
-// ================================================================
-//
-// Parse an IEEE 754 Binary64 (Double)
-//
-// ================================================================
-// ================================================================
-
-#if ENABLE_BINARY64_SUPPORT
-static void _ffpp_strtoencf64_l(unsigned char *dest, const char *start, char **end, strtofp_locale_t loc) {
- static const int bytes = 8;
- static const int sigBits = 53;
- static const int minBinaryExp = -1022;
- static const int maxBinaryExp = 1024;
- static const int minDecimalExp = -325;
- static const int maxDecimalExp = 310;
- static const int maxDecimalMidpointDigits = 768;
- struct parseInfo info = {
- .bytes = bytes,
- .sigBits = sigBits,
- .minBinaryExp = minBinaryExp,
- .maxBinaryExp = maxBinaryExp,
- .minDecimalExp = minDecimalExp,
- .maxDecimalExp = maxDecimalExp,
- .maxDecimalMidpointDigits = maxDecimalMidpointDigits,
- .dest = dest,
- .start = start,
- .end = end,
- .loc = loc,
- };
-
- // ================================================================
- // Parse the input (mostly)
- // ================================================================
- if (!fastParse64(&info)) {
- return;
- }
-
- // If digitCount <= 19, then the result we want is:
- // (info.negative ? -1 : 1) * info.digits * 10^info.base10Exponent
-
- // The rest of this function consists of several different methods
- // of computing this product with varying trade-offs of input range
- // and performance. The first ones are fast but only work for
- // certain inputs; the later ones are slower and more general.
-
- // ================================================================
- // Floating-point Calculation
- // ================================================================
- // Note: This optimization relies on the host platform `double` type
- // supporting true IEEE754 binary64 arithmetic.
-#if DOUBLE_IS_BINARY64 && (FLT_EVAL_METHOD == 0 || FLT_EVAL_METHOD == 1)
-
- const static double doublePowerOf10[] = { 1.0, 10.0, 100.0, 1e3, 1e4, 1e5,
- 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16,
- 1e17, 1e18, 1e19, 1e20, 1e21, 1e22 };
-
- if (info.base10Exponent >= -22 && info.base10Exponent <= 18) {
- if (info.base10Exponent < 0) {
- // We can give exact inputs to a division operation if
- // we have <= 15 digits and base10Exponent is in [-22..0]
- // For example, this handles 1.23456789012345
- if (info.digitCount <= 15) {
- int64_t sdigits = info.negative ? -(int64_t)info.digits : (int64_t)info.digits;
- double result = (double)sdigits / doublePowerOf10[-info.base10Exponent];
- memcpy(info.dest, &result, sizeof(result));
- return;
- }
- // TODO: I really want to handle 35.678912345678934 here
- // (17 digits and negative exponent). This case is the common
- // case for Lemire's "canada.txt" benchmark (which is derived from
- // latitude/longitude data).
- } else if (info.base10Exponent == 0) {
- if (info.digitCount <= 19) {
- if (!info.negative) {
- // We can use HW conversion of unsigned (positive) int
- double result = (double)info.digits;
- memcpy(info.dest, &result, sizeof(result));
- return;
- } else if (info.digits <= INT64_MAX) {
- // We can use HW conversion of signed (negative) int
- double result = (double)(-(int64_t)info.digits);
- memcpy(info.dest, &result, sizeof(result));
- return;
- }
- }
- } else {
- if (info.digitCount <= 19) {
- // We can give exact inputs to fma if we have <= 19 digits
- // and base10Exponent is in [1..18].
- uint64_t lowMask = 0x7ff;
- uint64_t highMask = ~lowMask;
- // digits has <= 64 bits (19 digits)
- double highDigits = (double)(info.digits & highMask); // <= 53 bits
- double lowDigits = (double)(info.digits & lowMask); // 11 bits
- double p10 = doublePowerOf10[info.base10Exponent]; // Exact (10^18 has 42 bits)
- if (info.negative) p10 = -p10;
- double u = lowDigits * p10; // Exact (11 bits + 42 bits)
- // Inputs to fma are all exact, so result is correctly rounded
- double result = __builtin_fma(highDigits, p10, u);
- memcpy(info.dest, &result, sizeof(result));
- return;
- }
- }
- }
-#endif
-
- int roundingMode = FENV_ROUNDING_MODE();
-
-#if 1
- // ================================================================
- // Fixed-width interval arithmetic
- // ================================================================
- int16_t exp10;
- // Total possible inaccuracy in our calculations below
- unsigned intervalWidth;
- if (info.digitCount <= 19) {
- exp10 = (int16_t)info.base10Exponent;
- // We have all the digits, so our upper/lower bounds
- // on the significand are the same, which makes the final
- // bounds fairly tight.
- intervalWidth = 12;
- } else {
- // We're going to treat the input as if it were:
- // (first 19 digits).(remaining digits) * 10^p
- // In this form, (first 19 digits) is a lower bound for the decimal
- // significand and (first 19 digits) + 1 is an upper bound. We also
- // have upper/lower bounds for 10^p.
- // info.digits already has the first 19 digits, we just need to
- // adjust the effective exponent.
- exp10 = (int16_t)(info.base10Exponent + info.digitCount - 19);
- // The non-trivial significand bounds lead to a wider final interval.
- // This still gives us success ~96% of the time.
- intervalWidth = 80;
- }
-
- // Multiply an exact value for 10^{0..27} times a rounded value 10^{n * 28}
- const int16_t coarseIndex = (exp10 * 585 + 256) >> 14; // exp10 / 28;
- const int16_t coarsePower = (coarseIndex * 32) - (coarseIndex * 4); // coarseIndex * 28
- const int16_t exactPower = exp10 - coarsePower; // exp10 % 28
- const uint64_t exact = powersOf10_Exact64[exactPower]; // Exact
- const uint64_t coarse = (powersOf10_CoarseBinary64 + 15)[coarseIndex];
-
- // Powers in the coarse table are never rounded up, so
- // coarse <= true value <= coarse + 1
- const uint64_t powerOfTenRoundedDown = multiply64x64RoundingDown(coarse, exact);
- // And powerOfTenRoundedUp
- // <= ((coarse + 1) * exact + UINT64_MAX) >> 64
- // <= (coarse * exact + exact + UINT64_MAX) >> 64
- // <= powerOfTenRoundedDown + 2
-
- // Binary exponent for the power-of-10 product
- const int powerOfTenExponent = binaryExponentFor10ToThe(coarsePower)
- + binaryExponentFor10ToThe(exactPower);
-
- // Normalize the digits, adjust binary exponent
- int normalizeDigits = __builtin_clzll(info.digits);
- assert(normalizeDigits <= 4 || info.digitCount < 20);
- uint64_t d = info.digits << normalizeDigits;
- int binaryExponent = powerOfTenExponent - normalizeDigits + 64;
- // The upper bound for d is:
- // exactly d (for <= 19 digit case)
- // d + 16 (for > 19 digit case)
-
- // A 64-bit lower bound on the binary significand
- uint64_t l = multiply64x64RoundingDown(powerOfTenRoundedDown, d);
- // Corresponding upper bound:
- // <= 19 digits: (powerOfTenRoundedDown + 2) * d == l128 + d + d
- // > 19 digits: (powerOfTenRoundedDown + 2) * (d + 16)
- // l is a lower bound for the 64-bit binary significand
- // An upper bound for <= 19 digit case:
- // (l128 + d + d + UINT64_MAX) >> 64 <= l + 3
- // For >19 digits, we similarly get l + 20 as an upper bound.
-
- // Normalize the product, adjust binary exponent
- // (In particular, this lets us shift by a constant below.)
- int normalizeProduct = __builtin_clzll(l); // 0 <= normalizeProduct <= 2
- assert(normalizeProduct <= 2);
- // Upper bound is (l + 3) or (l + 20)
- l <<= normalizeProduct;
- // Upper bound is (l + 12) or (l + 80)
- binaryExponent -= normalizeProduct;
-
- // Upper/lower bounds for the 53-bit significand, with rounding
- //
- // We round by adding an offset and then truncating the bottom 11
- // bits. For example, to precisely round away from zero, we
- // would ideally want to add 0x7ff.ffffffffffffffff. Rounding that
- // value down/up to the nearest integer gives us 0x7ff, 0x800.
- unsigned lowerRoundOffset, upperRoundOffset;
- {
- bool negative = info.negative;
- switch (roundingMode) {
-#ifdef FE_UPWARD
- case FE_UPWARD:
- negative = !negative;
- // FALL THROUGH
-#endif
-#ifdef FE_DOWNWARD
- case FE_DOWNWARD:
- if (negative) {
- lowerRoundOffset = 0x7ff; upperRoundOffset = 0x800 + intervalWidth;
- break;
- }
- // FALL THROUGH
-#endif
-#ifdef FE_TOWARDZERO
- case FE_TOWARDZERO:
- lowerRoundOffset = 0; upperRoundOffset = intervalWidth;
- break;
-#endif
-#ifdef FE_TONEAREST
- case FE_TONEAREST:
-#endif
- default:
- (void)negative;
- lowerRoundOffset = 0x3ff; upperRoundOffset = 0x400 + intervalWidth;
- break;
- }
- }
- uint64_t lowerSignificand = (l + lowerRoundOffset) >> 11;
- uint64_t upperSignificand = (l + upperRoundOffset) >> 11;
-
- int adjustedBinaryExponent = binaryExponent;
- if (lowerSignificand == 0) { // lowerSignificand wrapped...
- adjustedBinaryExponent += 1;
- }
- if (adjustedBinaryExponent > maxBinaryExp) {
- overflow(&info);
- return;
- } else if (adjustedBinaryExponent <= minBinaryExp) {
- if (adjustedBinaryExponent <= minBinaryExp - sigBits) {
- underflow(&info);
- return;
- }
- // Might be subnormal; we need to re-round to the appropriate
- // number of bits, which requires recomputing the shift amount and
- // the rounding offsets. This is a generalized version of the
- // code above that's also slightly slower due to the variable shifts.
- int shift = 65 - binaryExponent + (minBinaryExp - sigBits);
- if (shift < 64) {
- assert(0 < shift && shift < 64);
- uint64_t lro, uro;
- bool negative = info.negative;
- switch (roundingMode) {
-#ifdef FE_UPWARD
- case FE_UPWARD:
- negative = !negative;
- // FALL THROUGH
-#endif
-#ifdef FE_DOWNWARD
- case FE_DOWNWARD:
- if (negative) {
- lro = (1ULL << (shift)) - 1; uro = lro + intervalWidth; break;
- }
- // FALL THROUGH
-#endif
-#ifdef FE_TOWARDZERO
- case FE_TOWARDZERO:
- lro = 0; uro = intervalWidth; break;
-#endif
-#ifdef FE_TONEAREST
- case FE_TONEAREST:
-#endif
- default:
- (void)negative;
- lro = (1ULL << (shift - 1)) - 1; uro = (1ULL << (shift - 1)) + intervalWidth; break;
- }
- lowerSignificand = (l + lro) >> shift;
- upperSignificand = (l + uro) >> shift;
- if (upperSignificand == lowerSignificand) {
- if (lowerSignificand == 0) { // lowerSignificand wrapped...
- lowerSignificand = 1ULL << (64 - shift);
- }
- // Subnormal
- uint64_t signbit = info.negative ? 0x8000000000000000ULL : 0ULL;
- uint64_t raw = signbit | lowerSignificand;
- if (raw != 0x0010000000000000ULL) {
- errno = ERANGE;
- }
- memcpy(info.dest, &raw, sizeof(raw));
- return;
- }
- }
- } else if (upperSignificand == lowerSignificand) {
- // Normal
- uint64_t exponentBits = (uint64_t)(adjustedBinaryExponent - minBinaryExp) << (sigBits - 1);
- uint64_t significandMask = (((uint64_t)1 << (sigBits - 1)) - 1);
- uint64_t significandBits = lowerSignificand & significandMask;
- uint64_t signbit = info.negative ? 0x8000000000000000ULL : 0ULL;
- uint64_t raw = signbit | exponentBits | significandBits;
- memcpy(info.dest, &raw, sizeof(raw));
- return;
- }
- // TODO: If we fall through, should we try refining the
- // estimate? If we can extend `l` to 128 bits with very
- // little work, it might be worth the effort.
-#endif
-
- // ================================================================
- // Slow Path (varint calculation)
- // ================================================================
- // Random testing suggests this step only runs about 3% of the
- // time, so we focus here on optimizing for code size rather than perf.
- char _Alignas(sizeof(mp_word_t)) stackWorkArea[656];
- static const size_t stackWorkAreaWords = sizeof(stackWorkArea) / sizeof(mp_word_t);
- generalSlowpath(&info, roundingMode, (mp_word_t *)stackWorkArea, stackWorkAreaWords, false);
-}
-#endif
-
-// ================================================================
-// ================================================================
-//
-// 128-bit interval calculation for Float80 and Binary128
-//
-// Unlike binary32/64 above (where we optimize for performance),
-// this logic has been generalized to support both float80 and
-// binary128 with common code. It's a little slower, but these
-// formats are less commonly used, so the code savings are worth
-// it. (And we're still about 80x faster than gdtoa even with this,
-// so I doubt anyone will complain about the performance here!)
-//
-// ================================================================
-// ================================================================
-
-#if ENABLE_FLOAT80_OPTIMIZATIONS || ENABLE_BINARY128_OPTIMIZATIONS
-#if HAVE_UINT128_T
-typedef __uint128_t my_uint128_t;
-#define create128FromHighLow64(high,low) ((low) + ((__uint128_t)(high) << 64))
-#define multiply128xInt(lhs, rhs) ((lhs) * (rhs))
-#define fullMultiply64x64(lhs, rhs) ((__uint128_t)(lhs) * (rhs))
-#define add128x64(lhs, rhs) ((lhs) + (rhs))
-#define add128x128(lhs, rhs) ((lhs) + (rhs))
-#define extractLow64(x) ((uint64_t)(x))
-#define extractHigh64(x) ((uint64_t)((x) >> 64))
-#define isZero(x) ((x) == 0)
-#define isEqual(lhs, rhs) ((lhs) == (rhs))
-#define shiftLeft(lhs, rhs) ((lhs) << (rhs))
-#define shiftRight(lhs, rhs) ((lhs) >> (rhs))
-#else
-typedef struct { uint64_t low; uint64_t high; } my_uint128_t;
-#define extractLow64(x) ((x).low)
-#define extractHigh64(x) ((x).high)
-#define isZero(x) ((x).low == 0 && (x).high == 0)
-#define isEqual(lhs, rhs) ((lhs).low == (rhs).low && ((lhs).high == (rhs).high))
-static my_uint128_t shiftLeft(my_uint128_t lhs, int rhs) {
- if (rhs > 64) {
- lhs.high = lhs.low << (rhs - 64);
- lhs.low = 0;
- } else if (rhs == 64) {
- lhs.high = lhs.low;
- lhs.low = 0;
- } else if (rhs > 0) {
- lhs.high = (lhs.high << rhs) + (lhs.low >> (64 - rhs));
- lhs.low <<= rhs;
- }
- return lhs;
-}
-static my_uint128_t shiftRight(my_uint128_t lhs, int rhs) {
- if (rhs > 64) {
- lhs.low = lhs.high >> (rhs - 64);
- lhs.high = 0;
- } else if (rhs == 64) {
- lhs.low = lhs.high;
- lhs.high = 0;
- } else if (rhs > 0) {
- lhs.low = (lhs.low >> rhs) + (lhs.high << (64 - rhs));
- lhs.high >>= rhs;
- }
- return lhs;
-}
-static my_uint128_t create128FromHighLow64(uint64_t high, uint64_t low) {
- my_uint128_t result = {low, high};
- return result;
-}
-static my_uint128_t add128x64(my_uint128_t lhs, uint64_t rhs) {
- if (lhs.low > UINT64_MAX - rhs) {
- lhs.high += 1;
- }
- lhs.low += rhs;
- return lhs;
-}
-static my_uint128_t add128x128(my_uint128_t lhs, my_uint128_t rhs) {
- if (lhs.low > UINT64_MAX - rhs.low) {
- lhs.high += 1;
- }
- lhs.low += rhs.low;
- lhs.high += rhs.high;
- return lhs;
-}
-static my_uint128_t fullMultiply64x64(uint64_t lhs, uint64_t rhs) {
- uint64_t a = (lhs >> 32) * (rhs >> 32);
- uint64_t b = (lhs >> 32) * (rhs & UINT32_MAX);
- uint64_t c = (lhs & UINT32_MAX) * (rhs >> 32);
- uint64_t d = (lhs & UINT32_MAX) * (rhs & UINT32_MAX);
- b += (c & UINT32_MAX) + (d >> 32);
- return create128FromHighLow64(a + (b >> 32) + (c >> 32),
- (b << 32) + (d & UINT32_MAX));
-}
-static my_uint128_t multiply128xInt(my_uint128_t lhs, int rhs) {
- uint64_t a = (lhs.low & UINT32_MAX) * (uint64_t)rhs;
- uint64_t b = (lhs.low >> 32) * (uint64_t)rhs;
- b += (a >> 32);
- lhs.high = (lhs.high * (uint64_t)rhs) + (b >> 32);
- lhs.low = (a & UINT32_MAX) + (b << 32);
- return lhs;
-}
-#endif
-static my_uint128_t multiply128x128RoundingDown(my_uint128_t lhs, my_uint128_t rhs) {
- my_uint128_t a = fullMultiply64x64(extractHigh64(lhs), extractHigh64(rhs));
- my_uint128_t b = fullMultiply64x64(extractHigh64(lhs), extractLow64(rhs));
- my_uint128_t c = fullMultiply64x64(extractLow64(lhs), extractHigh64(rhs));
- my_uint128_t d = fullMultiply64x64(extractLow64(lhs), extractLow64(rhs));
- b = add128x64(b, extractLow64(c));
- b = add128x64(b, extractHigh64(d));
- a = add128x64(a, extractHigh64(b));
- a = add128x64(a, extractHigh64(c));
- return a;
-}
-
-static my_uint128_t getPowerOfTenRoundedDown(int p, int *exponent) {
- my_uint128_t result;
- int e;
-
- // If power is < 0, multiply by 10^-5040 and adjust p
- // That lets us use a table with only positive powers (half the size)
- if (p < 0) {
- result = create128FromHighLow64(0xb2d31bf022977fd8ULL, 0xbf034c011f5000deULL);
- p += 5040;
- e = binaryExponentFor10ToThe(-5040);
- } else {
- result = create128FromHighLow64(1ULL << 63, 0);
- e = 1;
- }
-
- int finePower = p % 56;
- my_uint128_t fine;
- p -= finePower;
- if (finePower <= 27) {
- fine = create128FromHighLow64(powersOf10_Exact64[finePower], 0);
- } else if (finePower <= 54) {
- fine = fullMultiply64x64(powersOf10_Exact64[finePower - 27],
- 0xcecb8f27f4200f3aULL); // 10^27
- if ((extractHigh64(fine) >> 63) == 0) {
- fine = shiftLeft(fine, 1);
- }
- } else {
- fine = create128FromHighLow64(0xd0cf4b50cfe20765ULL, 0xfff4b4e3f741cf6dULL);
- }
- e += binaryExponentFor10ToThe(finePower);
- result = multiply128x128RoundingDown(result, fine);
-
- int coarseIndex = p / 56;
- const uint64_t *c = powersOf10_Binary128 + coarseIndex * 2;
- my_uint128_t coarse = create128FromHighLow64(c[1], c[0]);
- e += binaryExponentFor10ToThe(coarseIndex * 56);
- result = multiply128x128RoundingDown(result, coarse);
-
- *exponent = e;
- return result;
-}
-
-static int highPrecisionIntervalPath(struct parseInfo *info, int roundingMode) {
- int16_t exp10;
- unsigned upperBoundOffset;
- if (info->digitCount <= 38) {
- exp10 = (int16_t)info->base10Exponent;
- upperBoundOffset = 16; // FIXME
- } else {
- exp10 = (int16_t)(info->base10Exponent + info->digitCount - 38);
- upperBoundOffset = 272; // FIXME
- }
-
- my_uint128_t digits = create128FromHighLow64(0, info->digits);
- int normalizeDigits;
- if (info->digitCount <= 19) {
- normalizeDigits = __builtin_clzll(info->digits) + 64;
- } else {
- int remaining = info->digitCount - 19;
- if (remaining > 19) { remaining = 19; }
- const unsigned char *p = info->firstUnparsedDigit;
- for (int i = 0; i < remaining; i++) {
- // Skip decimal point (depends on locale, can be any non-digit!)
- while (*p < '0' || *p > '9') {
- p++;
- }
- digits = multiply128xInt(digits, 10);
- digits = add128x64(digits, (uint64_t)(*p - '0'));
- p += 1;
- }
- if (extractHigh64(digits) != 0) {
- normalizeDigits = __builtin_clzll(extractHigh64(digits));
- } else {
- normalizeDigits = __builtin_clzll(extractLow64(digits)) + 64;
- }
- }
- assert(normalizeDigits <= 5 || info->digitCount <= 38);
- digits = shiftLeft(digits, normalizeDigits);
- int binaryExponent = 128 - normalizeDigits;
- // For <= 38 digits, the upper bound for d is just d
- // For > 38 digits, the upper bound is 1 << normalizeDigits <= 32
-
- int powerOfTenExponent;
- const my_uint128_t powerOfTenRoundedDown = getPowerOfTenRoundedDown(exp10, &powerOfTenExponent);
- binaryExponent += powerOfTenExponent;
- // powerOfTenRoundedDown <= true value <= powerOfTenRoundedDown + 2
-
- // A 128-bit lower bound on the binary significand
- my_uint128_t l = multiply128x128RoundingDown(powerOfTenRoundedDown, digits);
-
- // (In particular, this lets us shift by a constant below.)
- int normalizeProduct = __builtin_clzll(extractHigh64(l)); // 0 <= normalizeProduct <= 1
- // Upper bound is <= (l + 2) or (l + 34)
- assert(normalizeProduct <= 3);
- l = shiftLeft(l, normalizeProduct);
- // Upper bound is <= l + 16 or l + 272
- binaryExponent -= normalizeProduct;
-
- // Upper/lower bounds for the 64-bit significand
- my_uint128_t u = add128x64(l, upperBoundOffset);
- bool negative = info->negative;
- switch (roundingMode) {
-#ifdef FE_DOWNWARD
- case FE_DOWNWARD:
- negative = !negative;
- // FALL THROUGH
-#endif
-#ifdef FE_UPWARD
- case FE_UPWARD:
- if (!negative) {
- my_uint128_t offset = create128FromHighLow64(UINT64_MAX, UINT64_MAX);
- offset = shiftRight(offset, info->sigBits);
- l = add128x128(l, offset);
- u = add128x128(u, offset);
- u = add128x64(u, 1);
- break;
- }
- // FALL THROUGH
-#endif
-#ifdef FE_TOWARDZERO
- case FE_TOWARDZERO:
- break;
-#endif
-#ifdef FE_TONEAREST
- case FE_TONEAREST:
-#endif
- default: {
- (void)negative;
- my_uint128_t offset = create128FromHighLow64(UINT64_MAX, UINT64_MAX);
- offset = shiftRight(offset, info->sigBits + 1);
- l = add128x128(l, offset);
- u = add128x128(u, offset);
- u = add128x64(u, 1);
- }
- }
- my_uint128_t lowerSignificand = shiftRight(l, (128 - info->sigBits));
- my_uint128_t upperSignificand = shiftRight(u, (128 - info->sigBits));
- if (isZero(lowerSignificand)) { // lowerSignificand wrapped...
- binaryExponent += 1;
- lowerSignificand = create128FromHighLow64(1ULL << 63, 0);
- lowerSignificand = shiftRight(lowerSignificand, 128 - info->sigBits);
- }
- if (binaryExponent > info->maxBinaryExp) {
- overflow(info);
- return 1;
- } else if (binaryExponent <= info->minBinaryExp) {
- if (binaryExponent <= info->minBinaryExp - info->sigBits) {
- underflow(info);
- return 1;
- }
- // TODO: ... Subnormal? ...
- } else if (isEqual(upperSignificand, lowerSignificand)) {
- switch (info->bytes) {
- case 10: {
- uint16_t signbit = info->negative ? 0x8000U : 0U;
- uint16_t exponentBits = signbit | (uint16_t)(binaryExponent - info->minBinaryExp);
- uint64_t significandBits = extractLow64(lowerSignificand);
- memcpy(info->dest, &significandBits, sizeof(significandBits));
- memcpy(info->dest + 8, &exponentBits, sizeof(exponentBits));
- return 1;
- }
- case 16: {
- uint16_t signbit = info->negative ? 0x8000U : 0U;
- uint16_t exponentBits = signbit | (uint16_t)(binaryExponent - info->minBinaryExp);
- memcpy(info->dest, &lowerSignificand, sizeof(lowerSignificand));
- memcpy(info->dest + 14, &exponentBits, sizeof(exponentBits));
- return 1;
- }
- }
- }
- return 0;
-}
-#endif
-
-// ================================================================
-// ================================================================
-//
-// Parse an Intel x87 80-bit extended format value
-//
-// ================================================================
-// ================================================================
-
-#if ENABLE_FLOAT80_SUPPORT
-static void _ffpp_strtoencf80_l(unsigned char *dest, const char *start, char **end, strtofp_locale_t loc) {
- static const int bytes = 10;
- static const int sigBits = 64;
- static const int minBinaryExp = -16382;
- static const int maxBinaryExp = 16384;
- static const int minDecimalExp = -5000;
- static const int maxDecimalExp = 5000;
- static const int maxDecimalMidpointDigits = 11515;
- struct parseInfo info = {
- .bytes = bytes,
- .sigBits = sigBits,
- .minBinaryExp = minBinaryExp,
- .maxBinaryExp = maxBinaryExp,
- .minDecimalExp = minDecimalExp,
- .maxDecimalExp = maxDecimalExp,
- .maxDecimalMidpointDigits = maxDecimalMidpointDigits,
- .dest = dest,
- .start = start,
- .end = end,
- .loc = loc,
- };
-
- // ================================================================
- // Parse the input (mostly)
- // ================================================================
- if (!fastParse64(&info)) {
- return;
- }
-
-#if ENABLE_FLOAT80_OPTIMIZATIONS && LONG_DOUBLE_IS_FLOAT80
- // ================================================================
- // Use a single float80 operation when we can
- // ================================================================
- const static long double longDoublePowerOf10[] = {1.0L, 10.0L, 100.0L,
- 1e3L, 1e4L, 1e5L, 1e6L, 1e7L, 1e8L, 1e9L, 1e10L, 1e11L, 1e12L,
- 1e13L, 1e14L, 1e15L, 1e16L, 1e17L, 1e18L, 1e19L, 1e20L, 1e21L,
- 1e22L, 1e23L, 1e24L, 1e25L, 1e26L, 1e27L};
- if (info.base10Exponent > -28 && info.base10Exponent < 28 && info.digitCount <= 19) {
- if (info.base10Exponent < 0) {
- long double p = longDoublePowerOf10[-info.base10Exponent];
- if (info.negative) p = -p;
- long double result = (long double)info.digits / p;
- memcpy(info.dest, &result, sizeof(result));
- return;
- } else {
- long double p = longDoublePowerOf10[info.base10Exponent];
- if (info.negative) p = -p;
- long double result = (long double)info.digits * p;
- memcpy(info.dest, &result, sizeof(result));
- return;
- }
- }
-#endif
-
- int roundingMode = FENV_ROUNDING_MODE();
-
-#if ENABLE_FLOAT80_OPTIMIZATIONS
- if (highPrecisionIntervalPath(&info, roundingMode)) {
- return;
- }
-#endif
-
- // ================================================================
- // Slow Path (varint calculation)
- // ================================================================
- char _Alignas(sizeof(mp_word_t)) stackWorkArea[1536];
- static const size_t stackWorkAreaWords = sizeof(stackWorkArea) / sizeof(mp_word_t);
- generalSlowpath(&info, roundingMode, (mp_word_t *)stackWorkArea, stackWorkAreaWords, true);
-}
-#endif
-
-// ================================================================
-// ================================================================
-//
-// Parse an IEEE 754 Binary128
-//
-// ================================================================
-// ================================================================
-
-#if ENABLE_BINARY128_SUPPORT
-static void _ffpp_strtoencf128_l(unsigned char *dest, const char *start, char **end, strtofp_locale_t loc) {
- static const int bytes = 16;
- static const int sigBits = 113;
- static const int minBinaryExp = -16382;
- static const int maxBinaryExp = 16384;
- static const int minDecimalExp = -5000;
- static const int maxDecimalExp = 5000;
- static const int maxDecimalMidpointDigits = 11564;
- struct parseInfo info = {
- .bytes = bytes,
- .sigBits = sigBits,
- .minBinaryExp = minBinaryExp,
- .maxBinaryExp = maxBinaryExp,
- .minDecimalExp = minDecimalExp,
- .maxDecimalExp = maxDecimalExp,
- .maxDecimalMidpointDigits = maxDecimalMidpointDigits,
- .dest = dest,
- .start = start,
- .end = end,
- .loc = loc,
- };
-
- // ================================================================
- // Parse the input (mostly)
- // ================================================================
- if (!fastParse64(&info)) {
- return;
- }
-
-#if LONG_DOUBLE_IS_BINARY128 && ENABLE_BINARY128_OPTIMIZATIONS
- // ================================================================
- // Use a single binary128 operation when we can
- // ================================================================
- const static long double longDoublePowerOf10[] = {
- 1.0L, 10.0L, 100.0L, 1e3L, 1e4L, 1e5L, 1e6L, 1e7L, 1e8L, 1e9L,
- 1e10L, 1e11L, 1e12L, 1e13L, 1e14L, 1e15L, 1e16L, 1e17L, 1e18L, 1e19L,
- 1e20L, 1e21L, 1e22L, 1e23L, 1e24L, 1e25L, 1e26L, 1e27L, 1e28L, 1e29L,
- 1e30L, 1e31L, 1e32L, 1e33L, 1e34L, 1e35L, 1e36L, 1e37L, 1e38L, 1e39L,
- 1e40L, 1e41L, 1e42L, 1e43L, 1e44L, 1e45L, 1e46L, 1e47L, 1e48L};
- if (info.base10Exponent > -49 && info.base10Exponent < 49 && info.digitCount <= 19) {
- if (info.base10Exponent < 0) {
- long double p = longDoublePowerOf10[-info.base10Exponent];
- if (info.negative) p = -p;
- long double result = (long double)info.digits / p;
- memcpy(info.dest, &result, sizeof(result));
- return;
- } else {
- long double p = longDoublePowerOf10[info.base10Exponent];
- if (info.negative) p = -p;
- long double result = (long double)info.digits * p;
- memcpy(info.dest, &result, sizeof(result));
- return;
- }
- }
-#endif
-
- int roundingMode = FENV_ROUNDING_MODE();
-
-#if ENABLE_BINARY128_OPTIMIZATIONS
- if (highPrecisionIntervalPath(&info, roundingMode)) {
- return;
- }
-#endif
-
- // ================================================================
- // Slow Path (varint calculation)
- // ================================================================
- char _Alignas(sizeof(mp_word_t)) stackWorkArea[1536];
- static const size_t stackWorkAreaWords = sizeof(stackWorkArea) / sizeof(mp_word_t);
- generalSlowpath(&info, roundingMode, (mp_word_t *)stackWorkArea, stackWorkAreaWords, true);
-}
-#endif
-
-// ================================================================
-// ================================================================
-//
-// Public APIs
-//
-// The public functions exported from this file are all defined
-// in terms of the private `_ffpp_strtoencf**_l` functions defined
-// above.
-//
-// ================================================================
-// ================================================================
-
-// ================================================================
-// Wrappers for Binary16
-#if ENABLE_BINARY16_SUPPORT
-// TS 18661-3 `strtoencf16` API that can be supported on
-// every platform regardless of local FP
-void strtoencf16(unsigned char * restrict encptr,
- const char * restrict nptr,
- char ** restrict endptr) {
- _ffpp_strtoencf16_l(encptr, nptr, endptr, strtofp_current_locale());
-}
-#endif
-
-// ================================================================
-// Wrappers for Binary32
-
-#if ENABLE_BINARY32_SUPPORT
-// TS 18661-3 `strtoencf32` API that can be supported on
-// every platform regardless of local FP
-void strtoencf32(unsigned char * restrict encptr,
- const char * restrict nptr,
- char ** restrict endptr) {
- _ffpp_strtoencf32_l(encptr, nptr, endptr, strtofp_current_locale());
-}
-#endif
-
-#if ENABLE_BINARY32_SUPPORT && FLOAT_IS_BINARY32
-// ISO C17 `strtof` API
-float strtof(const char * restrict nptr,
- char ** restrict endptr) {
- union { float d; unsigned char raw[4]; } result;
- _ffpp_strtoencf32_l(result.raw, nptr, endptr, strtofp_current_locale());
- return result.d;
-}
-#endif
-
-#if ENABLE_BINARY32_SUPPORT && FLOAT_IS_BINARY32 && ENABLE_LOCALE_SUPPORT
-// ISO C17 `strtof_l` API
-float strtof_l(const char * restrict nptr,
- char ** restrict endptr,
- strtofp_locale_t loc) {
- union { float d; unsigned char raw[4]; } result;
- _ffpp_strtoencf32_l(result.raw, nptr, endptr, loc);
- return result.d;
-}
-#endif
-
-// ================================================================
-// Wrappers for Binary64
-
-#if ENABLE_BINARY64_SUPPORT
-// TS 18661-3 `strtoencf64` API that can be supported on
-// every platform regardless of local FP
-void strtoencf64(unsigned char * restrict encptr,
- const char * restrict nptr,
- char ** restrict endptr) {
- _ffpp_strtoencf64_l(encptr, nptr, endptr, strtofp_current_locale());
-}
-#endif
-
-#if ENABLE_BINARY64_SUPPORT && LONG_DOUBLE_IS_BINARY64
-// TS 18661-3 `strtoencf64x` API
-// If `long double` is binary64, we assume Float64x is binary64
-void strtoencf64x(unsigned char * restrict encptr,
- const char * restrict nptr,
- char ** restrict endptr) {
- _ffpp_strtoencf64_l(encptr, nptr, endptr, strtofp_current_locale());
-}
-#endif
-
-#if ENABLE_BINARY64_SUPPORT && DOUBLE_IS_BINARY64
-// ISO C17 `strtod` API
-double strtod(const char * restrict nptr,
- char ** restrict endptr) {
- union { double d; unsigned char raw[8]; } result;
- _ffpp_strtoencf64_l(result.raw, nptr, endptr, strtofp_current_locale());
- return result.d;
-}
-#endif
-
-#if ENABLE_BINARY64_SUPPORT && DOUBLE_IS_BINARY64 && ENABLE_LOCALE_SUPPORT
-// ISO C17 `strtod_l` API
-double strtod_l(const char * restrict nptr,
- char ** restrict endptr,
- strtofp_locale_t loc) {
- union { double d; unsigned char raw[8]; } result;
- _ffpp_strtoencf64_l(result.raw, nptr, endptr, loc);
- return result.d;
-}
-#endif
-
-#if ENABLE_BINARY64_SUPPORT && LONG_DOUBLE_IS_BINARY64
-// ISO C17 `strtold` API
-long double strtold(const char * restrict nptr,
- char ** restrict endptr) {
- union { long double d; unsigned char raw[8]; } result;
- _ffpp_strtoencf64_l(result.raw, nptr, endptr, strtofp_current_locale());
- return result.d;
-}
-#endif
-
-#if ENABLE_BINARY64_SUPPORT && LONG_DOUBLE_IS_BINARY64 && ENABLE_LOCALE_SUPPORT
-// ISO C17 `strtold_l` API
-long double strtold_l(const char * restrict nptr,
- char ** restrict endptr,
- strtofp_locale_t loc) {
- union { long double d; unsigned char raw[8]; } result;
- _ffpp_strtoencf64_l(result.raw, nptr, endptr, loc);
- return result.d;
-}
-#endif
-
-// ================================================================
-// Wrappers for Float80
-
-#if ENABLE_FLOAT80_SUPPORT && ENABLE_LOCALE_SUPPORT
-// Non-standard but helpful for testing.
-void strtoencf80_l(unsigned char * restrict encptr,
- const char * restrict nptr,
- char ** restrict endptr,
- strtofp_locale_t loc) {
- _ffpp_strtoencf80_l(encptr, nptr, endptr, loc);
-}
-#endif
-
-#if ENABLE_FLOAT80_SUPPORT && LONG_DOUBLE_IS_FLOAT80
-// TS 18661-3 `strtoencf64x` API
-// If `long double` is float80, assume `Float64x` is float80
-void strtoencf64x(unsigned char * restrict encptr,
- const char * restrict nptr,
- char ** restrict endptr) {
- _ffpp_strtoencf80_l(encptr, nptr, endptr, strtofp_current_locale());
-}
-#endif
-
-#if ENABLE_FLOAT80_SUPPORT && LONG_DOUBLE_IS_FLOAT80
-// ISO C17 `strtold` API
-long double strtold(const char * restrict nptr,
- char ** restrict endptr) {
- union { long double d; unsigned char raw[10]; } result;
- _ffpp_strtoencf80_l(result.raw, nptr, endptr, strtofp_current_locale());
- return result.d;
-}
-#endif
-
-#if ENABLE_FLOAT80_SUPPORT && LONG_DOUBLE_IS_FLOAT80 && ENABLE_LOCALE_SUPPORT
-// ISO C17 `strtold` API
-long double strtold_l(const char * restrict nptr,
- char ** restrict endptr,
- strtofp_locale_t loc) {
- union { long double d; unsigned char raw[10]; } result;
- _ffpp_strtoencf80_l(result.raw, nptr, endptr, loc);
- return result.d;
-}
-#endif
-
-// ================================================================
-// Wrappers for Binary128
-
-#if ENABLE_BINARY128_SUPPORT
-// TS 18661-3 `strtoencf128` API that can be supported on
-// every platform regardless of local FP
-void strtoencf128(unsigned char * restrict encptr,
- const char * restrict nptr,
- char ** restrict endptr) {
- _ffpp_strtoencf128_l(encptr, nptr, endptr, strtofp_current_locale());
-}
-#endif
-
-#if ENABLE_BINARY128_SUPPORT && LONG_DOUBLE_IS_BINARY128
-// ISO C17 `strtold` API
-long double strtold(const char * restrict nptr,
- char ** restrict endptr) {
- union { long double d; unsigned char raw[16]; } result;
- _ffpp_strtoencf128_l(result.raw, nptr, endptr, strtofp_current_locale());
- return result.d;
-}
-#endif
-
-#if ENABLE_BINARY128_SUPPORT && LONG_DOUBLE_IS_BINARY128 && ENABLE_LOCALE_SUPPORT
-// ISO C17 `strtold_l` API
-long double strtold_l(const char * restrict nptr,
- char ** restrict endptr,
- strtofp_locale_t loc) {
- union { long double d; unsigned char raw[16]; } result;
- _ffpp_strtoencf128_l(result.raw, nptr, endptr, loc);
- return result.d;
-}
-#endif