/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2007-2013 Bruce D. Evans
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice unmodified, this list of conditions, and the following
* disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/cdefs.h>
/**
* Implementation of the natural logarithm of x for 128-bit format.
*
* First decompose x into its base 2 representation:
*
* log(x) = log(X * 2**k), where X is in [1, 2)
* = log(X) + k * log(2).
*
* Let X = X_i + e, where X_i is the center of one of the intervals
* [-1.0/256, 1.0/256), [1.0/256, 3.0/256), .... [2.0-1.0/256, 2.0+1.0/256)
* and X is in this interval. Then
*
* log(X) = log(X_i + e)
* = log(X_i * (1 + e / X_i))
* = log(X_i) + log(1 + e / X_i).
*
* The values log(X_i) are tabulated below. Let d = e / X_i and use
*
* log(1 + d) = p(d)
*
* where p(d) = d - 0.5*d*d + ... is a special minimax polynomial of
* suitably high degree.
*
* To get sufficiently small roundoff errors, k * log(2), log(X_i), and
* sometimes (if |k| is not large) the first term in p(d) must be evaluated
* and added up in extra precision. Extra precision is not needed for the
* rest of p(d). In the worst case when k = 0 and log(X_i) is 0, the final
* error is controlled mainly by the error in the second term in p(d). The
* error in this term itself is at most 0.5 ulps from the d*d operation in
* it. The error in this term relative to the first term is thus at most
* 0.5 * |-0.5| * |d| < 1.0/1024 ulps. We aim for an accumulated error of
* at most twice this at the point of the final rounding step. Thus the
* final error should be at most 0.5 + 1.0/512 = 0.5020 ulps. Exhaustive
* testing of a float variant of this function showed a maximum final error
* of 0.5008 ulps. Non-exhaustive testing of a double variant of this
* function showed a maximum final error of 0.5078 ulps (near 1+1.0/256).
*
* We made the maximum of |d| (and thus the total relative error and the
* degree of p(d)) small by using a large number of intervals. Using
* centers of intervals instead of endpoints reduces this maximum by a
* factor of 2 for a given number of intervals. p(d) is special only
* in beginning with the Taylor coefficients 0 + 1*d, which tends to happen
* naturally. The most accurate minimax polynomial of a given degree might
* be different, but then we wouldn't want it since we would have to do
* extra work to avoid roundoff error (especially for P0*d instead of d).
*/
/*
* -0.005280 < d < 0.004838. In particular, the infinite-
* precision |d| is <= 2**-7. Rounding of G(i) to 8 bits
* ensures that d is representable without extra precision for
* this bound on |d| (since when this calculation is expressed
* as x*G(i)-1, the multiplication needs as many extra bits as
* G(i) has and the subtraction cancels 8 bits). But for
* most i (107 cases out of 129), the infinite-precision |d|
* is <= 2**-8. G(i) is rounded to 9 bits for such i to give
* better accuracy (this works by improving the bound on |d|,
* which in turn allows rounding to 9 bits in more cases).
* This is only important when the original x is near 1 -- it
* lets us avoid using a special method to give the desired
* accuracy for such x.
*/
if (0)
d = x * G(i) - 1;
else {
#ifdef USE_UTAB
d = (x - H(i)) * G(i) + E(i);
#else
long double x_hi;
double x_lo;
/*
* Split x into x_hi + x_lo to calculate x*G(i)-1 exactly.
* G(i) has at most 9 bits, so the splitting point is not
* critical.
*/
INSERT_LDBL128_WORDS(x_hi, 0x3fff, lx,
llx & 0xffffffffff000000ULL);
x_lo = x - x_hi;
d = x_hi * G(i) - 1 + x_lo * G(i);
#endif
}
/*
* Our algorithm depends on exact cancellation of F_lo(i) and
* F_hi(i) with dk*ln_2_lo and dk*ln2_hi when k is -1 and i is
* at the end of the table. This and other technical complications
* make it difficult to avoid the double scaling in (dk*ln2) *
* log(base) for base != e without losing more accuracy and/or
* efficiency than is gained.
*/
/*
* Use double precision operations wherever possible, since
* long double operations are emulated and were very slow on
* the old sparc64 and unknown on the newer aarch64 and riscv
* machines. Also, don't try to improve parallelism by
* increasing the number of operations, since any parallelism
* on such machines is needed for the emulation. Horner's
* method is good for this, and is also good for accuracy.
* Horner's method doesn't handle the `lo' term well, either
* for efficiency or accuracy. However, for accuracy we
* evaluate d * d * P2 separately to take advantage of by P2
* being exact, and this gives a good place to sum the 'lo'
* term too.
*/
dd = (double)d;
val_lo = d * d * d * (P3 +
d * (P4 + d * (P5 + d * (P6 + d * (P7 + d * (P8 +
dd * (P9 + dd * (P10 + dd * (P11 + dd * (P12 + dd * (P13 +
dd * P14))))))))))) + (F_lo(i) + dk * ln2_lo) + d * d * P2;
val_hi = d;
#ifdef DEBUG
if (fetestexcept(FE_UNDERFLOW))
breakpoint();
#endif
/*
* x*G(i)-1 (with a reduced x) can be represented exactly, as
* above, but now we need to evaluate the polynomial on d =
* (x+f_lo)*G(i)-1 and extra precision is needed for that.
* Since x+x_lo is a hi+lo decomposition and subtracting 1
* doesn't lose too many bits, an inexact calculation for
* f_lo*G(i) is good enough.
*/
if (0)
d_hi = x * G(i) - 1;
else {
#ifdef USE_UTAB
d_hi = (x - H(i)) * G(i) + E(i);
#else
long double x_hi;
double x_lo;
/*
* This is _2sumF(d_hi, d_lo) inlined. The condition
* (d_hi == 0 || |d_hi| >= |d_lo|) for using _2sumF() is not
* always satisifed, so it is not clear that this works, but
* it works in practice. It works even if it gives a wrong
* normalized d_lo, since |d_lo| > |d_hi| implies that i is
* nonzero and d is tiny, so the F(i) term dominates d_lo.
* In float precision:
* (By exhaustive testing, the worst case is d_hi = 0x1.bp-25.
* And if d is only a little tinier than that, we would have
* another underflow problem for the P3 term; this is also ruled
* out by exhaustive testing.)
*/
d = d_hi + d_lo;
d_lo = d_hi - d + d_lo;
d_hi = d;
dd = (double)d;
val_lo = d * d * d * (P3 +
d * (P4 + d * (P5 + d * (P6 + d * (P7 + d * (P8 +
dd * (P9 + dd * (P10 + dd * (P11 + dd * (P12 + dd * (P13 +
dd * P14))))))))))) + (F_lo(i) + dk * ln2_lo + d_lo) + d * d * P2;
val_hi = d_hi;
#ifdef DEBUG
if (fetestexcept(FE_UNDERFLOW))
breakpoint();
#endif
/*
* 29+113 bit decompositions. The bits are distributed so that the products
* of the hi terms are exact in double precision. The types are chosen so
* that the products of the hi terms are done in at least double precision,
* without any explicit conversions. More natural choices would require a
* slow long double precision multiplication.
*/
static const double
invln10_hi = 4.3429448176175356e-1, /* 0x1bcb7b15000000.0p-54 */
invln2_hi = 1.4426950402557850e0; /* 0x17154765000000.0p-52 */
static const long double
invln10_lo = 1.41498268538580090791605082294397000e-10L, /* 0x137287195355baaafad33dc323ee3.0p-145L */
invln2_lo = 6.33178418956604368501892137426645911e-10L, /* 0x15c17f0bbbe87fed0691d3e88eb57.0p-143L */
invln10_lo_plus_hi = invln10_lo + invln10_hi,
invln2_lo_plus_hi = invln2_lo + invln2_hi;
long double
log10l(long double x)
{
struct ld r;
long double hi, lo;
ENTERI();
k_logl(x, &r);
if (!r.lo_set)
RETURNI(r.hi);
_2sumF(r.hi, r.lo);
hi = (float)r.hi;
lo = r.lo + (r.hi - hi);
RETURNI(invln10_hi * hi + (invln10_lo_plus_hi * lo + invln10_lo * hi));
}
long double
log2l(long double x)
{
struct ld r;
long double hi, lo;
ENTERI();
k_logl(x, &r);
if (!r.lo_set)
RETURNI(r.hi);
_2sumF(r.hi, r.lo);
hi = (float)r.hi;
lo = r.lo + (r.hi - hi);
RETURNI(invln2_hi * hi + (invln2_lo_plus_hi * lo + invln2_lo * hi));
}