/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2007-2013 Bruce D. Evans
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice unmodified, this list of conditions, and the following
* disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/cdefs.h>
/**
* Implementation of the natural logarithm of x for Intel 80-bit format.
*
* First decompose x into its base 2 representation:
*
* log(x) = log(X * 2**k), where X is in [1, 2)
* = log(X) + k * log(2).
*
* Let X = X_i + e, where X_i is the center of one of the intervals
* [-1.0/256, 1.0/256), [1.0/256, 3.0/256), .... [2.0-1.0/256, 2.0+1.0/256)
* and X is in this interval. Then
*
* log(X) = log(X_i + e)
* = log(X_i * (1 + e / X_i))
* = log(X_i) + log(1 + e / X_i).
*
* The values log(X_i) are tabulated below. Let d = e / X_i and use
*
* log(1 + d) = p(d)
*
* where p(d) = d - 0.5*d*d + ... is a special minimax polynomial of
* suitably high degree.
*
* To get sufficiently small roundoff errors, k * log(2), log(X_i), and
* sometimes (if |k| is not large) the first term in p(d) must be evaluated
* and added up in extra precision. Extra precision is not needed for the
* rest of p(d). In the worst case when k = 0 and log(X_i) is 0, the final
* error is controlled mainly by the error in the second term in p(d). The
* error in this term itself is at most 0.5 ulps from the d*d operation in
* it. The error in this term relative to the first term is thus at most
* 0.5 * |-0.5| * |d| < 1.0/1024 ulps. We aim for an accumulated error of
* at most twice this at the point of the final rounding step. Thus the
* final error should be at most 0.5 + 1.0/512 = 0.5020 ulps. Exhaustive
* testing of a float variant of this function showed a maximum final error
* of 0.5008 ulps. Non-exhaustive testing of a double variant of this
* function showed a maximum final error of 0.5078 ulps (near 1+1.0/256).
*
* We made the maximum of |d| (and thus the total relative error and the
* degree of p(d)) small by using a large number of intervals. Using
* centers of intervals instead of endpoints reduces this maximum by a
* factor of 2 for a given number of intervals. p(d) is special only
* in beginning with the Taylor coefficients 0 + 1*d, which tends to happen
* naturally. The most accurate minimax polynomial of a given degree might
* be different, but then we wouldn't want it since we would have to do
* extra work to avoid roundoff error (especially for P0*d instead of d).
*/
/*
* -0.005280 < d < 0.004838. In particular, the infinite-
* precision |d| is <= 2**-7. Rounding of G(i) to 8 bits
* ensures that d is representable without extra precision for
* this bound on |d| (since when this calculation is expressed
* as x*G(i)-1, the multiplication needs as many extra bits as
* G(i) has and the subtraction cancels 8 bits). But for
* most i (107 cases out of 129), the infinite-precision |d|
* is <= 2**-8. G(i) is rounded to 9 bits for such i to give
* better accuracy (this works by improving the bound on |d|,
* which in turn allows rounding to 9 bits in more cases).
* This is only important when the original x is near 1 -- it
* lets us avoid using a special method to give the desired
* accuracy for such x.
*/
if (/*CONSTCOND*/0)
/*NOTREACHED*/
d = x * G(i) - 1;
else {
#ifdef USE_UTAB
d = (x - H(i)) * G(i) + E(i);
#else
long double x_hi, x_lo;
float fx_hi;
/*
* Split x into x_hi + x_lo to calculate x*G(i)-1 exactly.
* G(i) has at most 9 bits, so the splitting point is not
* critical.
*/
SET_FLOAT_WORD(fx_hi, (lx >> 40) | 0x3f800000);
x_hi = fx_hi;
x_lo = x - x_hi;
d = x_hi * G(i) - 1 + x_lo * G(i);
#endif
}
/*
* Our algorithm depends on exact cancellation of F_lo(i) and
* F_hi(i) with dk*ln_2_lo and dk*ln2_hi when k is -1 and i is
* at the end of the table. This and other technical complications
* make it difficult to avoid the double scaling in (dk*ln2) *
* log(base) for base != e without losing more accuracy and/or
* efficiency than is gained.
*/
z = d * d;
val_lo = z * d * z * (z * (d * P8 + P7) + (d * P6 + P5)) +
(F_lo(i) + dk * ln2_lo + z * d * (d * P4 + P3)) + z * P2;
val_hi = d;
#ifdef DEBUG
if (fetestexcept(FE_UNDERFLOW))
breakpoint();
#endif
/*
* x*G(i)-1 (with a reduced x) can be represented exactly, as
* above, but now we need to evaluate the polynomial on d =
* (x+f_lo)*G(i)-1 and extra precision is needed for that.
* Since x+x_lo is a hi+lo decomposition and subtracting 1
* doesn't lose too many bits, an inexact calculation for
* f_lo*G(i) is good enough.
*/
if (/*CONSTCOND*/0)
/*NOTREACHED*/
d_hi = x * G(i) - 1;
else {
#ifdef USE_UTAB
d_hi = (x - H(i)) * G(i) + E(i);
#else
long double x_hi, x_lo;
float fx_hi;
/*
* This is _2sumF(d_hi, d_lo) inlined. The condition
* (d_hi == 0 || |d_hi| >= |d_lo|) for using _2sumF() is not
* always satisifed, so it is not clear that this works, but
* it works in practice. It works even if it gives a wrong
* normalized d_lo, since |d_lo| > |d_hi| implies that i is
* nonzero and d is tiny, so the F(i) term dominates d_lo.
* In float precision:
* (By exhaustive testing, the worst case is d_hi = 0x1.bp-25.
* And if d is only a little tinier than that, we would have
* another underflow problem for the P3 term; this is also ruled
* out by exhaustive testing.)
*/
d = d_hi + d_lo;
d_lo = d_hi - d + d_lo;
d_hi = d;
z = d * d;
val_lo = z * d * z * (z * (d * P8 + P7) + (d * P6 + P5)) +
(F_lo(i) + dk * ln2_lo + d_lo + z * d * (d * P4 + P3)) + z * P2;
val_hi = d_hi;
#ifdef DEBUG
if (fetestexcept(FE_UNDERFLOW))
breakpoint();
#endif