mirror of
https://review.haiku-os.org/buildtools
synced 2025-02-07 22:44:46 +01:00
b58ddff026
* merged mpfr 3.0.0 and gmp 5.0.1 in buildtools trunk git-svn-id: file:///srv/svn/repos/haiku/buildtools/trunk@37378 a95241bf-73f2-0310-859d-f6bbb57e9c96
366 lines
13 KiB
C
366 lines
13 KiB
C
/* mpfr_pow_z -- power function x^z with z a MPZ
|
|
|
|
Copyright 2005, 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
|
|
Contributed by the Arenaire and Cacao projects, INRIA.
|
|
|
|
This file is part of the GNU MPFR Library.
|
|
|
|
The GNU MPFR Library is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU Lesser General Public License as published by
|
|
the Free Software Foundation; either version 3 of the License, or (at your
|
|
option) any later version.
|
|
|
|
The GNU MPFR Library is distributed in the hope that it will be useful, but
|
|
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
|
License for more details.
|
|
|
|
You should have received a copy of the GNU Lesser General Public License
|
|
along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see
|
|
http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
|
|
51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
|
|
|
|
#define MPFR_NEED_LONGLONG_H
|
|
#include "mpfr-impl.h"
|
|
|
|
/* y <- x^|z| with z != 0
|
|
if cr=1: ensures correct rounding of y
|
|
if cr=0: does not ensure correct rounding, but avoid spurious overflow
|
|
or underflow, and uses the precision of y as working precision (warning,
|
|
y and x might be the same variable). */
|
|
static int
|
|
mpfr_pow_pos_z (mpfr_ptr y, mpfr_srcptr x, mpz_srcptr z, mpfr_rnd_t rnd, int cr)
|
|
{
|
|
mpfr_t res;
|
|
mpfr_prec_t prec, err;
|
|
int inexact;
|
|
mpfr_rnd_t rnd1, rnd2;
|
|
mpz_t absz;
|
|
mp_size_t size_z;
|
|
MPFR_ZIV_DECL (loop);
|
|
MPFR_BLOCK_DECL (flags);
|
|
|
|
MPFR_LOG_FUNC (("x[%#R]=%R z=? rnd=%d cr=%d", x, x, rnd, cr),
|
|
("y[%#R]=%R inexact=%d", y, y, inexact));
|
|
|
|
MPFR_ASSERTD (mpz_sgn (z) != 0);
|
|
|
|
if (MPFR_UNLIKELY (mpz_cmpabs_ui (z, 1) == 0))
|
|
return mpfr_set (y, x, rnd);
|
|
|
|
absz[0] = z[0];
|
|
SIZ (absz) = ABS(SIZ(absz)); /* Hack to get abs(z) */
|
|
MPFR_MPZ_SIZEINBASE2 (size_z, z);
|
|
|
|
/* round toward 1 (or -1) to avoid spurious overflow or underflow,
|
|
i.e. if an overflow or underflow occurs, it is a real exception
|
|
and is not just due to the rounding error. */
|
|
rnd1 = (MPFR_EXP(x) >= 1) ? MPFR_RNDZ
|
|
: (MPFR_IS_POS(x) ? MPFR_RNDU : MPFR_RNDD);
|
|
rnd2 = (MPFR_EXP(x) >= 1) ? MPFR_RNDD : MPFR_RNDU;
|
|
|
|
if (cr != 0)
|
|
prec = MPFR_PREC (y) + 3 + size_z + MPFR_INT_CEIL_LOG2 (MPFR_PREC (y));
|
|
else
|
|
prec = MPFR_PREC (y);
|
|
mpfr_init2 (res, prec);
|
|
|
|
MPFR_ZIV_INIT (loop, prec);
|
|
for (;;)
|
|
{
|
|
unsigned int inexmul; /* will be non-zero if res may be inexact */
|
|
mp_size_t i = size_z;
|
|
|
|
/* now 2^(i-1) <= z < 2^i */
|
|
/* see below (case z < 0) for the error analysis, which is identical,
|
|
except if z=n, the maximal relative error is here 2(n-1)2^(-prec)
|
|
instead of 2(2n-1)2^(-prec) for z<0. */
|
|
MPFR_ASSERTD (prec > (mpfr_prec_t) i);
|
|
err = prec - 1 - (mpfr_prec_t) i;
|
|
|
|
MPFR_BLOCK (flags,
|
|
inexmul = mpfr_mul (res, x, x, rnd2);
|
|
MPFR_ASSERTD (i >= 2);
|
|
if (mpz_tstbit (absz, i - 2))
|
|
inexmul |= mpfr_mul (res, res, x, rnd1);
|
|
for (i -= 3; i >= 0 && !MPFR_BLOCK_EXCEP; i--)
|
|
{
|
|
inexmul |= mpfr_mul (res, res, res, rnd2);
|
|
if (mpz_tstbit (absz, i))
|
|
inexmul |= mpfr_mul (res, res, x, rnd1);
|
|
});
|
|
if (MPFR_LIKELY (inexmul == 0 || cr == 0
|
|
|| MPFR_OVERFLOW (flags) || MPFR_UNDERFLOW (flags)
|
|
|| MPFR_CAN_ROUND (res, err, MPFR_PREC (y), rnd)))
|
|
break;
|
|
/* Can't decide correct rounding, increase the precision */
|
|
MPFR_ZIV_NEXT (loop, prec);
|
|
mpfr_set_prec (res, prec);
|
|
}
|
|
MPFR_ZIV_FREE (loop);
|
|
|
|
/* Check Overflow */
|
|
if (MPFR_OVERFLOW (flags))
|
|
{
|
|
MPFR_LOG_MSG (("overflow\n", 0));
|
|
inexact = mpfr_overflow (y, rnd, mpz_odd_p (absz) ?
|
|
MPFR_SIGN (x) : MPFR_SIGN_POS);
|
|
}
|
|
/* Check Underflow */
|
|
else if (MPFR_UNDERFLOW (flags))
|
|
{
|
|
MPFR_LOG_MSG (("underflow\n", 0));
|
|
if (rnd == MPFR_RNDN)
|
|
{
|
|
mpfr_t y2, zz;
|
|
|
|
/* We cannot decide now whether the result should be rounded
|
|
toward zero or +Inf. So, let's use the general case of
|
|
mpfr_pow, which can do that. But the problem is that the
|
|
result can be exact! However, it is sufficient to try to
|
|
round on 2 bits (the precision does not matter in case of
|
|
underflow, since MPFR does not have subnormals), in which
|
|
case, the result cannot be exact due to previous filtering
|
|
of trivial cases. */
|
|
MPFR_ASSERTD (mpfr_cmp_si_2exp (x, MPFR_SIGN (x),
|
|
MPFR_EXP (x) - 1) != 0);
|
|
mpfr_init2 (y2, 2);
|
|
mpfr_init2 (zz, ABS (SIZ (z)) * GMP_NUMB_BITS);
|
|
inexact = mpfr_set_z (zz, z, MPFR_RNDN);
|
|
MPFR_ASSERTN (inexact == 0);
|
|
inexact = mpfr_pow_general (y2, x, zz, rnd, 1,
|
|
(mpfr_save_expo_t *) NULL);
|
|
mpfr_clear (zz);
|
|
mpfr_set (y, y2, MPFR_RNDN);
|
|
mpfr_clear (y2);
|
|
__gmpfr_flags = MPFR_FLAGS_INEXACT | MPFR_FLAGS_UNDERFLOW;
|
|
}
|
|
else
|
|
{
|
|
inexact = mpfr_underflow (y, rnd, mpz_odd_p (absz) ?
|
|
MPFR_SIGN (x) : MPFR_SIGN_POS);
|
|
}
|
|
}
|
|
else
|
|
inexact = mpfr_set (y, res, rnd);
|
|
|
|
mpfr_clear (res);
|
|
return inexact;
|
|
}
|
|
|
|
/* The computation of y = pow(x,z) is done by
|
|
* y = set_ui(1) if z = 0
|
|
* y = pow_ui(x,z) if z > 0
|
|
* y = pow_ui(1/x,-z) if z < 0
|
|
*
|
|
* Note: in case z < 0, we could also compute 1/pow_ui(x,-z). However, in
|
|
* case MAX < 1/MIN, where MAX is the largest positive value, i.e.,
|
|
* MAX = nextbelow(+Inf), and MIN is the smallest positive value, i.e.,
|
|
* MIN = nextabove(+0), then x^(-z) might produce an overflow, whereas
|
|
* x^z is representable.
|
|
*/
|
|
|
|
int
|
|
mpfr_pow_z (mpfr_ptr y, mpfr_srcptr x, mpz_srcptr z, mpfr_rnd_t rnd)
|
|
{
|
|
int inexact;
|
|
mpz_t tmp;
|
|
MPFR_SAVE_EXPO_DECL (expo);
|
|
|
|
MPFR_LOG_FUNC (("x[%#R]=%R z=? rnd=%d", x, x, rnd),
|
|
("y[%#R]=%R inexact=%d", y, y, inexact));
|
|
|
|
/* x^0 = 1 for any x, even a NaN */
|
|
if (MPFR_UNLIKELY (mpz_sgn (z) == 0))
|
|
return mpfr_set_ui (y, 1, rnd);
|
|
|
|
if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (x)))
|
|
{
|
|
if (MPFR_IS_NAN (x))
|
|
{
|
|
MPFR_SET_NAN (y);
|
|
MPFR_RET_NAN;
|
|
}
|
|
else if (MPFR_IS_INF (x))
|
|
{
|
|
/* Inf^n = Inf, (-Inf)^n = Inf for n even, -Inf for n odd */
|
|
/* Inf ^(-n) = 0, sign = + if x>0 or z even */
|
|
if (mpz_sgn (z) > 0)
|
|
MPFR_SET_INF (y);
|
|
else
|
|
MPFR_SET_ZERO (y);
|
|
if (MPFR_UNLIKELY (MPFR_IS_NEG (x) && mpz_odd_p (z)))
|
|
MPFR_SET_NEG (y);
|
|
else
|
|
MPFR_SET_POS (y);
|
|
MPFR_RET (0);
|
|
}
|
|
else /* x is zero */
|
|
{
|
|
MPFR_ASSERTD (MPFR_IS_ZERO(x));
|
|
if (mpz_sgn (z) > 0)
|
|
/* 0^n = +/-0 for any n */
|
|
MPFR_SET_ZERO (y);
|
|
else
|
|
/* 0^(-n) if +/- INF */
|
|
MPFR_SET_INF (y);
|
|
if (MPFR_LIKELY (MPFR_IS_POS (x) || mpz_even_p (z)))
|
|
MPFR_SET_POS (y);
|
|
else
|
|
MPFR_SET_NEG (y);
|
|
MPFR_RET(0);
|
|
}
|
|
}
|
|
|
|
/* detect exact powers: x^-n is exact iff x is a power of 2
|
|
Do it if n > 0 too as this is faster and this filtering is
|
|
needed in case of underflow. */
|
|
if (MPFR_UNLIKELY (mpfr_cmp_si_2exp (x, MPFR_SIGN (x),
|
|
MPFR_EXP (x) - 1) == 0))
|
|
{
|
|
mpfr_exp_t expx = MPFR_EXP (x); /* warning: x and y may be the same
|
|
variable */
|
|
|
|
MPFR_LOG_MSG (("x^n with x power of two\n", 0));
|
|
mpfr_set_si (y, mpz_odd_p (z) ? MPFR_INT_SIGN(x) : 1, rnd);
|
|
MPFR_ASSERTD (MPFR_IS_FP (y));
|
|
mpz_init (tmp);
|
|
mpz_mul_si (tmp, z, expx - 1);
|
|
MPFR_ASSERTD (MPFR_GET_EXP (y) == 1);
|
|
mpz_add_ui (tmp, tmp, 1);
|
|
inexact = 0;
|
|
if (MPFR_UNLIKELY (mpz_cmp_si (tmp, __gmpfr_emin) < 0))
|
|
{
|
|
MPFR_LOG_MSG (("underflow\n", 0));
|
|
/* |y| is a power of two, thus |y| <= 2^(emin-2), and in
|
|
rounding to nearest, the value must be rounded to 0. */
|
|
if (rnd == MPFR_RNDN)
|
|
rnd = MPFR_RNDZ;
|
|
inexact = mpfr_underflow (y, rnd, MPFR_SIGN (y));
|
|
}
|
|
else if (MPFR_UNLIKELY (mpz_cmp_si (tmp, __gmpfr_emax) > 0))
|
|
{
|
|
MPFR_LOG_MSG (("overflow\n", 0));
|
|
inexact = mpfr_overflow (y, rnd, MPFR_SIGN (y));
|
|
}
|
|
else
|
|
MPFR_SET_EXP (y, mpz_get_si (tmp));
|
|
mpz_clear (tmp);
|
|
MPFR_RET (inexact);
|
|
}
|
|
|
|
MPFR_SAVE_EXPO_MARK (expo);
|
|
|
|
if (mpz_sgn (z) > 0)
|
|
{
|
|
inexact = mpfr_pow_pos_z (y, x, z, rnd, 1);
|
|
MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
|
|
}
|
|
else
|
|
{
|
|
/* Declaration of the intermediary variable */
|
|
mpfr_t t;
|
|
mpfr_prec_t Nt; /* Precision of the intermediary variable */
|
|
mpfr_rnd_t rnd1;
|
|
mp_size_t size_z;
|
|
MPFR_ZIV_DECL (loop);
|
|
|
|
MPFR_MPZ_SIZEINBASE2 (size_z, z);
|
|
|
|
/* initial working precision */
|
|
Nt = MPFR_PREC (y);
|
|
Nt = Nt + size_z + 3 + MPFR_INT_CEIL_LOG2 (Nt);
|
|
/* ensures Nt >= bits(z)+2 */
|
|
|
|
/* initialise of intermediary variable */
|
|
mpfr_init2 (t, Nt);
|
|
|
|
/* We will compute rnd(rnd1(1/x) ^ (-z)), where rnd1 is the rounding
|
|
toward sign(x), to avoid spurious overflow or underflow. */
|
|
rnd1 = MPFR_EXP (x) < 1 ? MPFR_RNDZ :
|
|
(MPFR_SIGN (x) > 0 ? MPFR_RNDU : MPFR_RNDD);
|
|
|
|
MPFR_ZIV_INIT (loop, Nt);
|
|
for (;;)
|
|
{
|
|
MPFR_BLOCK_DECL (flags);
|
|
|
|
/* compute (1/x)^(-z), -z>0 */
|
|
/* As emin = -emax, an underflow cannot occur in the division.
|
|
And if an overflow occurs, then this means that x^z overflows
|
|
too (since we have rounded toward 1 or -1). */
|
|
MPFR_BLOCK (flags, mpfr_ui_div (t, 1, x, rnd1));
|
|
MPFR_ASSERTD (! MPFR_UNDERFLOW (flags));
|
|
/* t = (1/x)*(1+theta) where |theta| <= 2^(-Nt) */
|
|
if (MPFR_UNLIKELY (MPFR_OVERFLOW (flags)))
|
|
goto overflow;
|
|
MPFR_BLOCK (flags, mpfr_pow_pos_z (t, t, z, rnd, 0));
|
|
/* Now if z=-n, t = x^z*(1+theta)^(2n-1) where |theta| <= 2^(-Nt),
|
|
with theta maybe different from above. If (2n-1)*2^(-Nt) <= 1/2,
|
|
which is satisfied as soon as Nt >= bits(z)+2, then we can use
|
|
Lemma \ref{lemma_graillat} from algorithms.tex, which yields
|
|
t = x^z*(1+theta) with |theta| <= 2(2n-1)*2^(-Nt), thus the
|
|
error is bounded by 2(2n-1) ulps <= 2^(bits(z)+2) ulps. */
|
|
if (MPFR_UNLIKELY (MPFR_OVERFLOW (flags)))
|
|
{
|
|
overflow:
|
|
MPFR_ZIV_FREE (loop);
|
|
mpfr_clear (t);
|
|
MPFR_SAVE_EXPO_FREE (expo);
|
|
MPFR_LOG_MSG (("overflow\n", 0));
|
|
return mpfr_overflow (y, rnd,
|
|
mpz_odd_p (z) ? MPFR_SIGN (x) :
|
|
MPFR_SIGN_POS);
|
|
}
|
|
if (MPFR_UNLIKELY (MPFR_UNDERFLOW (flags)))
|
|
{
|
|
MPFR_ZIV_FREE (loop);
|
|
mpfr_clear (t);
|
|
MPFR_LOG_MSG (("underflow\n", 0));
|
|
if (rnd == MPFR_RNDN)
|
|
{
|
|
mpfr_t y2, zz;
|
|
|
|
/* We cannot decide now whether the result should be
|
|
rounded toward zero or away from zero. So, like
|
|
in mpfr_pow_pos_z, let's use the general case of
|
|
mpfr_pow in precision 2. */
|
|
MPFR_ASSERTD (mpfr_cmp_si_2exp (x, MPFR_SIGN (x),
|
|
MPFR_EXP (x) - 1) != 0);
|
|
mpfr_init2 (y2, 2);
|
|
mpfr_init2 (zz, ABS (SIZ (z)) * GMP_NUMB_BITS);
|
|
inexact = mpfr_set_z (zz, z, MPFR_RNDN);
|
|
MPFR_ASSERTN (inexact == 0);
|
|
inexact = mpfr_pow_general (y2, x, zz, rnd, 1,
|
|
(mpfr_save_expo_t *) NULL);
|
|
mpfr_clear (zz);
|
|
mpfr_set (y, y2, MPFR_RNDN);
|
|
mpfr_clear (y2);
|
|
MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, MPFR_FLAGS_UNDERFLOW);
|
|
goto end;
|
|
}
|
|
else
|
|
{
|
|
MPFR_SAVE_EXPO_FREE (expo);
|
|
return mpfr_underflow (y, rnd, mpz_odd_p (z) ?
|
|
MPFR_SIGN (x) : MPFR_SIGN_POS);
|
|
}
|
|
}
|
|
if (MPFR_LIKELY (MPFR_CAN_ROUND (t, Nt - size_z - 2, MPFR_PREC (y),
|
|
rnd)))
|
|
break;
|
|
/* actualisation of the precision */
|
|
MPFR_ZIV_NEXT (loop, Nt);
|
|
mpfr_set_prec (t, Nt);
|
|
}
|
|
MPFR_ZIV_FREE (loop);
|
|
|
|
inexact = mpfr_set (y, t, rnd);
|
|
mpfr_clear (t);
|
|
}
|
|
|
|
end:
|
|
MPFR_SAVE_EXPO_FREE (expo);
|
|
return mpfr_check_range (y, inexact, rnd);
|
|
}
|