mirror of
https://review.haiku-os.org/buildtools
synced 2025-02-12 08:47:41 +01:00
Change-Id: Iae65e95cfa0d92091b8b0a424ae36d88efa76aa9 Reviewed-on: https://review.haiku-os.org/c/buildtools/+/3020 Reviewed-by: Adrien Destugues <pulkomandy@gmail.com>
196 lines
4.6 KiB
C
196 lines
4.6 KiB
C
/* mpn_broot -- Compute hensel sqrt
|
|
|
|
Contributed to the GNU project by Niels Möller
|
|
|
|
THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
|
|
SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
|
|
GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
|
|
|
|
Copyright 2012 Free Software Foundation, Inc.
|
|
|
|
This file is part of the GNU MP Library.
|
|
|
|
The GNU MP Library is free software; you can redistribute it and/or modify
|
|
it under the terms of either:
|
|
|
|
* the GNU Lesser General Public License as published by the Free
|
|
Software Foundation; either version 3 of the License, or (at your
|
|
option) any later version.
|
|
|
|
or
|
|
|
|
* the GNU General Public License as published by the Free Software
|
|
Foundation; either version 2 of the License, or (at your option) any
|
|
later version.
|
|
|
|
or both in parallel, as here.
|
|
|
|
The GNU MP Library is distributed in the hope that it will be useful, but
|
|
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
for more details.
|
|
|
|
You should have received copies of the GNU General Public License and the
|
|
GNU Lesser General Public License along with the GNU MP Library. If not,
|
|
see https://www.gnu.org/licenses/. */
|
|
|
|
#include "gmp-impl.h"
|
|
|
|
/* Computes a^e (mod B). Uses right-to-left binary algorithm, since
|
|
typical use will have e small. */
|
|
static mp_limb_t
|
|
powlimb (mp_limb_t a, mp_limb_t e)
|
|
{
|
|
mp_limb_t r = 1;
|
|
mp_limb_t s = a;
|
|
|
|
for (r = 1, s = a; e > 0; e >>= 1, s *= s)
|
|
if (e & 1)
|
|
r *= s;
|
|
|
|
return r;
|
|
}
|
|
|
|
/* Computes a^{1/k - 1} (mod B^n). Both a and k must be odd.
|
|
|
|
Iterates
|
|
|
|
r' <-- r - r * (a^{k-1} r^k - 1) / n
|
|
|
|
If
|
|
|
|
a^{k-1} r^k = 1 (mod 2^m),
|
|
|
|
then
|
|
|
|
a^{k-1} r'^k = 1 (mod 2^{2m}),
|
|
|
|
Compute the update term as
|
|
|
|
r' = r - (a^{k-1} r^{k+1} - r) / k
|
|
|
|
where we still have cancellation of low limbs.
|
|
|
|
*/
|
|
void
|
|
mpn_broot_invm1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t k)
|
|
{
|
|
mp_size_t sizes[GMP_LIMB_BITS * 2];
|
|
mp_ptr akm1, tp, rnp, ep;
|
|
mp_limb_t a0, r0, km1, kp1h, kinv;
|
|
mp_size_t rn;
|
|
unsigned i;
|
|
|
|
TMP_DECL;
|
|
|
|
ASSERT (n > 0);
|
|
ASSERT (ap[0] & 1);
|
|
ASSERT (k & 1);
|
|
ASSERT (k >= 3);
|
|
|
|
TMP_MARK;
|
|
|
|
akm1 = TMP_ALLOC_LIMBS (4*n);
|
|
tp = akm1 + n;
|
|
|
|
km1 = k-1;
|
|
/* FIXME: Could arrange the iteration so we don't need to compute
|
|
this up front, computing a^{k-1} * r^k as (a r)^{k-1} * r. Note
|
|
that we can use wraparound also for a*r, since the low half is
|
|
unchanged from the previous iteration. Or possibly mulmid. Also,
|
|
a r = a^{1/k}, so we get that value too, for free? */
|
|
mpn_powlo (akm1, ap, &km1, 1, n, tp); /* 3 n scratch space */
|
|
|
|
a0 = ap[0];
|
|
binvert_limb (kinv, k);
|
|
|
|
/* 4 bits: a^{1/k - 1} (mod 16):
|
|
|
|
a % 8
|
|
1 3 5 7
|
|
k%4 +-------
|
|
1 |1 1 1 1
|
|
3 |1 9 9 1
|
|
*/
|
|
r0 = 1 + (((k << 2) & ((a0 << 1) ^ (a0 << 2))) & 8);
|
|
r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k & 0x7f)); /* 8 bits */
|
|
r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k & 0x7fff)); /* 16 bits */
|
|
r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k)); /* 32 bits */
|
|
#if GMP_NUMB_BITS > 32
|
|
{
|
|
unsigned prec = 32;
|
|
do
|
|
{
|
|
r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k));
|
|
prec *= 2;
|
|
}
|
|
while (prec < GMP_NUMB_BITS);
|
|
}
|
|
#endif
|
|
|
|
rp[0] = r0;
|
|
if (n == 1)
|
|
{
|
|
TMP_FREE;
|
|
return;
|
|
}
|
|
|
|
/* For odd k, (k+1)/2 = k/2+1, and the latter avoids overflow. */
|
|
kp1h = k/2 + 1;
|
|
|
|
/* FIXME: Special case for two limb iteration. */
|
|
rnp = TMP_ALLOC_LIMBS (2*n + 1);
|
|
ep = rnp + n;
|
|
|
|
/* FIXME: Possible to this on the fly with some bit fiddling. */
|
|
for (i = 0; n > 1; n = (n + 1)/2)
|
|
sizes[i++] = n;
|
|
|
|
rn = 1;
|
|
|
|
while (i-- > 0)
|
|
{
|
|
/* Compute x^{k+1}. */
|
|
mpn_sqr (ep, rp, rn); /* For odd n, writes n+1 limbs in the
|
|
final iteration. */
|
|
mpn_powlo (rnp, ep, &kp1h, 1, sizes[i], tp);
|
|
|
|
/* Multiply by a^{k-1}. Can use wraparound; low part equals r. */
|
|
|
|
mpn_mullo_n (ep, rnp, akm1, sizes[i]);
|
|
ASSERT (mpn_cmp (ep, rp, rn) == 0);
|
|
|
|
ASSERT (sizes[i] <= 2*rn);
|
|
mpn_pi1_bdiv_q_1 (rp + rn, ep + rn, sizes[i] - rn, k, kinv, 0);
|
|
mpn_neg (rp + rn, rp + rn, sizes[i] - rn);
|
|
rn = sizes[i];
|
|
}
|
|
TMP_FREE;
|
|
}
|
|
|
|
/* Computes a^{1/k} (mod B^n). Both a and k must be odd. */
|
|
void
|
|
mpn_broot (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t k)
|
|
{
|
|
mp_ptr tp;
|
|
TMP_DECL;
|
|
|
|
ASSERT (n > 0);
|
|
ASSERT (ap[0] & 1);
|
|
ASSERT (k & 1);
|
|
|
|
if (k == 1)
|
|
{
|
|
MPN_COPY (rp, ap, n);
|
|
return;
|
|
}
|
|
|
|
TMP_MARK;
|
|
tp = TMP_ALLOC_LIMBS (n);
|
|
|
|
mpn_broot_invm1 (tp, ap, n, k);
|
|
mpn_mullo_n (rp, tp, ap, n);
|
|
|
|
TMP_FREE;
|
|
}
|