mirror of
https://review.haiku-os.org/buildtools
synced 2025-02-16 10:47:48 +01:00
Change-Id: Iae65e95cfa0d92091b8b0a424ae36d88efa76aa9 Reviewed-on: https://review.haiku-os.org/c/buildtools/+/3020 Reviewed-by: Adrien Destugues <pulkomandy@gmail.com>
313 lines
8.0 KiB
C
313 lines
8.0 KiB
C
/* mpn_mu_bdiv_qr(qp,rp,np,nn,dp,dn,tp) -- Compute {np,nn} / {dp,dn} mod B^qn,
|
|
where qn = nn-dn, storing the result in {qp,qn}. Overlap allowed between Q
|
|
and N; all other overlap disallowed.
|
|
|
|
Contributed to the GNU project by Torbjorn Granlund.
|
|
|
|
THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
|
|
SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
|
|
GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
|
|
|
|
Copyright 2005-2007, 2009, 2010, 2012, 2017 Free Software Foundation, Inc.
|
|
|
|
This file is part of the GNU MP Library.
|
|
|
|
The GNU MP Library is free software; you can redistribute it and/or modify
|
|
it under the terms of either:
|
|
|
|
* the GNU Lesser General Public License as published by the Free
|
|
Software Foundation; either version 3 of the License, or (at your
|
|
option) any later version.
|
|
|
|
or
|
|
|
|
* the GNU General Public License as published by the Free Software
|
|
Foundation; either version 2 of the License, or (at your option) any
|
|
later version.
|
|
|
|
or both in parallel, as here.
|
|
|
|
The GNU MP Library is distributed in the hope that it will be useful, but
|
|
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
for more details.
|
|
|
|
You should have received copies of the GNU General Public License and the
|
|
GNU Lesser General Public License along with the GNU MP Library. If not,
|
|
see https://www.gnu.org/licenses/. */
|
|
|
|
|
|
/*
|
|
The idea of the algorithm used herein is to compute a smaller inverted value
|
|
than used in the standard Barrett algorithm, and thus save time in the
|
|
Newton iterations, and pay just a small price when using the inverted value
|
|
for developing quotient bits. This algorithm was presented at ICMS 2006.
|
|
*/
|
|
|
|
#include "gmp-impl.h"
|
|
|
|
|
|
/* N = {np,nn}
|
|
D = {dp,dn}
|
|
|
|
Requirements: N >= D
|
|
D >= 1
|
|
D odd
|
|
dn >= 2
|
|
nn >= 2
|
|
scratch space as determined by mpn_mu_bdiv_qr_itch(nn,dn).
|
|
|
|
Write quotient to Q = {qp,nn-dn}.
|
|
|
|
FIXME: When iterating, perhaps do the small step before loop, not after.
|
|
FIXME: Try to avoid the scalar divisions when computing inverse size.
|
|
FIXME: Trim allocation for (qn > dn) case, 3*dn might be possible. In
|
|
particular, when dn==in, tp and rp could use the same space.
|
|
*/
|
|
static mp_limb_t
|
|
mpn_mu_bdiv_qr_old (mp_ptr qp,
|
|
mp_ptr rp,
|
|
mp_srcptr np, mp_size_t nn,
|
|
mp_srcptr dp, mp_size_t dn,
|
|
mp_ptr scratch)
|
|
{
|
|
mp_size_t qn;
|
|
mp_size_t in;
|
|
mp_limb_t cy, c0;
|
|
mp_size_t tn, wn;
|
|
|
|
qn = nn - dn;
|
|
|
|
ASSERT (dn >= 2);
|
|
ASSERT (qn >= 2);
|
|
|
|
if (qn > dn)
|
|
{
|
|
mp_size_t b;
|
|
|
|
/* |_______________________| dividend
|
|
|________| divisor */
|
|
|
|
#define ip scratch /* in */
|
|
#define tp (scratch + in) /* dn+in or next_size(dn) or rest >= binvert_itch(in) */
|
|
#define scratch_out (scratch + in + tn)/* mulmod_bnm1_itch(next_size(dn)) */
|
|
|
|
/* Compute an inverse size that is a nice partition of the quotient. */
|
|
b = (qn - 1) / dn + 1; /* ceil(qn/dn), number of blocks */
|
|
in = (qn - 1) / b + 1; /* ceil(qn/b) = ceil(qn / ceil(qn/dn)) */
|
|
|
|
/* Some notes on allocation:
|
|
|
|
When in = dn, R dies when mpn_mullo returns, if in < dn the low in
|
|
limbs of R dies at that point. We could save memory by letting T live
|
|
just under R, and let the upper part of T expand into R. These changes
|
|
should reduce itch to perhaps 3dn.
|
|
*/
|
|
|
|
mpn_binvert (ip, dp, in, tp);
|
|
|
|
MPN_COPY (rp, np, dn);
|
|
np += dn;
|
|
cy = 0;
|
|
|
|
while (qn > in)
|
|
{
|
|
mpn_mullo_n (qp, rp, ip, in);
|
|
|
|
if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
|
|
mpn_mul (tp, dp, dn, qp, in); /* mulhi, need tp[dn+in-1...in] */
|
|
else
|
|
{
|
|
tn = mpn_mulmod_bnm1_next_size (dn);
|
|
mpn_mulmod_bnm1 (tp, tn, dp, dn, qp, in, scratch_out);
|
|
wn = dn + in - tn; /* number of wrapped limbs */
|
|
if (wn > 0)
|
|
{
|
|
c0 = mpn_sub_n (tp + tn, tp, rp, wn);
|
|
mpn_decr_u (tp + wn, c0);
|
|
}
|
|
}
|
|
|
|
qp += in;
|
|
qn -= in;
|
|
|
|
if (dn != in)
|
|
{
|
|
/* Subtract tp[dn-1...in] from partial remainder. */
|
|
cy += mpn_sub_n (rp, rp + in, tp + in, dn - in);
|
|
if (cy == 2)
|
|
{
|
|
mpn_incr_u (tp + dn, 1);
|
|
cy = 1;
|
|
}
|
|
}
|
|
/* Subtract tp[dn+in-1...dn] from dividend. */
|
|
cy = mpn_sub_nc (rp + dn - in, np, tp + dn, in, cy);
|
|
np += in;
|
|
}
|
|
|
|
/* Generate last qn limbs. */
|
|
mpn_mullo_n (qp, rp, ip, qn);
|
|
|
|
if (BELOW_THRESHOLD (qn, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
|
|
mpn_mul (tp, dp, dn, qp, qn); /* mulhi, need tp[qn+in-1...in] */
|
|
else
|
|
{
|
|
tn = mpn_mulmod_bnm1_next_size (dn);
|
|
mpn_mulmod_bnm1 (tp, tn, dp, dn, qp, qn, scratch_out);
|
|
wn = dn + qn - tn; /* number of wrapped limbs */
|
|
if (wn > 0)
|
|
{
|
|
c0 = mpn_sub_n (tp + tn, tp, rp, wn);
|
|
mpn_decr_u (tp + wn, c0);
|
|
}
|
|
}
|
|
|
|
if (dn != qn)
|
|
{
|
|
cy += mpn_sub_n (rp, rp + qn, tp + qn, dn - qn);
|
|
if (cy == 2)
|
|
{
|
|
mpn_incr_u (tp + dn, 1);
|
|
cy = 1;
|
|
}
|
|
}
|
|
return mpn_sub_nc (rp + dn - qn, np, tp + dn, qn, cy);
|
|
|
|
#undef ip
|
|
#undef tp
|
|
#undef scratch_out
|
|
}
|
|
else
|
|
{
|
|
/* |_______________________| dividend
|
|
|________________| divisor */
|
|
|
|
#define ip scratch /* in */
|
|
#define tp (scratch + in) /* dn+in or next_size(dn) or rest >= binvert_itch(in) */
|
|
#define scratch_out (scratch + in + tn)/* mulmod_bnm1_itch(next_size(dn)) */
|
|
|
|
/* Compute half-sized inverse. */
|
|
in = qn - (qn >> 1);
|
|
|
|
mpn_binvert (ip, dp, in, tp);
|
|
|
|
mpn_mullo_n (qp, np, ip, in); /* low `in' quotient limbs */
|
|
|
|
if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
|
|
mpn_mul (tp, dp, dn, qp, in); /* mulhigh */
|
|
else
|
|
{
|
|
tn = mpn_mulmod_bnm1_next_size (dn);
|
|
mpn_mulmod_bnm1 (tp, tn, dp, dn, qp, in, scratch_out);
|
|
wn = dn + in - tn; /* number of wrapped limbs */
|
|
if (wn > 0)
|
|
{
|
|
c0 = mpn_sub_n (tp + tn, tp, np, wn);
|
|
mpn_decr_u (tp + wn, c0);
|
|
}
|
|
}
|
|
|
|
qp += in;
|
|
qn -= in;
|
|
|
|
cy = mpn_sub_n (rp, np + in, tp + in, dn);
|
|
mpn_mullo_n (qp, rp, ip, qn); /* high qn quotient limbs */
|
|
|
|
if (BELOW_THRESHOLD (qn, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
|
|
mpn_mul (tp, dp, dn, qp, qn); /* mulhigh */
|
|
else
|
|
{
|
|
tn = mpn_mulmod_bnm1_next_size (dn);
|
|
mpn_mulmod_bnm1 (tp, tn, dp, dn, qp, qn, scratch_out);
|
|
wn = dn + qn - tn; /* number of wrapped limbs */
|
|
if (wn > 0)
|
|
{
|
|
c0 = mpn_sub_n (tp + tn, tp, rp, wn);
|
|
mpn_decr_u (tp + wn, c0);
|
|
}
|
|
}
|
|
|
|
cy += mpn_sub_n (rp, rp + qn, tp + qn, dn - qn);
|
|
if (cy == 2)
|
|
{
|
|
mpn_incr_u (tp + dn, 1);
|
|
cy = 1;
|
|
}
|
|
return mpn_sub_nc (rp + dn - qn, np + dn + in, tp + dn, qn, cy);
|
|
|
|
#undef ip
|
|
#undef tp
|
|
#undef scratch_out
|
|
}
|
|
}
|
|
|
|
mp_limb_t
|
|
mpn_mu_bdiv_qr (mp_ptr qp,
|
|
mp_ptr rp,
|
|
mp_srcptr np, mp_size_t nn,
|
|
mp_srcptr dp, mp_size_t dn,
|
|
mp_ptr scratch)
|
|
{
|
|
mp_limb_t cy = mpn_mu_bdiv_qr_old (qp, rp, np, nn, dp, dn, scratch);
|
|
|
|
/* R' B^{qn} = U - Q' D
|
|
*
|
|
* Q = B^{qn} - Q' (assuming Q' != 0)
|
|
*
|
|
* R B^{qn} = U + Q D = U + B^{qn} D - Q' D
|
|
* = B^{qn} D + R'
|
|
*/
|
|
|
|
if (UNLIKELY (!mpn_neg (qp, qp, nn - dn)))
|
|
{
|
|
/* Zero quotient. */
|
|
ASSERT (cy == 0);
|
|
return 0;
|
|
}
|
|
else
|
|
{
|
|
mp_limb_t cy2 = mpn_add_n (rp, rp, dp, dn);
|
|
ASSERT (cy2 >= cy);
|
|
|
|
return cy2 - cy;
|
|
}
|
|
}
|
|
|
|
|
|
mp_size_t
|
|
mpn_mu_bdiv_qr_itch (mp_size_t nn, mp_size_t dn)
|
|
{
|
|
mp_size_t qn, in, tn, itch_binvert, itch_out, itches;
|
|
mp_size_t b;
|
|
|
|
ASSERT_ALWAYS (DC_BDIV_Q_THRESHOLD < MU_BDIV_Q_THRESHOLD);
|
|
|
|
qn = nn - dn;
|
|
|
|
if (qn > dn)
|
|
{
|
|
b = (qn - 1) / dn + 1; /* ceil(qn/dn), number of blocks */
|
|
in = (qn - 1) / b + 1; /* ceil(qn/b) = ceil(qn / ceil(qn/dn)) */
|
|
}
|
|
else
|
|
{
|
|
in = qn - (qn >> 1);
|
|
}
|
|
|
|
if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
|
|
{
|
|
tn = dn + in;
|
|
itch_out = 0;
|
|
}
|
|
else
|
|
{
|
|
tn = mpn_mulmod_bnm1_next_size (dn);
|
|
itch_out = mpn_mulmod_bnm1_itch (tn, dn, in);
|
|
}
|
|
|
|
itch_binvert = mpn_binvert_itch (in);
|
|
itches = tn + itch_out;
|
|
return in + MAX (itches, itch_binvert);
|
|
}
|