mirror of
https://review.haiku-os.org/buildtools
synced 2025-02-23 06:07:41 +01:00
Change-Id: Iae65e95cfa0d92091b8b0a424ae36d88efa76aa9 Reviewed-on: https://review.haiku-os.org/c/buildtools/+/3020 Reviewed-by: Adrien Destugues <pulkomandy@gmail.com>
460 lines
11 KiB
C
460 lines
11 KiB
C
/* mpz_bin_ui(RESULT, N, K) -- Set RESULT to N over K.
|
|
|
|
Copyright 1998-2002, 2012, 2013, 2015, 2017-2018 Free Software Foundation, Inc.
|
|
|
|
This file is part of the GNU MP Library.
|
|
|
|
The GNU MP Library is free software; you can redistribute it and/or modify
|
|
it under the terms of either:
|
|
|
|
* the GNU Lesser General Public License as published by the Free
|
|
Software Foundation; either version 3 of the License, or (at your
|
|
option) any later version.
|
|
|
|
or
|
|
|
|
* the GNU General Public License as published by the Free Software
|
|
Foundation; either version 2 of the License, or (at your option) any
|
|
later version.
|
|
|
|
or both in parallel, as here.
|
|
|
|
The GNU MP Library is distributed in the hope that it will be useful, but
|
|
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
for more details.
|
|
|
|
You should have received copies of the GNU General Public License and the
|
|
GNU Lesser General Public License along with the GNU MP Library. If not,
|
|
see https://www.gnu.org/licenses/. */
|
|
|
|
#include "gmp-impl.h"
|
|
|
|
/* How many special cases? Minimum is 2: 0 and 1;
|
|
* also 3 {0,1,2} and 5 {0,1,2,3,4} are implemented.
|
|
*/
|
|
#define APARTAJ_KALKULOJ 2
|
|
|
|
/* Whether to use (1) or not (0) the function mpz_bin_uiui whenever
|
|
* the operands fit.
|
|
*/
|
|
#define UZU_BIN_UIUI 0
|
|
|
|
/* Whether to use a shortcut to precompute the product of four
|
|
* elements (1), or precompute only the product of a couple (0).
|
|
*
|
|
* In both cases the precomputed product is then updated with some
|
|
* linear operations to obtain the product of the next four (1)
|
|
* [or two (0)] operands.
|
|
*/
|
|
#define KVAROPE 1
|
|
|
|
static void
|
|
posmpz_init (mpz_ptr r)
|
|
{
|
|
mp_ptr rp;
|
|
ASSERT (SIZ (r) > 0);
|
|
rp = SIZ (r) + MPZ_REALLOC (r, SIZ (r) + 2);
|
|
*rp = 0;
|
|
*++rp = 0;
|
|
}
|
|
|
|
/* Equivalent to mpz_add_ui (r, r, in), but faster when
|
|
0 < SIZ (r) < ALLOC (r) and limbs above SIZ (r) contain 0. */
|
|
static void
|
|
posmpz_inc_ui (mpz_ptr r, unsigned long in)
|
|
{
|
|
#if BITS_PER_ULONG > GMP_NUMB_BITS
|
|
mpz_add_ui (r, r, in);
|
|
#else
|
|
ASSERT (SIZ (r) > 0);
|
|
MPN_INCR_U (PTR (r), SIZ (r) + 1, in);
|
|
SIZ (r) += (PTR (r)[SIZ (r)] != 0);
|
|
#endif
|
|
}
|
|
|
|
/* Equivalent to mpz_sub_ui (r, r, in), but faster when
|
|
0 < SIZ (r) and we know in advance that the result is positive. */
|
|
static void
|
|
posmpz_dec_ui (mpz_ptr r, unsigned long in)
|
|
{
|
|
#if BITS_PER_ULONG > GMP_NUMB_BITS
|
|
mpz_sub_ui (r, r, in);
|
|
#else
|
|
ASSERT (mpz_cmp_ui (r, in) >= 0);
|
|
MPN_DECR_U (PTR (r), SIZ (r), in);
|
|
SIZ (r) -= (PTR (r)[SIZ (r)-1] == 0);
|
|
#endif
|
|
}
|
|
|
|
/* Equivalent to mpz_tdiv_q_2exp (r, r, 1), but faster when
|
|
0 < SIZ (r) and we know in advance that the result is positive. */
|
|
static void
|
|
posmpz_rsh1 (mpz_ptr r)
|
|
{
|
|
mp_ptr rp;
|
|
mp_size_t rn;
|
|
|
|
rn = SIZ (r);
|
|
rp = PTR (r);
|
|
ASSERT (rn > 0);
|
|
mpn_rshift (rp, rp, rn, 1);
|
|
SIZ (r) -= rp[rn - 1] == 0;
|
|
}
|
|
|
|
/* Computes r = n(n+(2*k-1))/2
|
|
It uses a sqare instead of a product, computing
|
|
r = ((n+k-1)^2 + n - (k-1)^2)/2
|
|
As a side effect, sets t = n+k-1
|
|
*/
|
|
static void
|
|
mpz_hmul_nbnpk (mpz_ptr r, mpz_srcptr n, unsigned long int k, mpz_ptr t)
|
|
{
|
|
ASSERT (k > 0 && SIZ(n) > 0);
|
|
--k;
|
|
mpz_add_ui (t, n, k);
|
|
mpz_mul (r, t, t);
|
|
mpz_add (r, r, n);
|
|
posmpz_rsh1 (r);
|
|
if (LIKELY (k <= (1UL << (BITS_PER_ULONG / 2))))
|
|
posmpz_dec_ui (r, (k + (k & 1))*(k >> 1));
|
|
else
|
|
{
|
|
mpz_t tmp;
|
|
mpz_init_set_ui (tmp, (k + (k & 1)));
|
|
mpz_mul_ui (tmp, tmp, k >> 1);
|
|
mpz_sub (r, r, tmp);
|
|
mpz_clear (tmp);
|
|
}
|
|
}
|
|
|
|
#if KVAROPE
|
|
static void
|
|
rek_raising_fac4 (mpz_ptr r, mpz_ptr p, mpz_ptr P, unsigned long int k, unsigned long int lk, mpz_ptr t)
|
|
{
|
|
if (k - lk < 5)
|
|
{
|
|
do {
|
|
posmpz_inc_ui (p, 4*k+2);
|
|
mpz_addmul_ui (P, p, 4*k);
|
|
posmpz_dec_ui (P, k);
|
|
mpz_mul (r, r, P);
|
|
} while (--k > lk);
|
|
}
|
|
else
|
|
{
|
|
mpz_t lt;
|
|
unsigned long int m;
|
|
|
|
m = ((k + lk) >> 1) + 1;
|
|
rek_raising_fac4 (r, p, P, k, m, t);
|
|
|
|
posmpz_inc_ui (p, 4*m+2);
|
|
mpz_addmul_ui (P, p, 4*m);
|
|
posmpz_dec_ui (P, m);
|
|
if (t == NULL)
|
|
{
|
|
mpz_init_set (lt, P);
|
|
t = lt;
|
|
}
|
|
else
|
|
{
|
|
ALLOC (lt) = 0;
|
|
mpz_set (t, P);
|
|
}
|
|
rek_raising_fac4 (t, p, P, m - 1, lk, NULL);
|
|
|
|
mpz_mul (r, r, t);
|
|
mpz_clear (lt);
|
|
}
|
|
}
|
|
|
|
/* Computes (n+1)(n+2)...(n+k)/2^(k/2 +k/4) using the helper function
|
|
rek_raising_fac4, and exploiting an idea inspired by a piece of
|
|
code that Fredrik Johansson wrote and by a comment by Niels Möller.
|
|
|
|
Assume k = 4i then compute:
|
|
p = (n+1)(n+4i)/2 - i
|
|
(n+1+1)(n+4i)/2 = p + i + (n+4i)/2
|
|
(n+1+1)(n+4i-1)/2 = p + i + ((n+4i)-(n+1+1))/2 = p + i + (n-n+4i-2)/2 = p + 3i-1
|
|
P = (p + i)*(p+3i-1)/2 = (n+1)(n+2)(n+4i-1)(n+4i)/8
|
|
n' = n + 2
|
|
i' = i - 1
|
|
(n'-1)(n')(n'+4i'+1)(n'+4i'+2)/8 = P
|
|
(n'-1)(n'+4i'+2)/2 - i' - 1 = p
|
|
(n'-1+2)(n'+4i'+2)/2 - i' - 1 = p + (n'+4i'+2)
|
|
(n'-1+2)(n'+4i'+2-2)/2 - i' - 1 = p + (n'+4i'+2) - (n'-1+2) = p + 4i' + 1
|
|
(n'-1+2)(n'+4i'+2-2)/2 - i' = p + 4i' + 2
|
|
p' = p + 4i' + 2 = (n'+1)(n'+4i')/2 - i'
|
|
p' - 4i' - 2 = p
|
|
(p' - 4i' - 2 + i)*(p' - 4i' - 2+3i-1)/2 = P
|
|
(p' - 4i' - 2 + i' + 1)*(p' - 4i' - 2 + 3i' + 3 - 1)/2 = P
|
|
(p' - 3i' - 1)*(p' - i')/2 = P
|
|
(p' - 3i' - 1 + 4i' + 1)*(p' - i' + 4i' - 1)/2 = P + (4i' + 1)*(p' - i')/2 + (p' - 3i' - 1 + 4i' + 1)*(4i' - 1)/2
|
|
(p' + i')*(p' + 3i' - 1)/2 = P + (4i')*(p' + p')/2 + (p' - i' - (p' + i'))/2
|
|
(p' + i')*(p' + 3i' - 1)/2 = P + 4i'p' + (p' - i' - p' - i')/2
|
|
(p' + i')*(p' + 3i' - 1)/2 = P + 4i'p' - i'
|
|
P' = P + 4i'p' - i'
|
|
|
|
And compute the product P * P' * P" ...
|
|
*/
|
|
|
|
static void
|
|
mpz_raising_fac4 (mpz_ptr r, mpz_ptr n, unsigned long int k, mpz_ptr t, mpz_ptr p)
|
|
{
|
|
ASSERT ((k >= APARTAJ_KALKULOJ) && (APARTAJ_KALKULOJ > 0));
|
|
posmpz_init (n);
|
|
posmpz_inc_ui (n, 1);
|
|
SIZ (r) = 0;
|
|
if (k & 1)
|
|
{
|
|
mpz_set (r, n);
|
|
posmpz_inc_ui (n, 1);
|
|
}
|
|
k >>= 1;
|
|
if (APARTAJ_KALKULOJ < 2 && k == 0)
|
|
return;
|
|
|
|
mpz_hmul_nbnpk (p, n, k, t);
|
|
posmpz_init (p);
|
|
|
|
if (k & 1)
|
|
{
|
|
if (SIZ (r))
|
|
mpz_mul (r, r, p);
|
|
else
|
|
mpz_set (r, p);
|
|
posmpz_inc_ui (p, k - 1);
|
|
}
|
|
k >>= 1;
|
|
if (APARTAJ_KALKULOJ < 4 && k == 0)
|
|
return;
|
|
|
|
mpz_hmul_nbnpk (t, p, k, n);
|
|
if (SIZ (r))
|
|
mpz_mul (r, r, t);
|
|
else
|
|
mpz_set (r, t);
|
|
|
|
if (APARTAJ_KALKULOJ > 8 || k > 1)
|
|
{
|
|
posmpz_dec_ui (p, k);
|
|
rek_raising_fac4 (r, p, t, k - 1, 0, n);
|
|
}
|
|
}
|
|
|
|
#else /* KVAROPE */
|
|
|
|
static void
|
|
rek_raising_fac (mpz_ptr r, mpz_ptr n, unsigned long int k, unsigned long int lk, mpz_ptr t1, mpz_ptr t2)
|
|
{
|
|
/* Should the threshold depend on SIZ (n) ? */
|
|
if (k - lk < 10)
|
|
{
|
|
do {
|
|
posmpz_inc_ui (n, k);
|
|
mpz_mul (r, r, n);
|
|
--k;
|
|
} while (k > lk);
|
|
}
|
|
else
|
|
{
|
|
mpz_t t3;
|
|
unsigned long int m;
|
|
|
|
m = ((k + lk) >> 1) + 1;
|
|
rek_raising_fac (r, n, k, m, t1, t2);
|
|
|
|
posmpz_inc_ui (n, m);
|
|
if (t1 == NULL)
|
|
{
|
|
mpz_init_set (t3, n);
|
|
t1 = t3;
|
|
}
|
|
else
|
|
{
|
|
ALLOC (t3) = 0;
|
|
mpz_set (t1, n);
|
|
}
|
|
rek_raising_fac (t1, n, m - 1, lk, t2, NULL);
|
|
|
|
mpz_mul (r, r, t1);
|
|
mpz_clear (t3);
|
|
}
|
|
}
|
|
|
|
/* Computes (n+1)(n+2)...(n+k)/2^(k/2) using the helper function
|
|
rek_raising_fac, and exploiting an idea inspired by a piece of
|
|
code that Fredrik Johansson wrote.
|
|
|
|
Force an even k = 2i then compute:
|
|
p = (n+1)(n+2i)/2
|
|
i' = i - 1
|
|
p == (n+1)(n+2i'+2)/2
|
|
p' = p + i' == (n+2)(n+2i'+1)/2
|
|
n' = n + 1
|
|
p'== (n'+1)(n'+2i')/2 == (n+1 +1)(n+2i -1)/2
|
|
|
|
And compute the product p * p' * p" ...
|
|
*/
|
|
|
|
static void
|
|
mpz_raising_fac (mpz_ptr r, mpz_ptr n, unsigned long int k, mpz_ptr t, mpz_ptr p)
|
|
{
|
|
unsigned long int hk;
|
|
ASSERT ((k >= APARTAJ_KALKULOJ) && (APARTAJ_KALKULOJ > 1));
|
|
mpz_add_ui (n, n, 1);
|
|
hk = k >> 1;
|
|
mpz_hmul_nbnpk (p, n, hk, t);
|
|
|
|
if ((k & 1) != 0)
|
|
{
|
|
mpz_add_ui (t, t, hk + 1);
|
|
mpz_mul (r, t, p);
|
|
}
|
|
else
|
|
{
|
|
mpz_set (r, p);
|
|
}
|
|
|
|
if ((APARTAJ_KALKULOJ > 3) || (hk > 1))
|
|
{
|
|
posmpz_init (p);
|
|
rek_raising_fac (r, p, hk - 1, 0, t, n);
|
|
}
|
|
}
|
|
#endif /* KVAROPE */
|
|
|
|
/* This is a poor implementation. Look at bin_uiui.c for improvement ideas.
|
|
In fact consider calling mpz_bin_uiui() when the arguments fit, leaving
|
|
the code here only for big n.
|
|
|
|
The identity bin(n,k) = (-1)^k * bin(-n+k-1,k) can be found in Knuth vol
|
|
1 section 1.2.6 part G. */
|
|
|
|
void
|
|
mpz_bin_ui (mpz_ptr r, mpz_srcptr n, unsigned long int k)
|
|
{
|
|
mpz_t ni;
|
|
mp_size_t negate;
|
|
|
|
if (SIZ (n) < 0)
|
|
{
|
|
/* bin(n,k) = (-1)^k * bin(-n+k-1,k), and set ni = -n+k-1 - k = -n-1 */
|
|
mpz_init (ni);
|
|
mpz_add_ui (ni, n, 1L);
|
|
mpz_neg (ni, ni);
|
|
negate = (k & 1); /* (-1)^k */
|
|
}
|
|
else
|
|
{
|
|
/* bin(n,k) == 0 if k>n
|
|
(no test for this under the n<0 case, since -n+k-1 >= k there) */
|
|
if (mpz_cmp_ui (n, k) < 0)
|
|
{
|
|
SIZ (r) = 0;
|
|
return;
|
|
}
|
|
|
|
/* set ni = n-k */
|
|
mpz_init (ni);
|
|
mpz_sub_ui (ni, n, k);
|
|
negate = 0;
|
|
}
|
|
|
|
/* Now wanting bin(ni+k,k), with ni positive, and "negate" is the sign (0
|
|
for positive, 1 for negative). */
|
|
|
|
/* Rewrite bin(n,k) as bin(n,n-k) if that is smaller. In this case it's
|
|
whether ni+k-k < k meaning ni<k, and if so change to denominator ni+k-k
|
|
= ni, and new ni of ni+k-ni = k. */
|
|
if (mpz_cmp_ui (ni, k) < 0)
|
|
{
|
|
unsigned long tmp;
|
|
tmp = k;
|
|
k = mpz_get_ui (ni);
|
|
mpz_set_ui (ni, tmp);
|
|
}
|
|
|
|
if (k < APARTAJ_KALKULOJ)
|
|
{
|
|
if (k == 0)
|
|
{
|
|
SIZ (r) = 1;
|
|
MPZ_NEWALLOC (r, 1)[0] = 1;
|
|
}
|
|
#if APARTAJ_KALKULOJ > 2
|
|
else if (k == 2)
|
|
{
|
|
mpz_add_ui (ni, ni, 1);
|
|
mpz_mul (r, ni, ni);
|
|
mpz_add (r, r, ni);
|
|
posmpz_rsh1 (r);
|
|
}
|
|
#endif
|
|
#if APARTAJ_KALKULOJ > 3
|
|
else if (k > 2)
|
|
{ /* k = 3, 4 */
|
|
mpz_add_ui (ni, ni, 2); /* n+1 */
|
|
mpz_mul (r, ni, ni); /* (n+1)^2 */
|
|
mpz_sub_ui (r, r, 1); /* (n+1)^2-1 */
|
|
if (k == 3)
|
|
{
|
|
mpz_mul (r, r, ni); /* ((n+1)^2-1)(n+1) = n(n+1)(n+2) */
|
|
/* mpz_divexact_ui (r, r, 6); /\* 6=3<<1; div_by3 ? *\/ */
|
|
mpn_pi1_bdiv_q_1 (PTR(r), PTR(r), SIZ(r), 3, GMP_NUMB_MASK/3*2+1, 1);
|
|
MPN_NORMALIZE_NOT_ZERO (PTR(r), SIZ(r));
|
|
}
|
|
else /* k = 4 */
|
|
{
|
|
mpz_add (ni, ni, r); /* (n+1)^2+n */
|
|
mpz_mul (r, ni, ni); /* ((n+1)^2+n)^2 */
|
|
mpz_sub_ui (r, r, 1); /* ((n+1)^2+n)^2-1 = n(n+1)(n+2)(n+3) */
|
|
/* mpz_divexact_ui (r, r, 24); /\* 24=3<<3; div_by3 ? *\/ */
|
|
mpn_pi1_bdiv_q_1 (PTR(r), PTR(r), SIZ(r), 3, GMP_NUMB_MASK/3*2+1, 3);
|
|
MPN_NORMALIZE_NOT_ZERO (PTR(r), SIZ(r));
|
|
}
|
|
}
|
|
#endif
|
|
else
|
|
{ /* k = 1 */
|
|
mpz_add_ui (r, ni, 1);
|
|
}
|
|
}
|
|
#if UZU_BIN_UIUI
|
|
else if (mpz_cmp_ui (ni, ULONG_MAX - k) <= 0)
|
|
{
|
|
mpz_bin_uiui (r, mpz_get_ui (ni) + k, k);
|
|
}
|
|
#endif
|
|
else
|
|
{
|
|
mp_limb_t count;
|
|
mpz_t num, den;
|
|
|
|
mpz_init (num);
|
|
mpz_init (den);
|
|
|
|
#if KVAROPE
|
|
mpz_raising_fac4 (num, ni, k, den, r);
|
|
popc_limb (count, k);
|
|
ASSERT (k - (k >> 1) - (k >> 2) - count >= 0);
|
|
mpz_tdiv_q_2exp (num, num, k - (k >> 1) - (k >> 2) - count);
|
|
#else
|
|
mpz_raising_fac (num, ni, k, den, r);
|
|
popc_limb (count, k);
|
|
ASSERT (k - (k >> 1) - count >= 0);
|
|
mpz_tdiv_q_2exp (num, num, k - (k >> 1) - count);
|
|
#endif
|
|
|
|
mpz_oddfac_1(den, k, 0);
|
|
|
|
mpz_divexact(r, num, den);
|
|
mpz_clear (num);
|
|
mpz_clear (den);
|
|
}
|
|
mpz_clear (ni);
|
|
|
|
SIZ(r) = (SIZ(r) ^ -negate) + negate;
|
|
}
|