mirror of
https://review.haiku-os.org/buildtools
synced 2025-02-23 22:27:43 +01:00
* merged mpfr 3.0.0 and gmp 5.0.1 in buildtools trunk git-svn-id: file:///srv/svn/repos/haiku/buildtools/trunk@37378 a95241bf-73f2-0310-859d-f6bbb57e9c96
287 lines
6.7 KiB
C
287 lines
6.7 KiB
C
/* mpn/gcd.c: mpn_gcd for gcd of two odd integers.
|
|
|
|
Copyright 1991, 1993, 1994, 1995, 1996, 1997, 1998, 2000, 2001, 2002, 2003,
|
|
2004, 2005, 2008 Free Software Foundation, Inc.
|
|
|
|
This file is part of the GNU MP Library.
|
|
|
|
The GNU MP Library is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU Lesser General Public License as published by
|
|
the Free Software Foundation; either version 3 of the License, or (at your
|
|
option) any later version.
|
|
|
|
The GNU MP Library is distributed in the hope that it will be useful, but
|
|
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
|
License for more details.
|
|
|
|
You should have received a copy of the GNU Lesser General Public License
|
|
along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
|
|
|
|
#include "gmp.h"
|
|
#include "gmp-impl.h"
|
|
#include "longlong.h"
|
|
|
|
/* Uses the HGCD operation described in
|
|
|
|
N. Möller, On Schönhage's algorithm and subquadratic integer gcd
|
|
computation, Math. Comp. 77 (2008), 589-607.
|
|
|
|
to reduce inputs until they are of size below GCD_DC_THRESHOLD, and
|
|
then uses Lehmer's algorithm.
|
|
*/
|
|
|
|
/* Some reasonable choices are n / 2 (same as in hgcd), and p = (n +
|
|
* 2)/3, which gives a balanced multiplication in
|
|
* mpn_hgcd_matrix_adjust. However, p = 2 n/3 gives slightly better
|
|
* performance. The matrix-vector multiplication is then
|
|
* 4:1-unbalanced, with matrix elements of size n/6, and vector
|
|
* elements of size p = 2n/3. */
|
|
|
|
/* From analysis of the theoretical running time, it appears that when
|
|
* multiplication takes time O(n^alpha), p should be chosen so that
|
|
* the ratio of the time for the mpn_hgcd call, and the time for the
|
|
* multiplication in mpn_hgcd_matrix_adjust, is roughly 1/(alpha -
|
|
* 1). */
|
|
#ifdef TUNE_GCD_P
|
|
#define P_TABLE_SIZE 10000
|
|
mp_size_t p_table[P_TABLE_SIZE];
|
|
#define CHOOSE_P(n) ( (n) < P_TABLE_SIZE ? p_table[n] : 2*(n)/3)
|
|
#else
|
|
#define CHOOSE_P(n) (2*(n) / 3)
|
|
#endif
|
|
|
|
mp_size_t
|
|
mpn_gcd (mp_ptr gp, mp_ptr up, mp_size_t usize, mp_ptr vp, mp_size_t n)
|
|
{
|
|
mp_size_t talloc;
|
|
mp_size_t scratch;
|
|
mp_size_t matrix_scratch;
|
|
|
|
mp_size_t gn;
|
|
mp_ptr tp;
|
|
TMP_DECL;
|
|
|
|
/* FIXME: Check for small sizes first, before setting up temporary
|
|
storage etc. */
|
|
talloc = MPN_GCD_LEHMER_N_ITCH(n);
|
|
|
|
/* For initial division */
|
|
scratch = usize - n + 1;
|
|
if (scratch > talloc)
|
|
talloc = scratch;
|
|
|
|
#if TUNE_GCD_P
|
|
if (CHOOSE_P (n) > 0)
|
|
#else
|
|
if (ABOVE_THRESHOLD (n, GCD_DC_THRESHOLD))
|
|
#endif
|
|
{
|
|
mp_size_t hgcd_scratch;
|
|
mp_size_t update_scratch;
|
|
mp_size_t p = CHOOSE_P (n);
|
|
mp_size_t scratch;
|
|
#if TUNE_GCD_P
|
|
/* Worst case, since we don't guarantee that n - CHOOSE_P(n)
|
|
is increasing */
|
|
matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n);
|
|
hgcd_scratch = mpn_hgcd_itch (n);
|
|
update_scratch = 2*(n - 1);
|
|
#else
|
|
matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
|
|
hgcd_scratch = mpn_hgcd_itch (n - p);
|
|
update_scratch = p + n - 1;
|
|
#endif
|
|
scratch = matrix_scratch + MAX(hgcd_scratch, update_scratch);
|
|
if (scratch > talloc)
|
|
talloc = scratch;
|
|
}
|
|
|
|
TMP_MARK;
|
|
tp = TMP_ALLOC_LIMBS(talloc);
|
|
|
|
if (usize > n)
|
|
{
|
|
mpn_tdiv_qr (tp, up, 0, up, usize, vp, n);
|
|
|
|
if (mpn_zero_p (up, n))
|
|
{
|
|
MPN_COPY (gp, vp, n);
|
|
TMP_FREE;
|
|
return n;
|
|
}
|
|
}
|
|
|
|
#if TUNE_GCD_P
|
|
while (CHOOSE_P (n) > 0)
|
|
#else
|
|
while (ABOVE_THRESHOLD (n, GCD_DC_THRESHOLD))
|
|
#endif
|
|
{
|
|
struct hgcd_matrix M;
|
|
mp_size_t p = CHOOSE_P (n);
|
|
mp_size_t matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
|
|
mp_size_t nn;
|
|
mpn_hgcd_matrix_init (&M, n - p, tp);
|
|
nn = mpn_hgcd (up + p, vp + p, n - p, &M, tp + matrix_scratch);
|
|
if (nn > 0)
|
|
{
|
|
ASSERT (M.n <= (n - p - 1)/2);
|
|
ASSERT (M.n + p <= (p + n - 1) / 2);
|
|
/* Temporary storage 2 (p + M->n) <= p + n - 1. */
|
|
n = mpn_hgcd_matrix_adjust (&M, p + nn, up, vp, p, tp + matrix_scratch);
|
|
}
|
|
else
|
|
{
|
|
/* Temporary storage n */
|
|
n = mpn_gcd_subdiv_step (gp, &gn, up, vp, n, tp);
|
|
if (n == 0)
|
|
{
|
|
TMP_FREE;
|
|
return gn;
|
|
}
|
|
}
|
|
}
|
|
|
|
gn = mpn_gcd_lehmer_n (gp, up, vp, n, tp);
|
|
TMP_FREE;
|
|
return gn;
|
|
}
|
|
|
|
#ifdef TUNE_GCD_P
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
#include <time.h>
|
|
#include "speed.h"
|
|
|
|
static int
|
|
compare_double(const void *ap, const void *bp)
|
|
{
|
|
double a = * (const double *) ap;
|
|
double b = * (const double *) bp;
|
|
|
|
if (a < b)
|
|
return -1;
|
|
else if (a > b)
|
|
return 1;
|
|
else
|
|
return 0;
|
|
}
|
|
|
|
static double
|
|
median (double *v, size_t n)
|
|
{
|
|
qsort(v, n, sizeof(*v), compare_double);
|
|
|
|
return v[n/2];
|
|
}
|
|
|
|
#define TIME(res, code) do { \
|
|
double time_measurement[5]; \
|
|
unsigned time_i; \
|
|
\
|
|
for (time_i = 0; time_i < 5; time_i++) \
|
|
{ \
|
|
speed_starttime(); \
|
|
code; \
|
|
time_measurement[time_i] = speed_endtime(); \
|
|
} \
|
|
res = median(time_measurement, 5); \
|
|
} while (0)
|
|
|
|
int
|
|
main(int argc, char *argv)
|
|
{
|
|
gmp_randstate_t rands;
|
|
mp_size_t n;
|
|
mp_ptr ap;
|
|
mp_ptr bp;
|
|
mp_ptr up;
|
|
mp_ptr vp;
|
|
mp_ptr gp;
|
|
mp_ptr tp;
|
|
TMP_DECL;
|
|
|
|
/* Unbuffered so if output is redirected to a file it isn't lost if the
|
|
program is killed part way through. */
|
|
setbuf (stdout, NULL);
|
|
setbuf (stderr, NULL);
|
|
|
|
gmp_randinit_default (rands);
|
|
|
|
TMP_MARK;
|
|
|
|
ap = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
|
|
bp = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
|
|
up = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
|
|
vp = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
|
|
gp = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
|
|
tp = TMP_ALLOC_LIMBS (MPN_GCD_LEHMER_N_ITCH (P_TABLE_SIZE));
|
|
|
|
mpn_random (ap, P_TABLE_SIZE);
|
|
mpn_random (bp, P_TABLE_SIZE);
|
|
|
|
memset (p_table, 0, sizeof(p_table));
|
|
|
|
for (n = 100; n++; n < P_TABLE_SIZE)
|
|
{
|
|
mp_size_t p;
|
|
mp_size_t best_p;
|
|
double best_time;
|
|
double lehmer_time;
|
|
|
|
if (ap[n-1] == 0)
|
|
ap[n-1] = 1;
|
|
|
|
if (bp[n-1] == 0)
|
|
bp[n-1] = 1;
|
|
|
|
p_table[n] = 0;
|
|
TIME(lehmer_time, {
|
|
MPN_COPY (up, ap, n);
|
|
MPN_COPY (vp, bp, n);
|
|
mpn_gcd_lehmer_n (gp, up, vp, n, tp);
|
|
});
|
|
|
|
best_time = lehmer_time;
|
|
best_p = 0;
|
|
|
|
for (p = n * 0.48; p < n * 0.77; p++)
|
|
{
|
|
double t;
|
|
|
|
p_table[n] = p;
|
|
|
|
TIME(t, {
|
|
MPN_COPY (up, ap, n);
|
|
MPN_COPY (vp, bp, n);
|
|
mpn_gcd (gp, up, n, vp, n);
|
|
});
|
|
|
|
if (t < best_time)
|
|
{
|
|
best_time = t;
|
|
best_p = p;
|
|
}
|
|
}
|
|
printf("%6d %6d %5.3g", n, best_p, (double) best_p / n);
|
|
if (best_p > 0)
|
|
{
|
|
double speedup = 100 * (lehmer_time - best_time) / lehmer_time;
|
|
printf(" %5.3g%%", speedup);
|
|
if (speedup < 1.0)
|
|
{
|
|
printf(" (ignored)");
|
|
best_p = 0;
|
|
}
|
|
}
|
|
printf("\n");
|
|
|
|
p_table[n] = best_p;
|
|
}
|
|
TMP_FREE;
|
|
gmp_randclear(rands);
|
|
return 0;
|
|
}
|
|
#endif /* TUNE_GCD_P */
|