mirror of
https://review.haiku-os.org/buildtools
synced 2025-02-07 14:34:51 +01:00
Old version was 3.1.2 and is quite old: 2013-03-13 A lot has happened since then 4.0.1 is from 2018-02-07
469 lines
14 KiB
C
469 lines
14 KiB
C
/* [Description]
|
|
|
|
Copyright 2010-2018 Free Software Foundation, Inc.
|
|
Contributed by the AriC and Caramba projects, INRIA.
|
|
|
|
This file is part of the GNU MPFR Library.
|
|
|
|
The GNU MPFR Library is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU Lesser General Public License as published by
|
|
the Free Software Foundation; either version 3 of the License, or (at your
|
|
option) any later version.
|
|
|
|
The GNU MPFR Library is distributed in the hope that it will be useful, but
|
|
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
|
License for more details.
|
|
|
|
You should have received a copy of the GNU Lesser General Public License
|
|
along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see
|
|
http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
|
|
51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
|
|
|
|
#include <stdlib.h>
|
|
#include <time.h>
|
|
|
|
#define MPFR_NEED_LONGLONG_H
|
|
#include "mpfr-impl.h"
|
|
|
|
#undef _PROTO
|
|
#define _PROTO __GMP_PROTO
|
|
#include "speed.h"
|
|
|
|
/* Let f be a function for which we have several implementations f1, f2... */
|
|
/* We wish to have a quick overview of which implementation is the best */
|
|
/* in function of the point x where f(x) is computed and of the precision */
|
|
/* prec requested by the user. */
|
|
/* This is performed by drawing a 2D graphic with color indicating which */
|
|
/* method is the best. */
|
|
/* For building this graphic, the following structur is used (see the core */
|
|
/* of generate_2D_sample for an explanation of each field. */
|
|
struct speed_params2D
|
|
{
|
|
/* x-window: [min_x, max_x] or [2^min_x, 2^max_x] */
|
|
/* or [-2^(max_x), -2^(min_x)] U [2^min_x, 2^max_x] */
|
|
/* depending on the value of logarithmic_scale_x */
|
|
double min_x;
|
|
double max_x;
|
|
|
|
/* prec-window: [min_prec, max_prec] */
|
|
mpfr_prec_t min_prec;
|
|
mpfr_prec_t max_prec;
|
|
|
|
/* number of sample points for the x-axis and the prec-axis */
|
|
int nb_points_x;
|
|
int nb_points_prec;
|
|
|
|
/* should the sample points be logarithmically scaled or not */
|
|
int logarithmic_scale_x;
|
|
int logarithmic_scale_prec;
|
|
|
|
/* list of functions g1, g2... measuring the execution time of f1, f2... */
|
|
/* when given a point x and a precision prec stored in s. */
|
|
/* We use s->xp to store the significant of x, s->r to store its exponent */
|
|
/* s->align_xp to store its sign, and s->size to store prec. */
|
|
double (**speed_funcs) (struct speed_params *s);
|
|
};
|
|
|
|
/* Given an array t of nb_functions double indicating the timings of several */
|
|
/* implementations, return i, such that t[i] is the best timing. */
|
|
int
|
|
find_best (double *t, int nb_functions)
|
|
{
|
|
int i, ibest;
|
|
double best;
|
|
|
|
if (nb_functions<=0)
|
|
{
|
|
fprintf (stderr, "There is no function\n");
|
|
abort ();
|
|
}
|
|
|
|
ibest = 0;
|
|
best = t[0];
|
|
for (i=1; i<nb_functions; i++)
|
|
{
|
|
if (t[i]<best)
|
|
{
|
|
best = t[i];
|
|
ibest = i;
|
|
}
|
|
}
|
|
|
|
return ibest;
|
|
}
|
|
|
|
void generate_2D_sample (FILE *output, struct speed_params2D param)
|
|
{
|
|
mpfr_t temp;
|
|
double incr_prec;
|
|
mpfr_t incr_x;
|
|
mpfr_t x, x2;
|
|
double prec;
|
|
struct speed_params s;
|
|
int i;
|
|
int test;
|
|
int nb_functions;
|
|
double *t; /* store the timing of each implementation */
|
|
|
|
/* We first determine how many implementations we have */
|
|
nb_functions = 0;
|
|
while (param.speed_funcs[nb_functions] != NULL)
|
|
nb_functions++;
|
|
|
|
t = malloc (nb_functions * sizeof (double));
|
|
if (t == NULL)
|
|
{
|
|
fprintf (stderr, "Can't allocate memory.\n");
|
|
abort ();
|
|
}
|
|
|
|
|
|
mpfr_init2 (temp, MPFR_SMALL_PRECISION);
|
|
|
|
/* The precision is sampled from min_prec to max_prec with */
|
|
/* approximately nb_points_prec points. If logarithmic_scale_prec */
|
|
/* is true, the precision is multiplied by incr_prec at each */
|
|
/* step. Otherwise, incr_prec is added at each step. */
|
|
if (param.logarithmic_scale_prec)
|
|
{
|
|
mpfr_set_ui (temp, (unsigned long int)param.max_prec, MPFR_RNDU);
|
|
mpfr_div_ui (temp, temp, (unsigned long int)param.min_prec, MPFR_RNDU);
|
|
mpfr_root (temp, temp,
|
|
(unsigned long int)param.nb_points_prec, MPFR_RNDU);
|
|
incr_prec = mpfr_get_d (temp, MPFR_RNDU);
|
|
}
|
|
else
|
|
{
|
|
incr_prec = (double)param.max_prec - (double)param.min_prec;
|
|
incr_prec = incr_prec/((double)param.nb_points_prec);
|
|
}
|
|
|
|
/* The points x are sampled according to the following rule: */
|
|
/* If logarithmic_scale_x = 0: */
|
|
/* nb_points_x points are equally distributed between min_x and max_x */
|
|
/* If logarithmic_scale_x = 1: */
|
|
/* nb_points_x points are sampled from 2^(min_x) to 2^(max_x). At */
|
|
/* each step, the current point is multiplied by incr_x. */
|
|
/* If logarithmic_scale_x = -1: */
|
|
/* nb_points_x/2 points are sampled from -2^(max_x) to -2^(min_x) */
|
|
/* (at each step, the current point is divided by incr_x); and */
|
|
/* nb_points_x/2 points are sampled from 2^(min_x) to 2^(max_x) */
|
|
/* (at each step, the current point is multiplied by incr_x). */
|
|
mpfr_init2 (incr_x, param.max_prec);
|
|
if (param.logarithmic_scale_x == 0)
|
|
{
|
|
mpfr_set_d (incr_x,
|
|
(param.max_x - param.min_x)/(double)param.nb_points_x,
|
|
MPFR_RNDU);
|
|
}
|
|
else if (param.logarithmic_scale_x == -1)
|
|
{
|
|
mpfr_set_d (incr_x,
|
|
2.*(param.max_x - param.min_x)/(double)param.nb_points_x,
|
|
MPFR_RNDU);
|
|
mpfr_exp2 (incr_x, incr_x, MPFR_RNDU);
|
|
}
|
|
else
|
|
{ /* other values of param.logarithmic_scale_x are considered as 1 */
|
|
mpfr_set_d (incr_x,
|
|
(param.max_x - param.min_x)/(double)param.nb_points_x,
|
|
MPFR_RNDU);
|
|
mpfr_exp2 (incr_x, incr_x, MPFR_RNDU);
|
|
}
|
|
|
|
/* Main loop */
|
|
mpfr_init2 (x, param.max_prec);
|
|
mpfr_init2 (x2, param.max_prec);
|
|
prec = (double)param.min_prec;
|
|
while (prec <= param.max_prec)
|
|
{
|
|
printf ("prec = %d\n", (int)prec);
|
|
if (param.logarithmic_scale_x == 0)
|
|
mpfr_set_d (temp, param.min_x, MPFR_RNDU);
|
|
else if (param.logarithmic_scale_x == -1)
|
|
{
|
|
mpfr_set_d (temp, param.max_x, MPFR_RNDD);
|
|
mpfr_exp2 (temp, temp, MPFR_RNDD);
|
|
mpfr_neg (temp, temp, MPFR_RNDU);
|
|
}
|
|
else
|
|
{
|
|
mpfr_set_d (temp, param.min_x, MPFR_RNDD);
|
|
mpfr_exp2 (temp, temp, MPFR_RNDD);
|
|
}
|
|
|
|
/* We perturb x a little bit, in order to avoid trailing zeros that */
|
|
/* might change the behavior of algorithms. */
|
|
mpfr_const_pi (x, MPFR_RNDN);
|
|
mpfr_div_2ui (x, x, 7, MPFR_RNDN);
|
|
mpfr_add_ui (x, x, 1, MPFR_RNDN);
|
|
mpfr_mul (x, x, temp, MPFR_RNDN);
|
|
|
|
test = 1;
|
|
while (test)
|
|
{
|
|
mpfr_fprintf (output, "%e\t", mpfr_get_d (x, MPFR_RNDN));
|
|
mpfr_fprintf (output, "%Pu\t", (mpfr_prec_t)prec);
|
|
|
|
s.r = (mp_limb_t)mpfr_get_exp (x);
|
|
s.size = (mpfr_prec_t)prec;
|
|
s.align_xp = (mpfr_sgn (x) > 0)?1:2;
|
|
mpfr_set_prec (x2, (mpfr_prec_t)prec);
|
|
mpfr_set (x2, x, MPFR_RNDU);
|
|
s.xp = x2->_mpfr_d;
|
|
|
|
for (i=0; i<nb_functions; i++)
|
|
{
|
|
t[i] = speed_measure (param.speed_funcs[i], &s);
|
|
mpfr_fprintf (output, "%e\t", t[i]);
|
|
}
|
|
fprintf (output, "%d\n", 1 + find_best (t, nb_functions));
|
|
|
|
if (param.logarithmic_scale_x == 0)
|
|
{
|
|
mpfr_add (x, x, incr_x, MPFR_RNDU);
|
|
if (mpfr_cmp_d (x, param.max_x) > 0)
|
|
test=0;
|
|
}
|
|
else
|
|
{
|
|
if (mpfr_sgn (x) < 0 )
|
|
{ /* if x<0, it means that logarithmic_scale_x=-1 */
|
|
mpfr_div (x, x, incr_x, MPFR_RNDU);
|
|
mpfr_abs (temp, x, MPFR_RNDD);
|
|
mpfr_log2 (temp, temp, MPFR_RNDD);
|
|
if (mpfr_cmp_d (temp, param.min_x) < 0)
|
|
mpfr_neg (x, x, MPFR_RNDN);
|
|
}
|
|
else
|
|
{
|
|
mpfr_mul (x, x, incr_x, MPFR_RNDU);
|
|
mpfr_set (temp, x, MPFR_RNDD);
|
|
mpfr_log2 (temp, temp, MPFR_RNDD);
|
|
if (mpfr_cmp_d (temp, param.max_x) > 0)
|
|
test=0;
|
|
}
|
|
}
|
|
}
|
|
|
|
prec = ( (param.logarithmic_scale_prec) ? (prec * incr_prec)
|
|
: (prec + incr_prec) );
|
|
fprintf (output, "\n");
|
|
}
|
|
|
|
free (t);
|
|
mpfr_clear (incr_x);
|
|
mpfr_clear (x);
|
|
mpfr_clear (x2);
|
|
mpfr_clear (temp);
|
|
|
|
return;
|
|
}
|
|
|
|
#define SPEED_MPFR_FUNC_2D(mean_func) \
|
|
do \
|
|
{ \
|
|
double t; \
|
|
unsigned i; \
|
|
mpfr_t w, x; \
|
|
mp_size_t size; \
|
|
\
|
|
SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN); \
|
|
SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX); \
|
|
\
|
|
size = (s->size-1)/GMP_NUMB_BITS+1; \
|
|
s->xp[size-1] |= MPFR_LIMB_HIGHBIT; \
|
|
MPFR_TMP_INIT1 (s->xp, x, s->size); \
|
|
MPFR_SET_EXP (x, (mpfr_exp_t) s->r); \
|
|
if (s->align_xp == 2) MPFR_SET_NEG (x); \
|
|
\
|
|
mpfr_init2 (w, s->size); \
|
|
speed_starttime (); \
|
|
i = s->reps; \
|
|
\
|
|
do \
|
|
mean_func (w, x, MPFR_RNDN); \
|
|
while (--i != 0); \
|
|
t = speed_endtime (); \
|
|
\
|
|
mpfr_clear (w); \
|
|
return t; \
|
|
} \
|
|
while (0)
|
|
|
|
mpfr_prec_t mpfr_exp_2_threshold;
|
|
mpfr_prec_t old_threshold = MPFR_EXP_2_THRESHOLD;
|
|
#undef MPFR_EXP_2_THRESHOLD
|
|
#define MPFR_EXP_2_THRESHOLD mpfr_exp_2_threshold
|
|
#include "exp_2.c"
|
|
|
|
double
|
|
timing_exp1 (struct speed_params *s)
|
|
{
|
|
mpfr_exp_2_threshold = s->size+1;
|
|
SPEED_MPFR_FUNC_2D (mpfr_exp_2);
|
|
}
|
|
|
|
double
|
|
timing_exp2 (struct speed_params *s)
|
|
{
|
|
mpfr_exp_2_threshold = s->size-1;
|
|
SPEED_MPFR_FUNC_2D (mpfr_exp_2);
|
|
}
|
|
|
|
double
|
|
timing_exp3 (struct speed_params *s)
|
|
{
|
|
SPEED_MPFR_FUNC_2D (mpfr_exp_3);
|
|
}
|
|
|
|
|
|
#include "ai.c"
|
|
double
|
|
timing_ai1 (struct speed_params *s)
|
|
{
|
|
SPEED_MPFR_FUNC_2D (mpfr_ai1);
|
|
}
|
|
|
|
double
|
|
timing_ai2 (struct speed_params *s)
|
|
{
|
|
SPEED_MPFR_FUNC_2D (mpfr_ai2);
|
|
}
|
|
|
|
/* These functions are for testing purpose only */
|
|
/* They are used to draw which method is actually used */
|
|
double
|
|
virtual_timing_ai1 (struct speed_params *s)
|
|
{
|
|
double t;
|
|
unsigned i;
|
|
mpfr_t w, x;
|
|
mp_size_t size;
|
|
mpfr_t temp1, temp2;
|
|
|
|
SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN);
|
|
SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX);
|
|
|
|
size = (s->size-1)/GMP_NUMB_BITS+1;
|
|
s->xp[size-1] |= MPFR_LIMB_HIGHBIT;
|
|
MPFR_TMP_INIT1 (s->xp, x, s->size);
|
|
MPFR_SET_EXP (x, (mpfr_exp_t) s->r);
|
|
if (s->align_xp == 2) MPFR_SET_NEG (x);
|
|
|
|
mpfr_init2 (w, s->size);
|
|
speed_starttime ();
|
|
i = s->reps;
|
|
|
|
mpfr_init2 (temp1, MPFR_SMALL_PRECISION);
|
|
mpfr_init2 (temp2, MPFR_SMALL_PRECISION);
|
|
|
|
mpfr_set (temp1, x, MPFR_SMALL_PRECISION);
|
|
mpfr_set_si (temp2, MPFR_AI_THRESHOLD2, MPFR_RNDN);
|
|
mpfr_mul_ui (temp2, temp2, (unsigned int)MPFR_PREC (w), MPFR_RNDN);
|
|
|
|
if (MPFR_IS_NEG (x))
|
|
mpfr_mul_si (temp1, temp1, MPFR_AI_THRESHOLD1, MPFR_RNDN);
|
|
else
|
|
mpfr_mul_si (temp1, temp1, MPFR_AI_THRESHOLD3, MPFR_RNDN);
|
|
|
|
mpfr_add (temp1, temp1, temp2, MPFR_RNDN);
|
|
|
|
if (mpfr_cmp_si (temp1, MPFR_AI_SCALE) > 0)
|
|
t = 1000.;
|
|
else
|
|
t = 1.;
|
|
|
|
mpfr_clear (temp1);
|
|
mpfr_clear (temp2);
|
|
|
|
return t;
|
|
}
|
|
|
|
double
|
|
virtual_timing_ai2 (struct speed_params *s)
|
|
{
|
|
double t;
|
|
unsigned i;
|
|
mpfr_t w, x;
|
|
mp_size_t size;
|
|
mpfr_t temp1, temp2;
|
|
|
|
SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN);
|
|
SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX);
|
|
|
|
size = (s->size-1)/GMP_NUMB_BITS+1;
|
|
s->xp[size-1] |= MPFR_LIMB_HIGHBIT;
|
|
MPFR_TMP_INIT1 (s->xp, x, s->size);
|
|
MPFR_SET_EXP (x, (mpfr_exp_t) s->r);
|
|
if (s->align_xp == 2) MPFR_SET_NEG (x);
|
|
|
|
mpfr_init2 (w, s->size);
|
|
speed_starttime ();
|
|
i = s->reps;
|
|
|
|
mpfr_init2 (temp1, MPFR_SMALL_PRECISION);
|
|
mpfr_init2 (temp2, MPFR_SMALL_PRECISION);
|
|
|
|
mpfr_set (temp1, x, MPFR_SMALL_PRECISION);
|
|
mpfr_set_si (temp2, MPFR_AI_THRESHOLD2, MPFR_RNDN);
|
|
mpfr_mul_ui (temp2, temp2, (unsigned int)MPFR_PREC (w), MPFR_RNDN);
|
|
|
|
if (MPFR_IS_NEG (x))
|
|
mpfr_mul_si (temp1, temp1, MPFR_AI_THRESHOLD1, MPFR_RNDN);
|
|
else
|
|
mpfr_mul_si (temp1, temp1, MPFR_AI_THRESHOLD3, MPFR_RNDN);
|
|
|
|
mpfr_add (temp1, temp1, temp2, MPFR_RNDN);
|
|
|
|
if (mpfr_cmp_si (temp1, MPFR_AI_SCALE) > 0)
|
|
t = 1.;
|
|
else
|
|
t = 1000.;
|
|
|
|
mpfr_clear (temp1);
|
|
mpfr_clear (temp2);
|
|
|
|
return t;
|
|
}
|
|
|
|
int
|
|
main (void)
|
|
{
|
|
FILE *output;
|
|
struct speed_params2D param;
|
|
double (*speed_funcs[3]) (struct speed_params *s);
|
|
|
|
/* char filename[256] = "virtual_timing_ai.dat"; */
|
|
/* speed_funcs[0] = virtual_timing_ai1; */
|
|
/* speed_funcs[1] = virtual_timing_ai2; */
|
|
|
|
char filename[256] = "airy.dat";
|
|
speed_funcs[0] = timing_ai1;
|
|
speed_funcs[1] = timing_ai2;
|
|
|
|
speed_funcs[2] = NULL;
|
|
output = fopen (filename, "w");
|
|
if (output == NULL)
|
|
{
|
|
fprintf (stderr, "Can't open file '%s' for writing.\n", filename);
|
|
abort ();
|
|
}
|
|
param.min_x = -80;
|
|
param.max_x = 60;
|
|
param.min_prec = 50;
|
|
param.max_prec = 1500;
|
|
param.nb_points_x = 200;
|
|
param.nb_points_prec = 200;
|
|
param.logarithmic_scale_x = 0;
|
|
param.logarithmic_scale_prec = 0;
|
|
param.speed_funcs = speed_funcs;
|
|
|
|
generate_2D_sample (output, param);
|
|
|
|
fclose (output);
|
|
mpfr_free_cache ();
|
|
return 0;
|
|
}
|