mirror of
https://review.haiku-os.org/buildtools
synced 2025-01-31 10:34:41 +01:00
4c74403188
Change-Id: Iae65e95cfa0d92091b8b0a424ae36d88efa76aa9 Reviewed-on: https://review.haiku-os.org/c/buildtools/+/3020 Reviewed-by: Adrien Destugues <pulkomandy@gmail.com>
236 lines
6.0 KiB
C
236 lines
6.0 KiB
C
/* mpn_div_qr_1u_pi2.
|
|
|
|
THIS FILE CONTAINS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE. IT IS
|
|
ONLY SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
|
|
GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
|
|
|
|
Copyright 2013, 2017 Free Software Foundation, Inc.
|
|
|
|
This file is part of the GNU MP Library.
|
|
|
|
The GNU MP Library is free software; you can redistribute it and/or modify
|
|
it under the terms of either:
|
|
|
|
* the GNU Lesser General Public License as published by the Free
|
|
Software Foundation; either version 3 of the License, or (at your
|
|
option) any later version.
|
|
|
|
or
|
|
|
|
* the GNU General Public License as published by the Free Software
|
|
Foundation; either version 2 of the License, or (at your option) any
|
|
later version.
|
|
|
|
or both in parallel, as here.
|
|
|
|
The GNU MP Library is distributed in the hope that it will be useful, but
|
|
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
for more details.
|
|
|
|
You should have received copies of the GNU General Public License and the
|
|
GNU Lesser General Public License along with the GNU MP Library. If not,
|
|
see https://www.gnu.org/licenses/. */
|
|
|
|
/* ISSUES:
|
|
|
|
* Can we really use the high pi2 inverse limb for udiv_qrnnd_preinv?
|
|
|
|
* Are there any problems with generating n quotient limbs in the q area? It
|
|
surely simplifies things.
|
|
|
|
* Not yet adequately tested.
|
|
*/
|
|
|
|
#include "gmp-impl.h"
|
|
#include "longlong.h"
|
|
|
|
/* Define some longlong.h-style macros, but for wider operations.
|
|
* add_sssaaaa is like longlong.h's add_ssaaaa but propagating carry-out into
|
|
an additional sum operand.
|
|
*/
|
|
#if defined (__GNUC__) && ! defined (__INTEL_COMPILER) && ! defined (NO_ASM)
|
|
|
|
#if HAVE_HOST_CPU_FAMILY_x86 && W_TYPE_SIZE == 32
|
|
#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0) \
|
|
__asm__ ("add\t%7, %k2\n\tadc\t%5, %k1\n\tadc\t$0, %k0" \
|
|
: "=r" (s2), "=&r" (s1), "=&r" (s0) \
|
|
: "0" ((USItype)(s2)), \
|
|
"1" ((USItype)(a1)), "g" ((USItype)(b1)), \
|
|
"%2" ((USItype)(a0)), "g" ((USItype)(b0)))
|
|
#endif
|
|
|
|
#if defined (__amd64__) && W_TYPE_SIZE == 64
|
|
#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0) \
|
|
__asm__ ("add\t%7, %q2\n\tadc\t%5, %q1\n\tadc\t$0, %q0" \
|
|
: "=r" (s2), "=&r" (s1), "=&r" (s0) \
|
|
: "0" ((UDItype)(s2)), \
|
|
"1" ((UDItype)(a1)), "rme" ((UDItype)(b1)), \
|
|
"%2" ((UDItype)(a0)), "rme" ((UDItype)(b0)))
|
|
#endif
|
|
|
|
#if defined (__aarch64__) && W_TYPE_SIZE == 64
|
|
#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0) \
|
|
__asm__ ("adds\t%2, %x6, %7\n\tadcs\t%1, %x4, %x5\n\tadc\t%0, %3, xzr"\
|
|
: "=r" (s2), "=&r" (s1), "=&r" (s0) \
|
|
: "rZ" (s2), "%rZ" (a1), "rZ" (b1), "%rZ" (a0), "rI" (b0) __CLOBBER_CC)
|
|
#endif
|
|
|
|
#if HAVE_HOST_CPU_FAMILY_powerpc && !defined (_LONG_LONG_LIMB)
|
|
/* This works fine for 32-bit and 64-bit limbs, except for 64-bit limbs with a
|
|
processor running in 32-bit mode, since the carry flag then gets the 32-bit
|
|
carry. */
|
|
#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0) \
|
|
__asm__ ("add%I7c\t%2,%6,%7\n\tadde\t%1,%4,%5\n\taddze\t%0,%3" \
|
|
: "=r" (s2), "=&r" (s1), "=&r" (s0) \
|
|
: "r" (s2), "r" (a1), "r" (b1), "%r" (a0), "rI" (b0) \
|
|
__CLOBBER_CC)
|
|
#endif
|
|
|
|
#endif /* __GNUC__ */
|
|
|
|
#ifndef add_sssaaaa
|
|
#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0) \
|
|
do { \
|
|
UWtype __s0, __s1, __c0, __c1; \
|
|
__s0 = (a0) + (b0); \
|
|
__s1 = (a1) + (b1); \
|
|
__c0 = __s0 < (a0); \
|
|
__c1 = __s1 < (a1); \
|
|
(s0) = __s0; \
|
|
__s1 = __s1 + __c0; \
|
|
(s1) = __s1; \
|
|
(s2) += __c1 + (__s1 < __c0); \
|
|
} while (0)
|
|
#endif
|
|
|
|
struct precomp_div_1_pi2
|
|
{
|
|
mp_limb_t dip[2];
|
|
mp_limb_t d;
|
|
int norm_cnt;
|
|
};
|
|
|
|
mp_limb_t
|
|
mpn_div_qr_1u_pi2 (mp_ptr qp,
|
|
mp_srcptr up, mp_size_t un,
|
|
struct precomp_div_1_pi2 *pd)
|
|
{
|
|
mp_size_t i;
|
|
mp_limb_t r, u2, u1, u0;
|
|
mp_limb_t d0, di1, di0;
|
|
mp_limb_t q3a, q2a, q2b, q1b, q2c, q1c, q1d, q0d;
|
|
mp_limb_t cnd;
|
|
int cnt;
|
|
|
|
ASSERT (un >= 2);
|
|
ASSERT ((pd->d & GMP_NUMB_HIGHBIT) == 0);
|
|
ASSERT (! MPN_OVERLAP_P (qp, un-2, up, un) || qp+2 >= up);
|
|
ASSERT_MPN (up, un);
|
|
|
|
#define q3 q3a
|
|
#define q2 q2b
|
|
#define q1 q1b
|
|
|
|
up += un - 3;
|
|
cnt = pd->norm_cnt;
|
|
r = up[2] >> (GMP_NUMB_BITS - cnt);
|
|
d0 = pd->d << cnt;
|
|
|
|
qp += un - 2;
|
|
|
|
di1 = pd->dip[1];
|
|
di0 = pd->dip[0];
|
|
|
|
for (i = un - 3; i >= 0; i -= 2)
|
|
{
|
|
u2 = r;
|
|
u1 = (up[2] << cnt) | (up[1] >> (GMP_NUMB_BITS - cnt));
|
|
u0 = (up[1] << cnt) | (up[0] >> (GMP_NUMB_BITS - cnt));
|
|
|
|
/* Dividend in {r,u1,u0} */
|
|
|
|
umul_ppmm (q1d,q0d, u1, di0);
|
|
umul_ppmm (q2b,q1b, u1, di1);
|
|
q2b++; /* cannot spill */
|
|
add_sssaaaa (r,q2b,q1b, q2b,q1b, u1,u0);
|
|
|
|
umul_ppmm (q2c,q1c, u2, di0);
|
|
add_sssaaaa (r,q2b,q1b, q2b,q1b, q2c,q1c);
|
|
umul_ppmm (q3a,q2a, u2, di1);
|
|
|
|
add_sssaaaa (r,q2b,q1b, q2b,q1b, q2a,q1d);
|
|
|
|
q3 += r;
|
|
|
|
r = u0 - q2 * d0;
|
|
|
|
cnd = (r >= q1);
|
|
r += d0 & -cnd;
|
|
sub_ddmmss (q3,q2, q3,q2, 0,cnd);
|
|
|
|
if (UNLIKELY (r >= d0))
|
|
{
|
|
r -= d0;
|
|
add_ssaaaa (q3,q2, q3,q2, 0,1);
|
|
}
|
|
|
|
qp[0] = q2;
|
|
qp[1] = q3;
|
|
|
|
up -= 2;
|
|
qp -= 2;
|
|
}
|
|
|
|
if ((un & 1) != 0)
|
|
{
|
|
u2 = r;
|
|
u1 = (up[2] << cnt);
|
|
|
|
udiv_qrnnd_preinv (q3, r, u2, u1, d0, di1);
|
|
qp[1] = q3;
|
|
}
|
|
else
|
|
{
|
|
u2 = r;
|
|
u1 = (up[2] << cnt) | (up[1] >> (GMP_NUMB_BITS - cnt));
|
|
u0 = (up[1] << cnt);
|
|
|
|
/* Dividend in {r,u1,u0} */
|
|
|
|
umul_ppmm (q1d,q0d, u1, di0);
|
|
umul_ppmm (q2b,q1b, u1, di1);
|
|
q2b++; /* cannot spill */
|
|
add_sssaaaa (r,q2b,q1b, q2b,q1b, u1,u0);
|
|
|
|
umul_ppmm (q2c,q1c, u2, di0);
|
|
add_sssaaaa (r,q2b,q1b, q2b,q1b, q2c,q1c);
|
|
umul_ppmm (q3a,q2a, u2, di1);
|
|
|
|
add_sssaaaa (r,q2b,q1b, q2b,q1b, q2a,q1d);
|
|
|
|
q3 += r;
|
|
|
|
r = u0 - q2 * d0;
|
|
|
|
cnd = (r >= q1);
|
|
r += d0 & -cnd;
|
|
sub_ddmmss (q3,q2, q3,q2, 0,cnd);
|
|
|
|
if (UNLIKELY (r >= d0))
|
|
{
|
|
r -= d0;
|
|
add_ssaaaa (q3,q2, q3,q2, 0,1);
|
|
}
|
|
|
|
qp[0] = q2;
|
|
qp[1] = q3;
|
|
}
|
|
|
|
return r >> cnt;
|
|
|
|
#undef q3
|
|
#undef q2
|
|
#undef q1
|
|
}
|