mirror of
https://review.haiku-os.org/buildtools
synced 2025-02-12 08:47:41 +01:00
Old version was from 2012-05-06, 6.1.2 is from 2016-12-16 A lot of support for newer processors and speedups since then See gmp/NEWS for details
315 lines
6.3 KiB
NASM
315 lines
6.3 KiB
NASM
dnl x86-64 mpn_divrem_1 -- mpn by limb division.
|
|
|
|
dnl Copyright 2004, 2005, 2007-2012, 2014 Free Software Foundation, Inc.
|
|
|
|
dnl This file is part of the GNU MP Library.
|
|
dnl
|
|
dnl The GNU MP Library is free software; you can redistribute it and/or modify
|
|
dnl it under the terms of either:
|
|
dnl
|
|
dnl * the GNU Lesser General Public License as published by the Free
|
|
dnl Software Foundation; either version 3 of the License, or (at your
|
|
dnl option) any later version.
|
|
dnl
|
|
dnl or
|
|
dnl
|
|
dnl * the GNU General Public License as published by the Free Software
|
|
dnl Foundation; either version 2 of the License, or (at your option) any
|
|
dnl later version.
|
|
dnl
|
|
dnl or both in parallel, as here.
|
|
dnl
|
|
dnl The GNU MP Library is distributed in the hope that it will be useful, but
|
|
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
dnl for more details.
|
|
dnl
|
|
dnl You should have received copies of the GNU General Public License and the
|
|
dnl GNU Lesser General Public License along with the GNU MP Library. If not,
|
|
dnl see https://www.gnu.org/licenses/.
|
|
|
|
include(`../config.m4')
|
|
|
|
|
|
C norm unorm frac
|
|
C AMD K8,K9 13 13 12
|
|
C AMD K10 13 13 12
|
|
C Intel P4 43 44 43
|
|
C Intel core2 24.5 24.5 19.5
|
|
C Intel corei 20.5 19.5 18
|
|
C Intel atom 43 46 36
|
|
C VIA nano 25.5 25.5 24
|
|
|
|
C mp_limb_t
|
|
C mpn_divrem_1 (mp_ptr qp, mp_size_t fn,
|
|
C mp_srcptr np, mp_size_t nn, mp_limb_t d)
|
|
|
|
C mp_limb_t
|
|
C mpn_preinv_divrem_1 (mp_ptr qp, mp_size_t fn,
|
|
C mp_srcptr np, mp_size_t nn, mp_limb_t d,
|
|
C mp_limb_t dinv, int cnt)
|
|
|
|
C INPUT PARAMETERS
|
|
define(`qp', `%rdi')
|
|
define(`fn_param', `%rsi')
|
|
define(`up_param', `%rdx')
|
|
define(`un_param', `%rcx')
|
|
define(`d', `%r8')
|
|
define(`dinv', `%r9') C only for mpn_preinv_divrem_1
|
|
C shift passed on stack C only for mpn_preinv_divrem_1
|
|
|
|
define(`cnt', `%rcx')
|
|
define(`up', `%rsi')
|
|
define(`fn', `%r12')
|
|
define(`un', `%rbx')
|
|
|
|
|
|
C rax rbx rcx rdx rsi rdi rbp r8 r9 r10 r11 r12 r13 r14 r15
|
|
C cnt qp d dinv
|
|
|
|
ABI_SUPPORT(DOS64)
|
|
ABI_SUPPORT(STD64)
|
|
|
|
IFSTD(`define(`CNTOFF', `40($1)')')
|
|
IFDOS(`define(`CNTOFF', `104($1)')')
|
|
|
|
ASM_START()
|
|
TEXT
|
|
ALIGN(16)
|
|
PROLOGUE(mpn_preinv_divrem_1)
|
|
FUNC_ENTRY(4)
|
|
IFDOS(` mov 56(%rsp), %r8 ')
|
|
IFDOS(` mov 64(%rsp), %r9 ')
|
|
xor R32(%rax), R32(%rax)
|
|
push %r13
|
|
push %r12
|
|
push %rbp
|
|
push %rbx
|
|
|
|
mov fn_param, fn
|
|
mov un_param, un
|
|
add fn_param, un_param
|
|
mov up_param, up
|
|
|
|
lea -8(qp,un_param,8), qp
|
|
|
|
test d, d
|
|
js L(nent)
|
|
|
|
mov CNTOFF(%rsp), R8(cnt)
|
|
shl R8(cnt), d
|
|
jmp L(uent)
|
|
EPILOGUE()
|
|
|
|
ALIGN(16)
|
|
PROLOGUE(mpn_divrem_1)
|
|
FUNC_ENTRY(4)
|
|
IFDOS(` mov 56(%rsp), %r8 ')
|
|
xor R32(%rax), R32(%rax)
|
|
push %r13
|
|
push %r12
|
|
push %rbp
|
|
push %rbx
|
|
|
|
mov fn_param, fn
|
|
mov un_param, un
|
|
add fn_param, un_param
|
|
mov up_param, up
|
|
je L(ret)
|
|
|
|
lea -8(qp,un_param,8), qp
|
|
xor R32(%rbp), R32(%rbp)
|
|
|
|
test d, d
|
|
jns L(unnormalized)
|
|
|
|
L(normalized):
|
|
test un, un
|
|
je L(8) C un == 0
|
|
mov -8(up,un,8), %rbp
|
|
dec un
|
|
mov %rbp, %rax
|
|
sub d, %rbp
|
|
cmovc %rax, %rbp
|
|
sbb R32(%rax), R32(%rax)
|
|
inc R32(%rax)
|
|
mov %rax, (qp)
|
|
lea -8(qp), qp
|
|
L(8):
|
|
IFSTD(` push %rdi ')
|
|
IFSTD(` push %rsi ')
|
|
push %r8
|
|
IFSTD(` mov d, %rdi ')
|
|
IFDOS(` sub $32, %rsp ')
|
|
IFDOS(` mov d, %rcx ')
|
|
ASSERT(nz, `test $15, %rsp')
|
|
CALL( mpn_invert_limb)
|
|
IFDOS(` add $32, %rsp ')
|
|
pop %r8
|
|
IFSTD(` pop %rsi ')
|
|
IFSTD(` pop %rdi ')
|
|
|
|
mov %rax, dinv
|
|
mov %rbp, %rax
|
|
jmp L(nent)
|
|
|
|
ALIGN(16)
|
|
L(ntop):mov (up,un,8), %r10 C K8-K10 P6-CNR P6-NHM P4
|
|
mul dinv C 0,13 0,20 0,18 0,45
|
|
add %r10, %rax C 4 8 3 12
|
|
adc %rbp, %rdx C 5 9 10 13
|
|
mov %rax, %rbp C 5 9 4 13
|
|
mov %rdx, %r13 C 6 11 12 23
|
|
imul d, %rdx C 6 11 11 23
|
|
sub %rdx, %r10 C 10 16 14 33
|
|
mov d, %rax C
|
|
add %r10, %rax C 11 17 15 34
|
|
cmp %rbp, %r10 C 11 17 15 34
|
|
cmovc %r10, %rax C 12 18 16 35
|
|
adc $-1, %r13 C
|
|
cmp d, %rax C
|
|
jae L(nfx) C
|
|
L(nok): mov %r13, (qp) C
|
|
sub $8, qp C
|
|
L(nent):lea 1(%rax), %rbp C
|
|
dec un C
|
|
jns L(ntop) C
|
|
|
|
xor R32(%rcx), R32(%rcx)
|
|
jmp L(frac)
|
|
|
|
L(nfx): sub d, %rax
|
|
inc %r13
|
|
jmp L(nok)
|
|
|
|
L(unnormalized):
|
|
test un, un
|
|
je L(44)
|
|
mov -8(up,un,8), %rax
|
|
cmp d, %rax
|
|
jae L(44)
|
|
mov %rbp, (qp)
|
|
mov %rax, %rbp
|
|
lea -8(qp), qp
|
|
je L(ret)
|
|
dec un
|
|
L(44):
|
|
bsr d, %rcx
|
|
not R32(%rcx)
|
|
shl R8(%rcx), d
|
|
shl R8(%rcx), %rbp
|
|
|
|
push %rcx
|
|
IFSTD(` push %rdi ')
|
|
IFSTD(` push %rsi ')
|
|
push %r8
|
|
IFSTD(` sub $8, %rsp ')
|
|
IFSTD(` mov d, %rdi ')
|
|
IFDOS(` sub $40, %rsp ')
|
|
IFDOS(` mov d, %rcx ')
|
|
ASSERT(nz, `test $15, %rsp')
|
|
CALL( mpn_invert_limb)
|
|
IFSTD(` add $8, %rsp ')
|
|
IFDOS(` add $40, %rsp ')
|
|
pop %r8
|
|
IFSTD(` pop %rsi ')
|
|
IFSTD(` pop %rdi ')
|
|
pop %rcx
|
|
|
|
mov %rax, dinv
|
|
mov %rbp, %rax
|
|
test un, un
|
|
je L(frac)
|
|
|
|
L(uent):dec un
|
|
mov (up,un,8), %rbp
|
|
neg R32(%rcx)
|
|
shr R8(%rcx), %rbp
|
|
neg R32(%rcx)
|
|
or %rbp, %rax
|
|
jmp L(ent)
|
|
|
|
ALIGN(16)
|
|
L(utop):mov (up,un,8), %r10
|
|
shl R8(%rcx), %rbp
|
|
neg R32(%rcx)
|
|
shr R8(%rcx), %r10
|
|
neg R32(%rcx)
|
|
or %r10, %rbp
|
|
mul dinv
|
|
add %rbp, %rax
|
|
adc %r11, %rdx
|
|
mov %rax, %r11
|
|
mov %rdx, %r13
|
|
imul d, %rdx
|
|
sub %rdx, %rbp
|
|
mov d, %rax
|
|
add %rbp, %rax
|
|
cmp %r11, %rbp
|
|
cmovc %rbp, %rax
|
|
adc $-1, %r13
|
|
cmp d, %rax
|
|
jae L(ufx)
|
|
L(uok): mov %r13, (qp)
|
|
sub $8, qp
|
|
L(ent): mov (up,un,8), %rbp
|
|
dec un
|
|
lea 1(%rax), %r11
|
|
jns L(utop)
|
|
|
|
L(uend):shl R8(%rcx), %rbp
|
|
mul dinv
|
|
add %rbp, %rax
|
|
adc %r11, %rdx
|
|
mov %rax, %r11
|
|
mov %rdx, %r13
|
|
imul d, %rdx
|
|
sub %rdx, %rbp
|
|
mov d, %rax
|
|
add %rbp, %rax
|
|
cmp %r11, %rbp
|
|
cmovc %rbp, %rax
|
|
adc $-1, %r13
|
|
cmp d, %rax
|
|
jae L(efx)
|
|
L(eok): mov %r13, (qp)
|
|
sub $8, qp
|
|
jmp L(frac)
|
|
|
|
L(ufx): sub d, %rax
|
|
inc %r13
|
|
jmp L(uok)
|
|
L(efx): sub d, %rax
|
|
inc %r13
|
|
jmp L(eok)
|
|
|
|
L(frac):mov d, %rbp
|
|
neg %rbp
|
|
jmp L(fent)
|
|
|
|
ALIGN(16) C K8-K10 P6-CNR P6-NHM P4
|
|
L(ftop):mul dinv C 0,12 0,17 0,17
|
|
add %r11, %rdx C 5 8 10
|
|
mov %rax, %r11 C 4 8 3
|
|
mov %rdx, %r13 C 6 9 11
|
|
imul %rbp, %rdx C 6 9 11
|
|
mov d, %rax C
|
|
add %rdx, %rax C 10 14 14
|
|
cmp %r11, %rdx C 10 14 14
|
|
cmovc %rdx, %rax C 11 15 15
|
|
adc $-1, %r13 C
|
|
mov %r13, (qp) C
|
|
sub $8, qp C
|
|
L(fent):lea 1(%rax), %r11 C
|
|
dec fn C
|
|
jns L(ftop) C
|
|
|
|
shr R8(%rcx), %rax
|
|
L(ret): pop %rbx
|
|
pop %rbp
|
|
pop %r12
|
|
pop %r13
|
|
FUNC_EXIT()
|
|
ret
|
|
EPILOGUE()
|