mirror of
https://review.haiku-os.org/buildtools
synced 2025-02-07 22:44:46 +01:00
b58ddff026
* merged mpfr 3.0.0 and gmp 5.0.1 in buildtools trunk git-svn-id: file:///srv/svn/repos/haiku/buildtools/trunk@37378 a95241bf-73f2-0310-859d-f6bbb57e9c96
189 lines
3.7 KiB
NASM
189 lines
3.7 KiB
NASM
dnl x86 mpn_divrem_2 -- Divide an mpn number by a normalized 2-limb number.
|
|
|
|
dnl Copyright 2007, 2008 Free Software Foundation, Inc.
|
|
|
|
dnl This file is part of the GNU MP Library.
|
|
|
|
dnl The GNU MP Library is free software; you can redistribute it and/or modify
|
|
dnl it under the terms of the GNU Lesser General Public License as published
|
|
dnl by the Free Software Foundation; either version 3 of the License, or (at
|
|
dnl your option) any later version.
|
|
|
|
dnl The GNU MP Library is distributed in the hope that it will be useful, but
|
|
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
|
dnl License for more details.
|
|
|
|
dnl You should have received a copy of the GNU Lesser General Public License
|
|
dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
|
|
|
|
include(`../config.m4')
|
|
|
|
|
|
C norm frac
|
|
C 486
|
|
C P5
|
|
C P6-13 29.2
|
|
C P6-15 *26
|
|
C K6
|
|
C K7 22
|
|
C K8 *19
|
|
C P4-f1
|
|
C P4-f2 *65
|
|
C P4-f3
|
|
C P4-f4 *72
|
|
|
|
C A star means numbers not updated for the latest version of the code.
|
|
|
|
|
|
C TODO
|
|
C * Perhaps keep ecx or esi in stack slot, freeing up a reg for q0.
|
|
C * The loop has not been carefully tuned. We should at the very least do
|
|
C some local insn swapping.
|
|
C * The code outside the main loop is what gcc generated. Clean up!
|
|
C * Clean up stack slot usage.
|
|
|
|
C INPUT PARAMETERS
|
|
C qp
|
|
C fn
|
|
C up_param
|
|
C un_param
|
|
C dp
|
|
|
|
|
|
C eax ebx ecx edx esi edi ebp
|
|
C cnt qp
|
|
|
|
ASM_START()
|
|
TEXT
|
|
ALIGN(16)
|
|
PROLOGUE(mpn_divrem_2)
|
|
push %ebp
|
|
push %edi
|
|
push %esi
|
|
push %ebx
|
|
sub $36, %esp
|
|
mov 68(%esp), %ecx C un
|
|
mov 72(%esp), %esi C dp
|
|
movl $0, 32(%esp)
|
|
lea 0(,%ecx,4), %edi
|
|
add 64(%esp), %edi C up
|
|
mov (%esi), %ebx
|
|
mov 4(%esi), %eax
|
|
mov %ebx, 20(%esp)
|
|
sub $12, %edi
|
|
mov %eax, 24(%esp)
|
|
mov %edi, 12(%esp)
|
|
mov 8(%edi), %ebx
|
|
mov 4(%edi), %ebp
|
|
cmp %eax, %ebx
|
|
jb L(8)
|
|
seta %dl
|
|
cmp 20(%esp), %ebp
|
|
setae %al
|
|
or %dl, %al
|
|
jne L(35)
|
|
L(8):
|
|
mov 60(%esp), %esi C fn
|
|
lea -3(%esi,%ecx), %edi
|
|
test %edi, %edi
|
|
js L(9)
|
|
mov 24(%esp), %edx
|
|
mov $-1, %esi
|
|
mov %esi, %eax
|
|
mov %esi, %ecx
|
|
not %edx
|
|
divl 24(%esp)
|
|
mov %eax, %esi
|
|
imul 24(%esp), %eax
|
|
mov %eax, (%esp)
|
|
mov %esi, %eax
|
|
mull 20(%esp)
|
|
mov (%esp), %eax
|
|
add 20(%esp), %eax
|
|
adc $0, %ecx
|
|
add %eax, %edx
|
|
adc $0, %ecx
|
|
mov %ecx, %eax
|
|
js L(32)
|
|
L(36): dec %esi
|
|
sub 24(%esp), %edx
|
|
sbb $0, %eax
|
|
jns L(36)
|
|
L(32):
|
|
mov %esi, 16(%esp) C di
|
|
mov %edi, %ecx C un
|
|
mov 12(%esp), %esi C up
|
|
mov 24(%esp), %eax
|
|
neg %eax
|
|
mov %eax, 4(%esp) C -d1
|
|
ALIGN(16)
|
|
nop
|
|
|
|
C eax ebx ecx edx esi edi ebp 0 4 8 12 16 20 24 28 32 56 60
|
|
C n2 un up n1 q0 -d1 di d0 d1 msl qp fn
|
|
|
|
L(loop):
|
|
mov 16(%esp), %eax C di
|
|
mul %ebx
|
|
add %ebp, %eax
|
|
mov %eax, (%esp) C q0
|
|
adc %ebx, %edx
|
|
mov %edx, %edi C q
|
|
imul 4(%esp), %edx
|
|
mov 20(%esp), %eax
|
|
lea (%edx, %ebp), %ebx C n1 -= ...
|
|
mul %edi
|
|
xor %ebp, %ebp
|
|
cmp 60(%esp), %ecx
|
|
jl L(19)
|
|
mov (%esi), %ebp
|
|
sub $4, %esi
|
|
L(19): sub 20(%esp), %ebp
|
|
sbb 24(%esp), %ebx
|
|
sub %eax, %ebp
|
|
sbb %edx, %ebx
|
|
mov 20(%esp), %eax C d1
|
|
inc %edi
|
|
xor %edx, %edx
|
|
cmp (%esp), %ebx
|
|
adc $-1, %edx C mask
|
|
add %edx, %edi C q--
|
|
and %edx, %eax C d0 or 0
|
|
and 24(%esp), %edx C d1 or 0
|
|
add %eax, %ebp
|
|
adc %edx, %ebx
|
|
cmp 24(%esp), %ebx
|
|
jae L(fix)
|
|
L(bck): mov 56(%esp), %edx
|
|
mov %edi, (%edx, %ecx, 4)
|
|
dec %ecx
|
|
jns L(loop)
|
|
|
|
L(9): mov 64(%esp), %esi C up
|
|
mov %ebp, (%esi)
|
|
mov %ebx, 4(%esi)
|
|
mov 32(%esp), %eax
|
|
add $36, %esp
|
|
pop %ebx
|
|
pop %esi
|
|
pop %edi
|
|
pop %ebp
|
|
ret
|
|
|
|
L(fix): seta %dl
|
|
cmp 20(%esp), %ebp
|
|
setae %al
|
|
or %dl, %al
|
|
je L(bck)
|
|
inc %edi
|
|
sub 20(%esp), %ebp
|
|
sbb 24(%esp), %ebx
|
|
jmp L(bck)
|
|
|
|
L(35): sub 20(%esp), %ebp
|
|
sbb 24(%esp), %ebx
|
|
movl $1, 32(%esp)
|
|
jmp L(8)
|
|
EPILOGUE()
|