mirror of
https://review.haiku-os.org/buildtools
synced 2025-02-12 08:47:41 +01:00
Old version was from 2012-05-06, 6.1.2 is from 2016-12-16 A lot of support for newer processors and speedups since then See gmp/NEWS for details
168 lines
4.0 KiB
NASM
168 lines
4.0 KiB
NASM
dnl ARM mpn_addlsh1_n and mpn_sublsh1_n
|
|
|
|
dnl Contributed to the GNU project by Torbjörn Granlund.
|
|
|
|
dnl Copyright 2012 Free Software Foundation, Inc.
|
|
|
|
dnl This file is part of the GNU MP Library.
|
|
dnl
|
|
dnl The GNU MP Library is free software; you can redistribute it and/or modify
|
|
dnl it under the terms of either:
|
|
dnl
|
|
dnl * the GNU Lesser General Public License as published by the Free
|
|
dnl Software Foundation; either version 3 of the License, or (at your
|
|
dnl option) any later version.
|
|
dnl
|
|
dnl or
|
|
dnl
|
|
dnl * the GNU General Public License as published by the Free Software
|
|
dnl Foundation; either version 2 of the License, or (at your option) any
|
|
dnl later version.
|
|
dnl
|
|
dnl or both in parallel, as here.
|
|
dnl
|
|
dnl The GNU MP Library is distributed in the hope that it will be useful, but
|
|
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
dnl for more details.
|
|
dnl
|
|
dnl You should have received copies of the GNU General Public License and the
|
|
dnl GNU Lesser General Public License along with the GNU MP Library. If not,
|
|
dnl see https://www.gnu.org/licenses/.
|
|
|
|
include(`../config.m4')
|
|
|
|
C addlsh1_n sublsh1_n
|
|
C cycles/limb cycles/limb
|
|
C StrongARM ? ?
|
|
C XScale ? ?
|
|
C Cortex-A7 ? ?
|
|
C Cortex-A8 ? ?
|
|
C Cortex-A9 3.12 3.7
|
|
C Cortex-A15 ? ?
|
|
|
|
C TODO
|
|
C * The addlsh1_n code runs well, but is only barely faster than mpn_addmul_1.
|
|
C The sublsh1_n code could surely be tweaked, its REVCY slows down things
|
|
C very much. If two insns are really needed, it might help to separate them
|
|
C for better micro-parallelism.
|
|
|
|
define(`rp', `r0')
|
|
define(`up', `r1')
|
|
define(`vp', `r2')
|
|
define(`n', `r3')
|
|
|
|
ifdef(`OPERATION_addlsh1_n', `
|
|
define(`ADDSUB', adds)
|
|
define(`ADDSUBC', adcs)
|
|
define(`SETCY', `cmp $1, #1')
|
|
define(`RETVAL', `adc r0, $1, #2')
|
|
define(`SAVECY', `sbc $1, $2, #0')
|
|
define(`RESTCY', `cmn $1, #1')
|
|
define(`REVCY', `')
|
|
define(`INICYR', `mov $1, #0')
|
|
define(`r10r11', `r11')
|
|
define(`func', mpn_addlsh1_n)
|
|
define(`func_nc', mpn_addlsh1_nc)')
|
|
ifdef(`OPERATION_sublsh1_n', `
|
|
define(`ADDSUB', subs)
|
|
define(`ADDSUBC', sbcs)
|
|
define(`SETCY', `rsbs $1, $1, #0')
|
|
define(`RETVAL', `adc r0, $1, #1')
|
|
define(`SAVECY', `sbc $1, $1, $1')
|
|
define(`RESTCY', `cmn $1, #1')
|
|
define(`REVCY', `sbc $1, $1, $1
|
|
cmn $1, #1')
|
|
define(`INICYR', `mvn $1, #0')
|
|
define(`r10r11', `r10')
|
|
define(`func', mpn_sublsh1_n)
|
|
define(`func_nc', mpn_sublsh1_nc)')
|
|
|
|
MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n)
|
|
|
|
ASM_START()
|
|
PROLOGUE(func)
|
|
push {r4-r10r11, r14}
|
|
|
|
ifdef(`OPERATION_addlsh1_n', `
|
|
mvn r11, #0
|
|
')
|
|
INICYR( r14)
|
|
subs n, n, #3
|
|
blt L(le2) C carry clear on branch path
|
|
|
|
cmn r0, #0 C clear carry
|
|
ldmia vp!, {r8, r9, r10}
|
|
b L(mid)
|
|
|
|
L(top): RESTCY( r14)
|
|
ADDSUBC r4, r4, r8
|
|
ADDSUBC r5, r5, r9
|
|
ADDSUBC r6, r6, r10
|
|
ldmia vp!, {r8, r9, r10}
|
|
stmia rp!, {r4, r5, r6}
|
|
REVCY(r14)
|
|
adcs r8, r8, r8
|
|
adcs r9, r9, r9
|
|
adcs r10, r10, r10
|
|
ldmia up!, {r4, r5, r6}
|
|
SAVECY( r14, r11)
|
|
subs n, n, #3
|
|
blt L(exi)
|
|
RESTCY( r12)
|
|
ADDSUBC r4, r4, r8
|
|
ADDSUBC r5, r5, r9
|
|
ADDSUBC r6, r6, r10
|
|
ldmia vp!, {r8, r9, r10}
|
|
stmia rp!, {r4, r5, r6}
|
|
REVCY(r12)
|
|
L(mid): adcs r8, r8, r8
|
|
adcs r9, r9, r9
|
|
adcs r10, r10, r10
|
|
ldmia up!, {r4, r5, r6}
|
|
SAVECY( r12, r11)
|
|
subs n, n, #3
|
|
bge L(top)
|
|
|
|
mov r7, r12 C swap alternating...
|
|
mov r12, r14 C ...carry-save...
|
|
mov r14, r7 C ...registers
|
|
|
|
L(exi): RESTCY( r12)
|
|
ADDSUBC r4, r4, r8
|
|
ADDSUBC r5, r5, r9
|
|
ADDSUBC r6, r6, r10
|
|
stmia rp!, {r4, r5, r6}
|
|
|
|
REVCY(r12)
|
|
L(le2): tst n, #1 C n = {-1,-2,-3} map to [2], [1], [0]
|
|
beq L(e1)
|
|
|
|
L(e02): tst n, #2
|
|
beq L(rt0)
|
|
ldm vp, {r8, r9}
|
|
adcs r8, r8, r8
|
|
adcs r9, r9, r9
|
|
ldm up, {r4, r5}
|
|
SAVECY( r12, r11)
|
|
RESTCY( r14)
|
|
ADDSUBC r4, r4, r8
|
|
ADDSUBC r5, r5, r9
|
|
stm rp, {r4, r5}
|
|
b L(rt1)
|
|
|
|
L(e1): ldr r8, [vp]
|
|
adcs r8, r8, r8
|
|
ldr r4, [up]
|
|
SAVECY( r12, r11)
|
|
RESTCY( r14)
|
|
ADDSUBC r4, r4, r8
|
|
str r4, [rp]
|
|
|
|
L(rt1): mov r14, r12
|
|
REVCY(r12)
|
|
L(rt0): RETVAL( r14)
|
|
pop {r4-r10r11, r14}
|
|
ret r14
|
|
EPILOGUE()
|