mirror of
https://review.haiku-os.org/buildtools
synced 2025-02-15 02:07:42 +01:00
635 lines
12 KiB
NASM
635 lines
12 KiB
NASM
|
dnl IA-64 mpn_submul_1 -- Multiply a limb vector with a limb and subtract the
|
||
|
dnl result from a second limb vector.
|
||
|
|
||
|
dnl Copyright 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
|
||
|
|
||
|
dnl This file is part of the GNU MP Library.
|
||
|
|
||
|
dnl The GNU MP Library is free software; you can redistribute it and/or modify
|
||
|
dnl it under the terms of the GNU Lesser General Public License as published
|
||
|
dnl by the Free Software Foundation; either version 3 of the License, or (at
|
||
|
dnl your option) any later version.
|
||
|
|
||
|
dnl The GNU MP Library is distributed in the hope that it will be useful, but
|
||
|
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||
|
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
||
|
dnl License for more details.
|
||
|
|
||
|
dnl You should have received a copy of the GNU Lesser General Public License
|
||
|
dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
|
||
|
|
||
|
include(`../config.m4')
|
||
|
|
||
|
C cycles/limb
|
||
|
C Itanium: 4.0
|
||
|
C Itanium 2: 2.25 (alignment dependent, sometimes it seems to need 3 c/l)
|
||
|
|
||
|
C TODO
|
||
|
C * Optimize feed-in and wind-down code, both for speed and code size.
|
||
|
C * Handle low limb input and results specially, using a common stf8 in the
|
||
|
C epilogue.
|
||
|
C * Delay r8, r10 initialization, put cmp-p6 in 1st bundle and br .Ldone in
|
||
|
C 2nd bundle. This will allow the bbb bundle to be one cycle earlier and
|
||
|
C save a cycle.
|
||
|
|
||
|
C INPUT PARAMETERS
|
||
|
define(`rp', `r32')
|
||
|
define(`up', `r33')
|
||
|
define(`n', `r34')
|
||
|
define(`vl', `r35')
|
||
|
|
||
|
ASM_START()
|
||
|
PROLOGUE(mpn_submul_1)
|
||
|
.prologue
|
||
|
.save ar.lc, r2
|
||
|
.body
|
||
|
|
||
|
ifdef(`HAVE_ABI_32',
|
||
|
` addp4 rp = 0, rp C M I
|
||
|
addp4 up = 0, up C M I
|
||
|
zxt4 n = n C I
|
||
|
;;
|
||
|
')
|
||
|
{.mmi
|
||
|
mov r10 = rp C M I
|
||
|
mov r9 = up C M I
|
||
|
sub vl = r0, vl C M I negate vl
|
||
|
}
|
||
|
{.mmi
|
||
|
ldf8 f8 = [rp], 8 C M
|
||
|
ldf8 f7 = [up], 8 C M
|
||
|
add r19 = -1, n C M I n - 1
|
||
|
;;
|
||
|
}
|
||
|
{.mmi
|
||
|
cmp.eq p6, p0 = 0, vl C M I
|
||
|
mov r8 = 0 C M I zero cylimb
|
||
|
mov r2 = ar.lc C I0
|
||
|
}
|
||
|
{.mmi
|
||
|
setf.sig f6 = vl C M2 M3
|
||
|
and r14 = 3, n C M I
|
||
|
shr.u r19 = r19, 2 C I0
|
||
|
;;
|
||
|
}
|
||
|
{.mmb
|
||
|
nop 0
|
||
|
cmp.eq p10, p0 = 0, r14 C M I
|
||
|
(p6) br.spnt .Ldone C B vl == 0
|
||
|
}
|
||
|
{.mmi
|
||
|
cmp.eq p11, p0 = 2, r14 C M I
|
||
|
cmp.eq p12, p0 = 3, r14 C M I
|
||
|
mov ar.lc = r19 C I0
|
||
|
}
|
||
|
{.bbb
|
||
|
(p10) br.dptk .Lb00 C B
|
||
|
(p11) br.dptk .Lb10 C B
|
||
|
(p12) br.dptk .Lb11 C B
|
||
|
;;
|
||
|
}
|
||
|
|
||
|
.Lb01: br.cloop.dptk .grt1
|
||
|
|
||
|
xma.l f39 = f7, f6, f8
|
||
|
xma.hu f43 = f7, f6, f8
|
||
|
;;
|
||
|
getf.sig r27 = f39 C lo
|
||
|
getf.sig r31 = f43 C hi
|
||
|
ld8 r20 = [r9], 8
|
||
|
br .Lcj1
|
||
|
|
||
|
.grt1: ldf8 f44 = [rp], 8
|
||
|
ldf8 f32 = [up], 8
|
||
|
;;
|
||
|
ldf8 f45 = [rp], 8
|
||
|
ldf8 f33 = [up], 8
|
||
|
;;
|
||
|
ldf8 f46 = [rp], 8
|
||
|
xma.l f39 = f7, f6, f8
|
||
|
ldf8 f34 = [up], 8
|
||
|
xma.hu f43 = f7, f6, f8
|
||
|
;;
|
||
|
ldf8 f47 = [rp], 8
|
||
|
xma.l f36 = f32, f6, f44
|
||
|
ldf8 f35 = [up], 8
|
||
|
xma.hu f40 = f32, f6, f44
|
||
|
br.cloop.dptk .grt5
|
||
|
;;
|
||
|
|
||
|
getf.sig r27 = f39 C lo
|
||
|
xma.l f37 = f33, f6, f45
|
||
|
ld8 r20 = [r9], 8
|
||
|
xma.hu f41 = f33, f6, f45
|
||
|
;;
|
||
|
getf.sig r31 = f43 C hi
|
||
|
getf.sig r24 = f36 C lo
|
||
|
xma.l f38 = f34, f6, f46
|
||
|
ld8 r21 = [r9], 8
|
||
|
xma.hu f42 = f34, f6, f46
|
||
|
;;
|
||
|
getf.sig r28 = f40 C hi
|
||
|
getf.sig r25 = f37 C lo
|
||
|
xma.l f39 = f35, f6, f47
|
||
|
ld8 r22 = [r9], 8
|
||
|
xma.hu f43 = f35, f6, f47
|
||
|
;;
|
||
|
getf.sig r29 = f41 C hi
|
||
|
getf.sig r26 = f38 C lo
|
||
|
ld8 r23 = [r9], 8
|
||
|
br .Lcj5
|
||
|
|
||
|
.grt5: ldf8 f44 = [rp], 8
|
||
|
ldf8 f32 = [up], 8
|
||
|
;;
|
||
|
getf.sig r27 = f39 C lo
|
||
|
xma.l f37 = f33, f6, f45
|
||
|
ld8 r20 = [r9], 8
|
||
|
xma.hu f41 = f33, f6, f45
|
||
|
;;
|
||
|
ldf8 f45 = [rp], 8
|
||
|
getf.sig r31 = f43 C hi
|
||
|
ldf8 f33 = [up], 8
|
||
|
;;
|
||
|
getf.sig r24 = f36 C lo
|
||
|
xma.l f38 = f34, f6, f46
|
||
|
ld8 r21 = [r9], 8
|
||
|
xma.hu f42 = f34, f6, f46
|
||
|
;;
|
||
|
ldf8 f46 = [rp], 8
|
||
|
getf.sig r28 = f40 C hi
|
||
|
ldf8 f34 = [up], 8
|
||
|
;;
|
||
|
getf.sig r25 = f37 C lo
|
||
|
xma.l f39 = f35, f6, f47
|
||
|
ld8 r22 = [r9], 8
|
||
|
xma.hu f43 = f35, f6, f47
|
||
|
;;
|
||
|
ldf8 f47 = [rp], 8
|
||
|
getf.sig r29 = f41 C hi
|
||
|
ldf8 f35 = [up], 8
|
||
|
;;
|
||
|
getf.sig r26 = f38 C lo
|
||
|
xma.l f36 = f32, f6, f44
|
||
|
ld8 r23 = [r9], 8
|
||
|
xma.hu f40 = f32, f6, f44
|
||
|
br.cloop.dptk .Loop
|
||
|
br .Lend
|
||
|
|
||
|
|
||
|
.Lb10: ldf8 f47 = [rp], 8
|
||
|
ldf8 f35 = [up], 8
|
||
|
br.cloop.dptk .grt2
|
||
|
|
||
|
xma.l f38 = f7, f6, f8
|
||
|
xma.hu f42 = f7, f6, f8
|
||
|
;;
|
||
|
xma.l f39 = f35, f6, f47
|
||
|
xma.hu f43 = f35, f6, f47
|
||
|
;;
|
||
|
getf.sig r26 = f38 C lo
|
||
|
getf.sig r30 = f42 C hi
|
||
|
ld8 r23 = [r9], 8
|
||
|
;;
|
||
|
getf.sig r27 = f39 C lo
|
||
|
getf.sig r31 = f43 C hi
|
||
|
ld8 r20 = [r9], 8
|
||
|
br .Lcj2
|
||
|
|
||
|
.grt2: ldf8 f44 = [rp], 8
|
||
|
ldf8 f32 = [up], 8
|
||
|
;;
|
||
|
ldf8 f45 = [rp], 8
|
||
|
ldf8 f33 = [up], 8
|
||
|
xma.l f38 = f7, f6, f8
|
||
|
xma.hu f42 = f7, f6, f8
|
||
|
;;
|
||
|
ldf8 f46 = [rp], 8
|
||
|
ldf8 f34 = [up], 8
|
||
|
xma.l f39 = f35, f6, f47
|
||
|
xma.hu f43 = f35, f6, f47
|
||
|
;;
|
||
|
ldf8 f47 = [rp], 8
|
||
|
ldf8 f35 = [up], 8
|
||
|
;;
|
||
|
getf.sig r26 = f38 C lo
|
||
|
xma.l f36 = f32, f6, f44
|
||
|
ld8 r23 = [r9], 8
|
||
|
xma.hu f40 = f32, f6, f44
|
||
|
br.cloop.dptk .grt6
|
||
|
|
||
|
getf.sig r30 = f42 C hi
|
||
|
;;
|
||
|
getf.sig r27 = f39 C lo
|
||
|
xma.l f37 = f33, f6, f45
|
||
|
ld8 r20 = [r9], 8
|
||
|
xma.hu f41 = f33, f6, f45
|
||
|
;;
|
||
|
getf.sig r31 = f43 C hi
|
||
|
getf.sig r24 = f36 C lo
|
||
|
xma.l f38 = f34, f6, f46
|
||
|
ld8 r21 = [r9], 8
|
||
|
xma.hu f42 = f34, f6, f46
|
||
|
;;
|
||
|
getf.sig r28 = f40 C hi
|
||
|
getf.sig r25 = f37 C lo
|
||
|
xma.l f39 = f35, f6, f47
|
||
|
ld8 r22 = [r9], 8
|
||
|
xma.hu f43 = f35, f6, f47
|
||
|
br .Lcj6
|
||
|
|
||
|
.grt6: ldf8 f44 = [rp], 8
|
||
|
getf.sig r30 = f42 C hi
|
||
|
ldf8 f32 = [up], 8
|
||
|
;;
|
||
|
getf.sig r27 = f39 C lo
|
||
|
xma.l f37 = f33, f6, f45
|
||
|
ld8 r20 = [r9], 8
|
||
|
xma.hu f41 = f33, f6, f45
|
||
|
;;
|
||
|
ldf8 f45 = [rp], 8
|
||
|
getf.sig r31 = f43 C hi
|
||
|
ldf8 f33 = [up], 8
|
||
|
;;
|
||
|
getf.sig r24 = f36 C lo
|
||
|
xma.l f38 = f34, f6, f46
|
||
|
ld8 r21 = [r9], 8
|
||
|
xma.hu f42 = f34, f6, f46
|
||
|
;;
|
||
|
ldf8 f46 = [rp], 8
|
||
|
getf.sig r28 = f40 C hi
|
||
|
ldf8 f34 = [up], 8
|
||
|
;;
|
||
|
getf.sig r25 = f37 C lo
|
||
|
xma.l f39 = f35, f6, f47
|
||
|
ld8 r22 = [r9], 8
|
||
|
xma.hu f43 = f35, f6, f47
|
||
|
br .LL10
|
||
|
|
||
|
|
||
|
.Lb11: ldf8 f46 = [rp], 8
|
||
|
ldf8 f34 = [up], 8
|
||
|
;;
|
||
|
ldf8 f47 = [rp], 8
|
||
|
ldf8 f35 = [up], 8
|
||
|
br.cloop.dptk .grt3
|
||
|
|
||
|
xma.l f37 = f7, f6, f8
|
||
|
xma.hu f41 = f7, f6, f8
|
||
|
;;
|
||
|
xma.l f38 = f34, f6, f46
|
||
|
xma.hu f42 = f34, f6, f46
|
||
|
;;
|
||
|
getf.sig r25 = f37 C lo
|
||
|
xma.l f39 = f35, f6, f47
|
||
|
xma.hu f43 = f35, f6, f47
|
||
|
;;
|
||
|
getf.sig r29 = f41 C hi
|
||
|
ld8 r22 = [r9], 8
|
||
|
;;
|
||
|
getf.sig r26 = f38 C lo
|
||
|
getf.sig r30 = f42 C hi
|
||
|
ld8 r23 = [r9], 8
|
||
|
;;
|
||
|
getf.sig r27 = f39 C lo
|
||
|
getf.sig r31 = f43 C hi
|
||
|
ld8 r20 = [r9], 8
|
||
|
br .Lcj3
|
||
|
|
||
|
.grt3: ldf8 f44 = [rp], 8
|
||
|
xma.l f37 = f7, f6, f8
|
||
|
ldf8 f32 = [up], 8
|
||
|
xma.hu f41 = f7, f6, f8
|
||
|
;;
|
||
|
ldf8 f45 = [rp], 8
|
||
|
xma.l f38 = f34, f6, f46
|
||
|
ldf8 f33 = [up], 8
|
||
|
xma.hu f42 = f34, f6, f46
|
||
|
;;
|
||
|
ldf8 f46 = [rp], 8
|
||
|
ldf8 f34 = [up], 8
|
||
|
;;
|
||
|
getf.sig r25 = f37 C lo
|
||
|
xma.l f39 = f35, f6, f47
|
||
|
ld8 r22 = [r9], 8
|
||
|
xma.hu f43 = f35, f6, f47
|
||
|
;;
|
||
|
ldf8 f47 = [rp], 8
|
||
|
getf.sig r29 = f41 C hi
|
||
|
ldf8 f35 = [up], 8
|
||
|
;;
|
||
|
getf.sig r26 = f38 C lo
|
||
|
xma.l f36 = f32, f6, f44
|
||
|
ld8 r23 = [r9], 8
|
||
|
xma.hu f40 = f32, f6, f44
|
||
|
br.cloop.dptk .grt7
|
||
|
;;
|
||
|
|
||
|
getf.sig r30 = f42 C hi
|
||
|
getf.sig r27 = f39 C lo
|
||
|
xma.l f37 = f33, f6, f45
|
||
|
ld8 r20 = [r9], 8
|
||
|
xma.hu f41 = f33, f6, f45
|
||
|
;;
|
||
|
getf.sig r31 = f43 C hi
|
||
|
getf.sig r24 = f36 C lo
|
||
|
xma.l f38 = f34, f6, f46
|
||
|
ld8 r21 = [r9], 8
|
||
|
xma.hu f42 = f34, f6, f46
|
||
|
br .Lcj7
|
||
|
|
||
|
.grt7: ldf8 f44 = [rp], 8
|
||
|
getf.sig r30 = f42 C hi
|
||
|
ldf8 f32 = [up], 8
|
||
|
;;
|
||
|
getf.sig r27 = f39 C lo
|
||
|
xma.l f37 = f33, f6, f45
|
||
|
ld8 r20 = [r9], 8
|
||
|
xma.hu f41 = f33, f6, f45
|
||
|
;;
|
||
|
ldf8 f45 = [rp], 8
|
||
|
getf.sig r31 = f43 C hi
|
||
|
ldf8 f33 = [up], 8
|
||
|
;;
|
||
|
getf.sig r24 = f36 C lo
|
||
|
xma.l f38 = f34, f6, f46
|
||
|
ld8 r21 = [r9], 8
|
||
|
xma.hu f42 = f34, f6, f46
|
||
|
br .LL11
|
||
|
|
||
|
|
||
|
.Lb00: ldf8 f45 = [rp], 8
|
||
|
ldf8 f33 = [up], 8
|
||
|
;;
|
||
|
ldf8 f46 = [rp], 8
|
||
|
ldf8 f34 = [up], 8
|
||
|
;;
|
||
|
ldf8 f47 = [rp], 8
|
||
|
xma.l f36 = f7, f6, f8
|
||
|
ldf8 f35 = [up], 8
|
||
|
xma.hu f40 = f7, f6, f8
|
||
|
br.cloop.dptk .grt4
|
||
|
|
||
|
xma.l f37 = f33, f6, f45
|
||
|
xma.hu f41 = f33, f6, f45
|
||
|
;;
|
||
|
getf.sig r24 = f36 C lo
|
||
|
xma.l f38 = f34, f6, f46
|
||
|
ld8 r21 = [r9], 8
|
||
|
xma.hu f42 = f34, f6, f46
|
||
|
;;
|
||
|
getf.sig r28 = f40 C hi
|
||
|
xma.l f39 = f35, f6, f47
|
||
|
getf.sig r25 = f37 C lo
|
||
|
ld8 r22 = [r9], 8
|
||
|
xma.hu f43 = f35, f6, f47
|
||
|
;;
|
||
|
getf.sig r29 = f41 C hi
|
||
|
getf.sig r26 = f38 C lo
|
||
|
ld8 r23 = [r9], 8
|
||
|
;;
|
||
|
getf.sig r30 = f42 C hi
|
||
|
getf.sig r27 = f39 C lo
|
||
|
ld8 r20 = [r9], 8
|
||
|
br .Lcj4
|
||
|
|
||
|
.grt4: ldf8 f44 = [rp], 8
|
||
|
xma.l f37 = f33, f6, f45
|
||
|
ldf8 f32 = [up], 8
|
||
|
xma.hu f41 = f33, f6, f45
|
||
|
;;
|
||
|
ldf8 f45 = [rp], 8
|
||
|
ldf8 f33 = [up], 8
|
||
|
xma.l f38 = f34, f6, f46
|
||
|
getf.sig r24 = f36 C lo
|
||
|
ld8 r21 = [r9], 8
|
||
|
xma.hu f42 = f34, f6, f46
|
||
|
;;
|
||
|
ldf8 f46 = [rp], 8
|
||
|
getf.sig r28 = f40 C hi
|
||
|
ldf8 f34 = [up], 8
|
||
|
xma.l f39 = f35, f6, f47
|
||
|
getf.sig r25 = f37 C lo
|
||
|
ld8 r22 = [r9], 8
|
||
|
xma.hu f43 = f35, f6, f47
|
||
|
;;
|
||
|
ldf8 f47 = [rp], 8
|
||
|
getf.sig r29 = f41 C hi
|
||
|
ldf8 f35 = [up], 8
|
||
|
;;
|
||
|
getf.sig r26 = f38 C lo
|
||
|
xma.l f36 = f32, f6, f44
|
||
|
ld8 r23 = [r9], 8
|
||
|
xma.hu f40 = f32, f6, f44
|
||
|
br.cloop.dptk .grt8
|
||
|
;;
|
||
|
|
||
|
getf.sig r30 = f42 C hi
|
||
|
getf.sig r27 = f39 C lo
|
||
|
xma.l f37 = f33, f6, f45
|
||
|
ld8 r20 = [r9], 8
|
||
|
xma.hu f41 = f33, f6, f45
|
||
|
br .Lcj8
|
||
|
|
||
|
.grt8: ldf8 f44 = [rp], 8
|
||
|
getf.sig r30 = f42 C hi
|
||
|
ldf8 f32 = [up], 8
|
||
|
;;
|
||
|
getf.sig r27 = f39 C lo
|
||
|
xma.l f37 = f33, f6, f45
|
||
|
ld8 r20 = [r9], 8
|
||
|
xma.hu f41 = f33, f6, f45
|
||
|
br .LL00
|
||
|
|
||
|
ALIGN(32)
|
||
|
.Loop:
|
||
|
{.mmi
|
||
|
ldf8 f44 = [rp], 8
|
||
|
cmp.ltu p6, p0 = r27, r8 C lo cmp
|
||
|
sub r14 = r27, r8 C lo sub
|
||
|
}
|
||
|
{.mmi
|
||
|
getf.sig r30 = f42 C hi
|
||
|
ldf8 f32 = [up], 8
|
||
|
sub r8 = r20, r31 C hi sub
|
||
|
;; C 01
|
||
|
}
|
||
|
{.mmf
|
||
|
getf.sig r27 = f39 C lo
|
||
|
st8 [r10] = r14, 8
|
||
|
xma.l f37 = f33, f6, f45
|
||
|
}
|
||
|
{.mfi
|
||
|
ld8 r20 = [r9], 8
|
||
|
xma.hu f41 = f33, f6, f45
|
||
|
(p6) add r8 = 1, r8
|
||
|
;; C 02
|
||
|
}
|
||
|
{.mmi
|
||
|
.LL00: ldf8 f45 = [rp], 8
|
||
|
cmp.ltu p6, p0 = r24, r8
|
||
|
sub r14 = r24, r8
|
||
|
}
|
||
|
{.mmi
|
||
|
getf.sig r31 = f43 C hi
|
||
|
ldf8 f33 = [up], 8
|
||
|
sub r8 = r21, r28
|
||
|
;; C 03
|
||
|
}
|
||
|
{.mmf
|
||
|
getf.sig r24 = f36 C lo
|
||
|
st8 [r10] = r14, 8
|
||
|
xma.l f38 = f34, f6, f46
|
||
|
}
|
||
|
{.mfi
|
||
|
ld8 r21 = [r9], 8
|
||
|
xma.hu f42 = f34, f6, f46
|
||
|
(p6) add r8 = 1, r8
|
||
|
;; C 04
|
||
|
}
|
||
|
{.mmi
|
||
|
.LL11: ldf8 f46 = [rp], 8
|
||
|
cmp.ltu p6, p0 = r25, r8
|
||
|
sub r14 = r25, r8
|
||
|
}
|
||
|
{.mmi
|
||
|
getf.sig r28 = f40 C hi
|
||
|
ldf8 f34 = [up], 8
|
||
|
sub r8 = r22, r29
|
||
|
;; C 05
|
||
|
}
|
||
|
{.mmf
|
||
|
getf.sig r25 = f37 C lo
|
||
|
st8 [r10] = r14, 8
|
||
|
xma.l f39 = f35, f6, f47
|
||
|
}
|
||
|
{.mfi
|
||
|
ld8 r22 = [r9], 8
|
||
|
xma.hu f43 = f35, f6, f47
|
||
|
(p6) add r8 = 1, r8
|
||
|
;; C 06
|
||
|
}
|
||
|
{.mmi
|
||
|
.LL10: ldf8 f47 = [rp], 8
|
||
|
cmp.ltu p6, p0 = r26, r8
|
||
|
sub r14 = r26, r8
|
||
|
}
|
||
|
{.mmi
|
||
|
getf.sig r29 = f41 C hi
|
||
|
ldf8 f35 = [up], 8
|
||
|
sub r8 = r23, r30
|
||
|
;; C 07
|
||
|
}
|
||
|
{.mmf
|
||
|
getf.sig r26 = f38 C lo
|
||
|
st8 [r10] = r14, 8
|
||
|
xma.l f36 = f32, f6, f44
|
||
|
}
|
||
|
{.mfi
|
||
|
ld8 r23 = [r9], 8
|
||
|
xma.hu f40 = f32, f6, f44
|
||
|
(p6) add r8 = 1, r8
|
||
|
}
|
||
|
br.cloop.dptk .Loop
|
||
|
;;
|
||
|
|
||
|
.Lend:
|
||
|
cmp.ltu p6, p0 = r27, r8
|
||
|
sub r14 = r27, r8
|
||
|
getf.sig r30 = f42
|
||
|
sub r8 = r20, r31
|
||
|
;;
|
||
|
getf.sig r27 = f39
|
||
|
st8 [r10] = r14, 8
|
||
|
xma.l f37 = f33, f6, f45
|
||
|
ld8 r20 = [r9], 8
|
||
|
xma.hu f41 = f33, f6, f45
|
||
|
(p6) add r8 = 1, r8
|
||
|
;;
|
||
|
.Lcj8:
|
||
|
cmp.ltu p6, p0 = r24, r8
|
||
|
sub r14 = r24, r8
|
||
|
getf.sig r31 = f43
|
||
|
sub r8 = r21, r28
|
||
|
;;
|
||
|
getf.sig r24 = f36
|
||
|
st8 [r10] = r14, 8
|
||
|
xma.l f38 = f34, f6, f46
|
||
|
ld8 r21 = [r9], 8
|
||
|
xma.hu f42 = f34, f6, f46
|
||
|
(p6) add r8 = 1, r8
|
||
|
;;
|
||
|
.Lcj7:
|
||
|
cmp.ltu p6, p0 = r25, r8
|
||
|
sub r14 = r25, r8
|
||
|
getf.sig r28 = f40
|
||
|
sub r8 = r22, r29
|
||
|
;;
|
||
|
getf.sig r25 = f37
|
||
|
st8 [r10] = r14, 8
|
||
|
xma.l f39 = f35, f6, f47
|
||
|
ld8 r22 = [r9], 8
|
||
|
xma.hu f43 = f35, f6, f47
|
||
|
(p6) add r8 = 1, r8
|
||
|
;;
|
||
|
.Lcj6:
|
||
|
cmp.ltu p6, p0 = r26, r8
|
||
|
sub r14 = r26, r8
|
||
|
getf.sig r29 = f41
|
||
|
sub r8 = r23, r30
|
||
|
;;
|
||
|
getf.sig r26 = f38
|
||
|
st8 [r10] = r14, 8
|
||
|
ld8 r23 = [r9], 8
|
||
|
(p6) add r8 = 1, r8
|
||
|
;;
|
||
|
.Lcj5:
|
||
|
cmp.ltu p6, p0 = r27, r8
|
||
|
sub r14 = r27, r8
|
||
|
getf.sig r30 = f42
|
||
|
sub r8 = r20, r31
|
||
|
;;
|
||
|
getf.sig r27 = f39
|
||
|
st8 [r10] = r14, 8
|
||
|
ld8 r20 = [r9], 8
|
||
|
(p6) add r8 = 1, r8
|
||
|
;;
|
||
|
.Lcj4:
|
||
|
cmp.ltu p6, p0 = r24, r8
|
||
|
sub r14 = r24, r8
|
||
|
getf.sig r31 = f43
|
||
|
sub r8 = r21, r28
|
||
|
;;
|
||
|
st8 [r10] = r14, 8
|
||
|
(p6) add r8 = 1, r8
|
||
|
;;
|
||
|
.Lcj3:
|
||
|
cmp.ltu p6, p0 = r25, r8
|
||
|
sub r14 = r25, r8
|
||
|
sub r8 = r22, r29
|
||
|
;;
|
||
|
st8 [r10] = r14, 8
|
||
|
(p6) add r8 = 1, r8
|
||
|
;;
|
||
|
.Lcj2:
|
||
|
cmp.ltu p6, p0 = r26, r8
|
||
|
sub r14 = r26, r8
|
||
|
sub r8 = r23, r30
|
||
|
;;
|
||
|
st8 [r10] = r14, 8
|
||
|
(p6) add r8 = 1, r8
|
||
|
;;
|
||
|
.Lcj1:
|
||
|
cmp.ltu p6, p0 = r27, r8
|
||
|
sub r14 = r27, r8
|
||
|
sub r8 = r20, r31
|
||
|
;;
|
||
|
st8 [r10] = r14, 8
|
||
|
mov ar.lc = r2
|
||
|
(p6) add r8 = 1, r8
|
||
|
br.ret.sptk.many b0
|
||
|
.Ldone: mov ar.lc = r2
|
||
|
br.ret.sptk.many b0
|
||
|
EPILOGUE()
|
||
|
ASM_END()
|