mirror of
https://review.haiku-os.org/buildtools
synced 2025-02-14 17:57:39 +01:00
b58ddff026
* merged mpfr 3.0.0 and gmp 5.0.1 in buildtools trunk git-svn-id: file:///srv/svn/repos/haiku/buildtools/trunk@37378 a95241bf-73f2-0310-859d-f6bbb57e9c96
635 lines
12 KiB
NASM
635 lines
12 KiB
NASM
dnl IA-64 mpn_submul_1 -- Multiply a limb vector with a limb and subtract the
|
|
dnl result from a second limb vector.
|
|
|
|
dnl Copyright 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
|
|
|
|
dnl This file is part of the GNU MP Library.
|
|
|
|
dnl The GNU MP Library is free software; you can redistribute it and/or modify
|
|
dnl it under the terms of the GNU Lesser General Public License as published
|
|
dnl by the Free Software Foundation; either version 3 of the License, or (at
|
|
dnl your option) any later version.
|
|
|
|
dnl The GNU MP Library is distributed in the hope that it will be useful, but
|
|
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
|
dnl License for more details.
|
|
|
|
dnl You should have received a copy of the GNU Lesser General Public License
|
|
dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
|
|
|
|
include(`../config.m4')
|
|
|
|
C cycles/limb
|
|
C Itanium: 4.0
|
|
C Itanium 2: 2.25 (alignment dependent, sometimes it seems to need 3 c/l)
|
|
|
|
C TODO
|
|
C * Optimize feed-in and wind-down code, both for speed and code size.
|
|
C * Handle low limb input and results specially, using a common stf8 in the
|
|
C epilogue.
|
|
C * Delay r8, r10 initialization, put cmp-p6 in 1st bundle and br .Ldone in
|
|
C 2nd bundle. This will allow the bbb bundle to be one cycle earlier and
|
|
C save a cycle.
|
|
|
|
C INPUT PARAMETERS
|
|
define(`rp', `r32')
|
|
define(`up', `r33')
|
|
define(`n', `r34')
|
|
define(`vl', `r35')
|
|
|
|
ASM_START()
|
|
PROLOGUE(mpn_submul_1)
|
|
.prologue
|
|
.save ar.lc, r2
|
|
.body
|
|
|
|
ifdef(`HAVE_ABI_32',
|
|
` addp4 rp = 0, rp C M I
|
|
addp4 up = 0, up C M I
|
|
zxt4 n = n C I
|
|
;;
|
|
')
|
|
{.mmi
|
|
mov r10 = rp C M I
|
|
mov r9 = up C M I
|
|
sub vl = r0, vl C M I negate vl
|
|
}
|
|
{.mmi
|
|
ldf8 f8 = [rp], 8 C M
|
|
ldf8 f7 = [up], 8 C M
|
|
add r19 = -1, n C M I n - 1
|
|
;;
|
|
}
|
|
{.mmi
|
|
cmp.eq p6, p0 = 0, vl C M I
|
|
mov r8 = 0 C M I zero cylimb
|
|
mov r2 = ar.lc C I0
|
|
}
|
|
{.mmi
|
|
setf.sig f6 = vl C M2 M3
|
|
and r14 = 3, n C M I
|
|
shr.u r19 = r19, 2 C I0
|
|
;;
|
|
}
|
|
{.mmb
|
|
nop 0
|
|
cmp.eq p10, p0 = 0, r14 C M I
|
|
(p6) br.spnt .Ldone C B vl == 0
|
|
}
|
|
{.mmi
|
|
cmp.eq p11, p0 = 2, r14 C M I
|
|
cmp.eq p12, p0 = 3, r14 C M I
|
|
mov ar.lc = r19 C I0
|
|
}
|
|
{.bbb
|
|
(p10) br.dptk .Lb00 C B
|
|
(p11) br.dptk .Lb10 C B
|
|
(p12) br.dptk .Lb11 C B
|
|
;;
|
|
}
|
|
|
|
.Lb01: br.cloop.dptk .grt1
|
|
|
|
xma.l f39 = f7, f6, f8
|
|
xma.hu f43 = f7, f6, f8
|
|
;;
|
|
getf.sig r27 = f39 C lo
|
|
getf.sig r31 = f43 C hi
|
|
ld8 r20 = [r9], 8
|
|
br .Lcj1
|
|
|
|
.grt1: ldf8 f44 = [rp], 8
|
|
ldf8 f32 = [up], 8
|
|
;;
|
|
ldf8 f45 = [rp], 8
|
|
ldf8 f33 = [up], 8
|
|
;;
|
|
ldf8 f46 = [rp], 8
|
|
xma.l f39 = f7, f6, f8
|
|
ldf8 f34 = [up], 8
|
|
xma.hu f43 = f7, f6, f8
|
|
;;
|
|
ldf8 f47 = [rp], 8
|
|
xma.l f36 = f32, f6, f44
|
|
ldf8 f35 = [up], 8
|
|
xma.hu f40 = f32, f6, f44
|
|
br.cloop.dptk .grt5
|
|
;;
|
|
|
|
getf.sig r27 = f39 C lo
|
|
xma.l f37 = f33, f6, f45
|
|
ld8 r20 = [r9], 8
|
|
xma.hu f41 = f33, f6, f45
|
|
;;
|
|
getf.sig r31 = f43 C hi
|
|
getf.sig r24 = f36 C lo
|
|
xma.l f38 = f34, f6, f46
|
|
ld8 r21 = [r9], 8
|
|
xma.hu f42 = f34, f6, f46
|
|
;;
|
|
getf.sig r28 = f40 C hi
|
|
getf.sig r25 = f37 C lo
|
|
xma.l f39 = f35, f6, f47
|
|
ld8 r22 = [r9], 8
|
|
xma.hu f43 = f35, f6, f47
|
|
;;
|
|
getf.sig r29 = f41 C hi
|
|
getf.sig r26 = f38 C lo
|
|
ld8 r23 = [r9], 8
|
|
br .Lcj5
|
|
|
|
.grt5: ldf8 f44 = [rp], 8
|
|
ldf8 f32 = [up], 8
|
|
;;
|
|
getf.sig r27 = f39 C lo
|
|
xma.l f37 = f33, f6, f45
|
|
ld8 r20 = [r9], 8
|
|
xma.hu f41 = f33, f6, f45
|
|
;;
|
|
ldf8 f45 = [rp], 8
|
|
getf.sig r31 = f43 C hi
|
|
ldf8 f33 = [up], 8
|
|
;;
|
|
getf.sig r24 = f36 C lo
|
|
xma.l f38 = f34, f6, f46
|
|
ld8 r21 = [r9], 8
|
|
xma.hu f42 = f34, f6, f46
|
|
;;
|
|
ldf8 f46 = [rp], 8
|
|
getf.sig r28 = f40 C hi
|
|
ldf8 f34 = [up], 8
|
|
;;
|
|
getf.sig r25 = f37 C lo
|
|
xma.l f39 = f35, f6, f47
|
|
ld8 r22 = [r9], 8
|
|
xma.hu f43 = f35, f6, f47
|
|
;;
|
|
ldf8 f47 = [rp], 8
|
|
getf.sig r29 = f41 C hi
|
|
ldf8 f35 = [up], 8
|
|
;;
|
|
getf.sig r26 = f38 C lo
|
|
xma.l f36 = f32, f6, f44
|
|
ld8 r23 = [r9], 8
|
|
xma.hu f40 = f32, f6, f44
|
|
br.cloop.dptk .Loop
|
|
br .Lend
|
|
|
|
|
|
.Lb10: ldf8 f47 = [rp], 8
|
|
ldf8 f35 = [up], 8
|
|
br.cloop.dptk .grt2
|
|
|
|
xma.l f38 = f7, f6, f8
|
|
xma.hu f42 = f7, f6, f8
|
|
;;
|
|
xma.l f39 = f35, f6, f47
|
|
xma.hu f43 = f35, f6, f47
|
|
;;
|
|
getf.sig r26 = f38 C lo
|
|
getf.sig r30 = f42 C hi
|
|
ld8 r23 = [r9], 8
|
|
;;
|
|
getf.sig r27 = f39 C lo
|
|
getf.sig r31 = f43 C hi
|
|
ld8 r20 = [r9], 8
|
|
br .Lcj2
|
|
|
|
.grt2: ldf8 f44 = [rp], 8
|
|
ldf8 f32 = [up], 8
|
|
;;
|
|
ldf8 f45 = [rp], 8
|
|
ldf8 f33 = [up], 8
|
|
xma.l f38 = f7, f6, f8
|
|
xma.hu f42 = f7, f6, f8
|
|
;;
|
|
ldf8 f46 = [rp], 8
|
|
ldf8 f34 = [up], 8
|
|
xma.l f39 = f35, f6, f47
|
|
xma.hu f43 = f35, f6, f47
|
|
;;
|
|
ldf8 f47 = [rp], 8
|
|
ldf8 f35 = [up], 8
|
|
;;
|
|
getf.sig r26 = f38 C lo
|
|
xma.l f36 = f32, f6, f44
|
|
ld8 r23 = [r9], 8
|
|
xma.hu f40 = f32, f6, f44
|
|
br.cloop.dptk .grt6
|
|
|
|
getf.sig r30 = f42 C hi
|
|
;;
|
|
getf.sig r27 = f39 C lo
|
|
xma.l f37 = f33, f6, f45
|
|
ld8 r20 = [r9], 8
|
|
xma.hu f41 = f33, f6, f45
|
|
;;
|
|
getf.sig r31 = f43 C hi
|
|
getf.sig r24 = f36 C lo
|
|
xma.l f38 = f34, f6, f46
|
|
ld8 r21 = [r9], 8
|
|
xma.hu f42 = f34, f6, f46
|
|
;;
|
|
getf.sig r28 = f40 C hi
|
|
getf.sig r25 = f37 C lo
|
|
xma.l f39 = f35, f6, f47
|
|
ld8 r22 = [r9], 8
|
|
xma.hu f43 = f35, f6, f47
|
|
br .Lcj6
|
|
|
|
.grt6: ldf8 f44 = [rp], 8
|
|
getf.sig r30 = f42 C hi
|
|
ldf8 f32 = [up], 8
|
|
;;
|
|
getf.sig r27 = f39 C lo
|
|
xma.l f37 = f33, f6, f45
|
|
ld8 r20 = [r9], 8
|
|
xma.hu f41 = f33, f6, f45
|
|
;;
|
|
ldf8 f45 = [rp], 8
|
|
getf.sig r31 = f43 C hi
|
|
ldf8 f33 = [up], 8
|
|
;;
|
|
getf.sig r24 = f36 C lo
|
|
xma.l f38 = f34, f6, f46
|
|
ld8 r21 = [r9], 8
|
|
xma.hu f42 = f34, f6, f46
|
|
;;
|
|
ldf8 f46 = [rp], 8
|
|
getf.sig r28 = f40 C hi
|
|
ldf8 f34 = [up], 8
|
|
;;
|
|
getf.sig r25 = f37 C lo
|
|
xma.l f39 = f35, f6, f47
|
|
ld8 r22 = [r9], 8
|
|
xma.hu f43 = f35, f6, f47
|
|
br .LL10
|
|
|
|
|
|
.Lb11: ldf8 f46 = [rp], 8
|
|
ldf8 f34 = [up], 8
|
|
;;
|
|
ldf8 f47 = [rp], 8
|
|
ldf8 f35 = [up], 8
|
|
br.cloop.dptk .grt3
|
|
|
|
xma.l f37 = f7, f6, f8
|
|
xma.hu f41 = f7, f6, f8
|
|
;;
|
|
xma.l f38 = f34, f6, f46
|
|
xma.hu f42 = f34, f6, f46
|
|
;;
|
|
getf.sig r25 = f37 C lo
|
|
xma.l f39 = f35, f6, f47
|
|
xma.hu f43 = f35, f6, f47
|
|
;;
|
|
getf.sig r29 = f41 C hi
|
|
ld8 r22 = [r9], 8
|
|
;;
|
|
getf.sig r26 = f38 C lo
|
|
getf.sig r30 = f42 C hi
|
|
ld8 r23 = [r9], 8
|
|
;;
|
|
getf.sig r27 = f39 C lo
|
|
getf.sig r31 = f43 C hi
|
|
ld8 r20 = [r9], 8
|
|
br .Lcj3
|
|
|
|
.grt3: ldf8 f44 = [rp], 8
|
|
xma.l f37 = f7, f6, f8
|
|
ldf8 f32 = [up], 8
|
|
xma.hu f41 = f7, f6, f8
|
|
;;
|
|
ldf8 f45 = [rp], 8
|
|
xma.l f38 = f34, f6, f46
|
|
ldf8 f33 = [up], 8
|
|
xma.hu f42 = f34, f6, f46
|
|
;;
|
|
ldf8 f46 = [rp], 8
|
|
ldf8 f34 = [up], 8
|
|
;;
|
|
getf.sig r25 = f37 C lo
|
|
xma.l f39 = f35, f6, f47
|
|
ld8 r22 = [r9], 8
|
|
xma.hu f43 = f35, f6, f47
|
|
;;
|
|
ldf8 f47 = [rp], 8
|
|
getf.sig r29 = f41 C hi
|
|
ldf8 f35 = [up], 8
|
|
;;
|
|
getf.sig r26 = f38 C lo
|
|
xma.l f36 = f32, f6, f44
|
|
ld8 r23 = [r9], 8
|
|
xma.hu f40 = f32, f6, f44
|
|
br.cloop.dptk .grt7
|
|
;;
|
|
|
|
getf.sig r30 = f42 C hi
|
|
getf.sig r27 = f39 C lo
|
|
xma.l f37 = f33, f6, f45
|
|
ld8 r20 = [r9], 8
|
|
xma.hu f41 = f33, f6, f45
|
|
;;
|
|
getf.sig r31 = f43 C hi
|
|
getf.sig r24 = f36 C lo
|
|
xma.l f38 = f34, f6, f46
|
|
ld8 r21 = [r9], 8
|
|
xma.hu f42 = f34, f6, f46
|
|
br .Lcj7
|
|
|
|
.grt7: ldf8 f44 = [rp], 8
|
|
getf.sig r30 = f42 C hi
|
|
ldf8 f32 = [up], 8
|
|
;;
|
|
getf.sig r27 = f39 C lo
|
|
xma.l f37 = f33, f6, f45
|
|
ld8 r20 = [r9], 8
|
|
xma.hu f41 = f33, f6, f45
|
|
;;
|
|
ldf8 f45 = [rp], 8
|
|
getf.sig r31 = f43 C hi
|
|
ldf8 f33 = [up], 8
|
|
;;
|
|
getf.sig r24 = f36 C lo
|
|
xma.l f38 = f34, f6, f46
|
|
ld8 r21 = [r9], 8
|
|
xma.hu f42 = f34, f6, f46
|
|
br .LL11
|
|
|
|
|
|
.Lb00: ldf8 f45 = [rp], 8
|
|
ldf8 f33 = [up], 8
|
|
;;
|
|
ldf8 f46 = [rp], 8
|
|
ldf8 f34 = [up], 8
|
|
;;
|
|
ldf8 f47 = [rp], 8
|
|
xma.l f36 = f7, f6, f8
|
|
ldf8 f35 = [up], 8
|
|
xma.hu f40 = f7, f6, f8
|
|
br.cloop.dptk .grt4
|
|
|
|
xma.l f37 = f33, f6, f45
|
|
xma.hu f41 = f33, f6, f45
|
|
;;
|
|
getf.sig r24 = f36 C lo
|
|
xma.l f38 = f34, f6, f46
|
|
ld8 r21 = [r9], 8
|
|
xma.hu f42 = f34, f6, f46
|
|
;;
|
|
getf.sig r28 = f40 C hi
|
|
xma.l f39 = f35, f6, f47
|
|
getf.sig r25 = f37 C lo
|
|
ld8 r22 = [r9], 8
|
|
xma.hu f43 = f35, f6, f47
|
|
;;
|
|
getf.sig r29 = f41 C hi
|
|
getf.sig r26 = f38 C lo
|
|
ld8 r23 = [r9], 8
|
|
;;
|
|
getf.sig r30 = f42 C hi
|
|
getf.sig r27 = f39 C lo
|
|
ld8 r20 = [r9], 8
|
|
br .Lcj4
|
|
|
|
.grt4: ldf8 f44 = [rp], 8
|
|
xma.l f37 = f33, f6, f45
|
|
ldf8 f32 = [up], 8
|
|
xma.hu f41 = f33, f6, f45
|
|
;;
|
|
ldf8 f45 = [rp], 8
|
|
ldf8 f33 = [up], 8
|
|
xma.l f38 = f34, f6, f46
|
|
getf.sig r24 = f36 C lo
|
|
ld8 r21 = [r9], 8
|
|
xma.hu f42 = f34, f6, f46
|
|
;;
|
|
ldf8 f46 = [rp], 8
|
|
getf.sig r28 = f40 C hi
|
|
ldf8 f34 = [up], 8
|
|
xma.l f39 = f35, f6, f47
|
|
getf.sig r25 = f37 C lo
|
|
ld8 r22 = [r9], 8
|
|
xma.hu f43 = f35, f6, f47
|
|
;;
|
|
ldf8 f47 = [rp], 8
|
|
getf.sig r29 = f41 C hi
|
|
ldf8 f35 = [up], 8
|
|
;;
|
|
getf.sig r26 = f38 C lo
|
|
xma.l f36 = f32, f6, f44
|
|
ld8 r23 = [r9], 8
|
|
xma.hu f40 = f32, f6, f44
|
|
br.cloop.dptk .grt8
|
|
;;
|
|
|
|
getf.sig r30 = f42 C hi
|
|
getf.sig r27 = f39 C lo
|
|
xma.l f37 = f33, f6, f45
|
|
ld8 r20 = [r9], 8
|
|
xma.hu f41 = f33, f6, f45
|
|
br .Lcj8
|
|
|
|
.grt8: ldf8 f44 = [rp], 8
|
|
getf.sig r30 = f42 C hi
|
|
ldf8 f32 = [up], 8
|
|
;;
|
|
getf.sig r27 = f39 C lo
|
|
xma.l f37 = f33, f6, f45
|
|
ld8 r20 = [r9], 8
|
|
xma.hu f41 = f33, f6, f45
|
|
br .LL00
|
|
|
|
ALIGN(32)
|
|
.Loop:
|
|
{.mmi
|
|
ldf8 f44 = [rp], 8
|
|
cmp.ltu p6, p0 = r27, r8 C lo cmp
|
|
sub r14 = r27, r8 C lo sub
|
|
}
|
|
{.mmi
|
|
getf.sig r30 = f42 C hi
|
|
ldf8 f32 = [up], 8
|
|
sub r8 = r20, r31 C hi sub
|
|
;; C 01
|
|
}
|
|
{.mmf
|
|
getf.sig r27 = f39 C lo
|
|
st8 [r10] = r14, 8
|
|
xma.l f37 = f33, f6, f45
|
|
}
|
|
{.mfi
|
|
ld8 r20 = [r9], 8
|
|
xma.hu f41 = f33, f6, f45
|
|
(p6) add r8 = 1, r8
|
|
;; C 02
|
|
}
|
|
{.mmi
|
|
.LL00: ldf8 f45 = [rp], 8
|
|
cmp.ltu p6, p0 = r24, r8
|
|
sub r14 = r24, r8
|
|
}
|
|
{.mmi
|
|
getf.sig r31 = f43 C hi
|
|
ldf8 f33 = [up], 8
|
|
sub r8 = r21, r28
|
|
;; C 03
|
|
}
|
|
{.mmf
|
|
getf.sig r24 = f36 C lo
|
|
st8 [r10] = r14, 8
|
|
xma.l f38 = f34, f6, f46
|
|
}
|
|
{.mfi
|
|
ld8 r21 = [r9], 8
|
|
xma.hu f42 = f34, f6, f46
|
|
(p6) add r8 = 1, r8
|
|
;; C 04
|
|
}
|
|
{.mmi
|
|
.LL11: ldf8 f46 = [rp], 8
|
|
cmp.ltu p6, p0 = r25, r8
|
|
sub r14 = r25, r8
|
|
}
|
|
{.mmi
|
|
getf.sig r28 = f40 C hi
|
|
ldf8 f34 = [up], 8
|
|
sub r8 = r22, r29
|
|
;; C 05
|
|
}
|
|
{.mmf
|
|
getf.sig r25 = f37 C lo
|
|
st8 [r10] = r14, 8
|
|
xma.l f39 = f35, f6, f47
|
|
}
|
|
{.mfi
|
|
ld8 r22 = [r9], 8
|
|
xma.hu f43 = f35, f6, f47
|
|
(p6) add r8 = 1, r8
|
|
;; C 06
|
|
}
|
|
{.mmi
|
|
.LL10: ldf8 f47 = [rp], 8
|
|
cmp.ltu p6, p0 = r26, r8
|
|
sub r14 = r26, r8
|
|
}
|
|
{.mmi
|
|
getf.sig r29 = f41 C hi
|
|
ldf8 f35 = [up], 8
|
|
sub r8 = r23, r30
|
|
;; C 07
|
|
}
|
|
{.mmf
|
|
getf.sig r26 = f38 C lo
|
|
st8 [r10] = r14, 8
|
|
xma.l f36 = f32, f6, f44
|
|
}
|
|
{.mfi
|
|
ld8 r23 = [r9], 8
|
|
xma.hu f40 = f32, f6, f44
|
|
(p6) add r8 = 1, r8
|
|
}
|
|
br.cloop.dptk .Loop
|
|
;;
|
|
|
|
.Lend:
|
|
cmp.ltu p6, p0 = r27, r8
|
|
sub r14 = r27, r8
|
|
getf.sig r30 = f42
|
|
sub r8 = r20, r31
|
|
;;
|
|
getf.sig r27 = f39
|
|
st8 [r10] = r14, 8
|
|
xma.l f37 = f33, f6, f45
|
|
ld8 r20 = [r9], 8
|
|
xma.hu f41 = f33, f6, f45
|
|
(p6) add r8 = 1, r8
|
|
;;
|
|
.Lcj8:
|
|
cmp.ltu p6, p0 = r24, r8
|
|
sub r14 = r24, r8
|
|
getf.sig r31 = f43
|
|
sub r8 = r21, r28
|
|
;;
|
|
getf.sig r24 = f36
|
|
st8 [r10] = r14, 8
|
|
xma.l f38 = f34, f6, f46
|
|
ld8 r21 = [r9], 8
|
|
xma.hu f42 = f34, f6, f46
|
|
(p6) add r8 = 1, r8
|
|
;;
|
|
.Lcj7:
|
|
cmp.ltu p6, p0 = r25, r8
|
|
sub r14 = r25, r8
|
|
getf.sig r28 = f40
|
|
sub r8 = r22, r29
|
|
;;
|
|
getf.sig r25 = f37
|
|
st8 [r10] = r14, 8
|
|
xma.l f39 = f35, f6, f47
|
|
ld8 r22 = [r9], 8
|
|
xma.hu f43 = f35, f6, f47
|
|
(p6) add r8 = 1, r8
|
|
;;
|
|
.Lcj6:
|
|
cmp.ltu p6, p0 = r26, r8
|
|
sub r14 = r26, r8
|
|
getf.sig r29 = f41
|
|
sub r8 = r23, r30
|
|
;;
|
|
getf.sig r26 = f38
|
|
st8 [r10] = r14, 8
|
|
ld8 r23 = [r9], 8
|
|
(p6) add r8 = 1, r8
|
|
;;
|
|
.Lcj5:
|
|
cmp.ltu p6, p0 = r27, r8
|
|
sub r14 = r27, r8
|
|
getf.sig r30 = f42
|
|
sub r8 = r20, r31
|
|
;;
|
|
getf.sig r27 = f39
|
|
st8 [r10] = r14, 8
|
|
ld8 r20 = [r9], 8
|
|
(p6) add r8 = 1, r8
|
|
;;
|
|
.Lcj4:
|
|
cmp.ltu p6, p0 = r24, r8
|
|
sub r14 = r24, r8
|
|
getf.sig r31 = f43
|
|
sub r8 = r21, r28
|
|
;;
|
|
st8 [r10] = r14, 8
|
|
(p6) add r8 = 1, r8
|
|
;;
|
|
.Lcj3:
|
|
cmp.ltu p6, p0 = r25, r8
|
|
sub r14 = r25, r8
|
|
sub r8 = r22, r29
|
|
;;
|
|
st8 [r10] = r14, 8
|
|
(p6) add r8 = 1, r8
|
|
;;
|
|
.Lcj2:
|
|
cmp.ltu p6, p0 = r26, r8
|
|
sub r14 = r26, r8
|
|
sub r8 = r23, r30
|
|
;;
|
|
st8 [r10] = r14, 8
|
|
(p6) add r8 = 1, r8
|
|
;;
|
|
.Lcj1:
|
|
cmp.ltu p6, p0 = r27, r8
|
|
sub r14 = r27, r8
|
|
sub r8 = r20, r31
|
|
;;
|
|
st8 [r10] = r14, 8
|
|
mov ar.lc = r2
|
|
(p6) add r8 = 1, r8
|
|
br.ret.sptk.many b0
|
|
.Ldone: mov ar.lc = r2
|
|
br.ret.sptk.many b0
|
|
EPILOGUE()
|
|
ASM_END()
|