[PATCH] D86114: [SVE] Lower fixed length vXi8/vXi16 SDIV to scalable
Cameron McInally via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 20 11:13:05 PDT 2020
cameron.mcinally added a comment.
In D86114#2228977 <https://reviews.llvm.org/D86114#2228977>, @paulwalker-arm wrote:
>> That said the custom lowering for i8/i16 doesn't have to be SVE specific as there's the alternative approach of using normal ISD nodes to do the widening so that only the final SDIV lowering is SVE specific.
I tried that at first, but the NEON assembly was pretty ugly. I may have made a mistake there, but if not, it was all scalarized. The SVE lowering is uglier, but the generated code seems cleaner.
Extending fixed width vectors
=======================
sdiv_v8i8:
.cfi_startproc
umov w11, v1.b[1]
umov w9, v1.b[2]
umov w13, v0.b[1]
fmov s2, w11
umov w10, v1.b[3]
umov w8, v1.b[4]
umov w12, v0.b[2]
umov w14, v0.b[3]
fmov s3, w13
umov w11, v0.b[4]
zip1 v1.8b, v1.8b, v0.8b
zip1 v0.8b, v0.8b, v0.8b
mov v2.h[1], w9
shl v1.4h, v1.4h, #8
shl v0.4h, v0.4h, #8
mov v3.h[1], w12
mov v2.h[2], w10
sshr v1.4h, v1.4h, #8
sshr v0.4h, v0.4h, #8
mov v3.h[2], w14
mov v2.h[3], w8
umov w13, v1.h[0]
umov w8, v0.h[0]
mov v3.h[3], w11
shl v2.4h, v2.4h, #8
umov w9, v1.h[1]
umov w10, v1.h[2]
umov w12, v0.h[1]
umov w11, v0.h[2]
fmov s0, w13
fmov s1, w8
shl v3.4h, v3.4h, #8
sshr v2.4h, v2.4h, #8
mov v0.s[1], w9
fmov s4, w9
mov v1.s[1], w12
umov w8, v2.h[1]
umov w9, v2.h[2]
umov w13, v2.h[0]
fmov s2, w12
sshr v3.4h, v3.4h, #8
mov v4.s[1], w10
mov v2.s[1], w11
umov w12, v3.h[0]
shl v0.2s, v0.2s, #16
shl v1.2s, v1.2s, #16
umov w10, v3.h[1]
umov w11, v3.h[2]
fmov s3, w13
sshr v0.2s, v0.2s, #16
fmov s5, w12
sshr v1.2s, v1.2s, #16
ptrue p0.s, vl2
shl v4.2s, v4.2s, #16
shl v2.2s, v2.2s, #16
mov v3.s[1], w8
fmov s6, w8
sshr v4.2s, v4.2s, #16
sdivr z0.s, p0/m, z0.s, z1.s
sshr v1.2s, v2.2s, #16
mov v5.s[1], w10
fmov s2, w10
sdiv z1.s, p0/m, z1.s, z4.s
mov v6.s[1], w9
mov v2.s[1], w11
shl v3.2s, v3.2s, #16
shl v4.2s, v5.2s, #16
shl v5.2s, v6.2s, #16
shl v2.2s, v2.2s, #16
sshr v3.2s, v3.2s, #16
sshr v4.2s, v4.2s, #16
sdivr z3.s, p0/m, z3.s, z4.s
sshr v4.2s, v5.2s, #16
sshr v2.2s, v2.2s, #16
sdiv z2.s, p0/m, z2.s, z4.s
uzp1 v2.4h, v3.4h, v2.4h
uzp1 v0.4h, v0.4h, v1.4h
uzp1 v0.8b, v0.8b, v2.8b
ret
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D86114/new/
https://reviews.llvm.org/D86114
More information about the llvm-commits
mailing list