[PATCH] D85364: [SVE][WIP] Implement lowering for fixed width select
Cameron McInally via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 5 15:08:53 PDT 2020
cameron.mcinally added a comment.
For this IR test and `-aarch64-sve-vector-bits-min=512`:
target triple = "aarch64-unknown-linux-gnu"
define void @select(<8 x double>* %a, <8 x double>* %b, <8 x i1>* %c) #0 {
; CHECK: select:
%mask = load <8 x i1>, <8 x i1>* %c
%op1 = load <8 x double>, <8 x double>* %a
%op2 = load <8 x double>, <8 x double>* %b
%sel = select <8 x i1> %mask, <8 x double> %op1, <8 x double> %op2
store <8 x double> %sel, <8 x double>* %a, align 4
ret void
}
attributes #0 = { "target-features"="+sve" }
this patch will generate:
ldrb w8, [x2]
ptrue p0.d, vl8
mov x9, sp
ptrue p1.d
lsr w10, w8, #7
lsr w11, w8, #6
lsr w12, w8, #5
lsr w13, w8, #4
// Extend the mask: v8i1 -> v8i64
sbfx x10, x10, #0, #1
sbfx x11, x11, #0, #1
stp x11, x10, [sp, #48]
sbfx x11, x12, #0, #1
sbfx x12, x13, #0, #1
lsr w10, w8, #3
stp x12, x11, [sp, #32]
lsr w11, w8, #2
sbfx x10, x10, #0, #1
sbfx x11, x11, #0, #1
stp x11, x10, [sp, #16]
sbfx x10, x8, #0, #1
lsr w8, w8, #1
sbfx x8, x8, #0, #1
stp x10, x8, [sp]
// Load extended mask into ZPR: nxv2i64
ld1d { z0.d }, p0/z, [x9]
ld1d { z1.d }, p0/z, [x0]
ld1d { z2.d }, p0/z, [x1]
// Truncate the mask: nxv2i64 -> nxv2i1
and z0.d, z0.d, #0x1
cmpne p2.d, p1/z, z0.d, #0
// Combine the select mask with the fixed VL mask
// Note: Not sure if this is *really* needed, or if we can trust the select mask,
// but that's a separate issue.
and p1.b, p1/z, p0.b, p2.b
sel z0.d, p1, z1.d, z2.d
st1d { z0.d }, p0, [x0]
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D85364/new/
https://reviews.llvm.org/D85364
More information about the llvm-commits
mailing list