[PATCH] D120715: [AArch64][SVE] Optimize mov and sel away for masked loads in sel
Matt Devereau via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 1 04:42:30 PST 2022
MattDevereau created this revision.
MattDevereau added reviewers: peterwaller-arm, paulwalker-arm, DavidTruby, bsmith, david-arm.
Herald added subscribers: psnobl, hiraditya, kristof.beyls, tschuett.
Herald added a reviewer: efriedma.
MattDevereau requested review of this revision.
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.
fold (sel (p) (masked_load (ptr) (p)) passthru) -> (masked_load (p) (ptr))
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D120715
Files:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/test/CodeGen/AArch64/sve-select.ll
Index: llvm/test/CodeGen/AArch64/sve-select.ll
===================================================================
--- llvm/test/CodeGen/AArch64/sve-select.ll
+++ llvm/test/CodeGen/AArch64/sve-select.ll
@@ -650,3 +650,20 @@
%sel = select <4 x i1> %p, <4 x float> %a, <4 x float> %fmul
ret <4 x float> %sel
}
+
+define <vscale x 4 x i32> @fold_vselect_masked_load_zero(i32* %ptr) {
+; CHECK-LABEL: fold_vselect_masked_load_zero:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s, vl16
+; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+ %p = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 9)
+ %vscaleptr = bitcast i32* %ptr to <vscale x 4 x i32>*
+ %load = tail call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* %vscaleptr, i32 1, <vscale x 4 x i1> %p, <vscale x 4 x i32> zeroinitializer)
+ %sel = select <vscale x 4 x i1> %p, <vscale x 4 x i32> %load, <vscale x 4 x i32> zeroinitializer
+ ret <vscale x 4 x i32> %sel
+}
+
+declare <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 immarg)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.sel.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>*, i32, <vscale x 4 x i1>, <vscale x 4 x i32>)
\ No newline at end of file
Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -17340,6 +17340,21 @@
{InverseSetCC, SelectB, SelectA});
}
+static SDValue tryFoldVSelectMaskedLoad(SDNode *N, SelectionDAG &DAG) {
+ auto VSelectLHS = N->getOperand(1);
+ if (VSelectLHS.getOpcode() != ISD::MLOAD)
+ return SDValue();
+
+ APInt SplatValue;
+ if (!ISD::isConstantSplatVector(VSelectLHS.getOperand(4).getNode(),
+ SplatValue))
+ return SDValue();
+ if (!SplatValue.isZero())
+ return SDValue();
+
+ return VSelectLHS;
+}
+
// vselect (v1i1 setcc) ->
// vselect (v1iXX setcc) (XX is the size of the compared operand type)
// FIXME: Currently the type legalizer can't handle VSELECT having v1i1 as
@@ -17348,6 +17363,8 @@
static SDValue performVSelectCombine(SDNode *N, SelectionDAG &DAG) {
if (auto SwapResult = trySwapVSelectOperands(N, DAG))
return SwapResult;
+ if (auto FoldMaskedLoadResult = tryFoldVSelectMaskedLoad(N, DAG))
+ return FoldMaskedLoadResult;
SDValue N0 = N->getOperand(0);
EVT CCVT = N0.getValueType();
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D120715.412052.patch
Type: text/x-patch
Size: 2614 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20220301/0b782d5e/attachment.bin>
More information about the llvm-commits
mailing list