[llvm] 34d88cf - [DAG] Allow folding AND of anyext masked_load with >1 user to zext version
Cullen Rhodes via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 18 02:38:23 PST 2022
Author: Benjamin Maxwell
Date: 2022-11-18T10:38:09Z
New Revision: 34d88cf6cfe9f878e6330f157f178c2b104c3949
URL: https://github.com/llvm/llvm-project/commit/34d88cf6cfe9f878e6330f157f178c2b104c3949
DIFF: https://github.com/llvm/llvm-project/commit/34d88cf6cfe9f878e6330f157f178c2b104c3949.diff
LOG: [DAG] Allow folding AND of anyext masked_load with >1 user to zext version
This now allows folding an AND of a anyext masked_load to a
zext_masked_load even if the masked load has multiple users. Doing is
eliminates some redundant ANDs/MOVs for certain AArch64 SVE code.
I'm not sure if there's any cases where doing this could negatively the
other users of the masked_load. Looking at other optimizations of
masked loads, most don't apply if the load is used more than once, so it
doesn't look like this would interfere.
Reviewed By: c-rhodes
Differential Revision: https://reviews.llvm.org/D137844
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/test/CodeGen/AArch64/sve-load-compare-store.ll
llvm/test/CodeGen/Thumb2/mve-masked-load.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 9043042eb16f8..0457e1d081f10 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -6241,8 +6241,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
// fold (and (masked_load) (splat_vec (x, ...))) to zext_masked_load
auto *MLoad = dyn_cast<MaskedLoadSDNode>(N0);
ConstantSDNode *Splat = isConstOrConstSplat(N1, true, true);
- if (MLoad && MLoad->getExtensionType() == ISD::EXTLOAD && N0.hasOneUse() &&
- Splat && N1.hasOneUse()) {
+ if (MLoad && MLoad->getExtensionType() == ISD::EXTLOAD && Splat &&
+ N1.hasOneUse()) {
EVT LoadVT = MLoad->getMemoryVT();
EVT ExtVT = VT;
if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT, LoadVT)) {
@@ -6252,11 +6252,16 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
uint64_t ElementSize =
LoadVT.getVectorElementType().getScalarSizeInBits();
if (Splat->getAPIntValue().isMask(ElementSize)) {
- return DAG.getMaskedLoad(
+ auto NewLoad = DAG.getMaskedLoad(
ExtVT, SDLoc(N), MLoad->getChain(), MLoad->getBasePtr(),
MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(),
LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(),
ISD::ZEXTLOAD, MLoad->isExpandingLoad());
+ bool LoadHasOtherUsers = !N0.hasOneUse();
+ CombineTo(N, NewLoad);
+ if (LoadHasOtherUsers)
+ CombineTo(MLoad, NewLoad.getValue(0), NewLoad.getValue(1));
+ return SDValue(N, 0);
}
}
}
diff --git a/llvm/test/CodeGen/AArch64/sve-load-compare-store.ll b/llvm/test/CodeGen/AArch64/sve-load-compare-store.ll
index 27436ede52eb3..3a6e9818e07fa 100644
--- a/llvm/test/CodeGen/AArch64/sve-load-compare-store.ll
+++ b/llvm/test/CodeGen/AArch64/sve-load-compare-store.ll
@@ -6,9 +6,7 @@ define void @sve_load_compare_store(ptr noalias nocapture noundef readonly %a, p
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
-; CHECK-NEXT: mov z1.d, z0.d
-; CHECK-NEXT: and z1.s, z1.s, #0xffff
-; CHECK-NEXT: cmphs p0.s, p0/z, z1.s, #0
+; CHECK-NEXT: cmphs p0.s, p0/z, z0.s, #0
; CHECK-NEXT: st1b { z0.s }, p0, [x1]
; CHECK-NEXT: ret
entry:
diff --git a/llvm/test/CodeGen/Thumb2/mve-masked-load.ll b/llvm/test/CodeGen/Thumb2/mve-masked-load.ll
index afcea7901ccf7..f250731ea6a7c 100644
--- a/llvm/test/CodeGen/Thumb2/mve-masked-load.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-masked-load.ll
@@ -2079,7 +2079,7 @@ define arm_aapcs_vfpcc <4 x i32> @multi_user_zext(<4 x i16> *%dest, <4 x i32> %a
; CHECK-LE-NEXT: vmov r0, r1, d8
; CHECK-LE-NEXT: vmov r2, r3, d9
; CHECK-LE-NEXT: bl foo
-; CHECK-LE-NEXT: vmovlb.u16 q0, q4
+; CHECK-LE-NEXT: vmov q0, q4
; CHECK-LE-NEXT: vpop {d8, d9}
; CHECK-LE-NEXT: pop {r7, pc}
;
@@ -2091,13 +2091,12 @@ define arm_aapcs_vfpcc <4 x i32> @multi_user_zext(<4 x i16> *%dest, <4 x i32> %a
; CHECK-BE-NEXT: vpush {d8, d9}
; CHECK-BE-NEXT: vrev64.32 q1, q0
; CHECK-BE-NEXT: vpt.s32 gt, q1, zr
-; CHECK-BE-NEXT: vldrht.u32 q4, [r0]
-; CHECK-BE-NEXT: vrev64.32 q0, q4
-; CHECK-BE-NEXT: vmov r1, r0, d0
-; CHECK-BE-NEXT: vmov r3, r2, d1
+; CHECK-BE-NEXT: vldrht.u32 q0, [r0]
+; CHECK-BE-NEXT: vrev64.32 q4, q0
+; CHECK-BE-NEXT: vmov r1, r0, d8
+; CHECK-BE-NEXT: vmov r3, r2, d9
; CHECK-BE-NEXT: bl foo
-; CHECK-BE-NEXT: vmovlb.u16 q1, q4
-; CHECK-BE-NEXT: vrev64.32 q0, q1
+; CHECK-BE-NEXT: vmov q0, q4
; CHECK-BE-NEXT: vpop {d8, d9}
; CHECK-BE-NEXT: pop {r7, pc}
entry:
More information about the llvm-commits
mailing list