[llvm] [VPlan] Simplify (X && Y) || (X && !Y) -> X. (PR #89386)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Fri May 17 05:03:01 PDT 2024
https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/89386
>From b0bdfaa97b827bf2883afb71721c874d1a6b3375 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Fri, 19 Apr 2024 14:50:11 +0100
Subject: [PATCH 1/7] [VPlan] Simplify (X && Y) || (X && !Y) -> X.
Simplify a common pattern generated for masks when folding the tail.
---
.../Transforms/Vectorize/VPlanPatternMatch.h | 59 +++++++++++++++++++
.../Transforms/Vectorize/VPlanTransforms.cpp | 12 ++++
.../LoopVectorize/AArch64/masked-call.ll | 18 ++----
.../AArch64/scalable-strict-fadd.ll | 3 +-
.../LoopVectorize/AArch64/sve-tail-folding.ll | 3 +-
.../LoopVectorize/RISCV/uniform-load-store.ll | 20 ++-----
.../Transforms/LoopVectorize/uniform-blend.ll | 5 +-
.../vplan-sink-scalars-and-merge.ll | 19 ++----
8 files changed, 89 insertions(+), 50 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
index ae7717eb7cc97..76bd25d939523 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
@@ -185,6 +185,47 @@ using AllBinaryRecipe_match =
BinaryRecipe_match<Op0_t, Op1_t, Opcode, VPWidenRecipe, VPReplicateRecipe,
VPWidenCastRecipe, VPInstruction>;
+template <typename Op0_t, typename Op1_t, unsigned Opcode, bool Commutable,
+ typename... RecipeTys>
+struct LogicalRecipe_match {
+ Op0_t LHS;
+ Op1_t RHS;
+
+ LogicalRecipe_match(Op0_t LHS, Op1_t RHS) : LHS(LHS), RHS(RHS) {}
+
+ bool match(const VPValue *V) {
+ auto *DefR = V->getDefiningRecipe();
+ return DefR && match(DefR);
+ }
+
+ bool match(const VPSingleDefRecipe *R) {
+ return match(static_cast<const VPRecipeBase *>(R));
+ }
+
+ bool match(const VPRecipeBase *R) {
+ if (!detail::MatchRecipeAndOpcode<Opcode, RecipeTys...>::match(R)) {
+ if (!detail::MatchRecipeAndOpcode<Instruction::Select,
+ RecipeTys...>::match(R))
+ return false;
+ if (Opcode == Instruction::And) {
+ if (!m_SpecificInt(0).match(R->getOperand(2)))
+ return false;
+ } else if (Opcode == Instruction::Or) {
+ if (!m_SpecificInt(1).match(R->getOperand(1)))
+ return false;
+ } else {
+ llvm_unreachable("unsupported opcode");
+ }
+ } else {
+ assert(R->getNumOperands() == 2 &&
+ "recipe with matched opcode does not have 2 operands");
+ }
+ return (LHS.match(R->getOperand(0)) && RHS.match(R->getOperand(1))) ||
+ (Commutable && LHS.match(R->getOperand(1)) &&
+ RHS.match(R->getOperand(0)));
+ }
+};
+
template <unsigned Opcode, typename Op0_t>
inline UnaryVPInstruction_match<Op0_t, Opcode>
m_VPInstruction(const Op0_t &Op0) {
@@ -266,6 +307,24 @@ inline AllBinaryRecipe_match<Op0_t, Op1_t, Instruction::Or>
m_Or(const Op0_t &Op0, const Op1_t &Op1) {
return m_Binary<Instruction::Or, Op0_t, Op1_t>(Op0, Op1);
}
+
+template <typename Op0_t, typename Op1_t>
+inline LogicalRecipe_match<Op0_t, Op1_t, Instruction::Or, true, VPWidenRecipe,
+ VPReplicateRecipe, VPWidenCastRecipe, VPInstruction>
+m_c_LogicalOr(const Op0_t &Op0, const Op1_t &Op1) {
+ return LogicalRecipe_match<Op0_t, Op1_t, Instruction::Or, true, VPWidenRecipe,
+ VPReplicateRecipe, VPWidenCastRecipe,
+ VPInstruction>(Op0, Op1);
+}
+
+template <typename Op0_t, typename Op1_t>
+inline LogicalRecipe_match<Op0_t, Op1_t, Instruction::And, true, VPWidenRecipe,
+ VPReplicateRecipe, VPWidenCastRecipe, VPInstruction>
+m_c_LogicalAnd(const Op0_t &Op0, const Op1_t &Op1) {
+ return LogicalRecipe_match<Op0_t, Op1_t, Instruction::And, true,
+ VPWidenRecipe, VPReplicateRecipe,
+ VPWidenCastRecipe, VPInstruction>(Op0, Op1);
+}
} // namespace VPlanPatternMatch
} // namespace llvm
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 007dc3f89b3fb..5453c47ecf700 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -939,6 +939,18 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
#endif
}
+ VPValue *B;
+ VPValue *C;
+ VPValue *D;
+ // Simplify (X && Y) || (X && !Y) -> X.
+ if (match(&R,
+ m_c_LogicalOr(m_c_LogicalAnd(m_VPValue(A), m_VPValue(B)),
+ m_c_LogicalAnd(m_VPValue(C), m_Not(m_VPValue(D))))) &&
+ A == C && B == D) {
+ R.getVPSingleValue()->replaceAllUsesWith(A);
+ return;
+ }
+
if (match(&R, m_CombineOr(m_Mul(m_VPValue(A), m_SpecificInt(1)),
m_Mul(m_SpecificInt(1), m_VPValue(A)))))
return R.getVPSingleValue()->replaceAllUsesWith(A);
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll
index b91579106261a..200c2adcf0e66 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll
@@ -223,10 +223,9 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 {
; TFCOMMON-NEXT: [[TMP10:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[TMP9]])
; TFCOMMON-NEXT: [[TMP11:%.*]] = xor <vscale x 2 x i1> [[TMP8]], shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
; TFCOMMON-NEXT: [[TMP12:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i1> [[TMP11]], <vscale x 2 x i1> zeroinitializer
-; TFCOMMON-NEXT: [[TMP13:%.*]] = or <vscale x 2 x i1> [[TMP9]], [[TMP12]]
; TFCOMMON-NEXT: [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP12]], <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i64> [[TMP10]]
; TFCOMMON-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]]
-; TFCOMMON-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[PREDPHI]], ptr [[TMP14]], i32 8, <vscale x 2 x i1> [[TMP13]])
+; TFCOMMON-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[PREDPHI]], ptr [[TMP14]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
; TFCOMMON-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]]
; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025)
; TFCOMMON-NEXT: [[TMP15:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
@@ -272,16 +271,14 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 {
; TFA_INTERLEAVE-NEXT: [[TMP20:%.*]] = xor <vscale x 2 x i1> [[TMP14]], shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
; TFA_INTERLEAVE-NEXT: [[TMP21:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i1> [[TMP19]], <vscale x 2 x i1> zeroinitializer
; TFA_INTERLEAVE-NEXT: [[TMP22:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]], <vscale x 2 x i1> [[TMP20]], <vscale x 2 x i1> zeroinitializer
-; TFA_INTERLEAVE-NEXT: [[TMP23:%.*]] = or <vscale x 2 x i1> [[TMP15]], [[TMP21]]
-; TFA_INTERLEAVE-NEXT: [[TMP24:%.*]] = or <vscale x 2 x i1> [[TMP16]], [[TMP22]]
; TFA_INTERLEAVE-NEXT: [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP21]], <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i64> [[TMP17]]
; TFA_INTERLEAVE-NEXT: [[PREDPHI4:%.*]] = select <vscale x 2 x i1> [[TMP22]], <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i64> [[TMP18]]
; TFA_INTERLEAVE-NEXT: [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]]
; TFA_INTERLEAVE-NEXT: [[TMP26:%.*]] = call i64 @llvm.vscale.i64()
; TFA_INTERLEAVE-NEXT: [[TMP27:%.*]] = mul i64 [[TMP26]], 2
; TFA_INTERLEAVE-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[TMP25]], i64 [[TMP27]]
-; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[PREDPHI]], ptr [[TMP25]], i32 8, <vscale x 2 x i1> [[TMP23]])
-; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[PREDPHI4]], ptr [[TMP28]], i32 8, <vscale x 2 x i1> [[TMP24]])
+; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[PREDPHI]], ptr [[TMP25]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
+; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[PREDPHI4]], ptr [[TMP28]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]])
; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT:%.*]] = add i64 [[INDEX]], [[TMP6]]
; TFA_INTERLEAVE-NEXT: [[TMP29:%.*]] = call i64 @llvm.vscale.i64()
; TFA_INTERLEAVE-NEXT: [[TMP30:%.*]] = mul i64 [[TMP29]], 2
@@ -405,10 +402,9 @@ define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 {
; TFCOMMON-NEXT: [[TMP11:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> [[TMP10]])
; TFCOMMON-NEXT: [[TMP12:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i1> [[TMP8]], <vscale x 2 x i1> zeroinitializer
; TFCOMMON-NEXT: [[TMP13:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[TMP12]])
-; TFCOMMON-NEXT: [[TMP14:%.*]] = or <vscale x 2 x i1> [[TMP10]], [[TMP12]]
; TFCOMMON-NEXT: [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP10]], <vscale x 2 x i64> [[TMP11]], <vscale x 2 x i64> [[TMP13]]
; TFCOMMON-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]]
-; TFCOMMON-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[PREDPHI]], ptr [[TMP15]], i32 8, <vscale x 2 x i1> [[TMP14]])
+; TFCOMMON-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[PREDPHI]], ptr [[TMP15]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
; TFCOMMON-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]]
; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025)
; TFCOMMON-NEXT: [[TMP16:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
@@ -456,16 +452,14 @@ define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 {
; TFA_INTERLEAVE-NEXT: [[TMP22:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]], <vscale x 2 x i1> [[TMP14]], <vscale x 2 x i1> zeroinitializer
; TFA_INTERLEAVE-NEXT: [[TMP23:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[TMP21]])
; TFA_INTERLEAVE-NEXT: [[TMP24:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> [[WIDE_MASKED_LOAD3]], <vscale x 2 x i1> [[TMP22]])
-; TFA_INTERLEAVE-NEXT: [[TMP25:%.*]] = or <vscale x 2 x i1> [[TMP17]], [[TMP21]]
-; TFA_INTERLEAVE-NEXT: [[TMP26:%.*]] = or <vscale x 2 x i1> [[TMP18]], [[TMP22]]
; TFA_INTERLEAVE-NEXT: [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP17]], <vscale x 2 x i64> [[TMP19]], <vscale x 2 x i64> [[TMP23]]
; TFA_INTERLEAVE-NEXT: [[PREDPHI4:%.*]] = select <vscale x 2 x i1> [[TMP18]], <vscale x 2 x i64> [[TMP20]], <vscale x 2 x i64> [[TMP24]]
; TFA_INTERLEAVE-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]]
; TFA_INTERLEAVE-NEXT: [[TMP28:%.*]] = call i64 @llvm.vscale.i64()
; TFA_INTERLEAVE-NEXT: [[TMP29:%.*]] = mul i64 [[TMP28]], 2
; TFA_INTERLEAVE-NEXT: [[TMP30:%.*]] = getelementptr inbounds i64, ptr [[TMP27]], i64 [[TMP29]]
-; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[PREDPHI]], ptr [[TMP27]], i32 8, <vscale x 2 x i1> [[TMP25]])
-; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[PREDPHI4]], ptr [[TMP30]], i32 8, <vscale x 2 x i1> [[TMP26]])
+; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[PREDPHI]], ptr [[TMP27]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
+; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[PREDPHI4]], ptr [[TMP30]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]])
; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT:%.*]] = add i64 [[INDEX]], [[TMP6]]
; TFA_INTERLEAVE-NEXT: [[TMP31:%.*]] = call i64 @llvm.vscale.i64()
; TFA_INTERLEAVE-NEXT: [[TMP32:%.*]] = mul i64 [[TMP31]], 2
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll
index ad6e8534f3180..e22f70e436f33 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll
@@ -1241,9 +1241,8 @@ define float @fadd_conditional(ptr noalias nocapture readonly %a, ptr noalias no
; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr [[TMP16]], i32 4, <vscale x 4 x i1> [[TMP15]], <vscale x 4 x float> poison)
; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = xor <vscale x 4 x i1> [[TMP13]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
; CHECK-ORDERED-TF-NEXT: [[TMP18:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i1> [[TMP17]], <vscale x 4 x i1> zeroinitializer
-; CHECK-ORDERED-TF-NEXT: [[TMP19:%.*]] = or <vscale x 4 x i1> [[TMP15]], [[TMP18]]
; CHECK-ORDERED-TF-NEXT: [[PREDPHI:%.*]] = select <vscale x 4 x i1> [[TMP18]], <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 3.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x float> [[WIDE_MASKED_LOAD1]]
-; CHECK-ORDERED-TF-NEXT: [[TMP20:%.*]] = select <vscale x 4 x i1> [[TMP19]], <vscale x 4 x float> [[PREDPHI]], <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float -0.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-ORDERED-TF-NEXT: [[TMP20:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x float> [[PREDPHI]], <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float -0.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer)
; CHECK-ORDERED-TF-NEXT: [[TMP21]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], <vscale x 4 x float> [[TMP20]])
; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP23]]
; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP9]])
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll
index 2b2742ca7ccbc..63ad98b2d8ab2 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll
@@ -480,11 +480,10 @@ define void @cond_uniform_load(ptr noalias %dst, ptr noalias readonly %src, ptr
; CHECK-NEXT: [[TMP15:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i1> [[TMP14]], <vscale x 4 x i1> zeroinitializer
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[BROADCAST_SPLAT]], i32 4, <vscale x 4 x i1> [[TMP15]], <vscale x 4 x i32> poison)
; CHECK-NEXT: [[TMP16:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i1> [[TMP13]], <vscale x 4 x i1> zeroinitializer
-; CHECK-NEXT: [[TMP18:%.*]] = or <vscale x 4 x i1> [[TMP15]], [[TMP16]]
; CHECK-NEXT: [[PREDPHI:%.*]] = select <vscale x 4 x i1> [[TMP16]], <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> [[WIDE_MASKED_GATHER]]
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[TMP10]]
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 0
-; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[PREDPHI]], ptr [[TMP19]], i32 4, <vscale x 4 x i1> [[TMP18]])
+; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[PREDPHI]], ptr [[TMP19]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP21]]
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]])
; CHECK-NEXT: [[TMP22:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll
index 1ce4cb928e808..ee70f4aa35850 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll
@@ -462,13 +462,10 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca
; TF-SCALABLE-NEXT: [[TMP12:%.*]] = icmp ugt <vscale x 2 x i64> [[VEC_IND]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 10, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
; TF-SCALABLE-NEXT: [[TMP13:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i1> [[TMP12]], <vscale x 2 x i1> zeroinitializer
; TF-SCALABLE-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr> [[BROADCAST_SPLAT]], i32 8, <vscale x 2 x i1> [[TMP13]], <vscale x 2 x i64> poison)
-; TF-SCALABLE-NEXT: [[TMP14:%.*]] = xor <vscale x 2 x i1> [[TMP12]], shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
-; TF-SCALABLE-NEXT: [[TMP15:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i1> [[TMP14]], <vscale x 2 x i1> zeroinitializer
-; TF-SCALABLE-NEXT: [[TMP17:%.*]] = or <vscale x 2 x i1> [[TMP13]], [[TMP15]]
; TF-SCALABLE-NEXT: [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP13]], <vscale x 2 x i64> [[WIDE_MASKED_GATHER]], <vscale x 2 x i64> zeroinitializer
; TF-SCALABLE-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP11]]
; TF-SCALABLE-NEXT: [[TMP18:%.*]] = getelementptr inbounds i64, ptr [[TMP16]], i32 0
-; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[PREDPHI]], ptr [[TMP18]], i32 8, <vscale x 2 x i1> [[TMP17]])
+; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[PREDPHI]], ptr [[TMP18]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP20]]
; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[DOTSPLAT]]
; TF-SCALABLE-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
@@ -510,13 +507,10 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca
; TF-FIXEDLEN-NEXT: [[TMP1:%.*]] = icmp ugt <4 x i64> [[VEC_IND]], <i64 10, i64 10, i64 10, i64 10>
; TF-FIXEDLEN-NEXT: [[TMP2:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i1> [[TMP1]], <4 x i1> zeroinitializer
; TF-FIXEDLEN-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[BROADCAST_SPLAT]], i32 8, <4 x i1> [[TMP2]], <4 x i64> poison)
-; TF-FIXEDLEN-NEXT: [[TMP3:%.*]] = xor <4 x i1> [[TMP1]], <i1 true, i1 true, i1 true, i1 true>
-; TF-FIXEDLEN-NEXT: [[TMP4:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i1> [[TMP3]], <4 x i1> zeroinitializer
-; TF-FIXEDLEN-NEXT: [[TMP6:%.*]] = or <4 x i1> [[TMP2]], [[TMP4]]
; TF-FIXEDLEN-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP2]], <4 x i64> [[WIDE_MASKED_GATHER]], <4 x i64> zeroinitializer
; TF-FIXEDLEN-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]]
; TF-FIXEDLEN-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0
-; TF-FIXEDLEN-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[PREDPHI]], ptr [[TMP7]], i32 8, <4 x i1> [[TMP6]])
+; TF-FIXEDLEN-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[PREDPHI]], ptr [[TMP7]], i32 8, <4 x i1> [[ACTIVE_LANE_MASK]])
; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
; TF-FIXEDLEN-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
; TF-FIXEDLEN-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1028
@@ -1296,12 +1290,9 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc
; TF-SCALABLE-NEXT: [[TMP12:%.*]] = icmp ugt <vscale x 2 x i64> [[VEC_IND]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 10, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
; TF-SCALABLE-NEXT: [[TMP13:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i1> [[TMP12]], <vscale x 2 x i1> zeroinitializer
; TF-SCALABLE-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> [[BROADCAST_SPLAT]], <vscale x 2 x ptr> [[BROADCAST_SPLAT2]], i32 8, <vscale x 2 x i1> [[TMP13]])
-; TF-SCALABLE-NEXT: [[TMP15:%.*]] = xor <vscale x 2 x i1> [[TMP12]], shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
-; TF-SCALABLE-NEXT: [[TMP16:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i1> [[TMP15]], <vscale x 2 x i1> zeroinitializer
-; TF-SCALABLE-NEXT: [[TMP17:%.*]] = or <vscale x 2 x i1> [[TMP13]], [[TMP16]]
; TF-SCALABLE-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP11]]
; TF-SCALABLE-NEXT: [[TMP18:%.*]] = getelementptr inbounds i64, ptr [[TMP14]], i32 0
-; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP18]], i32 8, <vscale x 2 x i1> [[TMP17]])
+; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP18]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP20]]
; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[DOTSPLAT]]
; TF-SCALABLE-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
@@ -1344,12 +1335,9 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc
; TF-FIXEDLEN-NEXT: [[TMP1:%.*]] = icmp ugt <4 x i64> [[VEC_IND]], <i64 10, i64 10, i64 10, i64 10>
; TF-FIXEDLEN-NEXT: [[TMP2:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i1> [[TMP1]], <4 x i1> zeroinitializer
; TF-FIXEDLEN-NEXT: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> [[BROADCAST_SPLAT]], <4 x ptr> [[BROADCAST_SPLAT2]], i32 8, <4 x i1> [[TMP2]])
-; TF-FIXEDLEN-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP1]], <i1 true, i1 true, i1 true, i1 true>
-; TF-FIXEDLEN-NEXT: [[TMP5:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i1> [[TMP4]], <4 x i1> zeroinitializer
-; TF-FIXEDLEN-NEXT: [[TMP6:%.*]] = or <4 x i1> [[TMP2]], [[TMP5]]
; TF-FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]]
; TF-FIXEDLEN-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0
-; TF-FIXEDLEN-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP7]], i32 8, <4 x i1> [[TMP6]])
+; TF-FIXEDLEN-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP7]], i32 8, <4 x i1> [[ACTIVE_LANE_MASK]])
; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
; TF-FIXEDLEN-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
; TF-FIXEDLEN-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1028
diff --git a/llvm/test/Transforms/LoopVectorize/uniform-blend.ll b/llvm/test/Transforms/LoopVectorize/uniform-blend.ll
index 71eed3b2985d4..a4745da4e2f09 100644
--- a/llvm/test/Transforms/LoopVectorize/uniform-blend.ll
+++ b/llvm/test/Transforms/LoopVectorize/uniform-blend.ll
@@ -98,11 +98,8 @@ define void @blend_chain_iv(i1 %c) {
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %vector.ph ], [ [[VEC_IND_NEXT:%.*]], %vector.body ]
; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[MASK1]], <4 x i1> [[MASK1]], <4 x i1> zeroinitializer
-; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[MASK1]], <i1 true, i1 true, i1 true, i1 true>
-; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[MASK1]], <4 x i1> [[TMP4]], <4 x i1> zeroinitializer
-; CHECK-NEXT: [[TMP8:%.*]] = or <4 x i1> [[TMP6]], [[TMP5]]
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP6]], <4 x i64> [[VEC_IND]], <4 x i64> undef
-; CHECK-NEXT: [[PREDPHI1:%.*]] = select <4 x i1> [[TMP8]], <4 x i64> [[PREDPHI]], <4 x i64> undef
+; CHECK-NEXT: [[PREDPHI1:%.*]] = select <4 x i1> [[MASK1]], <4 x i64> [[PREDPHI]], <4 x i64> undef
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[PREDPHI1]], i32 0
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[TMP9]]
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i64> [[PREDPHI1]], i32 1
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll
index 108b78a70fa1a..fe5c0254b150a 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll
@@ -361,15 +361,12 @@ define void @pred_cfg1(i32 %k, i32 %j) {
; CHECK-NEXT: Successor(s): then.0.0
; CHECK-EMPTY:
; CHECK-NEXT: then.0.0:
-; CHECK-NEXT: EMIT vp<[[NOT:%.+]]> = not ir<%c.1>
-; CHECK-NEXT: EMIT vp<[[MASK3:%.+]]> = select vp<[[MASK1]]>, vp<[[NOT]]>, ir<false>
-; CHECK-NEXT: EMIT vp<[[OR:%.+]]> = or vp<[[MASK2]]>, vp<[[MASK3]]>
; CHECK-NEXT: BLEND ir<%p> = ir<0> vp<[[PRED]]>/vp<[[MASK2]]>
; CHECK-NEXT: Successor(s): pred.store
; CHECK-EMPTY:
; CHECK-NEXT: <xVFxUF> pred.store: {
; CHECK-NEXT: pred.store.entry:
-; CHECK-NEXT: BRANCH-ON-MASK vp<[[OR]]>
+; CHECK-NEXT: BRANCH-ON-MASK vp<[[MASK1]]>
; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue
; CHECK-EMPTY:
; CHECK-NEXT: pred.store.if:
@@ -462,16 +459,13 @@ define void @pred_cfg2(i32 %k, i32 %j) {
; CHECK-NEXT: Successor(s): then.0.0
; CHECK-EMPTY:
; CHECK-NEXT: then.0.0:
-; CHECK-NEXT: EMIT vp<[[NOT:%.+]]> = not ir<%c.0>
-; CHECK-NEXT: EMIT vp<[[MASK3:%.+]]> = select vp<[[MASK1]]>, vp<[[NOT]]>, ir<false>
-; CHECK-NEXT: EMIT vp<[[OR:%.+]]> = or vp<[[MASK2]]>, vp<[[MASK3]]>
; CHECK-NEXT: BLEND ir<%p> = ir<0> vp<[[PRED]]>/vp<[[MASK2]]>
-; CHECK-NEXT: EMIT vp<[[MASK4:%.+]]> = select vp<[[OR]]>, ir<%c.1>, ir<false>
+; CHECK-NEXT: EMIT vp<[[MASK3:%.+]]> = select vp<[[MASK1]]>, ir<%c.1>, ir<false>
; CHECK-NEXT: Successor(s): pred.store
; CHECK-EMPTY:
; CHECK-NEXT: <xVFxUF> pred.store: {
; CHECK-NEXT: pred.store.entry:
-; CHECK-NEXT: BRANCH-ON-MASK vp<[[MASK4]]>
+; CHECK-NEXT: BRANCH-ON-MASK vp<[[MASK3]]>
; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue
; CHECK-EMPTY:
; CHECK-NEXT: pred.store.if:
@@ -570,16 +564,13 @@ define void @pred_cfg3(i32 %k, i32 %j) {
; CHECK-NEXT: Successor(s): then.0.0
; CHECK-EMPTY:
; CHECK-NEXT: then.0.0:
-; CHECK-NEXT: EMIT vp<[[NOT:%.+]]> = not ir<%c.0>
-; CHECK-NEXT: EMIT vp<[[MASK3:%.+]]> = select vp<[[MASK1]]>, vp<[[NOT]]>, ir<false>
-; CHECK-NEXT: EMIT vp<[[MASK4:%.+]]> = or vp<[[MASK2]]>, vp<[[MASK3]]>
; CHECK-NEXT: BLEND ir<%p> = ir<0> vp<[[PRED]]>/vp<[[MASK2]]>
-; CHECK-NEXT: EMIT vp<[[MASK5:%.+]]> = select vp<[[MASK4]]>, ir<%c.0>, ir<false>
+; CHECK-NEXT: EMIT vp<[[MASK3:%.+]]> = select vp<[[MASK1]]>, ir<%c.0>, ir<false>
; CHECK-NEXT: Successor(s): pred.store
; CHECK-EMPTY:
; CHECK-NEXT: <xVFxUF> pred.store: {
; CHECK-NEXT: pred.store.entry:
-; CHECK-NEXT: BRANCH-ON-MASK vp<[[MASK5]]>
+; CHECK-NEXT: BRANCH-ON-MASK vp<[[MASK3]]>
; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue
; CHECK-EMPTY:
; CHECK-NEXT: pred.store.if:
>From 4419476b3fba4db88db17de15ce046c0c4b661a2 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Thu, 9 May 2024 16:48:49 +0100
Subject: [PATCH 2/7] !fixup address comemnts, thanks!
---
.../Transforms/Vectorize/VPlanPatternMatch.h | 4 ----
.../Transforms/Vectorize/VPlanTransforms.cpp | 18 ++++++++++--------
2 files changed, 10 insertions(+), 12 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
index f0e47d03a3bfa..c00cdc04b3c4f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
@@ -204,10 +204,6 @@ struct LogicalRecipe_match {
return DefR && match(DefR);
}
- bool match(const VPSingleDefRecipe *R) {
- return match(static_cast<const VPRecipeBase *>(R));
- }
-
bool match(const VPRecipeBase *R) {
if (!detail::MatchRecipeAndOpcode<Opcode, RecipeTys...>::match(R)) {
if (!detail::MatchRecipeAndOpcode<Instruction::Select,
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 8fda4f9373fe6..246c31b79ed9a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -935,15 +935,17 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
#endif
}
- VPValue *B;
- VPValue *C;
- VPValue *D;
- // Simplify (X && Y) || (X && !Y) -> X.
+ VPValue *X;
+ VPValue *Y;
+ VPValue *X1;
+ VPValue *Y1;
+ // Simplify (X && Y) || (X1 && !Y1) -> X.
+ // TODO: Split up into simpler, modular combines: (X && Y) || (X && Z) into X && (Y || Z) and (X || !X) into true. This requires queuing newly created recipes to be visited during simplification.
if (match(&R,
- m_c_LogicalOr(m_c_LogicalAnd(m_VPValue(A), m_VPValue(B)),
- m_c_LogicalAnd(m_VPValue(C), m_Not(m_VPValue(D))))) &&
- A == C && B == D) {
- R.getVPSingleValue()->replaceAllUsesWith(A);
+ m_c_LogicalOr(m_c_LogicalAnd(m_VPValue(X), m_VPValue(Y)),
+ m_c_LogicalAnd(m_VPValue(X1), m_Not(m_VPValue(Y1))))) &&
+ X == X1 && Y == Y1) {
+ R.getVPSingleValue()->replaceAllUsesWith(X);
return;
}
>From 0abc56303c4a44db6cad2be76e3296673959fc36 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Thu, 9 May 2024 16:53:33 +0100
Subject: [PATCH 3/7] !fixup fix formatting
---
llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 246c31b79ed9a..91d2ae640b93a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -940,10 +940,12 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
VPValue *X1;
VPValue *Y1;
// Simplify (X && Y) || (X1 && !Y1) -> X.
- // TODO: Split up into simpler, modular combines: (X && Y) || (X && Z) into X && (Y || Z) and (X || !X) into true. This requires queuing newly created recipes to be visited during simplification.
- if (match(&R,
- m_c_LogicalOr(m_c_LogicalAnd(m_VPValue(X), m_VPValue(Y)),
- m_c_LogicalAnd(m_VPValue(X1), m_Not(m_VPValue(Y1))))) &&
+ // TODO: Split up into simpler, modular combines: (X && Y) || (X && Z) into X
+ // && (Y || Z) and (X || !X) into true. This requires queuing newly created
+ // recipes to be visited during simplification.
+ if (match(&R, m_c_LogicalOr(
+ m_c_LogicalAnd(m_VPValue(X), m_VPValue(Y)),
+ m_c_LogicalAnd(m_VPValue(X1), m_Not(m_VPValue(Y1))))) &&
X == X1 && Y == Y1) {
R.getVPSingleValue()->replaceAllUsesWith(X);
return;
>From fd62d99b2cc774bc0635ee9badbadc26abf021d4 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Tue, 14 May 2024 11:34:48 +0100
Subject: [PATCH 4/7] !fixup update based on LogialAnd
---
.../Transforms/Vectorize/VPlanPatternMatch.h | 113 ++++++++----------
.../Transforms/Vectorize/VPlanTransforms.cpp | 7 +-
.../vplan-sink-scalars-and-merge.ll | 2 +-
3 files changed, 54 insertions(+), 68 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
index c00cdc04b3c4f..bdb8db338df6f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
@@ -156,7 +156,7 @@ using AllUnaryRecipe_match =
UnaryRecipe_match<Op0_t, Opcode, VPWidenRecipe, VPReplicateRecipe,
VPWidenCastRecipe, VPInstruction>;
-template <typename Op0_t, typename Op1_t, unsigned Opcode,
+template <typename Op0_t, typename Op1_t, unsigned Opcode, bool Commutative,
typename... RecipeTys>
struct BinaryRecipe_match {
Op0_t Op0;
@@ -178,55 +178,22 @@ struct BinaryRecipe_match {
return false;
assert(R->getNumOperands() == 2 &&
"recipe with matched opcode does not have 2 operands");
- return Op0.match(R->getOperand(0)) && Op1.match(R->getOperand(1));
+ if (Op0.match(R->getOperand(0)) && Op1.match(R->getOperand(1)))
+ return true;
+ return Commutative && Op0.match(R->getOperand(1)) &&
+ Op1.match(R->getOperand(0));
}
};
template <typename Op0_t, typename Op1_t, unsigned Opcode>
using BinaryVPInstruction_match =
- BinaryRecipe_match<Op0_t, Op1_t, Opcode, VPInstruction>;
+ BinaryRecipe_match<Op0_t, Op1_t, Opcode, false, VPInstruction>;
-template <typename Op0_t, typename Op1_t, unsigned Opcode>
+template <typename Op0_t, typename Op1_t, unsigned Opcode,
+ bool Commutative = false>
using AllBinaryRecipe_match =
- BinaryRecipe_match<Op0_t, Op1_t, Opcode, VPWidenRecipe, VPReplicateRecipe,
- VPWidenCastRecipe, VPInstruction>;
-
-template <typename Op0_t, typename Op1_t, unsigned Opcode, bool Commutable,
- typename... RecipeTys>
-struct LogicalRecipe_match {
- Op0_t LHS;
- Op1_t RHS;
-
- LogicalRecipe_match(Op0_t LHS, Op1_t RHS) : LHS(LHS), RHS(RHS) {}
-
- bool match(const VPValue *V) {
- auto *DefR = V->getDefiningRecipe();
- return DefR && match(DefR);
- }
-
- bool match(const VPRecipeBase *R) {
- if (!detail::MatchRecipeAndOpcode<Opcode, RecipeTys...>::match(R)) {
- if (!detail::MatchRecipeAndOpcode<Instruction::Select,
- RecipeTys...>::match(R))
- return false;
- if (Opcode == Instruction::And) {
- if (!m_False().match(R->getOperand(2)))
- return false;
- } else if (Opcode == Instruction::Or) {
- if (!m_True().match(R->getOperand(1)))
- return false;
- } else {
- llvm_unreachable("unsupported opcode");
- }
- } else {
- assert(R->getNumOperands() == 2 &&
- "recipe with matched opcode does not have 2 operands");
- }
- return (LHS.match(R->getOperand(0)) && RHS.match(R->getOperand(1))) ||
- (Commutable && LHS.match(R->getOperand(1)) &&
- RHS.match(R->getOperand(0)));
- }
-};
+ BinaryRecipe_match<Op0_t, Op1_t, Opcode, Commutative, VPWidenRecipe,
+ VPReplicateRecipe, VPWidenCastRecipe, VPInstruction>;
template <unsigned Opcode, typename Op0_t>
inline UnaryVPInstruction_match<Op0_t, Opcode>
@@ -292,10 +259,11 @@ m_ZExtOrSExt(const Op0_t &Op0) {
return m_CombineOr(m_ZExt(Op0), m_SExt(Op0));
}
-template <unsigned Opcode, typename Op0_t, typename Op1_t>
-inline AllBinaryRecipe_match<Op0_t, Op1_t, Opcode> m_Binary(const Op0_t &Op0,
- const Op1_t &Op1) {
- return AllBinaryRecipe_match<Op0_t, Op1_t, Opcode>(Op0, Op1);
+template <unsigned Opcode, typename Op0_t, typename Op1_t,
+ bool Commutative = false>
+inline AllBinaryRecipe_match<Op0_t, Op1_t, Opcode, Commutative>
+m_Binary(const Op0_t &Op0, const Op1_t &Op1) {
+ return AllBinaryRecipe_match<Op0_t, Op1_t, Opcode, Commutative>(Op0, Op1);
}
template <typename Op0_t, typename Op1_t>
@@ -304,28 +272,47 @@ m_Mul(const Op0_t &Op0, const Op1_t &Op1) {
return m_Binary<Instruction::Mul, Op0_t, Op1_t>(Op0, Op1);
}
-template <typename Op0_t, typename Op1_t>
-inline AllBinaryRecipe_match<Op0_t, Op1_t, Instruction::Or>
-m_Or(const Op0_t &Op0, const Op1_t &Op1) {
- return m_Binary<Instruction::Or, Op0_t, Op1_t>(Op0, Op1);
+template <typename Op0_t, typename Op1_t, bool Commutative = false>
+inline AllBinaryRecipe_match<Op0_t, Op1_t, Instruction::Or, Commutative>
+m_BinaryOr(const Op0_t &Op0, const Op1_t &Op1) {
+ return m_Binary<Instruction::Or, Op0_t, Op1_t, Commutative>(Op0, Op1);
}
template <typename Op0_t, typename Op1_t>
-inline LogicalRecipe_match<Op0_t, Op1_t, Instruction::Or, true, VPWidenRecipe,
- VPReplicateRecipe, VPWidenCastRecipe, VPInstruction>
-m_c_LogicalOr(const Op0_t &Op0, const Op1_t &Op1) {
- return LogicalRecipe_match<Op0_t, Op1_t, Instruction::Or, true, VPWidenRecipe,
- VPReplicateRecipe, VPWidenCastRecipe,
- VPInstruction>(Op0, Op1);
+inline AllBinaryRecipe_match<Op0_t, Op1_t, Instruction::Or, true>
+m_c_BinaryOr(const Op0_t &Op0, const Op1_t &Op1) {
+ return m_BinaryOr<Op0_t, Op1_t, true>(Op0, Op1);
+}
+
+template <typename Op0_t, typename Op1_t, bool Commutative = false>
+inline AllBinaryRecipe_match<Op0_t, Op1_t, Instruction::And, Commutative>
+m_BinaryAnd(const Op0_t &Op0, const Op1_t &Op1) {
+ return m_Binary<Instruction::And, Op0_t, Op1_t, Commutative>(Op0, Op1);
+}
+
+template <typename Op0_t, typename Op1_t, bool Commutative = false>
+inline AllBinaryRecipe_match<Op0_t, Op1_t, VPInstruction::LogicalAnd,
+ Commutative>
+m_LogicalAnd(const Op0_t &Op0, const Op1_t &Op1) {
+ return m_Binary<VPInstruction::LogicalAnd, Op0_t, Op1_t, Commutative>(Op0,
+ Op1);
+}
+
+template <typename Op0_t, typename Op1_t, bool Commutative = false>
+inline match_combine_or<
+ AllBinaryRecipe_match<Op0_t, Op1_t, Instruction::And, Commutative>,
+ AllBinaryRecipe_match<Op0_t, Op1_t, VPInstruction::LogicalAnd, Commutative>>
+m_And(const Op0_t &Op0, const Op1_t &Op1) {
+ return m_CombineOr(m_BinaryAnd<Op0_t, Op1_t, Commutative>(Op0, Op1),
+ m_LogicalAnd<Op0_t, Op1_t, Commutative>(Op0, Op1));
}
template <typename Op0_t, typename Op1_t>
-inline LogicalRecipe_match<Op0_t, Op1_t, Instruction::And, true, VPWidenRecipe,
- VPReplicateRecipe, VPWidenCastRecipe, VPInstruction>
-m_c_LogicalAnd(const Op0_t &Op0, const Op1_t &Op1) {
- return LogicalRecipe_match<Op0_t, Op1_t, Instruction::And, true,
- VPWidenRecipe, VPReplicateRecipe,
- VPWidenCastRecipe, VPInstruction>(Op0, Op1);
+inline match_combine_or<
+ AllBinaryRecipe_match<Op0_t, Op1_t, Instruction::And, true>,
+ AllBinaryRecipe_match<Op0_t, Op1_t, VPInstruction::LogicalAnd, true>>
+m_c_And(const Op0_t &Op0, const Op1_t &Op1) {
+ return m_And<Op0_t, Op1_t, true>(Op0, Op1);
}
} // namespace VPlanPatternMatch
} // namespace llvm
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index c315e8db5bbef..4a07c9b05a0bb 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -943,9 +943,8 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
// TODO: Split up into simpler, modular combines: (X && Y) || (X && Z) into X
// && (Y || Z) and (X || !X) into true. This requires queuing newly created
// recipes to be visited during simplification.
- if (match(&R, m_c_LogicalOr(
- m_c_LogicalAnd(m_VPValue(X), m_VPValue(Y)),
- m_c_LogicalAnd(m_VPValue(X1), m_Not(m_VPValue(Y1))))) &&
+ if (match(&R, m_c_BinaryOr(m_c_And(m_VPValue(X), m_VPValue(Y)),
+ m_c_And(m_VPValue(X1), m_Not(m_VPValue(Y1))))) &&
X == X1 && Y == Y1) {
R.getVPSingleValue()->replaceAllUsesWith(X);
return;
@@ -1418,7 +1417,7 @@ void VPlanTransforms::dropPoisonGeneratingRecipes(
// for dependence analysis). Instead, replace it with an equivalent Add.
// This is possible as all users of the disjoint OR only access lanes
// where the operands are disjoint or poison otherwise.
- if (match(RecWithFlags, m_Or(m_VPValue(A), m_VPValue(B))) &&
+ if (match(RecWithFlags, m_BinaryOr(m_VPValue(A), m_VPValue(B))) &&
RecWithFlags->isDisjoint()) {
VPBuilder Builder(RecWithFlags);
VPInstruction *New = Builder.createOverflowingOp(
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll
index 9d3f068e93569..ae5879bb2bae9 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll
@@ -565,7 +565,7 @@ define void @pred_cfg3(i32 %k, i32 %j) {
; CHECK-EMPTY:
; CHECK-NEXT: then.0.0:
; CHECK-NEXT: BLEND ir<%p> = ir<0> vp<[[PRED]]>/vp<[[MASK2]]>
-; CHECK-NEXT: EMIT vp<[[MASK3:%.+]]> = logical-and vp<[[MASK1]]>, ir<%c.0>, ir<false>
+; CHECK-NEXT: EMIT vp<[[MASK3:%.+]]> = logical-and vp<[[MASK1]]>, ir<%c.0>
; CHECK-NEXT: Successor(s): pred.store
; CHECK-EMPTY:
; CHECK-NEXT: <xVFxUF> pred.store: {
>From e8683365a006afb1a4bc8ae4f70b724cae3a48e7 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Tue, 14 May 2024 11:46:48 +0100
Subject: [PATCH 5/7] !fixup restore original code, limit matched types for
logicalAnd
---
.../Transforms/Vectorize/VPlanPatternMatch.h | 31 ++++++++++---------
1 file changed, 17 insertions(+), 14 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
index bdb8db338df6f..176d29244b0ea 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
@@ -69,8 +69,9 @@ template <unsigned BitWidth = 0> struct specific_intval {
C->getSplatValue(/*AllowPoison=*/false));
if (!CI)
return false;
- return (BitWidth == 0 || CI->getBitWidth() == BitWidth) &&
- APInt::isSameValue(CI->getValue(), Val);
+ assert((BitWidth == 0 || CI->getBitWidth() == BitWidth) &&
+ "Trying the match constant with unexpected bitwidth.");
+ return APInt::isSameValue(CI->getValue(), Val);
}
};
@@ -79,7 +80,6 @@ inline specific_intval<0> m_SpecificInt(uint64_t V) {
}
inline specific_intval<1> m_False() { return specific_intval<1>(APInt(64, 0)); }
-inline specific_intval<1> m_True() { return specific_intval<1>(APInt(64, 1)); }
/// Matching combinators
template <typename LTy, typename RTy> struct match_combine_or {
@@ -185,9 +185,10 @@ struct BinaryRecipe_match {
}
};
-template <typename Op0_t, typename Op1_t, unsigned Opcode>
+template <typename Op0_t, typename Op1_t, unsigned Opcode,
+ bool Commutative = false>
using BinaryVPInstruction_match =
- BinaryRecipe_match<Op0_t, Op1_t, Opcode, false, VPInstruction>;
+ BinaryRecipe_match<Op0_t, Op1_t, Opcode, Commutative, VPInstruction>;
template <typename Op0_t, typename Op1_t, unsigned Opcode,
bool Commutative = false>
@@ -201,10 +202,11 @@ m_VPInstruction(const Op0_t &Op0) {
return UnaryVPInstruction_match<Op0_t, Opcode>(Op0);
}
-template <unsigned Opcode, typename Op0_t, typename Op1_t>
-inline BinaryVPInstruction_match<Op0_t, Op1_t, Opcode>
+template <unsigned Opcode, typename Op0_t, typename Op1_t,
+ bool Commutative = false>
+inline BinaryVPInstruction_match<Op0_t, Op1_t, Opcode, Commutative>
m_VPInstruction(const Op0_t &Op0, const Op1_t &Op1) {
- return BinaryVPInstruction_match<Op0_t, Op1_t, Opcode>(Op0, Op1);
+ return BinaryVPInstruction_match<Op0_t, Op1_t, Opcode, Commutative>(Op0, Op1);
}
template <typename Op0_t>
@@ -291,17 +293,18 @@ m_BinaryAnd(const Op0_t &Op0, const Op1_t &Op1) {
}
template <typename Op0_t, typename Op1_t, bool Commutative = false>
-inline AllBinaryRecipe_match<Op0_t, Op1_t, VPInstruction::LogicalAnd,
- Commutative>
+inline BinaryVPInstruction_match<Op0_t, Op1_t, VPInstruction::LogicalAnd,
+ Commutative>
m_LogicalAnd(const Op0_t &Op0, const Op1_t &Op1) {
- return m_Binary<VPInstruction::LogicalAnd, Op0_t, Op1_t, Commutative>(Op0,
- Op1);
+ return m_VPInstruction<VPInstruction::LogicalAnd, Op0_t, Op1_t, Commutative>(
+ Op0, Op1);
}
template <typename Op0_t, typename Op1_t, bool Commutative = false>
inline match_combine_or<
AllBinaryRecipe_match<Op0_t, Op1_t, Instruction::And, Commutative>,
- AllBinaryRecipe_match<Op0_t, Op1_t, VPInstruction::LogicalAnd, Commutative>>
+ BinaryVPInstruction_match<Op0_t, Op1_t, VPInstruction::LogicalAnd,
+ Commutative>>
m_And(const Op0_t &Op0, const Op1_t &Op1) {
return m_CombineOr(m_BinaryAnd<Op0_t, Op1_t, Commutative>(Op0, Op1),
m_LogicalAnd<Op0_t, Op1_t, Commutative>(Op0, Op1));
@@ -310,7 +313,7 @@ m_And(const Op0_t &Op0, const Op1_t &Op1) {
template <typename Op0_t, typename Op1_t>
inline match_combine_or<
AllBinaryRecipe_match<Op0_t, Op1_t, Instruction::And, true>,
- AllBinaryRecipe_match<Op0_t, Op1_t, VPInstruction::LogicalAnd, true>>
+ BinaryVPInstruction_match<Op0_t, Op1_t, VPInstruction::LogicalAnd, true>>
m_c_And(const Op0_t &Op0, const Op1_t &Op1) {
return m_And<Op0_t, Op1_t, true>(Op0, Op1);
}
>From b3c8df8f493832bf4eaae2b9237a0bea47863cee Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Tue, 14 May 2024 11:52:43 +0100
Subject: [PATCH 6/7] !fixup add comment for arg
---
llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h | 12 +++++++-----
1 file changed, 7 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
index 176d29244b0ea..53b1939698234 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
@@ -281,9 +281,10 @@ m_BinaryOr(const Op0_t &Op0, const Op1_t &Op1) {
}
template <typename Op0_t, typename Op1_t>
-inline AllBinaryRecipe_match<Op0_t, Op1_t, Instruction::Or, true>
+inline AllBinaryRecipe_match<Op0_t, Op1_t, Instruction::Or,
+ /*Commutative*/ true>
m_c_BinaryOr(const Op0_t &Op0, const Op1_t &Op1) {
- return m_BinaryOr<Op0_t, Op1_t, true>(Op0, Op1);
+ return m_BinaryOr<Op0_t, Op1_t, /*Commutative*/ true>(Op0, Op1);
}
template <typename Op0_t, typename Op1_t, bool Commutative = false>
@@ -312,10 +313,11 @@ m_And(const Op0_t &Op0, const Op1_t &Op1) {
template <typename Op0_t, typename Op1_t>
inline match_combine_or<
- AllBinaryRecipe_match<Op0_t, Op1_t, Instruction::And, true>,
- BinaryVPInstruction_match<Op0_t, Op1_t, VPInstruction::LogicalAnd, true>>
+ AllBinaryRecipe_match<Op0_t, Op1_t, Instruction::And, /*Commutative*/ true>,
+ BinaryVPInstruction_match<Op0_t, Op1_t, VPInstruction::LogicalAnd,
+ /*Commutative*/ true>>
m_c_And(const Op0_t &Op0, const Op1_t &Op1) {
- return m_And<Op0_t, Op1_t, true>(Op0, Op1);
+ return m_And<Op0_t, Op1_t, /*Commutative*/ true>(Op0, Op1);
}
} // namespace VPlanPatternMatch
} // namespace llvm
>From d2deb2c8c979d53917dba1edf129685668e1e101 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Fri, 17 May 2024 12:48:55 +0100
Subject: [PATCH 7/7] !fixup simplify
---
.../Transforms/Vectorize/VPlanPatternMatch.h | 84 +++++--------------
.../Transforms/Vectorize/VPlanTransforms.cpp | 5 +-
.../LoopVectorize/AArch64/masked-call.ll | 9 +-
.../LoopVectorize/AArch64/sve-tail-folding.ll | 3 +-
.../unused-blend-mask-for-first-operand.ll | 12 +--
5 files changed, 35 insertions(+), 78 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
index 53b1939698234..56cbaa4201297 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
@@ -51,8 +51,8 @@ template <typename Class> struct bind_ty {
};
/// Match a specified integer value or vector of all elements of that
-/// value. \p BitWidth optionally specifies the bitwidth to match. If it is 0,
-/// the matched constant can have any bitwidth.
+/// value. \p BitWidth optionally specifies the bitwidth the matched constant
+/// must have. If it is 0, the matched constant can have any bitwidth.
template <unsigned BitWidth = 0> struct specific_intval {
APInt Val;
@@ -69,6 +69,7 @@ template <unsigned BitWidth = 0> struct specific_intval {
C->getSplatValue(/*AllowPoison=*/false));
if (!CI)
return false;
+
assert((BitWidth == 0 || CI->getBitWidth() == BitWidth) &&
"Trying the match constant with unexpected bitwidth.");
return APInt::isSameValue(CI->getValue(), Val);
@@ -156,7 +157,7 @@ using AllUnaryRecipe_match =
UnaryRecipe_match<Op0_t, Opcode, VPWidenRecipe, VPReplicateRecipe,
VPWidenCastRecipe, VPInstruction>;
-template <typename Op0_t, typename Op1_t, unsigned Opcode, bool Commutative,
+template <typename Op0_t, typename Op1_t, unsigned Opcode,
typename... RecipeTys>
struct BinaryRecipe_match {
Op0_t Op0;
@@ -178,23 +179,18 @@ struct BinaryRecipe_match {
return false;
assert(R->getNumOperands() == 2 &&
"recipe with matched opcode does not have 2 operands");
- if (Op0.match(R->getOperand(0)) && Op1.match(R->getOperand(1)))
- return true;
- return Commutative && Op0.match(R->getOperand(1)) &&
- Op1.match(R->getOperand(0));
+ return Op0.match(R->getOperand(0)) && Op1.match(R->getOperand(1));
}
};
-template <typename Op0_t, typename Op1_t, unsigned Opcode,
- bool Commutative = false>
+template <typename Op0_t, typename Op1_t, unsigned Opcode>
using BinaryVPInstruction_match =
- BinaryRecipe_match<Op0_t, Op1_t, Opcode, Commutative, VPInstruction>;
+ BinaryRecipe_match<Op0_t, Op1_t, Opcode, VPInstruction>;
-template <typename Op0_t, typename Op1_t, unsigned Opcode,
- bool Commutative = false>
+template <typename Op0_t, typename Op1_t, unsigned Opcode>
using AllBinaryRecipe_match =
- BinaryRecipe_match<Op0_t, Op1_t, Opcode, Commutative, VPWidenRecipe,
- VPReplicateRecipe, VPWidenCastRecipe, VPInstruction>;
+ BinaryRecipe_match<Op0_t, Op1_t, Opcode, VPWidenRecipe, VPReplicateRecipe,
+ VPWidenCastRecipe, VPInstruction>;
template <unsigned Opcode, typename Op0_t>
inline UnaryVPInstruction_match<Op0_t, Opcode>
@@ -202,11 +198,10 @@ m_VPInstruction(const Op0_t &Op0) {
return UnaryVPInstruction_match<Op0_t, Opcode>(Op0);
}
-template <unsigned Opcode, typename Op0_t, typename Op1_t,
- bool Commutative = false>
-inline BinaryVPInstruction_match<Op0_t, Op1_t, Opcode, Commutative>
+template <unsigned Opcode, typename Op0_t, typename Op1_t>
+inline BinaryVPInstruction_match<Op0_t, Op1_t, Opcode>
m_VPInstruction(const Op0_t &Op0, const Op1_t &Op1) {
- return BinaryVPInstruction_match<Op0_t, Op1_t, Opcode, Commutative>(Op0, Op1);
+ return BinaryVPInstruction_match<Op0_t, Op1_t, Opcode>(Op0, Op1);
}
template <typename Op0_t>
@@ -261,11 +256,10 @@ m_ZExtOrSExt(const Op0_t &Op0) {
return m_CombineOr(m_ZExt(Op0), m_SExt(Op0));
}
-template <unsigned Opcode, typename Op0_t, typename Op1_t,
- bool Commutative = false>
-inline AllBinaryRecipe_match<Op0_t, Op1_t, Opcode, Commutative>
-m_Binary(const Op0_t &Op0, const Op1_t &Op1) {
- return AllBinaryRecipe_match<Op0_t, Op1_t, Opcode, Commutative>(Op0, Op1);
+template <unsigned Opcode, typename Op0_t, typename Op1_t>
+inline AllBinaryRecipe_match<Op0_t, Op1_t, Opcode> m_Binary(const Op0_t &Op0,
+ const Op1_t &Op1) {
+ return AllBinaryRecipe_match<Op0_t, Op1_t, Opcode>(Op0, Op1);
}
template <typename Op0_t, typename Op1_t>
@@ -274,50 +268,16 @@ m_Mul(const Op0_t &Op0, const Op1_t &Op1) {
return m_Binary<Instruction::Mul, Op0_t, Op1_t>(Op0, Op1);
}
-template <typename Op0_t, typename Op1_t, bool Commutative = false>
-inline AllBinaryRecipe_match<Op0_t, Op1_t, Instruction::Or, Commutative>
+template <typename Op0_t, typename Op1_t>
+inline AllBinaryRecipe_match<Op0_t, Op1_t, Instruction::Or>
m_BinaryOr(const Op0_t &Op0, const Op1_t &Op1) {
- return m_Binary<Instruction::Or, Op0_t, Op1_t, Commutative>(Op0, Op1);
+ return m_Binary<Instruction::Or, Op0_t, Op1_t>(Op0, Op1);
}
template <typename Op0_t, typename Op1_t>
-inline AllBinaryRecipe_match<Op0_t, Op1_t, Instruction::Or,
- /*Commutative*/ true>
-m_c_BinaryOr(const Op0_t &Op0, const Op1_t &Op1) {
- return m_BinaryOr<Op0_t, Op1_t, /*Commutative*/ true>(Op0, Op1);
-}
-
-template <typename Op0_t, typename Op1_t, bool Commutative = false>
-inline AllBinaryRecipe_match<Op0_t, Op1_t, Instruction::And, Commutative>
-m_BinaryAnd(const Op0_t &Op0, const Op1_t &Op1) {
- return m_Binary<Instruction::And, Op0_t, Op1_t, Commutative>(Op0, Op1);
-}
-
-template <typename Op0_t, typename Op1_t, bool Commutative = false>
-inline BinaryVPInstruction_match<Op0_t, Op1_t, VPInstruction::LogicalAnd,
- Commutative>
+inline BinaryVPInstruction_match<Op0_t, Op1_t, VPInstruction::LogicalAnd>
m_LogicalAnd(const Op0_t &Op0, const Op1_t &Op1) {
- return m_VPInstruction<VPInstruction::LogicalAnd, Op0_t, Op1_t, Commutative>(
- Op0, Op1);
-}
-
-template <typename Op0_t, typename Op1_t, bool Commutative = false>
-inline match_combine_or<
- AllBinaryRecipe_match<Op0_t, Op1_t, Instruction::And, Commutative>,
- BinaryVPInstruction_match<Op0_t, Op1_t, VPInstruction::LogicalAnd,
- Commutative>>
-m_And(const Op0_t &Op0, const Op1_t &Op1) {
- return m_CombineOr(m_BinaryAnd<Op0_t, Op1_t, Commutative>(Op0, Op1),
- m_LogicalAnd<Op0_t, Op1_t, Commutative>(Op0, Op1));
-}
-
-template <typename Op0_t, typename Op1_t>
-inline match_combine_or<
- AllBinaryRecipe_match<Op0_t, Op1_t, Instruction::And, /*Commutative*/ true>,
- BinaryVPInstruction_match<Op0_t, Op1_t, VPInstruction::LogicalAnd,
- /*Commutative*/ true>>
-m_c_And(const Op0_t &Op0, const Op1_t &Op1) {
- return m_And<Op0_t, Op1_t, /*Commutative*/ true>(Op0, Op1);
+ return m_VPInstruction<VPInstruction::LogicalAnd, Op0_t, Op1_t>(Op0, Op1);
}
} // namespace VPlanPatternMatch
} // namespace llvm
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 4a07c9b05a0bb..5927345546e23 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -943,8 +943,9 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
// TODO: Split up into simpler, modular combines: (X && Y) || (X && Z) into X
// && (Y || Z) and (X || !X) into true. This requires queuing newly created
// recipes to be visited during simplification.
- if (match(&R, m_c_BinaryOr(m_c_And(m_VPValue(X), m_VPValue(Y)),
- m_c_And(m_VPValue(X1), m_Not(m_VPValue(Y1))))) &&
+ if (match(&R,
+ m_BinaryOr(m_LogicalAnd(m_VPValue(X), m_VPValue(Y)),
+ m_LogicalAnd(m_VPValue(X1), m_Not(m_VPValue(Y1))))) &&
X == X1 && Y == Y1) {
R.getVPSingleValue()->replaceAllUsesWith(X);
return;
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll
index 200c2adcf0e66..d335ac4b69709 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll
@@ -402,9 +402,10 @@ define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 {
; TFCOMMON-NEXT: [[TMP11:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> [[TMP10]])
; TFCOMMON-NEXT: [[TMP12:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i1> [[TMP8]], <vscale x 2 x i1> zeroinitializer
; TFCOMMON-NEXT: [[TMP13:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[TMP12]])
+; TFCOMMON-NEXT: [[TMP14:%.*]] = or <vscale x 2 x i1> [[TMP10]], [[TMP12]]
; TFCOMMON-NEXT: [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP10]], <vscale x 2 x i64> [[TMP11]], <vscale x 2 x i64> [[TMP13]]
; TFCOMMON-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]]
-; TFCOMMON-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[PREDPHI]], ptr [[TMP15]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
+; TFCOMMON-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[PREDPHI]], ptr [[TMP15]], i32 8, <vscale x 2 x i1> [[TMP14]])
; TFCOMMON-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]]
; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025)
; TFCOMMON-NEXT: [[TMP16:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
@@ -452,14 +453,16 @@ define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 {
; TFA_INTERLEAVE-NEXT: [[TMP22:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]], <vscale x 2 x i1> [[TMP14]], <vscale x 2 x i1> zeroinitializer
; TFA_INTERLEAVE-NEXT: [[TMP23:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[TMP21]])
; TFA_INTERLEAVE-NEXT: [[TMP24:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> [[WIDE_MASKED_LOAD3]], <vscale x 2 x i1> [[TMP22]])
+; TFA_INTERLEAVE-NEXT: [[TMP25:%.*]] = or <vscale x 2 x i1> [[TMP17]], [[TMP21]]
+; TFA_INTERLEAVE-NEXT: [[TMP26:%.*]] = or <vscale x 2 x i1> [[TMP18]], [[TMP22]]
; TFA_INTERLEAVE-NEXT: [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP17]], <vscale x 2 x i64> [[TMP19]], <vscale x 2 x i64> [[TMP23]]
; TFA_INTERLEAVE-NEXT: [[PREDPHI4:%.*]] = select <vscale x 2 x i1> [[TMP18]], <vscale x 2 x i64> [[TMP20]], <vscale x 2 x i64> [[TMP24]]
; TFA_INTERLEAVE-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]]
; TFA_INTERLEAVE-NEXT: [[TMP28:%.*]] = call i64 @llvm.vscale.i64()
; TFA_INTERLEAVE-NEXT: [[TMP29:%.*]] = mul i64 [[TMP28]], 2
; TFA_INTERLEAVE-NEXT: [[TMP30:%.*]] = getelementptr inbounds i64, ptr [[TMP27]], i64 [[TMP29]]
-; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[PREDPHI]], ptr [[TMP27]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
-; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[PREDPHI4]], ptr [[TMP30]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]])
+; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[PREDPHI]], ptr [[TMP27]], i32 8, <vscale x 2 x i1> [[TMP25]])
+; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[PREDPHI4]], ptr [[TMP30]], i32 8, <vscale x 2 x i1> [[TMP26]])
; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT:%.*]] = add i64 [[INDEX]], [[TMP6]]
; TFA_INTERLEAVE-NEXT: [[TMP31:%.*]] = call i64 @llvm.vscale.i64()
; TFA_INTERLEAVE-NEXT: [[TMP32:%.*]] = mul i64 [[TMP31]], 2
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll
index 63ad98b2d8ab2..2b2742ca7ccbc 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll
@@ -480,10 +480,11 @@ define void @cond_uniform_load(ptr noalias %dst, ptr noalias readonly %src, ptr
; CHECK-NEXT: [[TMP15:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i1> [[TMP14]], <vscale x 4 x i1> zeroinitializer
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[BROADCAST_SPLAT]], i32 4, <vscale x 4 x i1> [[TMP15]], <vscale x 4 x i32> poison)
; CHECK-NEXT: [[TMP16:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i1> [[TMP13]], <vscale x 4 x i1> zeroinitializer
+; CHECK-NEXT: [[TMP18:%.*]] = or <vscale x 4 x i1> [[TMP15]], [[TMP16]]
; CHECK-NEXT: [[PREDPHI:%.*]] = select <vscale x 4 x i1> [[TMP16]], <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> [[WIDE_MASKED_GATHER]]
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[TMP10]]
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 0
-; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[PREDPHI]], ptr [[TMP19]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
+; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[PREDPHI]], ptr [[TMP19]], i32 4, <vscale x 4 x i1> [[TMP18]])
; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP21]]
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]])
; CHECK-NEXT: [[TMP22:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
diff --git a/llvm/test/Transforms/LoopVectorize/unused-blend-mask-for-first-operand.ll b/llvm/test/Transforms/LoopVectorize/unused-blend-mask-for-first-operand.ll
index 0f7bd3d71feb4..d79b4a7cefc25 100644
--- a/llvm/test/Transforms/LoopVectorize/unused-blend-mask-for-first-operand.ll
+++ b/llvm/test/Transforms/LoopVectorize/unused-blend-mask-for-first-operand.ll
@@ -172,8 +172,6 @@ define void @test_not_first_lane_only_wide_compare_incoming_order_swapped(ptr %A
; CHECK: vector.ph:
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[X]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i16> poison, i16 [[Y]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT1]], <4 x i16> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -184,14 +182,8 @@ define void @test_not_first_lane_only_wide_compare_incoming_order_swapped(ptr %A
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP2]], align 2
; CHECK-NEXT: [[TMP3:%.*]] = icmp ult <4 x i16> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP3]], <i1 true, i1 true, i1 true, i1 true>
-; CHECK-NEXT: [[TMP5:%.*]] = icmp ult <4 x i16> [[WIDE_LOAD]], [[BROADCAST_SPLAT2]]
-; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> [[TMP5]], <4 x i1> zeroinitializer
-; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i1> [[TMP5]], <i1 true, i1 true, i1 true, i1 true>
-; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> [[TMP7]], <4 x i1> zeroinitializer
-; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP6]], i32 0
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP8]], i32 0
-; CHECK-NEXT: [[TMP11:%.*]] = or i1 [[TMP9]], [[TMP10]]
-; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP11]], ptr [[B]], ptr poison
+; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP4]], i32 0
+; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP9]], ptr [[B]], ptr poison
; CHECK-NEXT: [[TMP12:%.*]] = load i16, ptr [[PREDPHI]], align 2
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <4 x i16> poison, i16 [[TMP12]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT3]], <4 x i16> poison, <4 x i32> zeroinitializer
More information about the llvm-commits
mailing list