[llvm] VPlan: use worklist in simplifyRecipes (PR #93998)
Ramkumar Ramachandra via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 20 04:17:29 PDT 2024
https://github.com/artagnon updated https://github.com/llvm/llvm-project/pull/93998
>From db940e178f0d59f775ebad41cf287c96ca246db5 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <r at artagnon.com>
Date: Fri, 31 May 2024 15:33:29 +0100
Subject: [PATCH 1/2] VPlan: use Worklist in simplifyRecipes
Since simplifyRecipe creates new recipes in some cases, use a Worklist
in its caller to capture newly-created recipes, and add it to the
Worklist, as a candidate for further simplification. This patch
thoroughly rewrites simplifyRecipe to simplify matched patterns,
eraseFromParent when applicable, and simplify the logic.
---
.../Transforms/Vectorize/VPlanPatternMatch.h | 7 +-
.../Transforms/Vectorize/VPlanTransforms.cpp | 145 +++++++++++++-----
.../LoopVectorize/SystemZ/pr47665.ll | 100 +++++-------
3 files changed, 147 insertions(+), 105 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
index 9cd7712624bac4..d84c9b85b450a8 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
@@ -70,8 +70,9 @@ template <unsigned BitWidth = 0> struct specific_intval {
if (!CI)
return false;
- assert((BitWidth == 0 || CI->getBitWidth() == BitWidth) &&
- "Trying the match constant with unexpected bitwidth.");
+ if (BitWidth != 0 && CI->getBitWidth() != BitWidth)
+ return false;
+
return APInt::isSameValue(CI->getValue(), Val);
}
};
@@ -82,6 +83,8 @@ inline specific_intval<0> m_SpecificInt(uint64_t V) {
inline specific_intval<1> m_False() { return specific_intval<1>(APInt(64, 0)); }
+inline specific_intval<1> m_True() { return specific_intval<1>(APInt(64, 1)); }
+
/// Matching combinators
template <typename LTy, typename RTy> struct match_combine_or {
LTy L;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index c91fd0f118e311..c81d10ed15b403 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -986,48 +986,49 @@ void VPlanTransforms::clearReductionWrapFlags(VPlan &Plan) {
}
}
-/// Try to simplify recipe \p R.
-static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
+/// Try to simplify recipe \p R. Returns candidates for further simplification.
+static SmallVector<VPRecipeBase *>
+simplifyRecipe(VPRecipeBase *R, VPTypeAnalysis &TypeInfo, LLVMContext &Ctx) {
using namespace llvm::VPlanPatternMatch;
// Try to remove redundant blend recipes.
- if (auto *Blend = dyn_cast<VPBlendRecipe>(&R)) {
+ if (auto *Blend = dyn_cast<VPBlendRecipe>(R)) {
VPValue *Inc0 = Blend->getIncomingValue(0);
for (unsigned I = 1; I != Blend->getNumIncomingValues(); ++I)
if (Inc0 != Blend->getIncomingValue(I) &&
!match(Blend->getMask(I), m_False()))
- return;
+ return {};
Blend->replaceAllUsesWith(Inc0);
Blend->eraseFromParent();
- return;
+ return {};
}
- VPValue *A;
- if (match(&R, m_Trunc(m_ZExtOrSExt(m_VPValue(A))))) {
- VPValue *Trunc = R.getVPSingleValue();
+ VPValue *X, *X1, *Y, *Z;
+ if (match(R, m_Trunc(m_ZExtOrSExt(m_VPValue(X))))) {
+ VPValue *Trunc = R->getVPSingleValue();
Type *TruncTy = TypeInfo.inferScalarType(Trunc);
- Type *ATy = TypeInfo.inferScalarType(A);
- if (TruncTy == ATy) {
- Trunc->replaceAllUsesWith(A);
+ Type *XTy = TypeInfo.inferScalarType(X);
+ VPWidenCastRecipe *VPC = nullptr;
+ if (TruncTy == XTy) {
+ Trunc->replaceAllUsesWith(X);
} else {
// Don't replace a scalarizing recipe with a widened cast.
- if (isa<VPReplicateRecipe>(&R))
- return;
- if (ATy->getScalarSizeInBits() < TruncTy->getScalarSizeInBits()) {
-
- unsigned ExtOpcode = match(R.getOperand(0), m_SExt(m_VPValue()))
+ if (isa<VPReplicateRecipe>(R))
+ return {};
+ if (XTy->getScalarSizeInBits() < TruncTy->getScalarSizeInBits()) {
+ unsigned ExtOpcode = match(R->getOperand(0), m_SExt(m_VPValue()))
? Instruction::SExt
: Instruction::ZExt;
auto *VPC =
- new VPWidenCastRecipe(Instruction::CastOps(ExtOpcode), A, TruncTy);
- if (auto *UnderlyingExt = R.getOperand(0)->getUnderlyingValue()) {
+ new VPWidenCastRecipe(Instruction::CastOps(ExtOpcode), X, TruncTy);
+ if (auto *UnderlyingExt = R->getOperand(0)->getUnderlyingValue()) {
// UnderlyingExt has distinct return type, used to retain legacy cost.
VPC->setUnderlyingValue(UnderlyingExt);
}
- VPC->insertBefore(&R);
+ VPC->insertBefore(R);
Trunc->replaceAllUsesWith(VPC);
- } else if (ATy->getScalarSizeInBits() > TruncTy->getScalarSizeInBits()) {
- auto *VPC = new VPWidenCastRecipe(Instruction::Trunc, A, TruncTy);
- VPC->insertBefore(&R);
+ } else if (XTy->getScalarSizeInBits() > TruncTy->getScalarSizeInBits()) {
+ auto *VPC = new VPWidenCastRecipe(Instruction::Trunc, X, TruncTy);
+ VPC->insertBefore(R);
Trunc->replaceAllUsesWith(VPC);
}
}
@@ -1035,10 +1036,10 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
// Verify that the cached type info is for both A and its users is still
// accurate by comparing it to freshly computed types.
VPTypeAnalysis TypeInfo2(
- R.getParent()->getPlan()->getCanonicalIV()->getScalarType(),
+ R->getParent()->getPlan()->getCanonicalIV()->getScalarType(),
TypeInfo.getContext());
- assert(TypeInfo.inferScalarType(A) == TypeInfo2.inferScalarType(A));
- for (VPUser *U : A->users()) {
+ assert(TypeInfo.inferScalarType(X) == TypeInfo2.inferScalarType(X));
+ for (VPUser *U : X->users()) {
auto *R = dyn_cast<VPRecipeBase>(U);
if (!R)
continue;
@@ -1046,23 +1047,79 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
assert(TypeInfo.inferScalarType(VPV) == TypeInfo2.inferScalarType(VPV));
}
#endif
+ if (VPC)
+ return {VPC};
+ return {};
}
- // Simplify (X && Y) || (X && !Y) -> X.
- // TODO: Split up into simpler, modular combines: (X && Y) || (X && Z) into X
- // && (Y || Z) and (X || !X) into true. This requires queuing newly created
- // recipes to be visited during simplification.
- VPValue *X, *Y, *X1, *Y1;
- if (match(&R,
- m_c_BinaryOr(m_LogicalAnd(m_VPValue(X), m_VPValue(Y)),
- m_LogicalAnd(m_VPValue(X1), m_Not(m_VPValue(Y1))))) &&
- X == X1 && Y == Y1) {
- R.getVPSingleValue()->replaceAllUsesWith(X);
- return;
+ // (X || !X) -> true.
+ if (match(R, m_c_BinaryOr(m_VPValue(X), m_Not(m_VPValue(X1)))) && X == X1) {
+ auto *VPV = new VPValue(ConstantInt::getTrue(Ctx));
+ R->getVPSingleValue()->replaceAllUsesWith(VPV);
+ R->eraseFromParent();
+ return {};
+ }
+
+ // (X || true) -> true.
+ if (match(R, m_c_BinaryOr(m_VPValue(X), m_True()))) {
+ auto *VPV = new VPValue(ConstantInt::getTrue(Ctx));
+ R->getVPSingleValue()->replaceAllUsesWith(VPV);
+ R->eraseFromParent();
+ return {};
+ }
+
+ // (X || false) -> X.
+ if (match(R, m_c_BinaryOr(m_VPValue(X), m_False()))) {
+ R->getVPSingleValue()->replaceAllUsesWith(X);
+ R->eraseFromParent();
+ return {};
+ }
+
+ // (X && !X) -> false.
+ if (match(R, m_LogicalAnd(m_VPValue(X), m_Not(m_VPValue(X1)))) && X == X1) {
+ auto *VPV = new VPValue(ConstantInt::getFalse(Ctx));
+ R->getVPSingleValue()->replaceAllUsesWith(VPV);
+ R->eraseFromParent();
+ return {};
+ }
+
+ // (X && true) -> X.
+ if (match(R, m_LogicalAnd(m_VPValue(X), m_True()))) {
+ R->getVPSingleValue()->replaceAllUsesWith(X);
+ R->eraseFromParent();
+ return {};
+ }
+
+ // (X && false) -> false.
+ if (match(R, m_LogicalAnd(m_VPValue(X), m_False()))) {
+ auto *VPV = new VPValue(ConstantInt::getFalse(Ctx));
+ R->getVPSingleValue()->replaceAllUsesWith(VPV);
+ R->eraseFromParent();
+ return {};
+ }
+
+ // (X * 1) -> X.
+ if (match(R, m_c_Mul(m_VPValue(X), m_SpecificInt(1)))) {
+ R->getVPSingleValue()->replaceAllUsesWith(X);
+ R->eraseFromParent();
+ return {};
+ }
+
+ // (X && Y) || (X && Z) -> X && (Y || Z).
+ if (match(R, m_BinaryOr(m_LogicalAnd(m_VPValue(X), m_VPValue(Y)),
+ m_LogicalAnd(m_VPValue(X1), m_VPValue(Z)))) &&
+ X == X1) {
+ auto *YorZ = new VPInstruction(Instruction::Or, {Y, Z}, R->getDebugLoc());
+ YorZ->insertBefore(R);
+ auto *VPI = new VPInstruction(VPInstruction::LogicalAnd, {X, YorZ},
+ R->getDebugLoc());
+ VPI->insertBefore(R);
+ R->getVPSingleValue()->replaceAllUsesWith(VPI);
+ R->eraseFromParent();
+ return {VPI, YorZ};
}
- if (match(&R, m_c_Mul(m_VPValue(A), m_SpecificInt(1))))
- return R.getVPSingleValue()->replaceAllUsesWith(A);
+ return {};
}
/// Try to simplify the recipes in \p Plan.
@@ -1071,8 +1128,16 @@ static void simplifyRecipes(VPlan &Plan, LLVMContext &Ctx) {
Plan.getEntry());
VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType(), Ctx);
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
- for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
- simplifyRecipe(R, TypeInfo);
+ // Populate a Worklist, as simplifyRecipe might return a new recipe that we
+ // need to re-process.
+ SmallVector<VPRecipeBase *> Worklist;
+ for (auto &R : VPBB->getRecipeList())
+ Worklist.push_back(&R);
+
+ while (!Worklist.empty()) {
+ VPRecipeBase *R = Worklist.pop_back_val();
+ for (VPRecipeBase *Cand : simplifyRecipe(R, TypeInfo, Ctx))
+ Worklist.push_back(Cand);
}
}
}
diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll
index 07a1cca1bc21e7..203abe6c913125 100644
--- a/llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll
+++ b/llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll
@@ -7,8 +7,6 @@ define void @test(ptr %p, i40 %a) {
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <16 x i40> poison, i40 [[A]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <16 x i40> [[BROADCAST_SPLATINSERT1]], <16 x i40> poison, <16 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE32:%.*]] ]
@@ -16,126 +14,102 @@ define void @test(ptr %p, i40 %a) {
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
; CHECK-NEXT: [[VEC_IV:%.*]] = add <16 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <16 x i32> [[VEC_IV]], <i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9>
-; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i40> [[BROADCAST_SPLAT2]], <i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24>
-; CHECK-NEXT: [[TMP2:%.*]] = ashr <16 x i40> [[TMP1]], <i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28>
-; CHECK-NEXT: [[TMP3:%.*]] = trunc <16 x i40> [[TMP2]] to <16 x i32>
-; CHECK-NEXT: [[TMP4:%.*]] = trunc <16 x i32> [[TMP3]] to <16 x i1>
-; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <16 x i1> [[TMP4]], zeroinitializer
-; CHECK-NEXT: [[TMP6:%.*]] = icmp ult <16 x i1> zeroinitializer, [[TMP5]]
-; CHECK-NEXT: [[TMP7:%.*]] = or <16 x i1> [[TMP6]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
-; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt <16 x i1> [[TMP7]], zeroinitializer
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <16 x i1> [[TMP0]], i32 0
; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <16 x i1> [[TMP8]], i32 0
-; CHECK-NEXT: store i1 [[TMP10]], ptr [[P]], align 1
+; CHECK-NEXT: store i1 false, ptr [[P]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
; CHECK: pred.store.continue:
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <16 x i1> [[TMP0]], i32 1
; CHECK-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
-; CHECK: pred.store.if3:
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <16 x i1> [[TMP8]], i32 1
-; CHECK-NEXT: store i1 [[TMP12]], ptr [[P]], align 1
+; CHECK: pred.store.if1:
+; CHECK-NEXT: store i1 false, ptr [[P]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]]
+; CHECK: pred.store.continue2:
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <16 x i1> [[TMP0]], i32 2
+; CHECK-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]]
+; CHECK: pred.store.if3:
+; CHECK-NEXT: store i1 false, ptr [[P]], align 1
+; CHECK-NEXT: br label [[PRED_STORE_CONTINUE5]]
; CHECK: pred.store.continue4:
-; CHECK-NEXT: [[TMP13:%.*]] = extractelement <16 x i1> [[TMP0]], i32 2
+; CHECK-NEXT: [[TMP13:%.*]] = extractelement <16 x i1> [[TMP0]], i32 3
; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]]
; CHECK: pred.store.if5:
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <16 x i1> [[TMP8]], i32 2
-; CHECK-NEXT: store i1 [[TMP14]], ptr [[P]], align 1
+; CHECK-NEXT: store i1 false, ptr [[P]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]]
; CHECK: pred.store.continue6:
-; CHECK-NEXT: [[TMP15:%.*]] = extractelement <16 x i1> [[TMP0]], i32 3
+; CHECK-NEXT: [[TMP15:%.*]] = extractelement <16 x i1> [[TMP0]], i32 4
; CHECK-NEXT: br i1 [[TMP15]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]]
; CHECK: pred.store.if7:
-; CHECK-NEXT: [[TMP16:%.*]] = extractelement <16 x i1> [[TMP8]], i32 3
-; CHECK-NEXT: store i1 [[TMP16]], ptr [[P]], align 1
+; CHECK-NEXT: store i1 false, ptr [[P]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]]
; CHECK: pred.store.continue8:
-; CHECK-NEXT: [[TMP17:%.*]] = extractelement <16 x i1> [[TMP0]], i32 4
+; CHECK-NEXT: [[TMP17:%.*]] = extractelement <16 x i1> [[TMP0]], i32 5
; CHECK-NEXT: br i1 [[TMP17]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10:%.*]]
; CHECK: pred.store.if9:
-; CHECK-NEXT: [[TMP18:%.*]] = extractelement <16 x i1> [[TMP8]], i32 4
-; CHECK-NEXT: store i1 [[TMP18]], ptr [[P]], align 1
+; CHECK-NEXT: store i1 false, ptr [[P]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE10]]
; CHECK: pred.store.continue10:
-; CHECK-NEXT: [[TMP19:%.*]] = extractelement <16 x i1> [[TMP0]], i32 5
+; CHECK-NEXT: [[TMP19:%.*]] = extractelement <16 x i1> [[TMP0]], i32 6
; CHECK-NEXT: br i1 [[TMP19]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12:%.*]]
; CHECK: pred.store.if11:
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <16 x i1> [[TMP8]], i32 5
-; CHECK-NEXT: store i1 [[TMP20]], ptr [[P]], align 1
+; CHECK-NEXT: store i1 false, ptr [[P]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE12]]
; CHECK: pred.store.continue12:
-; CHECK-NEXT: [[TMP21:%.*]] = extractelement <16 x i1> [[TMP0]], i32 6
+; CHECK-NEXT: [[TMP21:%.*]] = extractelement <16 x i1> [[TMP0]], i32 7
; CHECK-NEXT: br i1 [[TMP21]], label [[PRED_STORE_IF13:%.*]], label [[PRED_STORE_CONTINUE14:%.*]]
; CHECK: pred.store.if13:
-; CHECK-NEXT: [[TMP22:%.*]] = extractelement <16 x i1> [[TMP8]], i32 6
-; CHECK-NEXT: store i1 [[TMP22]], ptr [[P]], align 1
+; CHECK-NEXT: store i1 false, ptr [[P]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE14]]
; CHECK: pred.store.continue14:
-; CHECK-NEXT: [[TMP23:%.*]] = extractelement <16 x i1> [[TMP0]], i32 7
+; CHECK-NEXT: [[TMP23:%.*]] = extractelement <16 x i1> [[TMP0]], i32 8
; CHECK-NEXT: br i1 [[TMP23]], label [[PRED_STORE_IF15:%.*]], label [[PRED_STORE_CONTINUE16:%.*]]
; CHECK: pred.store.if15:
-; CHECK-NEXT: [[TMP24:%.*]] = extractelement <16 x i1> [[TMP8]], i32 7
-; CHECK-NEXT: store i1 [[TMP24]], ptr [[P]], align 1
+; CHECK-NEXT: store i1 false, ptr [[P]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE16]]
; CHECK: pred.store.continue16:
-; CHECK-NEXT: [[TMP25:%.*]] = extractelement <16 x i1> [[TMP0]], i32 8
+; CHECK-NEXT: [[TMP25:%.*]] = extractelement <16 x i1> [[TMP0]], i32 9
; CHECK-NEXT: br i1 [[TMP25]], label [[PRED_STORE_IF17:%.*]], label [[PRED_STORE_CONTINUE18:%.*]]
; CHECK: pred.store.if17:
-; CHECK-NEXT: [[TMP26:%.*]] = extractelement <16 x i1> [[TMP8]], i32 8
-; CHECK-NEXT: store i1 [[TMP26]], ptr [[P]], align 1
+; CHECK-NEXT: store i1 false, ptr [[P]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE18]]
; CHECK: pred.store.continue18:
-; CHECK-NEXT: [[TMP27:%.*]] = extractelement <16 x i1> [[TMP0]], i32 9
+; CHECK-NEXT: [[TMP27:%.*]] = extractelement <16 x i1> [[TMP0]], i32 10
; CHECK-NEXT: br i1 [[TMP27]], label [[PRED_STORE_IF19:%.*]], label [[PRED_STORE_CONTINUE20:%.*]]
; CHECK: pred.store.if19:
-; CHECK-NEXT: [[TMP28:%.*]] = extractelement <16 x i1> [[TMP8]], i32 9
-; CHECK-NEXT: store i1 [[TMP28]], ptr [[P]], align 1
+; CHECK-NEXT: store i1 false, ptr [[P]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE20]]
; CHECK: pred.store.continue20:
-; CHECK-NEXT: [[TMP29:%.*]] = extractelement <16 x i1> [[TMP0]], i32 10
+; CHECK-NEXT: [[TMP29:%.*]] = extractelement <16 x i1> [[TMP0]], i32 11
; CHECK-NEXT: br i1 [[TMP29]], label [[PRED_STORE_IF21:%.*]], label [[PRED_STORE_CONTINUE22:%.*]]
; CHECK: pred.store.if21:
-; CHECK-NEXT: [[TMP30:%.*]] = extractelement <16 x i1> [[TMP8]], i32 10
-; CHECK-NEXT: store i1 [[TMP30]], ptr [[P]], align 1
+; CHECK-NEXT: store i1 false, ptr [[P]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE22]]
; CHECK: pred.store.continue22:
-; CHECK-NEXT: [[TMP31:%.*]] = extractelement <16 x i1> [[TMP0]], i32 11
+; CHECK-NEXT: [[TMP31:%.*]] = extractelement <16 x i1> [[TMP0]], i32 12
; CHECK-NEXT: br i1 [[TMP31]], label [[PRED_STORE_IF23:%.*]], label [[PRED_STORE_CONTINUE24:%.*]]
; CHECK: pred.store.if23:
-; CHECK-NEXT: [[TMP32:%.*]] = extractelement <16 x i1> [[TMP8]], i32 11
-; CHECK-NEXT: store i1 [[TMP32]], ptr [[P]], align 1
+; CHECK-NEXT: store i1 false, ptr [[P]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE24]]
; CHECK: pred.store.continue24:
-; CHECK-NEXT: [[TMP33:%.*]] = extractelement <16 x i1> [[TMP0]], i32 12
+; CHECK-NEXT: [[TMP33:%.*]] = extractelement <16 x i1> [[TMP0]], i32 13
; CHECK-NEXT: br i1 [[TMP33]], label [[PRED_STORE_IF25:%.*]], label [[PRED_STORE_CONTINUE26:%.*]]
; CHECK: pred.store.if25:
-; CHECK-NEXT: [[TMP34:%.*]] = extractelement <16 x i1> [[TMP8]], i32 12
-; CHECK-NEXT: store i1 [[TMP34]], ptr [[P]], align 1
+; CHECK-NEXT: store i1 false, ptr [[P]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE26]]
; CHECK: pred.store.continue26:
-; CHECK-NEXT: [[TMP35:%.*]] = extractelement <16 x i1> [[TMP0]], i32 13
+; CHECK-NEXT: [[TMP35:%.*]] = extractelement <16 x i1> [[TMP0]], i32 14
; CHECK-NEXT: br i1 [[TMP35]], label [[PRED_STORE_IF27:%.*]], label [[PRED_STORE_CONTINUE28:%.*]]
; CHECK: pred.store.if27:
-; CHECK-NEXT: [[TMP36:%.*]] = extractelement <16 x i1> [[TMP8]], i32 13
-; CHECK-NEXT: store i1 [[TMP36]], ptr [[P]], align 1
+; CHECK-NEXT: store i1 false, ptr [[P]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE28]]
; CHECK: pred.store.continue28:
-; CHECK-NEXT: [[TMP37:%.*]] = extractelement <16 x i1> [[TMP0]], i32 14
-; CHECK-NEXT: br i1 [[TMP37]], label [[PRED_STORE_IF29:%.*]], label [[PRED_STORE_CONTINUE30:%.*]]
+; CHECK-NEXT: [[TMP16:%.*]] = extractelement <16 x i1> [[TMP0]], i32 15
+; CHECK-NEXT: br i1 [[TMP16]], label [[PRED_STORE_IF29:%.*]], label [[PRED_STORE_CONTINUE32]]
; CHECK: pred.store.if29:
-; CHECK-NEXT: [[TMP38:%.*]] = extractelement <16 x i1> [[TMP8]], i32 14
-; CHECK-NEXT: store i1 [[TMP38]], ptr [[P]], align 1
-; CHECK-NEXT: br label [[PRED_STORE_CONTINUE30]]
-; CHECK: pred.store.continue30:
-; CHECK-NEXT: [[TMP39:%.*]] = extractelement <16 x i1> [[TMP0]], i32 15
-; CHECK-NEXT: br i1 [[TMP39]], label [[PRED_STORE_IF31:%.*]], label [[PRED_STORE_CONTINUE32]]
-; CHECK: pred.store.if31:
-; CHECK-NEXT: [[TMP40:%.*]] = extractelement <16 x i1> [[TMP8]], i32 15
-; CHECK-NEXT: store i1 [[TMP40]], ptr [[P]], align 1
+; CHECK-NEXT: store i1 false, ptr [[P]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE32]]
-; CHECK: pred.store.continue32:
+; CHECK: pred.store.continue30:
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 16
; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
>From d18e05e02aa18c158e840531621d71dcbb7747f3 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Tue, 20 Aug 2024 11:35:49 +0100
Subject: [PATCH 2/2] VPlanTransforms: undo NFC changes
---
.../Transforms/Vectorize/VPlanTransforms.cpp | 95 +++++++++----------
1 file changed, 45 insertions(+), 50 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index c81d10ed15b403..ca95fb92381fcf 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -988,10 +988,10 @@ void VPlanTransforms::clearReductionWrapFlags(VPlan &Plan) {
/// Try to simplify recipe \p R. Returns candidates for further simplification.
static SmallVector<VPRecipeBase *>
-simplifyRecipe(VPRecipeBase *R, VPTypeAnalysis &TypeInfo, LLVMContext &Ctx) {
+simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo, LLVMContext &Ctx) {
using namespace llvm::VPlanPatternMatch;
// Try to remove redundant blend recipes.
- if (auto *Blend = dyn_cast<VPBlendRecipe>(R)) {
+ if (auto *Blend = dyn_cast<VPBlendRecipe>(&R)) {
VPValue *Inc0 = Blend->getIncomingValue(0);
for (unsigned I = 1; I != Blend->getNumIncomingValues(); ++I)
if (Inc0 != Blend->getIncomingValue(I) &&
@@ -1002,33 +1002,34 @@ simplifyRecipe(VPRecipeBase *R, VPTypeAnalysis &TypeInfo, LLVMContext &Ctx) {
return {};
}
- VPValue *X, *X1, *Y, *Z;
- if (match(R, m_Trunc(m_ZExtOrSExt(m_VPValue(X))))) {
- VPValue *Trunc = R->getVPSingleValue();
+ VPValue *A;
+ if (match(&R, m_Trunc(m_ZExtOrSExt(m_VPValue(A))))) {
+ VPValue *Trunc = R.getVPSingleValue();
Type *TruncTy = TypeInfo.inferScalarType(Trunc);
- Type *XTy = TypeInfo.inferScalarType(X);
+ Type *ATy = TypeInfo.inferScalarType(A);
VPWidenCastRecipe *VPC = nullptr;
- if (TruncTy == XTy) {
- Trunc->replaceAllUsesWith(X);
+ if (TruncTy == ATy) {
+ Trunc->replaceAllUsesWith(A);
} else {
// Don't replace a scalarizing recipe with a widened cast.
- if (isa<VPReplicateRecipe>(R))
+ if (isa<VPReplicateRecipe>(&R))
return {};
- if (XTy->getScalarSizeInBits() < TruncTy->getScalarSizeInBits()) {
- unsigned ExtOpcode = match(R->getOperand(0), m_SExt(m_VPValue()))
+ if (ATy->getScalarSizeInBits() < TruncTy->getScalarSizeInBits()) {
+
+ unsigned ExtOpcode = match(R.getOperand(0), m_SExt(m_VPValue()))
? Instruction::SExt
: Instruction::ZExt;
auto *VPC =
- new VPWidenCastRecipe(Instruction::CastOps(ExtOpcode), X, TruncTy);
- if (auto *UnderlyingExt = R->getOperand(0)->getUnderlyingValue()) {
+ new VPWidenCastRecipe(Instruction::CastOps(ExtOpcode), A, TruncTy);
+ if (auto *UnderlyingExt = R.getOperand(0)->getUnderlyingValue()) {
// UnderlyingExt has distinct return type, used to retain legacy cost.
VPC->setUnderlyingValue(UnderlyingExt);
}
- VPC->insertBefore(R);
+ VPC->insertBefore(&R);
Trunc->replaceAllUsesWith(VPC);
- } else if (XTy->getScalarSizeInBits() > TruncTy->getScalarSizeInBits()) {
- auto *VPC = new VPWidenCastRecipe(Instruction::Trunc, X, TruncTy);
- VPC->insertBefore(R);
+ } else if (ATy->getScalarSizeInBits() > TruncTy->getScalarSizeInBits()) {
+ auto *VPC = new VPWidenCastRecipe(Instruction::Trunc, A, TruncTy);
+ VPC->insertBefore(&R);
Trunc->replaceAllUsesWith(VPC);
}
}
@@ -1036,10 +1037,10 @@ simplifyRecipe(VPRecipeBase *R, VPTypeAnalysis &TypeInfo, LLVMContext &Ctx) {
// Verify that the cached type info is for both A and its users is still
// accurate by comparing it to freshly computed types.
VPTypeAnalysis TypeInfo2(
- R->getParent()->getPlan()->getCanonicalIV()->getScalarType(),
+ R.getParent()->getPlan()->getCanonicalIV()->getScalarType(),
TypeInfo.getContext());
- assert(TypeInfo.inferScalarType(X) == TypeInfo2.inferScalarType(X));
- for (VPUser *U : X->users()) {
+ assert(TypeInfo.inferScalarType(A) == TypeInfo2.inferScalarType(A));
+ for (VPUser *U : A->users()) {
auto *R = dyn_cast<VPRecipeBase>(U);
if (!R)
continue;
@@ -1052,70 +1053,64 @@ simplifyRecipe(VPRecipeBase *R, VPTypeAnalysis &TypeInfo, LLVMContext &Ctx) {
return {};
}
+ VPValue *X, *X1, *Y, *Z;
+
// (X || !X) -> true.
- if (match(R, m_c_BinaryOr(m_VPValue(X), m_Not(m_VPValue(X1)))) && X == X1) {
+ if (match(&R, m_c_BinaryOr(m_VPValue(X), m_Not(m_VPValue(X1)))) && X == X1) {
auto *VPV = new VPValue(ConstantInt::getTrue(Ctx));
- R->getVPSingleValue()->replaceAllUsesWith(VPV);
- R->eraseFromParent();
+ R.getVPSingleValue()->replaceAllUsesWith(VPV);
return {};
}
// (X || true) -> true.
- if (match(R, m_c_BinaryOr(m_VPValue(X), m_True()))) {
+ if (match(&R, m_c_BinaryOr(m_VPValue(X), m_True()))) {
auto *VPV = new VPValue(ConstantInt::getTrue(Ctx));
- R->getVPSingleValue()->replaceAllUsesWith(VPV);
- R->eraseFromParent();
+ R.getVPSingleValue()->replaceAllUsesWith(VPV);
return {};
}
// (X || false) -> X.
- if (match(R, m_c_BinaryOr(m_VPValue(X), m_False()))) {
- R->getVPSingleValue()->replaceAllUsesWith(X);
- R->eraseFromParent();
+ if (match(&R, m_c_BinaryOr(m_VPValue(X), m_False()))) {
+ R.getVPSingleValue()->replaceAllUsesWith(X);
return {};
}
// (X && !X) -> false.
- if (match(R, m_LogicalAnd(m_VPValue(X), m_Not(m_VPValue(X1)))) && X == X1) {
+ if (match(&R, m_LogicalAnd(m_VPValue(X), m_Not(m_VPValue(X1)))) && X == X1) {
auto *VPV = new VPValue(ConstantInt::getFalse(Ctx));
- R->getVPSingleValue()->replaceAllUsesWith(VPV);
- R->eraseFromParent();
+ R.getVPSingleValue()->replaceAllUsesWith(VPV);
return {};
}
// (X && true) -> X.
- if (match(R, m_LogicalAnd(m_VPValue(X), m_True()))) {
- R->getVPSingleValue()->replaceAllUsesWith(X);
- R->eraseFromParent();
+ if (match(&R, m_LogicalAnd(m_VPValue(X), m_True()))) {
+ R.getVPSingleValue()->replaceAllUsesWith(X);
return {};
}
// (X && false) -> false.
- if (match(R, m_LogicalAnd(m_VPValue(X), m_False()))) {
+ if (match(&R, m_LogicalAnd(m_VPValue(X), m_False()))) {
auto *VPV = new VPValue(ConstantInt::getFalse(Ctx));
- R->getVPSingleValue()->replaceAllUsesWith(VPV);
- R->eraseFromParent();
+ R.getVPSingleValue()->replaceAllUsesWith(VPV);
return {};
}
// (X * 1) -> X.
- if (match(R, m_c_Mul(m_VPValue(X), m_SpecificInt(1)))) {
- R->getVPSingleValue()->replaceAllUsesWith(X);
- R->eraseFromParent();
+ if (match(&R, m_c_Mul(m_VPValue(X), m_SpecificInt(1)))) {
+ R.getVPSingleValue()->replaceAllUsesWith(X);
return {};
}
// (X && Y) || (X && Z) -> X && (Y || Z).
- if (match(R, m_BinaryOr(m_LogicalAnd(m_VPValue(X), m_VPValue(Y)),
- m_LogicalAnd(m_VPValue(X1), m_VPValue(Z)))) &&
+ if (match(&R, m_BinaryOr(m_LogicalAnd(m_VPValue(X), m_VPValue(Y)),
+ m_LogicalAnd(m_VPValue(X1), m_VPValue(Z)))) &&
X == X1) {
- auto *YorZ = new VPInstruction(Instruction::Or, {Y, Z}, R->getDebugLoc());
- YorZ->insertBefore(R);
+ auto *YorZ = new VPInstruction(Instruction::Or, {Y, Z}, R.getDebugLoc());
+ YorZ->insertBefore(&R);
auto *VPI = new VPInstruction(VPInstruction::LogicalAnd, {X, YorZ},
- R->getDebugLoc());
- VPI->insertBefore(R);
- R->getVPSingleValue()->replaceAllUsesWith(VPI);
- R->eraseFromParent();
+ R.getDebugLoc());
+ VPI->insertBefore(&R);
+ R.getVPSingleValue()->replaceAllUsesWith(VPI);
return {VPI, YorZ};
}
@@ -1136,7 +1131,7 @@ static void simplifyRecipes(VPlan &Plan, LLVMContext &Ctx) {
while (!Worklist.empty()) {
VPRecipeBase *R = Worklist.pop_back_val();
- for (VPRecipeBase *Cand : simplifyRecipe(R, TypeInfo, Ctx))
+ for (VPRecipeBase *Cand : simplifyRecipe(*R, TypeInfo, Ctx))
Worklist.push_back(Cand);
}
}
More information about the llvm-commits
mailing list