[llvm] VPlan: implement VPlan-level constant-folding (PR #125365)
Ramkumar Ramachandra via llvm-commits
llvm-commits at lists.llvm.org
Sat Feb 1 14:15:20 PST 2025
https://github.com/artagnon created https://github.com/llvm/llvm-project/pull/125365
Introduce VPlanConstantFolder, a variation of ConstantFolder for VPlan, and use it in VPBuilder to constant-fold when all the underlying IR values passed into the API are constants.
-- 8< --
Based on #125364.
>From f13968990fbc9dd8418c29c0ea919f8d6ebb4084 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Sat, 1 Feb 2025 14:48:27 +0000
Subject: [PATCH 1/2] VPlan: thread plan to VPBuilder (NFC)
Construct VPBuilder with a VPlan object, and change VPRecipeBuilder to
own the VPBuilder, in preparation to use the VPlan object in VPBuilder
to implement constant-folding. The VPlan reference in VPBuilder is
unused for now.
---
.../Vectorize/LoopVectorizationPlanner.h | 25 +++++++----------
.../Transforms/Vectorize/LoopVectorize.cpp | 27 +++++++++----------
.../Transforms/Vectorize/VPRecipeBuilder.h | 11 +++++---
llvm/lib/Transforms/Vectorize/VPlan.cpp | 2 +-
.../Transforms/Vectorize/VPlanHCFGBuilder.cpp | 2 +-
.../Transforms/Vectorize/VPlanTransforms.cpp | 21 ++++++++-------
llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp | 2 +-
7 files changed, 44 insertions(+), 46 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index bc44ec11edb7b0..76e7bf2f62650a 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -25,7 +25,6 @@
#define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H
#include "VPlan.h"
-#include "llvm/ADT/SmallSet.h"
#include "llvm/Support/InstructionCost.h"
namespace llvm {
@@ -45,6 +44,7 @@ class VPRecipeBuilder;
class VPBuilder {
VPBasicBlock *BB = nullptr;
VPBasicBlock::iterator InsertPt = VPBasicBlock::iterator();
+ [[maybe_unused]] VPlan &Plan;
/// Insert \p VPI in BB at InsertPt if BB is set.
template <typename T> T *tryInsertInstruction(T *R) {
@@ -66,10 +66,15 @@ class VPBuilder {
}
public:
- VPBuilder() = default;
- VPBuilder(VPBasicBlock *InsertBB) { setInsertPoint(InsertBB); }
- VPBuilder(VPRecipeBase *InsertPt) { setInsertPoint(InsertPt); }
- VPBuilder(VPBasicBlock *TheBB, VPBasicBlock::iterator IP) {
+ VPBuilder(VPlan &Plan) : Plan(Plan) {}
+ VPBuilder(VPlan &Plan, VPBasicBlock *InsertBB) : Plan(Plan) {
+ setInsertPoint(InsertBB);
+ }
+ VPBuilder(VPlan &Plan, VPRecipeBase *InsertPt) : Plan(Plan) {
+ setInsertPoint(InsertPt);
+ }
+ VPBuilder(VPlan &Plan, VPBasicBlock *TheBB, VPBasicBlock::iterator IP)
+ : Plan(Plan) {
setInsertPoint(TheBB, IP);
}
@@ -83,13 +88,6 @@ class VPBuilder {
VPBasicBlock *getInsertBlock() const { return BB; }
VPBasicBlock::iterator getInsertPoint() const { return InsertPt; }
- /// Create a VPBuilder to insert after \p R.
- static VPBuilder getToInsertAfter(VPRecipeBase *R) {
- VPBuilder B;
- B.setInsertPoint(R->getParent(), std::next(R->getIterator()));
- return B;
- }
-
/// InsertPoint - A saved insertion point.
class VPInsertPoint {
VPBasicBlock *Block = nullptr;
@@ -390,9 +388,6 @@ class LoopVectorizationPlanner {
/// Profitable vector factors.
SmallVector<VectorizationFactor, 8> ProfitableVFs;
- /// A builder used to construct the current plan.
- VPBuilder Builder;
-
/// Computes the cost of \p Plan for vectorization factor \p VF.
///
/// The current implementation requires access to the
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 493ce848171211..858eddae56943b 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8947,7 +8947,7 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW,
VPBasicBlock *Header = TopRegion->getEntryBasicBlock();
Header->insert(CanonicalIVPHI, Header->begin());
- VPBuilder Builder(TopRegion->getExitingBasicBlock());
+ VPBuilder Builder(Plan, TopRegion->getExitingBasicBlock());
// Add a VPInstruction to increment the scalar canonical IV by VF * UF.
auto *CanonicalIVIncrement = Builder.createOverflowingOp(
Instruction::Add, {CanonicalIVPHI, &Plan.getVFxUF()}, {HasNUW, false}, DL,
@@ -9007,9 +9007,9 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan,
auto *MiddleVPBB = cast<VPBasicBlock>(ScalarPH->getSinglePredecessor());
VPRegionBlock *VectorRegion = Plan.getVectorLoopRegion();
VPBuilder VectorPHBuilder(
- cast<VPBasicBlock>(VectorRegion->getSinglePredecessor()));
- VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
- VPBuilder ScalarPHBuilder(ScalarPH);
+ Plan, cast<VPBasicBlock>(VectorRegion->getSinglePredecessor()));
+ VPBuilder MiddleBuilder(Plan, MiddleVPBB, MiddleVPBB->getFirstNonPhi());
+ VPBuilder ScalarPHBuilder(Plan, ScalarPH);
VPValue *OneVPV = Plan.getOrAddLiveIn(
ConstantInt::get(Plan.getCanonicalIV()->getScalarType(), 1));
for (VPRecipeBase &ScalarPhiR : *Plan.getScalarHeader()) {
@@ -9101,7 +9101,7 @@ addUsersInExitBlocks(VPlan &Plan,
return;
auto *MiddleVPBB = Plan.getMiddleBlock();
- VPBuilder B(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
+ VPBuilder B(Plan, MiddleVPBB, MiddleVPBB->getFirstNonPhi());
// Introduce extract for exiting values and update the VPIRInstructions
// modeling the corresponding LCSSA phis.
@@ -9123,8 +9123,8 @@ static void addExitUsersForFirstOrderRecurrences(
VPRegionBlock *VectorRegion = Plan.getVectorLoopRegion();
auto *ScalarPHVPBB = Plan.getScalarPreheader();
auto *MiddleVPBB = Plan.getMiddleBlock();
- VPBuilder ScalarPHBuilder(ScalarPHVPBB);
- VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
+ VPBuilder ScalarPHBuilder(Plan, ScalarPHVPBB);
+ VPBuilder MiddleBuilder(Plan, MiddleVPBB, MiddleVPBB->getFirstNonPhi());
VPValue *TwoVPV = Plan.getOrAddLiveIn(
ConstantInt::get(Plan.getCanonicalIV()->getScalarType(), 2));
@@ -9261,8 +9261,7 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
bool HasNUW = !IVUpdateMayOverflow || Style == TailFoldingStyle::None;
addCanonicalIVRecipes(*Plan, Legal->getWidestInductionType(), HasNUW, DL);
- VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, &TTI, Legal, CM, PSE,
- Builder);
+ VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, &TTI, Legal, CM, PSE);
// ---------------------------------------------------------------------------
// Pre-construction: record ingredients whose recipes we'll need to further
@@ -9318,7 +9317,7 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
// ingredients and fill a new VPBasicBlock.
if (VPBB != HeaderVPBB)
VPBB->setName(BB->getName());
- Builder.setInsertPoint(VPBB);
+ RecipeBuilder.setInsertPoint(VPBB);
if (VPBB == HeaderVPBB)
RecipeBuilder.createHeaderMask();
@@ -9482,7 +9481,7 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
// Sink users of fixed-order recurrence past the recipe defining the previous
// value and introduce FirstOrderRecurrenceSplice VPInstructions.
if (!VPlanTransforms::runPass(VPlanTransforms::adjustFixedOrderRecurrences,
- *Plan, Builder))
+ *Plan, RecipeBuilder.getIRBuilder()))
return nullptr;
if (useActiveLaneMask(Style)) {
@@ -9532,8 +9531,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
// Collect mapping of IR header phis to header phi recipes, to be used in
// addScalarResumePhis.
- VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, &TTI, Legal, CM, PSE,
- Builder);
+ VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, &TTI, Legal, CM, PSE);
for (auto &R : Plan->getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
if (isa<VPCanonicalIVPHIRecipe>(&R))
continue;
@@ -9698,6 +9696,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
}
}
VPBasicBlock *LatchVPBB = VectorLoopRegion->getExitingBasicBlock();
+ VPBuilder Builder(*Plan);
Builder.setInsertPoint(&*LatchVPBB->begin());
VPBasicBlock::iterator IP = MiddleVPBB->getFirstNonPhi();
for (VPRecipeBase &R :
@@ -10205,7 +10204,7 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) {
m_Specific(VectorTC), m_SpecificInt(0)));
}))
return;
- VPBuilder ScalarPHBuilder(MainScalarPH, MainScalarPH->begin());
+ VPBuilder ScalarPHBuilder(MainPlan, MainScalarPH, MainScalarPH->begin());
ScalarPHBuilder.createNaryOp(
VPInstruction::ResumePhi,
{VectorTC, MainPlan.getCanonicalIV()->getStartValue()}, {},
diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
index 87c97d1edd7b6a..c9c3a1abec5283 100644
--- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
+++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
@@ -12,7 +12,6 @@
#include "LoopVectorizationPlanner.h"
#include "VPlan.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/PointerUnion.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/IR/IRBuilder.h"
@@ -65,7 +64,7 @@ class VPRecipeBuilder {
PredicatedScalarEvolution &PSE;
- VPBuilder &Builder;
+ VPBuilder Builder;
/// When we if-convert we need to create edge masks. We have to cache values
/// so that we don't end up with exponential recursion/IR. Note that
@@ -155,9 +154,13 @@ class VPRecipeBuilder {
const TargetTransformInfo *TTI,
LoopVectorizationLegality *Legal,
LoopVectorizationCostModel &CM,
- PredicatedScalarEvolution &PSE, VPBuilder &Builder)
+ PredicatedScalarEvolution &PSE)
: Plan(Plan), OrigLoop(OrigLoop), TLI(TLI), TTI(TTI), Legal(Legal),
- CM(CM), PSE(PSE), Builder(Builder) {}
+ CM(CM), PSE(PSE), Builder(Plan) {}
+
+ void setInsertPoint(VPBasicBlock *VPBB) { Builder.setInsertPoint(VPBB); }
+
+ VPBuilder &getIRBuilder() { return Builder; }
std::optional<unsigned> getScalingForReduction(const Instruction *ExitInst) {
auto It = ScaledReductionMap.find(ExitInst);
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 4a1512abe4e48c..5f7a69fd35a088 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -915,7 +915,7 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy,
// of the corresponding compare because they may have ended up with
// different line numbers and we want to avoid awkward line stepping while
// debugging. Eg. if the compare has got a line number inside the loop.
- VPBuilder Builder(MiddleVPBB);
+ VPBuilder Builder(*Plan, MiddleVPBB);
VPValue *Cmp =
TailFolded
? Plan->getOrAddLiveIn(ConstantInt::getTrue(
diff --git a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
index 5a2e5d7cfee48d..a0505e0d1bb8d0 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
@@ -72,7 +72,7 @@ class PlainCFGBuilder {
public:
PlainCFGBuilder(Loop *Lp, LoopInfo *LI, VPlan &P)
- : TheLoop(Lp), LI(LI), Plan(P) {}
+ : TheLoop(Lp), LI(LI), Plan(P), VPIRBuilder(Plan) {}
/// Build plain CFG for TheLoop and connects it to Plan's entry.
void buildPlainCFG();
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index a1a2cf211abf88..bf95b62474150c 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -591,7 +591,7 @@ static void legalizeAndOptimizeInductions(VPlan &Plan) {
using namespace llvm::VPlanPatternMatch;
VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
bool HasOnlyVectorVFs = !Plan.hasVF(ElementCount::getFixed(1));
- VPBuilder Builder(HeaderVPBB, HeaderVPBB->getFirstNonPhi());
+ VPBuilder Builder(Plan, HeaderVPBB, HeaderVPBB->getFirstNonPhi());
for (VPRecipeBase &Phi : HeaderVPBB->phis()) {
auto *PhiR = dyn_cast<VPWidenInductionRecipe>(&Phi);
if (!PhiR)
@@ -744,7 +744,7 @@ void VPlanTransforms::optimizeInductionExitUsers(
"predecessor must be the middle block");
VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType());
- VPBuilder B(Plan.getMiddleBlock()->getTerminator());
+ VPBuilder B(Plan, Plan.getMiddleBlock()->getTerminator());
for (VPRecipeBase &R : *ExitVPBB) {
auto *ExitIRI = cast<VPIRInstruction>(&R);
if (!isa<PHINode>(ExitIRI->getInstruction()))
@@ -1505,7 +1505,7 @@ static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch(
// we have to take unrolling into account. Each part needs to start at
// Part * VF
auto *VecPreheader = Plan.getVectorPreheader();
- VPBuilder Builder(VecPreheader);
+ VPBuilder Builder(Plan, VecPreheader);
// Create the ActiveLaneMask instruction using the correct start values.
VPValue *TC = Plan.getTripCount();
@@ -1624,7 +1624,8 @@ void VPlanTransforms::addActiveLaneMask(
LaneMask = addVPLaneMaskPhiAndUpdateExitBranch(
Plan, DataAndControlFlowWithoutRuntimeCheck);
} else {
- VPBuilder B = VPBuilder::getToInsertAfter(WideCanonicalIV);
+ VPBuilder B(Plan, WideCanonicalIV->getParent(),
+ std::next(WideCanonicalIV->getIterator()));
LaneMask = B.createNaryOp(VPInstruction::ActiveLaneMask,
{WideCanonicalIV, Plan.getTripCount()}, nullptr,
"active.lane.mask");
@@ -1828,7 +1829,7 @@ bool VPlanTransforms::tryAddExplicitVectorLength(
// Create the ExplicitVectorLengthPhi recipe in the main loop.
auto *EVLPhi = new VPEVLBasedIVPHIRecipe(StartV, DebugLoc());
EVLPhi->insertAfter(CanonicalIVPHI);
- VPBuilder Builder(Header, Header->getFirstNonPhi());
+ VPBuilder Builder(Plan, Header, Header->getFirstNonPhi());
// Compute original TC - IV as the AVL (application vector length).
VPValue *AVL = Builder.createNaryOp(
Instruction::Sub, {Plan.getTripCount(), EVLPhi}, DebugLoc(), "avl");
@@ -1909,7 +1910,7 @@ void VPlanTransforms::dropPoisonGeneratingRecipes(
// where the operands are disjoint or poison otherwise.
if (match(RecWithFlags, m_BinaryOr(m_VPValue(A), m_VPValue(B))) &&
RecWithFlags->isDisjoint()) {
- VPBuilder Builder(RecWithFlags);
+ VPBuilder Builder(Plan, RecWithFlags);
VPInstruction *New = Builder.createOverflowingOp(
Instruction::Add, {A, B}, {false, false},
RecWithFlags->getDebugLoc());
@@ -2023,7 +2024,7 @@ void VPlanTransforms::createInterleaveGroups(
/*IsSigned=*/true);
VPValue *OffsetVPV = Plan.getOrAddLiveIn(
ConstantInt::get(IRInsertPos->getParent()->getContext(), -Offset));
- VPBuilder B(InsertPos);
+ VPBuilder B(Plan, InsertPos);
Addr = InBounds ? B.createInBoundsPtrAdd(InsertPos->getAddr(), OffsetVPV)
: B.createPtrAdd(InsertPos->getAddr(), OffsetVPV);
}
@@ -2069,7 +2070,7 @@ void VPlanTransforms::handleUncountableEarlyExit(
BasicBlock *UncountableExitingBlock, VPRecipeBuilder &RecipeBuilder) {
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
auto *LatchVPBB = cast<VPBasicBlock>(LoopRegion->getExiting());
- VPBuilder Builder(LatchVPBB->getTerminator());
+ VPBuilder Builder(Plan, LatchVPBB->getTerminator());
auto *MiddleVPBB = Plan.getMiddleBlock();
VPValue *IsEarlyExitTaken = nullptr;
@@ -2110,8 +2111,8 @@ void VPlanTransforms::handleUncountableEarlyExit(
VPBlockUtils::connectBlocks(VectorEarlyExitVPBB, VPEarlyExitBlock);
// Update the exit phis in the early exit block.
- VPBuilder MiddleBuilder(NewMiddle);
- VPBuilder EarlyExitB(VectorEarlyExitVPBB);
+ VPBuilder MiddleBuilder(Plan, NewMiddle);
+ VPBuilder EarlyExitB(Plan, VectorEarlyExitVPBB);
for (VPRecipeBase &R : *VPEarlyExitBlock) {
auto *ExitIRI = cast<VPIRInstruction>(&R);
auto *ExitPhi = dyn_cast<PHINode>(&ExitIRI->getInstruction());
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index 89e372d6b46cfd..53e086dfe1cbd4 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -156,7 +156,7 @@ void UnrollState::unrollWidenInductionByUF(
FMFs = ID.getInductionBinOp()->getFastMathFlags();
VPValue *VectorStep = &Plan.getVF();
- VPBuilder Builder(PH);
+ VPBuilder Builder(Plan, PH);
if (TypeInfo.inferScalarType(VectorStep) != IVTy) {
Instruction::CastOps CastOp =
IVTy->isFloatingPointTy() ? Instruction::UIToFP : Instruction::Trunc;
>From c06b65b960e61686b5293ad2157a4b840e956790 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Sat, 1 Feb 2025 20:32:51 +0000
Subject: [PATCH 2/2] VPlan: implement VPlan-level constant-folding
Introduce VPlanConstantFolder, a variation of ConstantFolder for VPlan,
and use it in VPBuilder to constant-fold when all the underlying IR
values passed into the API are constants.
---
.../Vectorize/LoopVectorizationPlanner.h | 37 +++++--
.../Vectorize/VPlanConstantFolder.h | 96 +++++++++++++++++++
.../Transforms/Vectorize/VPlanTransforms.cpp | 4 +-
...licate-recipe-with-only-first-lane-used.ll | 50 +---------
.../interleave-and-scalarize-only.ll | 5 +-
5 files changed, 131 insertions(+), 61 deletions(-)
create mode 100644 llvm/lib/Transforms/Vectorize/VPlanConstantFolder.h
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index 76e7bf2f62650a..cfbc075e6dcd7d 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -25,6 +25,7 @@
#define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H
#include "VPlan.h"
+#include "VPlanConstantFolder.h"
#include "llvm/Support/InstructionCost.h"
namespace llvm {
@@ -44,7 +45,8 @@ class VPRecipeBuilder;
class VPBuilder {
VPBasicBlock *BB = nullptr;
VPBasicBlock::iterator InsertPt = VPBasicBlock::iterator();
- [[maybe_unused]] VPlan &Plan;
+ VPlan &Plan;
+ VPConstantFolder Folder;
/// Insert \p VPI in BB at InsertPt if BB is set.
template <typename T> T *tryInsertInstruction(T *R) {
@@ -174,17 +176,22 @@ class VPBuilder {
VPValue *createNot(VPValue *Operand, DebugLoc DL = {},
const Twine &Name = "") {
+ if (auto *V = Folder.foldNot(Operand))
+ return Plan.getOrAddLiveIn(V);
return createInstruction(VPInstruction::Not, {Operand}, DL, Name);
}
VPValue *createAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL = {},
const Twine &Name = "") {
+ if (auto *V = Folder.foldAnd(LHS, RHS))
+ return Plan.getOrAddLiveIn(V);
return createInstruction(Instruction::BinaryOps::And, {LHS, RHS}, DL, Name);
}
VPValue *createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL = {},
const Twine &Name = "") {
-
+ if (auto *V = Folder.foldOr(LHS, RHS))
+ return Plan.getOrAddLiveIn(V);
return tryInsertInstruction(new VPInstruction(
Instruction::BinaryOps::Or, {LHS, RHS},
VPRecipeWithIRFlags::DisjointFlagsTy(false), DL, Name));
@@ -192,6 +199,8 @@ class VPBuilder {
VPValue *createLogicalAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL = {},
const Twine &Name = "") {
+ if (auto *V = Folder.foldLogicalAnd(LHS, RHS))
+ return Plan.getOrAddLiveIn(V);
return tryInsertInstruction(
new VPInstruction(VPInstruction::LogicalAnd, {LHS, RHS}, DL, Name));
}
@@ -199,6 +208,8 @@ class VPBuilder {
VPValue *createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal,
DebugLoc DL = {}, const Twine &Name = "",
std::optional<FastMathFlags> FMFs = std::nullopt) {
+ if (auto *V = Folder.foldSelect(Cond, TrueVal, FalseVal))
+ return Plan.getOrAddLiveIn(V);
auto *Select =
FMFs ? new VPInstruction(Instruction::Select, {Cond, TrueVal, FalseVal},
*FMFs, DL, Name)
@@ -214,17 +225,23 @@ class VPBuilder {
DebugLoc DL = {}, const Twine &Name = "") {
assert(Pred >= CmpInst::FIRST_ICMP_PREDICATE &&
Pred <= CmpInst::LAST_ICMP_PREDICATE && "invalid predicate");
+ if (auto *V = Folder.foldCmp(Pred, A, B))
+ return Plan.getOrAddLiveIn(V);
return tryInsertInstruction(
new VPInstruction(Instruction::ICmp, Pred, A, B, DL, Name));
}
- VPInstruction *createPtrAdd(VPValue *Ptr, VPValue *Offset, DebugLoc DL = {},
- const Twine &Name = "") {
+ VPValue *createPtrAdd(VPValue *Ptr, VPValue *Offset, DebugLoc DL = {},
+ const Twine &Name = "") {
+ if (auto *V = Folder.foldPtrAdd(Ptr, Offset, GEPNoWrapFlags::none()))
+ return Plan.getOrAddLiveIn(V);
return tryInsertInstruction(
new VPInstruction(Ptr, Offset, GEPNoWrapFlags::none(), DL, Name));
}
VPValue *createInBoundsPtrAdd(VPValue *Ptr, VPValue *Offset, DebugLoc DL = {},
const Twine &Name = "") {
+ if (auto *V = Folder.foldPtrAdd(Ptr, Offset, GEPNoWrapFlags::inBounds()))
+ return Plan.getOrAddLiveIn(V);
return tryInsertInstruction(
new VPInstruction(Ptr, Offset, GEPNoWrapFlags::inBounds(), DL, Name));
}
@@ -240,14 +257,18 @@ class VPBuilder {
new VPDerivedIVRecipe(Kind, FPBinOp, Start, Current, Step, Name));
}
- VPScalarCastRecipe *createScalarCast(Instruction::CastOps Opcode, VPValue *Op,
- Type *ResultTy, DebugLoc DL) {
+ VPValue *createScalarCast(Instruction::CastOps Opcode, VPValue *Op,
+ Type *ResultTy, DebugLoc DL) {
+ if (auto *V = Folder.foldCast(Opcode, Op, ResultTy))
+ return Plan.getOrAddLiveIn(V);
return tryInsertInstruction(
new VPScalarCastRecipe(Opcode, Op, ResultTy, DL));
}
- VPWidenCastRecipe *createWidenCast(Instruction::CastOps Opcode, VPValue *Op,
- Type *ResultTy) {
+ VPValue *createWidenCast(Instruction::CastOps Opcode, VPValue *Op,
+ Type *ResultTy) {
+ if (auto *V = Folder.foldCast(Opcode, Op, ResultTy))
+ return Plan.getOrAddLiveIn(V);
return tryInsertInstruction(new VPWidenCastRecipe(Opcode, Op, ResultTy));
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstantFolder.h b/llvm/lib/Transforms/Vectorize/VPlanConstantFolder.h
new file mode 100644
index 00000000000000..01e47e92afa5cf
--- /dev/null
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstantFolder.h
@@ -0,0 +1,96 @@
+//===- VPlanConstantFolder.h - ConstantFolder for VPlan -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "VPlanValue.h"
+#include "llvm/IR/ConstantFold.h"
+#include "llvm/IR/Constants.h"
+
+namespace llvm {
+class VPConstantFolder {
+private:
+ Constant *getIRConstant(VPValue *V) const {
+ return dyn_cast_or_null<Constant>(V->getUnderlyingValue());
+ }
+
+ Value *foldBinOp(Instruction::BinaryOps Opcode, VPValue *LHS,
+ VPValue *RHS) const {
+ auto *LC = getIRConstant(LHS);
+ auto *RC = getIRConstant(RHS);
+ if (LC && RC) {
+ if (ConstantExpr::isDesirableBinOp(Opcode))
+ return ConstantExpr::get(Opcode, LC, RC);
+ return ConstantFoldBinaryInstruction(Opcode, LC, RC);
+ }
+ return nullptr;
+ }
+
+public:
+ Value *foldAnd(VPValue *LHS, VPValue *RHS) const {
+ return foldBinOp(Instruction::BinaryOps::And, LHS, RHS);
+ }
+
+ Value *foldOr(VPValue *LHS, VPValue *RHS) const {
+ return foldBinOp(Instruction::BinaryOps::Or, LHS, RHS);
+ }
+
+ Value *foldNot(VPValue *Op) const {
+ auto *C = getIRConstant(Op);
+ if (C)
+ return ConstantExpr::get(Instruction::BinaryOps::Xor, C,
+ Constant::getAllOnesValue(C->getType()));
+ return nullptr;
+ }
+
+ Value *foldLogicalAnd(VPValue *LHS, VPValue *RHS) const {
+ auto *LC = getIRConstant(LHS);
+ auto *RC = getIRConstant(RHS);
+ if (LC && RC)
+ return ConstantFoldSelectInstruction(
+ LC, RC, ConstantInt::getNullValue(RC->getType()));
+ return nullptr;
+ }
+
+ Value *foldSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal) const {
+ auto *CC = getIRConstant(Cond);
+ auto *TV = getIRConstant(TrueVal);
+ auto *FV = getIRConstant(FalseVal);
+ if (CC && TV && FV)
+ return ConstantFoldSelectInstruction(CC, TV, FV);
+ return nullptr;
+ }
+
+ Value *foldCmp(CmpInst::Predicate Pred, VPValue *LHS, VPValue *RHS) const {
+ auto *LC = getIRConstant(LHS);
+ auto *RC = getIRConstant(RHS);
+ if (LC && RC)
+ return ConstantFoldCompareInstruction(Pred, LC, RC);
+ return nullptr;
+ }
+
+ Value *foldPtrAdd(VPValue *Base, VPValue *Offset, GEPNoWrapFlags NW) const {
+ auto *BC = getIRConstant(Base);
+ auto *OC = getIRConstant(Offset);
+ if (BC && OC) {
+ auto &Ctx = BC->getType()->getContext();
+ return ConstantExpr::getGetElementPtr(Type::getInt8Ty(Ctx), BC, OC, NW);
+ }
+ return nullptr;
+ }
+
+ Value *foldCast(Instruction::CastOps Opcode, VPValue *Op,
+ Type *DestTy) const {
+ auto *C = getIRConstant(Op);
+ if (C) {
+ if (ConstantExpr::isDesirableCastOp(Opcode))
+ return ConstantExpr::getCast(Opcode, C, DestTy);
+ return ConstantFoldCastInstruction(Opcode, C, DestTy);
+ }
+ return nullptr;
+ }
+};
+} // namespace llvm
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index bf95b62474150c..5e0d1efb959030 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -532,8 +532,8 @@ createScalarIVSteps(VPlan &Plan, InductionDescriptor::InductionKind Kind,
VPBuilder &Builder) {
VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV();
- VPSingleDefRecipe *BaseIV = Builder.createDerivedIV(
- Kind, FPBinOp, StartV, CanonicalIV, Step, "offset.idx");
+ VPValue *BaseIV = Builder.createDerivedIV(Kind, FPBinOp, StartV, CanonicalIV,
+ Step, "offset.idx");
// Truncate base induction if needed.
Type *CanonicalIVType = CanonicalIV->getScalarType();
diff --git a/llvm/test/Transforms/LoopVectorize/X86/replicate-recipe-with-only-first-lane-used.ll b/llvm/test/Transforms/LoopVectorize/X86/replicate-recipe-with-only-first-lane-used.ll
index e6c9ce3381f73f..141ef6ec0b97e4 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/replicate-recipe-with-only-first-lane-used.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/replicate-recipe-with-only-first-lane-used.ll
@@ -12,56 +12,10 @@ define void @replicate_udiv_with_only_first_lane_used(i32 %x, ptr %dst, i64 %d)
; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[X]], 10
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i16, ptr [[DST]], i64 0
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_UDIV_CONTINUE14:.*]] ]
-; CHECK-NEXT: br i1 false, label %[[PRED_UDIV_IF:.*]], label %[[PRED_UDIV_CONTINUE:.*]]
-; CHECK: [[PRED_UDIV_IF]]:
-; CHECK-NEXT: [[TMP0:%.*]] = udiv i64 99, [[D]]
-; CHECK-NEXT: br label %[[PRED_UDIV_CONTINUE]]
-; CHECK: [[PRED_UDIV_CONTINUE]]:
-; CHECK-NEXT: [[TMP1:%.*]] = phi i64 [ poison, %[[VECTOR_BODY]] ], [ [[TMP0]], %[[PRED_UDIV_IF]] ]
-; CHECK-NEXT: br i1 false, label %[[PRED_UDIV_IF1:.*]], label %[[PRED_UDIV_CONTINUE2:.*]]
-; CHECK: [[PRED_UDIV_IF1]]:
-; CHECK-NEXT: [[TMP2:%.*]] = udiv i64 99, [[D]]
-; CHECK-NEXT: br label %[[PRED_UDIV_CONTINUE2]]
-; CHECK: [[PRED_UDIV_CONTINUE2]]:
-; CHECK-NEXT: br i1 false, label %[[PRED_UDIV_IF3:.*]], label %[[PRED_UDIV_CONTINUE4:.*]]
-; CHECK: [[PRED_UDIV_IF3]]:
-; CHECK-NEXT: [[TMP3:%.*]] = udiv i64 99, [[D]]
-; CHECK-NEXT: br label %[[PRED_UDIV_CONTINUE4]]
-; CHECK: [[PRED_UDIV_CONTINUE4]]:
-; CHECK-NEXT: br i1 false, label %[[PRED_UDIV_IF5:.*]], label %[[PRED_UDIV_CONTINUE6:.*]]
-; CHECK: [[PRED_UDIV_IF5]]:
-; CHECK-NEXT: [[TMP4:%.*]] = udiv i64 99, [[D]]
-; CHECK-NEXT: br label %[[PRED_UDIV_CONTINUE6]]
-; CHECK: [[PRED_UDIV_CONTINUE6]]:
-; CHECK-NEXT: br i1 false, label %[[PRED_UDIV_IF7:.*]], label %[[PRED_UDIV_CONTINUE8:.*]]
-; CHECK: [[PRED_UDIV_IF7]]:
-; CHECK-NEXT: [[TMP5:%.*]] = udiv i64 99, [[D]]
-; CHECK-NEXT: br label %[[PRED_UDIV_CONTINUE8]]
-; CHECK: [[PRED_UDIV_CONTINUE8]]:
-; CHECK-NEXT: [[TMP6:%.*]] = phi i64 [ poison, %[[PRED_UDIV_CONTINUE6]] ], [ [[TMP5]], %[[PRED_UDIV_IF7]] ]
-; CHECK-NEXT: br i1 false, label %[[PRED_UDIV_IF9:.*]], label %[[PRED_UDIV_CONTINUE10:.*]]
-; CHECK: [[PRED_UDIV_IF9]]:
-; CHECK-NEXT: [[TMP7:%.*]] = udiv i64 99, [[D]]
-; CHECK-NEXT: br label %[[PRED_UDIV_CONTINUE10]]
-; CHECK: [[PRED_UDIV_CONTINUE10]]:
-; CHECK-NEXT: br i1 false, label %[[PRED_UDIV_IF11:.*]], label %[[PRED_UDIV_CONTINUE12:.*]]
-; CHECK: [[PRED_UDIV_IF11]]:
-; CHECK-NEXT: [[TMP8:%.*]] = udiv i64 99, [[D]]
-; CHECK-NEXT: br label %[[PRED_UDIV_CONTINUE12]]
-; CHECK: [[PRED_UDIV_CONTINUE12]]:
-; CHECK-NEXT: br i1 false, label %[[PRED_UDIV_IF13:.*]], label %[[PRED_UDIV_CONTINUE14]]
-; CHECK: [[PRED_UDIV_IF13]]:
-; CHECK-NEXT: [[TMP9:%.*]] = udiv i64 99, [[D]]
-; CHECK-NEXT: br label %[[PRED_UDIV_CONTINUE14]]
-; CHECK: [[PRED_UDIV_CONTINUE14]]:
-; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 true, i64 0, i64 [[TMP1]]
-; CHECK-NEXT: [[PREDPHI15:%.*]] = select i1 true, i64 0, i64 [[TMP6]]
-; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i16, ptr [[DST]], i64 [[PREDPHI]]
-; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i16, ptr [[DST]], i64 [[PREDPHI15]]
-; CHECK-NEXT: store i16 0, ptr [[TMP10]], align 2
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: store i16 0, ptr [[TMP11]], align 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96
diff --git a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll
index c3164762e81300..c294ec11b906d8 100644
--- a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll
+++ b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll
@@ -202,15 +202,14 @@ exit:
; DBG-NEXT: Successor(s): vector.ph
; DBG-EMPTY:
; DBG-NEXT: vector.ph:
-; DBG-NEXT: SCALAR-CAST vp<[[CAST:%.+]]> = trunc ir<1> to i32
; DBG-NEXT: Successor(s): vector loop
; DBG-EMPTY:
; DBG-NEXT: <x1> vector loop: {
; DBG-NEXT: vector.body:
-; DBG-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
+; DBG-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[INDEX_NEXT:%.*]]>
; DBG-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%for> = phi ir<0>, vp<[[SCALAR_STEPS:.+]]>
; DBG-NEXT: SCALAR-CAST vp<[[TRUNC_IV:%.+]]> = trunc vp<[[CAN_IV]]> to i32
-; DBG-NEXT: vp<[[SCALAR_STEPS]]> = SCALAR-STEPS vp<[[TRUNC_IV]]>, vp<[[CAST]]>
+; DBG-NEXT: vp<[[SCALAR_STEPS]]> = SCALAR-STEPS vp<[[TRUNC_IV]]>, ir<1>
; DBG-NEXT: EMIT vp<[[SPLICE:%.+]]> = first-order splice ir<%for>, vp<[[SCALAR_STEPS]]>
; DBG-NEXT: CLONE store vp<[[SPLICE]]>, ir<%dst>
; DBG-NEXT: EMIT vp<[[IV_INC:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
More information about the llvm-commits
mailing list