[llvm] [VPlan] Introduce VPlanConstantFolder (PR #125365)
Ramkumar Ramachandra via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 24 04:05:30 PST 2025
https://github.com/artagnon updated https://github.com/llvm/llvm-project/pull/125365
>From cc98b9e960e0891d9213ecd0ffdd15f0b7528755 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Sat, 1 Feb 2025 20:32:51 +0000
Subject: [PATCH] VPlan: implement VPlan-level constant-folding
Introduce VPlanConstantFolder, a variation of ConstantFolder for VPlan,
and use it in VPBuilder to constant-fold when all the underlying IR
values passed into the API are constants.
---
.../Vectorize/LoopVectorizationPlanner.h | 42 ++++++--
.../Vectorize/VPlanConstantFolder.h | 96 +++++++++++++++++++
.../Transforms/Vectorize/VPlanTransforms.cpp | 4 +-
...licate-recipe-with-only-first-lane-used.ll | 50 +---------
.../interleave-and-scalarize-only.ll | 5 +-
5 files changed, 136 insertions(+), 61 deletions(-)
create mode 100644 llvm/lib/Transforms/Vectorize/VPlanConstantFolder.h
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index ed3e45dd2c6c8..e90d6dbcbaee6 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -25,7 +25,7 @@
#define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H
#include "VPlan.h"
-#include "llvm/ADT/SmallSet.h"
+#include "VPlanConstantFolder.h"
#include "llvm/Support/InstructionCost.h"
namespace llvm {
@@ -46,6 +46,7 @@ struct VFRange;
class VPBuilder {
VPBasicBlock *BB = nullptr;
VPBasicBlock::iterator InsertPt = VPBasicBlock::iterator();
+ VPConstantFolder Folder;
/// Insert \p VPI in BB at InsertPt if BB is set.
template <typename T> T *tryInsertInstruction(T *R) {
@@ -66,6 +67,12 @@ class VPBuilder {
return createInstruction(Opcode, ArrayRef<VPValue *>(Operands), DL, Name);
}
+ VPValue *getOrAddLiveIn(Value *V) {
+ if (BB)
+ return BB->getPlan()->getOrAddLiveIn(V);
+ return new VPValue(V);
+ }
+
public:
VPBuilder() = default;
VPBuilder(VPBasicBlock *InsertBB) { setInsertPoint(InsertBB); }
@@ -180,17 +187,22 @@ class VPBuilder {
VPValue *createNot(VPValue *Operand, DebugLoc DL = {},
const Twine &Name = "") {
+ if (auto *V = Folder.foldNot(Operand))
+ return getOrAddLiveIn(V);
return createInstruction(VPInstruction::Not, {Operand}, DL, Name);
}
VPValue *createAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL = {},
const Twine &Name = "") {
+ if (auto *V = Folder.foldAnd(LHS, RHS))
+ return getOrAddLiveIn(V);
return createInstruction(Instruction::BinaryOps::And, {LHS, RHS}, DL, Name);
}
VPValue *createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL = {},
const Twine &Name = "") {
-
+ if (auto *V = Folder.foldOr(LHS, RHS))
+ return getOrAddLiveIn(V);
return tryInsertInstruction(new VPInstruction(
Instruction::BinaryOps::Or, {LHS, RHS},
VPRecipeWithIRFlags::DisjointFlagsTy(false), DL, Name));
@@ -198,6 +210,8 @@ class VPBuilder {
VPValue *createLogicalAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL = {},
const Twine &Name = "") {
+ if (auto *V = Folder.foldLogicalAnd(LHS, RHS))
+ return getOrAddLiveIn(V);
return tryInsertInstruction(
new VPInstruction(VPInstruction::LogicalAnd, {LHS, RHS}, DL, Name));
}
@@ -205,6 +219,8 @@ class VPBuilder {
VPValue *createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal,
DebugLoc DL = {}, const Twine &Name = "",
std::optional<FastMathFlags> FMFs = std::nullopt) {
+ if (auto *V = Folder.foldSelect(Cond, TrueVal, FalseVal))
+ return getOrAddLiveIn(V);
auto *Select =
FMFs ? new VPInstruction(Instruction::Select, {Cond, TrueVal, FalseVal},
*FMFs, DL, Name)
@@ -220,17 +236,23 @@ class VPBuilder {
DebugLoc DL = {}, const Twine &Name = "") {
assert(Pred >= CmpInst::FIRST_ICMP_PREDICATE &&
Pred <= CmpInst::LAST_ICMP_PREDICATE && "invalid predicate");
+ if (auto *V = Folder.foldCmp(Pred, A, B))
+ return getOrAddLiveIn(V);
return tryInsertInstruction(
new VPInstruction(Instruction::ICmp, Pred, A, B, DL, Name));
}
- VPInstruction *createPtrAdd(VPValue *Ptr, VPValue *Offset, DebugLoc DL = {},
- const Twine &Name = "") {
+ VPValue *createPtrAdd(VPValue *Ptr, VPValue *Offset, DebugLoc DL = {},
+ const Twine &Name = "") {
+ if (auto *V = Folder.foldPtrAdd(Ptr, Offset, GEPNoWrapFlags::none()))
+ return getOrAddLiveIn(V);
return tryInsertInstruction(
new VPInstruction(Ptr, Offset, GEPNoWrapFlags::none(), DL, Name));
}
VPValue *createInBoundsPtrAdd(VPValue *Ptr, VPValue *Offset, DebugLoc DL = {},
const Twine &Name = "") {
+ if (auto *V = Folder.foldPtrAdd(Ptr, Offset, GEPNoWrapFlags::inBounds()))
+ return getOrAddLiveIn(V);
return tryInsertInstruction(
new VPInstruction(Ptr, Offset, GEPNoWrapFlags::inBounds(), DL, Name));
}
@@ -246,14 +268,18 @@ class VPBuilder {
new VPDerivedIVRecipe(Kind, FPBinOp, Start, Current, Step, Name));
}
- VPScalarCastRecipe *createScalarCast(Instruction::CastOps Opcode, VPValue *Op,
- Type *ResultTy, DebugLoc DL) {
+ VPValue *createScalarCast(Instruction::CastOps Opcode, VPValue *Op,
+ Type *ResultTy, DebugLoc DL) {
+ if (auto *V = Folder.foldCast(Opcode, Op, ResultTy))
+ return getOrAddLiveIn(V);
return tryInsertInstruction(
new VPScalarCastRecipe(Opcode, Op, ResultTy, DL));
}
- VPWidenCastRecipe *createWidenCast(Instruction::CastOps Opcode, VPValue *Op,
- Type *ResultTy) {
+ VPValue *createWidenCast(Instruction::CastOps Opcode, VPValue *Op,
+ Type *ResultTy) {
+ if (auto *V = Folder.foldCast(Opcode, Op, ResultTy))
+ return getOrAddLiveIn(V);
return tryInsertInstruction(new VPWidenCastRecipe(Opcode, Op, ResultTy));
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstantFolder.h b/llvm/lib/Transforms/Vectorize/VPlanConstantFolder.h
new file mode 100644
index 0000000000000..01e47e92afa5c
--- /dev/null
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstantFolder.h
@@ -0,0 +1,96 @@
+//===- VPlanConstantFolder.h - ConstantFolder for VPlan -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "VPlanValue.h"
+#include "llvm/IR/ConstantFold.h"
+#include "llvm/IR/Constants.h"
+
+namespace llvm {
+class VPConstantFolder {
+private:
+ Constant *getIRConstant(VPValue *V) const {
+ return dyn_cast_or_null<Constant>(V->getUnderlyingValue());
+ }
+
+ Value *foldBinOp(Instruction::BinaryOps Opcode, VPValue *LHS,
+ VPValue *RHS) const {
+ auto *LC = getIRConstant(LHS);
+ auto *RC = getIRConstant(RHS);
+ if (LC && RC) {
+ if (ConstantExpr::isDesirableBinOp(Opcode))
+ return ConstantExpr::get(Opcode, LC, RC);
+ return ConstantFoldBinaryInstruction(Opcode, LC, RC);
+ }
+ return nullptr;
+ }
+
+public:
+ Value *foldAnd(VPValue *LHS, VPValue *RHS) const {
+ return foldBinOp(Instruction::BinaryOps::And, LHS, RHS);
+ }
+
+ Value *foldOr(VPValue *LHS, VPValue *RHS) const {
+ return foldBinOp(Instruction::BinaryOps::Or, LHS, RHS);
+ }
+
+ Value *foldNot(VPValue *Op) const {
+ auto *C = getIRConstant(Op);
+ if (C)
+ return ConstantExpr::get(Instruction::BinaryOps::Xor, C,
+ Constant::getAllOnesValue(C->getType()));
+ return nullptr;
+ }
+
+ Value *foldLogicalAnd(VPValue *LHS, VPValue *RHS) const {
+ auto *LC = getIRConstant(LHS);
+ auto *RC = getIRConstant(RHS);
+ if (LC && RC)
+ return ConstantFoldSelectInstruction(
+ LC, RC, ConstantInt::getNullValue(RC->getType()));
+ return nullptr;
+ }
+
+ Value *foldSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal) const {
+ auto *CC = getIRConstant(Cond);
+ auto *TV = getIRConstant(TrueVal);
+ auto *FV = getIRConstant(FalseVal);
+ if (CC && TV && FV)
+ return ConstantFoldSelectInstruction(CC, TV, FV);
+ return nullptr;
+ }
+
+ Value *foldCmp(CmpInst::Predicate Pred, VPValue *LHS, VPValue *RHS) const {
+ auto *LC = getIRConstant(LHS);
+ auto *RC = getIRConstant(RHS);
+ if (LC && RC)
+ return ConstantFoldCompareInstruction(Pred, LC, RC);
+ return nullptr;
+ }
+
+ Value *foldPtrAdd(VPValue *Base, VPValue *Offset, GEPNoWrapFlags NW) const {
+ auto *BC = getIRConstant(Base);
+ auto *OC = getIRConstant(Offset);
+ if (BC && OC) {
+ auto &Ctx = BC->getType()->getContext();
+ return ConstantExpr::getGetElementPtr(Type::getInt8Ty(Ctx), BC, OC, NW);
+ }
+ return nullptr;
+ }
+
+ Value *foldCast(Instruction::CastOps Opcode, VPValue *Op,
+ Type *DestTy) const {
+ auto *C = getIRConstant(Op);
+ if (C) {
+ if (ConstantExpr::isDesirableCastOp(Opcode))
+ return ConstantExpr::getCast(Opcode, C, DestTy);
+ return ConstantFoldCastInstruction(Opcode, C, DestTy);
+ }
+ return nullptr;
+ }
+};
+} // namespace llvm
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 6c917e4eef655..f0542abcae4fb 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -533,8 +533,8 @@ createScalarIVSteps(VPlan &Plan, InductionDescriptor::InductionKind Kind,
VPBuilder &Builder) {
VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV();
- VPSingleDefRecipe *BaseIV = Builder.createDerivedIV(
- Kind, FPBinOp, StartV, CanonicalIV, Step, "offset.idx");
+ VPValue *BaseIV = Builder.createDerivedIV(Kind, FPBinOp, StartV, CanonicalIV,
+ Step, "offset.idx");
// Truncate base induction if needed.
Type *CanonicalIVType = CanonicalIV->getScalarType();
diff --git a/llvm/test/Transforms/LoopVectorize/X86/replicate-recipe-with-only-first-lane-used.ll b/llvm/test/Transforms/LoopVectorize/X86/replicate-recipe-with-only-first-lane-used.ll
index e6c9ce3381f73..141ef6ec0b97e 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/replicate-recipe-with-only-first-lane-used.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/replicate-recipe-with-only-first-lane-used.ll
@@ -12,56 +12,10 @@ define void @replicate_udiv_with_only_first_lane_used(i32 %x, ptr %dst, i64 %d)
; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[X]], 10
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i16, ptr [[DST]], i64 0
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_UDIV_CONTINUE14:.*]] ]
-; CHECK-NEXT: br i1 false, label %[[PRED_UDIV_IF:.*]], label %[[PRED_UDIV_CONTINUE:.*]]
-; CHECK: [[PRED_UDIV_IF]]:
-; CHECK-NEXT: [[TMP0:%.*]] = udiv i64 99, [[D]]
-; CHECK-NEXT: br label %[[PRED_UDIV_CONTINUE]]
-; CHECK: [[PRED_UDIV_CONTINUE]]:
-; CHECK-NEXT: [[TMP1:%.*]] = phi i64 [ poison, %[[VECTOR_BODY]] ], [ [[TMP0]], %[[PRED_UDIV_IF]] ]
-; CHECK-NEXT: br i1 false, label %[[PRED_UDIV_IF1:.*]], label %[[PRED_UDIV_CONTINUE2:.*]]
-; CHECK: [[PRED_UDIV_IF1]]:
-; CHECK-NEXT: [[TMP2:%.*]] = udiv i64 99, [[D]]
-; CHECK-NEXT: br label %[[PRED_UDIV_CONTINUE2]]
-; CHECK: [[PRED_UDIV_CONTINUE2]]:
-; CHECK-NEXT: br i1 false, label %[[PRED_UDIV_IF3:.*]], label %[[PRED_UDIV_CONTINUE4:.*]]
-; CHECK: [[PRED_UDIV_IF3]]:
-; CHECK-NEXT: [[TMP3:%.*]] = udiv i64 99, [[D]]
-; CHECK-NEXT: br label %[[PRED_UDIV_CONTINUE4]]
-; CHECK: [[PRED_UDIV_CONTINUE4]]:
-; CHECK-NEXT: br i1 false, label %[[PRED_UDIV_IF5:.*]], label %[[PRED_UDIV_CONTINUE6:.*]]
-; CHECK: [[PRED_UDIV_IF5]]:
-; CHECK-NEXT: [[TMP4:%.*]] = udiv i64 99, [[D]]
-; CHECK-NEXT: br label %[[PRED_UDIV_CONTINUE6]]
-; CHECK: [[PRED_UDIV_CONTINUE6]]:
-; CHECK-NEXT: br i1 false, label %[[PRED_UDIV_IF7:.*]], label %[[PRED_UDIV_CONTINUE8:.*]]
-; CHECK: [[PRED_UDIV_IF7]]:
-; CHECK-NEXT: [[TMP5:%.*]] = udiv i64 99, [[D]]
-; CHECK-NEXT: br label %[[PRED_UDIV_CONTINUE8]]
-; CHECK: [[PRED_UDIV_CONTINUE8]]:
-; CHECK-NEXT: [[TMP6:%.*]] = phi i64 [ poison, %[[PRED_UDIV_CONTINUE6]] ], [ [[TMP5]], %[[PRED_UDIV_IF7]] ]
-; CHECK-NEXT: br i1 false, label %[[PRED_UDIV_IF9:.*]], label %[[PRED_UDIV_CONTINUE10:.*]]
-; CHECK: [[PRED_UDIV_IF9]]:
-; CHECK-NEXT: [[TMP7:%.*]] = udiv i64 99, [[D]]
-; CHECK-NEXT: br label %[[PRED_UDIV_CONTINUE10]]
-; CHECK: [[PRED_UDIV_CONTINUE10]]:
-; CHECK-NEXT: br i1 false, label %[[PRED_UDIV_IF11:.*]], label %[[PRED_UDIV_CONTINUE12:.*]]
-; CHECK: [[PRED_UDIV_IF11]]:
-; CHECK-NEXT: [[TMP8:%.*]] = udiv i64 99, [[D]]
-; CHECK-NEXT: br label %[[PRED_UDIV_CONTINUE12]]
-; CHECK: [[PRED_UDIV_CONTINUE12]]:
-; CHECK-NEXT: br i1 false, label %[[PRED_UDIV_IF13:.*]], label %[[PRED_UDIV_CONTINUE14]]
-; CHECK: [[PRED_UDIV_IF13]]:
-; CHECK-NEXT: [[TMP9:%.*]] = udiv i64 99, [[D]]
-; CHECK-NEXT: br label %[[PRED_UDIV_CONTINUE14]]
-; CHECK: [[PRED_UDIV_CONTINUE14]]:
-; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 true, i64 0, i64 [[TMP1]]
-; CHECK-NEXT: [[PREDPHI15:%.*]] = select i1 true, i64 0, i64 [[TMP6]]
-; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i16, ptr [[DST]], i64 [[PREDPHI]]
-; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i16, ptr [[DST]], i64 [[PREDPHI15]]
-; CHECK-NEXT: store i16 0, ptr [[TMP10]], align 2
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: store i16 0, ptr [[TMP11]], align 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96
diff --git a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll
index c3164762e8130..c294ec11b906d 100644
--- a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll
+++ b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll
@@ -202,15 +202,14 @@ exit:
; DBG-NEXT: Successor(s): vector.ph
; DBG-EMPTY:
; DBG-NEXT: vector.ph:
-; DBG-NEXT: SCALAR-CAST vp<[[CAST:%.+]]> = trunc ir<1> to i32
; DBG-NEXT: Successor(s): vector loop
; DBG-EMPTY:
; DBG-NEXT: <x1> vector loop: {
; DBG-NEXT: vector.body:
-; DBG-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
+; DBG-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[INDEX_NEXT:%.*]]>
; DBG-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%for> = phi ir<0>, vp<[[SCALAR_STEPS:.+]]>
; DBG-NEXT: SCALAR-CAST vp<[[TRUNC_IV:%.+]]> = trunc vp<[[CAN_IV]]> to i32
-; DBG-NEXT: vp<[[SCALAR_STEPS]]> = SCALAR-STEPS vp<[[TRUNC_IV]]>, vp<[[CAST]]>
+; DBG-NEXT: vp<[[SCALAR_STEPS]]> = SCALAR-STEPS vp<[[TRUNC_IV]]>, ir<1>
; DBG-NEXT: EMIT vp<[[SPLICE:%.+]]> = first-order splice ir<%for>, vp<[[SCALAR_STEPS]]>
; DBG-NEXT: CLONE store vp<[[SPLICE]]>, ir<%dst>
; DBG-NEXT: EMIT vp<[[IV_INC:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
More information about the llvm-commits
mailing list