[llvm] [VPlan] Introduce VPlanConstantFolder (PR #125365)

Ramkumar Ramachandra via llvm-commits llvm-commits at lists.llvm.org
Mon Feb 24 04:05:30 PST 2025


https://github.com/artagnon updated https://github.com/llvm/llvm-project/pull/125365

>From cc98b9e960e0891d9213ecd0ffdd15f0b7528755 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Sat, 1 Feb 2025 20:32:51 +0000
Subject: [PATCH] VPlan: implement VPlan-level constant-folding

Introduce VPlanConstantFolder, a variation of ConstantFolder for VPlan,
and use it in VPBuilder to constant-fold when all the underlying IR
values passed into the API are constants.
---
 .../Vectorize/LoopVectorizationPlanner.h      | 42 ++++++--
 .../Vectorize/VPlanConstantFolder.h           | 96 +++++++++++++++++++
 .../Transforms/Vectorize/VPlanTransforms.cpp  |  4 +-
 ...licate-recipe-with-only-first-lane-used.ll | 50 +---------
 .../interleave-and-scalarize-only.ll          |  5 +-
 5 files changed, 136 insertions(+), 61 deletions(-)
 create mode 100644 llvm/lib/Transforms/Vectorize/VPlanConstantFolder.h

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index ed3e45dd2c6c8..e90d6dbcbaee6 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -25,7 +25,7 @@
 #define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H
 
 #include "VPlan.h"
-#include "llvm/ADT/SmallSet.h"
+#include "VPlanConstantFolder.h"
 #include "llvm/Support/InstructionCost.h"
 
 namespace llvm {
@@ -46,6 +46,7 @@ struct VFRange;
 class VPBuilder {
   VPBasicBlock *BB = nullptr;
   VPBasicBlock::iterator InsertPt = VPBasicBlock::iterator();
+  VPConstantFolder Folder;
 
   /// Insert \p VPI in BB at InsertPt if BB is set.
   template <typename T> T *tryInsertInstruction(T *R) {
@@ -66,6 +67,12 @@ class VPBuilder {
     return createInstruction(Opcode, ArrayRef<VPValue *>(Operands), DL, Name);
   }
 
+  VPValue *getOrAddLiveIn(Value *V) {
+    if (BB)
+      return BB->getPlan()->getOrAddLiveIn(V);
+    return new VPValue(V);
+  }
+
 public:
   VPBuilder() = default;
   VPBuilder(VPBasicBlock *InsertBB) { setInsertPoint(InsertBB); }
@@ -180,17 +187,22 @@ class VPBuilder {
 
   VPValue *createNot(VPValue *Operand, DebugLoc DL = {},
                      const Twine &Name = "") {
+    if (auto *V = Folder.foldNot(Operand))
+      return getOrAddLiveIn(V);
     return createInstruction(VPInstruction::Not, {Operand}, DL, Name);
   }
 
   VPValue *createAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL = {},
                      const Twine &Name = "") {
+    if (auto *V = Folder.foldAnd(LHS, RHS))
+      return getOrAddLiveIn(V);
     return createInstruction(Instruction::BinaryOps::And, {LHS, RHS}, DL, Name);
   }
 
   VPValue *createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL = {},
                     const Twine &Name = "") {
-
+    if (auto *V = Folder.foldOr(LHS, RHS))
+      return getOrAddLiveIn(V);
     return tryInsertInstruction(new VPInstruction(
         Instruction::BinaryOps::Or, {LHS, RHS},
         VPRecipeWithIRFlags::DisjointFlagsTy(false), DL, Name));
@@ -198,6 +210,8 @@ class VPBuilder {
 
   VPValue *createLogicalAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL = {},
                             const Twine &Name = "") {
+    if (auto *V = Folder.foldLogicalAnd(LHS, RHS))
+      return getOrAddLiveIn(V);
     return tryInsertInstruction(
         new VPInstruction(VPInstruction::LogicalAnd, {LHS, RHS}, DL, Name));
   }
@@ -205,6 +219,8 @@ class VPBuilder {
   VPValue *createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal,
                         DebugLoc DL = {}, const Twine &Name = "",
                         std::optional<FastMathFlags> FMFs = std::nullopt) {
+    if (auto *V = Folder.foldSelect(Cond, TrueVal, FalseVal))
+      return getOrAddLiveIn(V);
     auto *Select =
         FMFs ? new VPInstruction(Instruction::Select, {Cond, TrueVal, FalseVal},
                                  *FMFs, DL, Name)
@@ -220,17 +236,23 @@ class VPBuilder {
                       DebugLoc DL = {}, const Twine &Name = "") {
     assert(Pred >= CmpInst::FIRST_ICMP_PREDICATE &&
            Pred <= CmpInst::LAST_ICMP_PREDICATE && "invalid predicate");
+    if (auto *V = Folder.foldCmp(Pred, A, B))
+      return getOrAddLiveIn(V);
     return tryInsertInstruction(
         new VPInstruction(Instruction::ICmp, Pred, A, B, DL, Name));
   }
 
-  VPInstruction *createPtrAdd(VPValue *Ptr, VPValue *Offset, DebugLoc DL = {},
-                              const Twine &Name = "") {
+  VPValue *createPtrAdd(VPValue *Ptr, VPValue *Offset, DebugLoc DL = {},
+                        const Twine &Name = "") {
+    if (auto *V = Folder.foldPtrAdd(Ptr, Offset, GEPNoWrapFlags::none()))
+      return getOrAddLiveIn(V);
     return tryInsertInstruction(
         new VPInstruction(Ptr, Offset, GEPNoWrapFlags::none(), DL, Name));
   }
   VPValue *createInBoundsPtrAdd(VPValue *Ptr, VPValue *Offset, DebugLoc DL = {},
                                 const Twine &Name = "") {
+    if (auto *V = Folder.foldPtrAdd(Ptr, Offset, GEPNoWrapFlags::inBounds()))
+      return getOrAddLiveIn(V);
     return tryInsertInstruction(
         new VPInstruction(Ptr, Offset, GEPNoWrapFlags::inBounds(), DL, Name));
   }
@@ -246,14 +268,18 @@ class VPBuilder {
         new VPDerivedIVRecipe(Kind, FPBinOp, Start, Current, Step, Name));
   }
 
-  VPScalarCastRecipe *createScalarCast(Instruction::CastOps Opcode, VPValue *Op,
-                                       Type *ResultTy, DebugLoc DL) {
+  VPValue *createScalarCast(Instruction::CastOps Opcode, VPValue *Op,
+                            Type *ResultTy, DebugLoc DL) {
+    if (auto *V = Folder.foldCast(Opcode, Op, ResultTy))
+      return getOrAddLiveIn(V);
     return tryInsertInstruction(
         new VPScalarCastRecipe(Opcode, Op, ResultTy, DL));
   }
 
-  VPWidenCastRecipe *createWidenCast(Instruction::CastOps Opcode, VPValue *Op,
-                                     Type *ResultTy) {
+  VPValue *createWidenCast(Instruction::CastOps Opcode, VPValue *Op,
+                           Type *ResultTy) {
+    if (auto *V = Folder.foldCast(Opcode, Op, ResultTy))
+      return getOrAddLiveIn(V);
     return tryInsertInstruction(new VPWidenCastRecipe(Opcode, Op, ResultTy));
   }
 
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstantFolder.h b/llvm/lib/Transforms/Vectorize/VPlanConstantFolder.h
new file mode 100644
index 0000000000000..01e47e92afa5c
--- /dev/null
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstantFolder.h
@@ -0,0 +1,96 @@
+//===- VPlanConstantFolder.h - ConstantFolder for VPlan -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "VPlanValue.h"
+#include "llvm/IR/ConstantFold.h"
+#include "llvm/IR/Constants.h"
+
+namespace llvm {
+class VPConstantFolder {
+private:
+  Constant *getIRConstant(VPValue *V) const {
+    return dyn_cast_or_null<Constant>(V->getUnderlyingValue());
+  }
+
+  Value *foldBinOp(Instruction::BinaryOps Opcode, VPValue *LHS,
+                   VPValue *RHS) const {
+    auto *LC = getIRConstant(LHS);
+    auto *RC = getIRConstant(RHS);
+    if (LC && RC) {
+      if (ConstantExpr::isDesirableBinOp(Opcode))
+        return ConstantExpr::get(Opcode, LC, RC);
+      return ConstantFoldBinaryInstruction(Opcode, LC, RC);
+    }
+    return nullptr;
+  }
+
+public:
+  Value *foldAnd(VPValue *LHS, VPValue *RHS) const {
+    return foldBinOp(Instruction::BinaryOps::And, LHS, RHS);
+  }
+
+  Value *foldOr(VPValue *LHS, VPValue *RHS) const {
+    return foldBinOp(Instruction::BinaryOps::Or, LHS, RHS);
+  }
+
+  Value *foldNot(VPValue *Op) const {
+    auto *C = getIRConstant(Op);
+    if (C)
+      return ConstantExpr::get(Instruction::BinaryOps::Xor, C,
+                               Constant::getAllOnesValue(C->getType()));
+    return nullptr;
+  }
+
+  Value *foldLogicalAnd(VPValue *LHS, VPValue *RHS) const {
+    auto *LC = getIRConstant(LHS);
+    auto *RC = getIRConstant(RHS);
+    if (LC && RC)
+      return ConstantFoldSelectInstruction(
+          LC, RC, ConstantInt::getNullValue(RC->getType()));
+    return nullptr;
+  }
+
+  Value *foldSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal) const {
+    auto *CC = getIRConstant(Cond);
+    auto *TV = getIRConstant(TrueVal);
+    auto *FV = getIRConstant(FalseVal);
+    if (CC && TV && FV)
+      return ConstantFoldSelectInstruction(CC, TV, FV);
+    return nullptr;
+  }
+
+  Value *foldCmp(CmpInst::Predicate Pred, VPValue *LHS, VPValue *RHS) const {
+    auto *LC = getIRConstant(LHS);
+    auto *RC = getIRConstant(RHS);
+    if (LC && RC)
+      return ConstantFoldCompareInstruction(Pred, LC, RC);
+    return nullptr;
+  }
+
+  Value *foldPtrAdd(VPValue *Base, VPValue *Offset, GEPNoWrapFlags NW) const {
+    auto *BC = getIRConstant(Base);
+    auto *OC = getIRConstant(Offset);
+    if (BC && OC) {
+      auto &Ctx = BC->getType()->getContext();
+      return ConstantExpr::getGetElementPtr(Type::getInt8Ty(Ctx), BC, OC, NW);
+    }
+    return nullptr;
+  }
+
+  Value *foldCast(Instruction::CastOps Opcode, VPValue *Op,
+                  Type *DestTy) const {
+    auto *C = getIRConstant(Op);
+    if (C) {
+      if (ConstantExpr::isDesirableCastOp(Opcode))
+        return ConstantExpr::getCast(Opcode, C, DestTy);
+      return ConstantFoldCastInstruction(Opcode, C, DestTy);
+    }
+    return nullptr;
+  }
+};
+} // namespace llvm
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 6c917e4eef655..f0542abcae4fb 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -533,8 +533,8 @@ createScalarIVSteps(VPlan &Plan, InductionDescriptor::InductionKind Kind,
                     VPBuilder &Builder) {
   VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
   VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV();
-  VPSingleDefRecipe *BaseIV = Builder.createDerivedIV(
-      Kind, FPBinOp, StartV, CanonicalIV, Step, "offset.idx");
+  VPValue *BaseIV = Builder.createDerivedIV(Kind, FPBinOp, StartV, CanonicalIV,
+                                            Step, "offset.idx");
 
   // Truncate base induction if needed.
   Type *CanonicalIVType = CanonicalIV->getScalarType();
diff --git a/llvm/test/Transforms/LoopVectorize/X86/replicate-recipe-with-only-first-lane-used.ll b/llvm/test/Transforms/LoopVectorize/X86/replicate-recipe-with-only-first-lane-used.ll
index e6c9ce3381f73..141ef6ec0b97e 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/replicate-recipe-with-only-first-lane-used.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/replicate-recipe-with-only-first-lane-used.ll
@@ -12,56 +12,10 @@ define void @replicate_udiv_with_only_first_lane_used(i32 %x, ptr %dst, i64 %d)
 ; CHECK-NEXT:    [[C:%.*]] = icmp eq i32 [[X]], 10
 ; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
 ; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr i16, ptr [[DST]], i64 0
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_UDIV_CONTINUE14:.*]] ]
-; CHECK-NEXT:    br i1 false, label %[[PRED_UDIV_IF:.*]], label %[[PRED_UDIV_CONTINUE:.*]]
-; CHECK:       [[PRED_UDIV_IF]]:
-; CHECK-NEXT:    [[TMP0:%.*]] = udiv i64 99, [[D]]
-; CHECK-NEXT:    br label %[[PRED_UDIV_CONTINUE]]
-; CHECK:       [[PRED_UDIV_CONTINUE]]:
-; CHECK-NEXT:    [[TMP1:%.*]] = phi i64 [ poison, %[[VECTOR_BODY]] ], [ [[TMP0]], %[[PRED_UDIV_IF]] ]
-; CHECK-NEXT:    br i1 false, label %[[PRED_UDIV_IF1:.*]], label %[[PRED_UDIV_CONTINUE2:.*]]
-; CHECK:       [[PRED_UDIV_IF1]]:
-; CHECK-NEXT:    [[TMP2:%.*]] = udiv i64 99, [[D]]
-; CHECK-NEXT:    br label %[[PRED_UDIV_CONTINUE2]]
-; CHECK:       [[PRED_UDIV_CONTINUE2]]:
-; CHECK-NEXT:    br i1 false, label %[[PRED_UDIV_IF3:.*]], label %[[PRED_UDIV_CONTINUE4:.*]]
-; CHECK:       [[PRED_UDIV_IF3]]:
-; CHECK-NEXT:    [[TMP3:%.*]] = udiv i64 99, [[D]]
-; CHECK-NEXT:    br label %[[PRED_UDIV_CONTINUE4]]
-; CHECK:       [[PRED_UDIV_CONTINUE4]]:
-; CHECK-NEXT:    br i1 false, label %[[PRED_UDIV_IF5:.*]], label %[[PRED_UDIV_CONTINUE6:.*]]
-; CHECK:       [[PRED_UDIV_IF5]]:
-; CHECK-NEXT:    [[TMP4:%.*]] = udiv i64 99, [[D]]
-; CHECK-NEXT:    br label %[[PRED_UDIV_CONTINUE6]]
-; CHECK:       [[PRED_UDIV_CONTINUE6]]:
-; CHECK-NEXT:    br i1 false, label %[[PRED_UDIV_IF7:.*]], label %[[PRED_UDIV_CONTINUE8:.*]]
-; CHECK:       [[PRED_UDIV_IF7]]:
-; CHECK-NEXT:    [[TMP5:%.*]] = udiv i64 99, [[D]]
-; CHECK-NEXT:    br label %[[PRED_UDIV_CONTINUE8]]
-; CHECK:       [[PRED_UDIV_CONTINUE8]]:
-; CHECK-NEXT:    [[TMP6:%.*]] = phi i64 [ poison, %[[PRED_UDIV_CONTINUE6]] ], [ [[TMP5]], %[[PRED_UDIV_IF7]] ]
-; CHECK-NEXT:    br i1 false, label %[[PRED_UDIV_IF9:.*]], label %[[PRED_UDIV_CONTINUE10:.*]]
-; CHECK:       [[PRED_UDIV_IF9]]:
-; CHECK-NEXT:    [[TMP7:%.*]] = udiv i64 99, [[D]]
-; CHECK-NEXT:    br label %[[PRED_UDIV_CONTINUE10]]
-; CHECK:       [[PRED_UDIV_CONTINUE10]]:
-; CHECK-NEXT:    br i1 false, label %[[PRED_UDIV_IF11:.*]], label %[[PRED_UDIV_CONTINUE12:.*]]
-; CHECK:       [[PRED_UDIV_IF11]]:
-; CHECK-NEXT:    [[TMP8:%.*]] = udiv i64 99, [[D]]
-; CHECK-NEXT:    br label %[[PRED_UDIV_CONTINUE12]]
-; CHECK:       [[PRED_UDIV_CONTINUE12]]:
-; CHECK-NEXT:    br i1 false, label %[[PRED_UDIV_IF13:.*]], label %[[PRED_UDIV_CONTINUE14]]
-; CHECK:       [[PRED_UDIV_IF13]]:
-; CHECK-NEXT:    [[TMP9:%.*]] = udiv i64 99, [[D]]
-; CHECK-NEXT:    br label %[[PRED_UDIV_CONTINUE14]]
-; CHECK:       [[PRED_UDIV_CONTINUE14]]:
-; CHECK-NEXT:    [[PREDPHI:%.*]] = select i1 true, i64 0, i64 [[TMP1]]
-; CHECK-NEXT:    [[PREDPHI15:%.*]] = select i1 true, i64 0, i64 [[TMP6]]
-; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr i16, ptr [[DST]], i64 [[PREDPHI]]
-; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr i16, ptr [[DST]], i64 [[PREDPHI15]]
-; CHECK-NEXT:    store i16 0, ptr [[TMP10]], align 2
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
 ; CHECK-NEXT:    store i16 0, ptr [[TMP11]], align 2
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
 ; CHECK-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96
diff --git a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll
index c3164762e8130..c294ec11b906d 100644
--- a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll
+++ b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll
@@ -202,15 +202,14 @@ exit:
 ; DBG-NEXT: Successor(s): vector.ph
 ; DBG-EMPTY:
 ; DBG-NEXT: vector.ph:
-; DBG-NEXT:   SCALAR-CAST vp<[[CAST:%.+]]> = trunc ir<1> to i32
 ; DBG-NEXT: Successor(s): vector loop
 ; DBG-EMPTY:
 ; DBG-NEXT: <x1> vector loop: {
 ; DBG-NEXT:   vector.body:
-; DBG-NEXT:     EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
+; DBG-NEXT:     EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[INDEX_NEXT:%.*]]>
 ; DBG-NEXT:     FIRST-ORDER-RECURRENCE-PHI ir<%for> = phi ir<0>, vp<[[SCALAR_STEPS:.+]]>
 ; DBG-NEXT:     SCALAR-CAST vp<[[TRUNC_IV:%.+]]> = trunc vp<[[CAN_IV]]> to i32
-; DBG-NEXT:     vp<[[SCALAR_STEPS]]> = SCALAR-STEPS vp<[[TRUNC_IV]]>, vp<[[CAST]]>
+; DBG-NEXT:     vp<[[SCALAR_STEPS]]> = SCALAR-STEPS vp<[[TRUNC_IV]]>, ir<1>
 ; DBG-NEXT:     EMIT vp<[[SPLICE:%.+]]> = first-order splice ir<%for>, vp<[[SCALAR_STEPS]]>
 ; DBG-NEXT:     CLONE store vp<[[SPLICE]]>, ir<%dst>
 ; DBG-NEXT:     EMIT vp<[[IV_INC:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>



More information about the llvm-commits mailing list