[llvm] [LV] Move isPredicatedInst into LVLegality (NFC) (PR #134261)

Fri Jun 20 08:54:23 PDT 2025

https://github.com/artagnon updated https://github.com/llvm/llvm-project/pull/134261

>From 57d2b8cb78ab73bc87c8b92e7fbaf64b69ca2add Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Thu, 3 Apr 2025 14:49:08 +0100
Subject: [PATCH 1/2] [LV] Fold isPredicatedInst into isMaskRequired

Fold LoopVectorizationCostModel::isPredicatedInst into
LoopVectorizationLegality::isMaskRequired, fixing a pending TODO item.
Note that we still need to pass whether or not we're tail-folding by
masking from the cost-model into isMaskRequired.
---
 .../Vectorize/LoopVectorizationLegality.h     | 12 ++--
 .../Transforms/Vectorize/LoopVectorize.cpp    | 68 ++++++++-----------
 2 files changed, 37 insertions(+), 43 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
index d654ac3ec9273..a101e7d4807d9 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -418,11 +418,13 @@ class LoopVectorizationLegality {
     return LAI->getDepChecker().getStoreLoadForwardSafeDistanceInBits();
   }
 
-  /// Returns true if vector representation of the instruction \p I
-  /// requires mask.
-  bool isMaskRequired(const Instruction *I) const {
-    return MaskedOp.contains(I);
-  }
+  /// Returns true if MaskedOp contains \p I.
+  bool isMasked(Instruction *I) const { return MaskedOp.contains(I); }
+
+  /// Returns true if \p I is an instruction that needs to be predicated
+  /// at runtime.  The result is independent of the predication mechanism.
+  /// Superset of instructions that return true for isScalarWithPredication.
+  bool isMaskRequired(Instruction *I, bool FoldTailByMasking) const;
 
   /// Returns true if there is at least one function call in the loop which
   /// has a vectorized variant available.
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 88b2ffba1b79f..f05782b51f953 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1282,11 +1282,6 @@ class LoopVectorizationCostModel {
   /// \p VF is the vectorization factor that will be used to vectorize \p I.
   bool isScalarWithPredication(Instruction *I, ElementCount VF) const;
 
-  /// Returns true if \p I is an instruction that needs to be predicated
-  /// at runtime.  The result is independent of the predication mechanism.
-  /// Superset of instructions that return true for isScalarWithPredication.
-  bool isPredicatedInst(Instruction *I) const;
-
   /// Return the costs for our two available strategies for lowering a
   /// div/rem operation which requires speculating at least one lane.
   /// First result is for scalarization (will be invalid for scalable
@@ -3023,7 +3018,7 @@ void LoopVectorizationCostModel::collectLoopScalars(ElementCount VF) {
 
 bool LoopVectorizationCostModel::isScalarWithPredication(
     Instruction *I, ElementCount VF) const {
-  if (!isPredicatedInst(I))
+  if (!Legal->isMaskRequired(I, foldTailByMasking()))
     return false;
 
   // Do we have a non-scalar lowering for this predicated
@@ -3062,22 +3057,22 @@ bool LoopVectorizationCostModel::isScalarWithPredication(
   }
 }
 
-// TODO: Fold into LoopVectorizationLegality::isMaskRequired.
-bool LoopVectorizationCostModel::isPredicatedInst(Instruction *I) const {
+bool LoopVectorizationLegality::isMaskRequired(Instruction *I,
+                                               bool FoldTailByMasking) const {
   // TODO: We can use the loop-preheader as context point here and get
   // context sensitive reasoning for isSafeToSpeculativelyExecute.
   if (isSafeToSpeculativelyExecute(I) ||
-      (isa<LoadInst, StoreInst, CallInst>(I) && !Legal->isMaskRequired(I)) ||
+      (isa<LoadInst, StoreInst, CallInst>(I) && !isMasked(I)) ||
       isa<BranchInst, SwitchInst, PHINode, AllocaInst>(I))
     return false;
 
   // If the instruction was executed conditionally in the original scalar loop,
   // predication is needed with a mask whose lanes are all possibly inactive.
-  if (Legal->blockNeedsPredication(I->getParent()))
+  if (blockNeedsPredication(I->getParent()))
     return true;
 
   // If we're not folding the tail by masking, predication is unnecessary.
-  if (!foldTailByMasking())
+  if (!FoldTailByMasking)
     return false;
 
   // All that remain are instructions with side-effects originally executed in
@@ -3085,33 +3080,33 @@ bool LoopVectorizationCostModel::isPredicatedInst(Instruction *I) const {
   // having at least one active lane (the first). If the side-effects of the
   // instruction are invariant, executing it w/o (the tail-folding) mask is safe
   // - it will cause the same side-effects as when masked.
-  switch(I->getOpcode()) {
+  switch (I->getOpcode()) {
   default:
     llvm_unreachable(
         "instruction should have been considered by earlier checks");
   case Instruction::Call:
     // Side-effects of a Call are assumed to be non-invariant, needing a
     // (fold-tail) mask.
-    assert(Legal->isMaskRequired(I) &&
+    assert(isMasked(I) &&
            "should have returned earlier for calls not needing a mask");
     return true;
   case Instruction::Load:
     // If the address is loop invariant no predication is needed.
-    return !Legal->isInvariant(getLoadStorePointerOperand(I));
+    return !isInvariant(getLoadStorePointerOperand(I));
   case Instruction::Store: {
     // For stores, we need to prove both speculation safety (which follows from
     // the same argument as loads), but also must prove the value being stored
     // is correct.  The easiest form of the later is to require that all values
     // stored are the same.
-    return !(Legal->isInvariant(getLoadStorePointerOperand(I)) &&
-             Legal->isInvariant(cast<StoreInst>(I)->getValueOperand()));
+    return !(isInvariant(getLoadStorePointerOperand(I)) &&
+             isInvariant(cast<StoreInst>(I)->getValueOperand()));
   }
   case Instruction::UDiv:
   case Instruction::SDiv:
   case Instruction::SRem:
   case Instruction::URem:
     // If the divisor is loop-invariant no predication is needed.
-    return !Legal->isInvariant(I->getOperand(1));
+    return !isInvariant(I->getOperand(1));
   }
 }
 
@@ -3226,8 +3221,7 @@ bool LoopVectorizationCostModel::interleavedAccessCanBeWidened(
   // (either a gap at the end of a load-access that may result in a speculative
   // load, or any gaps in a store-access).
   bool PredicatedAccessRequiresMasking =
-      blockNeedsPredicationForAnyReason(I->getParent()) &&
-      Legal->isMaskRequired(I);
+      blockNeedsPredicationForAnyReason(I->getParent()) && Legal->isMasked(I);
   bool LoadAccessWithGapsRequiresEpilogMasking =
       isa<LoadInst>(I) && Group->requiresScalarEpilogue() &&
       !isScalarEpilogueAllowed();
@@ -3317,7 +3311,7 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
                         << *I << "\n");
       return;
     }
-    if (isPredicatedInst(I)) {
+    if (Legal->isMaskRequired(I, foldTailByMasking())) {
       LLVM_DEBUG(
           dbgs() << "LV: Found not uniform due to requiring predication: " << *I
                  << "\n");
@@ -4997,7 +4991,7 @@ bool LoopVectorizationCostModel::useEmulatedMaskMemRefHack(Instruction *I,
   // from moving "masked load/store" check from legality to cost model.
   // Masked Load/Gather emulation was previously never allowed.
   // Limited number of Masked Store/Scatter emulation was allowed.
-  assert((isPredicatedInst(I)) &&
+  assert((Legal->isMaskRequired(I, foldTailByMasking())) &&
          "Expecting a scalar emulated instruction");
   return isa<LoadInst>(I) ||
          (isa<StoreInst>(I) &&
@@ -5299,7 +5293,7 @@ LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
   // If we have a predicated load/store, it will need extra i1 extracts and
   // conditional branches, but may not be executed for each vector lane. Scale
   // the cost by the probability of executing the predicated block.
-  if (isPredicatedInst(I)) {
+  if (Legal->isMaskRequired(I, foldTailByMasking())) {
     Cost /= getPredBlockCostDivisor(CostKind);
 
     // Add the cost of an i1 extract and a branch
@@ -5332,7 +5326,7 @@ LoopVectorizationCostModel::getConsecutiveMemOpCost(Instruction *I,
          "Stride should be 1 or -1 for consecutive memory access");
   const Align Alignment = getLoadStoreAlignment(I);
   InstructionCost Cost = 0;
-  if (Legal->isMaskRequired(I)) {
+  if (Legal->isMasked(I)) {
     Cost += TTI.getMaskedMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS,
                                       CostKind);
   } else {
@@ -5390,8 +5384,7 @@ LoopVectorizationCostModel::getGatherScatterCost(Instruction *I,
 
   return TTI.getAddressComputationCost(VectorTy) +
          TTI.getGatherScatterOpCost(I->getOpcode(), VectorTy, Ptr,
-                                    Legal->isMaskRequired(I), Alignment,
-                                    CostKind, I);
+                                    Legal->isMasked(I), Alignment, CostKind, I);
 }
 
 InstructionCost
@@ -5420,12 +5413,11 @@ LoopVectorizationCostModel::getInterleaveGroupCost(Instruction *I,
       (isa<StoreInst>(I) && (Group->getNumMembers() < Group->getFactor()));
   InstructionCost Cost = TTI.getInterleavedMemoryOpCost(
       InsertPos->getOpcode(), WideVecTy, Group->getFactor(), Indices,
-      Group->getAlign(), AS, CostKind, Legal->isMaskRequired(I),
-      UseMaskForGaps);
+      Group->getAlign(), AS, CostKind, Legal->isMasked(I), UseMaskForGaps);
 
   if (Group->isReverse()) {
     // TODO: Add support for reversed masked interleaved access.
-    assert(!Legal->isMaskRequired(I) &&
+    assert(!Legal->isMasked(I) &&
            "Reverse masked interleaved access not supported.");
     Cost += Group->getNumMembers() *
             TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, VectorTy, {},
@@ -5918,7 +5910,7 @@ void LoopVectorizationCostModel::setVectorizedCallDecision(ElementCount VF) {
         continue;
       }
 
-      bool MaskRequired = Legal->isMaskRequired(CI);
+      bool MaskRequired = Legal->isMasked(CI);
       // Compute corresponding vector type for return value and arguments.
       Type *RetTy = toVectorizedTy(ScalarRetTy, VF);
       for (Type *ScalarTy : ScalarTys)
@@ -6038,7 +6030,7 @@ bool LoopVectorizationCostModel::shouldConsiderInvariant(Value *Op) {
   // instruction in the loop. In that case, it is not trivially hoistable.
   auto *OpI = dyn_cast<Instruction>(Op);
   return !OpI || !TheLoop->contains(OpI) ||
-         (!isPredicatedInst(OpI) &&
+         (!Legal->isMaskRequired(OpI, foldTailByMasking()) &&
           (!isa<PHINode>(OpI) || OpI->getParent() != TheLoop->getHeader()) &&
           all_of(OpI->operands(),
                  [this](Value *Op) { return shouldConsiderInvariant(Op); }));
@@ -6221,7 +6213,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I,
   case Instruction::SDiv:
   case Instruction::URem:
   case Instruction::SRem:
-    if (VF.isVector() && isPredicatedInst(I)) {
+    if (VF.isVector() && Legal->isMaskRequired(I, foldTailByMasking())) {
       const auto [ScalarCost, SafeDivisorCost] = getDivRemSpeculationCost(I, VF);
       return isDivRemScalarWithPredication(ScalarCost, SafeDivisorCost) ?
         ScalarCost : SafeDivisorCost;
@@ -6405,8 +6397,8 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I,
         return TTI::CastContextHint::Interleave;
       case LoopVectorizationCostModel::CM_Scalarize:
       case LoopVectorizationCostModel::CM_Widen:
-        return Legal->isMaskRequired(I) ? TTI::CastContextHint::Masked
-                                        : TTI::CastContextHint::Normal;
+        return Legal->isMasked(I) ? TTI::CastContextHint::Masked
+                                  : TTI::CastContextHint::Normal;
       case LoopVectorizationCostModel::CM_Widen_Reverse:
         return TTI::CastContextHint::Reversed;
       case LoopVectorizationCostModel::CM_Unknown:
@@ -7744,7 +7736,7 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
     return nullptr;
 
   VPValue *Mask = nullptr;
-  if (Legal->isMaskRequired(I))
+  if (Legal->isMasked(I))
     Mask = getBlockInMask(Builder.getInsertBlock());
 
   // Determine if the pointer operand of the access is either consecutive or
@@ -7939,7 +7931,7 @@ VPSingleDefRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
       //      vector variant at this VF requires a mask, so we synthesize an
       //      all-true mask.
       VPValue *Mask = nullptr;
-      if (Legal->isMaskRequired(CI))
+      if (Legal->isMasked(CI))
         Mask = getBlockInMask(Builder.getInsertBlock());
       else
         Mask = Plan.getOrAddLiveIn(
@@ -7980,7 +7972,7 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(Instruction *I,
   case Instruction::URem: {
     // If not provably safe, use a select to form a safe divisor before widening the
     // div/rem operation itself.  Otherwise fall through to general handling below.
-    if (CM.isPredicatedInst(I)) {
+    if (Legal->isMaskRequired(I, CM.foldTailByMasking())) {
       SmallVector<VPValue *> Ops(Operands);
       VPValue *Mask = getBlockInMask(Builder.getInsertBlock());
       VPValue *One =
@@ -8063,7 +8055,7 @@ VPRecipeBuilder::tryToWidenHistogram(const HistogramInfo *HI,
 
   // In case of predicated execution (due to tail-folding, or conditional
   // execution, or both), pass the relevant mask.
-  if (Legal->isMaskRequired(HI->Store))
+  if (Legal->isMasked(HI->Store))
     HGramOps.push_back(getBlockInMask(Builder.getInsertBlock()));
 
   return new VPHistogramRecipe(Opcode, HGramOps, HI->Store->getDebugLoc());
@@ -8076,7 +8068,7 @@ VPRecipeBuilder::handleReplication(Instruction *I, ArrayRef<VPValue *> Operands,
       [&](ElementCount VF) { return CM.isUniformAfterVectorization(I, VF); },
       Range);
 
-  bool IsPredicated = CM.isPredicatedInst(I);
+  bool IsPredicated = Legal->isMaskRequired(I, CM.foldTailByMasking());
 
   // Even if the instruction is not marked as uniform, there are certain
   // intrinsic calls that can be effectively treated as such, so we check for

>From c4e44baad0c17172ff78af7ea074a7d728fc3bba Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Tue, 13 May 2025 10:35:24 +0100
Subject: [PATCH 2/2] [LV] Move isMaskRequired to LVLegality

---
 .../Vectorize/LoopVectorizationLegality.cpp   | 53 +++++++++++++++++++
 .../Transforms/Vectorize/LoopVectorize.cpp    | 53 -------------------
 2 files changed, 53 insertions(+), 53 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 969d225c6ef2e..4719e0bf4f404 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -1406,6 +1406,59 @@ bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) const {
   return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
 }
 
+bool LoopVectorizationLegality::isMaskRequired(Instruction *I,
+                                               bool FoldTailByMasking) const {
+  // TODO: We can use the loop-preheader as context point here and get
+  // context sensitive reasoning for isSafeToSpeculativelyExecute.
+  if (isSafeToSpeculativelyExecute(I) ||
+      (isa<LoadInst, StoreInst, CallInst>(I) && !isMasked(I)) ||
+      isa<BranchInst, SwitchInst, PHINode, AllocaInst>(I))
+    return false;
+
+  // If the instruction was executed conditionally in the original scalar loop,
+  // predication is needed with a mask whose lanes are all possibly inactive.
+  if (blockNeedsPredication(I->getParent()))
+    return true;
+
+  // If we're not folding the tail by masking, predication is unnecessary.
+  if (!FoldTailByMasking)
+    return false;
+
+  // All that remain are instructions with side-effects originally executed in
+  // the loop unconditionally, but now execute under a tail-fold mask (only)
+  // having at least one active lane (the first). If the side-effects of the
+  // instruction are invariant, executing it w/o (the tail-folding) mask is safe
+  // - it will cause the same side-effects as when masked.
+  switch (I->getOpcode()) {
+  default:
+    llvm_unreachable(
+        "instruction should have been considered by earlier checks");
+  case Instruction::Call:
+    // Side-effects of a Call are assumed to be non-invariant, needing a
+    // (fold-tail) mask.
+    assert(isMasked(I) &&
+           "should have returned earlier for calls not needing a mask");
+    return true;
+  case Instruction::Load:
+    // If the address is loop invariant no predication is needed.
+    return !isInvariant(getLoadStorePointerOperand(I));
+  case Instruction::Store: {
+    // For stores, we need to prove both speculation safety (which follows from
+    // the same argument as loads), but also must prove the value being stored
+    // is correct.  The easiest form of the later is to require that all values
+    // stored are the same.
+    return !(isInvariant(getLoadStorePointerOperand(I)) &&
+             isInvariant(cast<StoreInst>(I)->getValueOperand()));
+  }
+  case Instruction::UDiv:
+  case Instruction::SDiv:
+  case Instruction::SRem:
+  case Instruction::URem:
+    // If the divisor is loop-invariant no predication is needed.
+    return !isInvariant(I->getOperand(1));
+  }
+}
+
 bool LoopVectorizationLegality::blockCanBePredicated(
     BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs,
     SmallPtrSetImpl<const Instruction *> &MaskedOp) const {
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index f05782b51f953..ebf1ad2604af7 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -3057,59 +3057,6 @@ bool LoopVectorizationCostModel::isScalarWithPredication(
   }
 }
 
-bool LoopVectorizationLegality::isMaskRequired(Instruction *I,
-                                               bool FoldTailByMasking) const {
-  // TODO: We can use the loop-preheader as context point here and get
-  // context sensitive reasoning for isSafeToSpeculativelyExecute.
-  if (isSafeToSpeculativelyExecute(I) ||
-      (isa<LoadInst, StoreInst, CallInst>(I) && !isMasked(I)) ||
-      isa<BranchInst, SwitchInst, PHINode, AllocaInst>(I))
-    return false;
-
-  // If the instruction was executed conditionally in the original scalar loop,
-  // predication is needed with a mask whose lanes are all possibly inactive.
-  if (blockNeedsPredication(I->getParent()))
-    return true;
-
-  // If we're not folding the tail by masking, predication is unnecessary.
-  if (!FoldTailByMasking)
-    return false;
-
-  // All that remain are instructions with side-effects originally executed in
-  // the loop unconditionally, but now execute under a tail-fold mask (only)
-  // having at least one active lane (the first). If the side-effects of the
-  // instruction are invariant, executing it w/o (the tail-folding) mask is safe
-  // - it will cause the same side-effects as when masked.
-  switch (I->getOpcode()) {
-  default:
-    llvm_unreachable(
-        "instruction should have been considered by earlier checks");
-  case Instruction::Call:
-    // Side-effects of a Call are assumed to be non-invariant, needing a
-    // (fold-tail) mask.
-    assert(isMasked(I) &&
-           "should have returned earlier for calls not needing a mask");
-    return true;
-  case Instruction::Load:
-    // If the address is loop invariant no predication is needed.
-    return !isInvariant(getLoadStorePointerOperand(I));
-  case Instruction::Store: {
-    // For stores, we need to prove both speculation safety (which follows from
-    // the same argument as loads), but also must prove the value being stored
-    // is correct.  The easiest form of the later is to require that all values
-    // stored are the same.
-    return !(isInvariant(getLoadStorePointerOperand(I)) &&
-             isInvariant(cast<StoreInst>(I)->getValueOperand()));
-  }
-  case Instruction::UDiv:
-  case Instruction::SDiv:
-  case Instruction::SRem:
-  case Instruction::URem:
-    // If the divisor is loop-invariant no predication is needed.
-    return !isInvariant(I->getOperand(1));
-  }
-}
-
 std::pair<InstructionCost, InstructionCost>
 LoopVectorizationCostModel::getDivRemSpeculationCost(Instruction *I,
                                                     ElementCount VF) const {