[llvm] e48b1e8 - [LV] Split off invariance check from isUniform (NFCI).

Thu Jun 1 11:09:43 PDT 2023

Author: Florian Hahn
Date: 2023-06-01T19:09:11+01:00
New Revision: e48b1e87a319e2e6645d8bb4b08432a7fd08e0b9

URL: https://github.com/llvm/llvm-project/commit/e48b1e87a319e2e6645d8bb4b08432a7fd08e0b9
DIFF: https://github.com/llvm/llvm-project/commit/e48b1e87a319e2e6645d8bb4b08432a7fd08e0b9.diff

LOG: [LV] Split off invariance check from isUniform (NFCI).

After 572cfa3fde5433, isUniform now checks VF based uniformity instead of
just invariance as before.

As follow-up cleanup suggested in D148841, separate the invariance check
out and update callers that currently check only for invariance.

This also moves the implementation of isUniform from LoopAccessAnalysis
to LoopVectorizationLegality, as LoopAccesAnalysis doesn't use the more
general isUniform.

Added: 
    

Modified: 
    llvm/include/llvm/Analysis/LoopAccessAnalysis.h
    llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
    llvm/lib/Analysis/LoopAccessAnalysis.cpp
    llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
    llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
index 9fe4a35284866..f8b0a31a527ff 100644

--- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -588,9 +588,8 @@ class LoopAccessInfo {
   static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop,
                                     DominatorTree *DT);
 
-  /// Returns true if value \p V is uniform across \p VF lanes, when \p VF is
-  /// provided, and otherwise if \p V is invariant across all loop iterations.
-  bool isUniform(Value *V, std::optional<ElementCount> VF = std::nullopt) const;
+  /// Returns true if value \p V is loop invariant.
+  bool isInvariant(Value *V) const;
 
   uint64_t getMaxSafeDepDistBytes() const { return MaxSafeDepDistBytes; }
   unsigned getNumStores() const { return NumStores; }

diff  --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
index c666267456213..f97029ce7e50f 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -349,13 +349,16 @@ class LoopVectorizationLegality {
 
   /// Returns true if value V is uniform across \p VF lanes, when \p VF is
   /// provided, and otherwise if \p V is invariant across all loop iterations.
-  bool isUniform(Value *V, std::optional<ElementCount> VF = std::nullopt) const;
+  bool isInvariant(Value *V) const;
+
+  /// Returns true if value V is uniform across \p VF lanes, when \p VF is
+  /// provided, and otherwise if \p V is invariant across all loop iterations.
+  bool isUniform(Value *V, ElementCount VF) const;
 
   /// A uniform memory op is a load or store which accesses the same memory
   /// location on all \p VF lanes, if \p VF is provided and otherwise if the
   /// memory location is invariant.
-  bool isUniformMemOp(Instruction &I,
-                      std::optional<ElementCount> VF = std::nullopt) const;
+  bool isUniformMemOp(Instruction &I, ElementCount VF) const;
 
   /// Returns the information that we collected about runtime memory check.
   const RuntimePointerChecking *getRuntimePointerChecking() const {

diff  --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 7a78906eec2d2..30033f1e72eff 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -2290,7 +2290,7 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
   for (StoreInst *ST : Stores) {
     Value *Ptr = ST->getPointerOperand();
 
-    if (isUniform(Ptr)) {
+    if (isInvariant(Ptr)) {
       // Record store instructions to loop invariant addresses
       StoresToInvariantAddresses.push_back(ST);
       HasDependenceInvolvingLoopInvariantAddress |=
@@ -2532,128 +2532,14 @@ OptimizationRemarkAnalysis &LoopAccessInfo::recordAnalysis(StringRef RemarkName,
   return *Report;
 }
 
-namespace {
-/// A rewriter to build the SCEVs for each of the VF lanes in the expected
-/// vectorized loop, which can then be compared to detect their uniformity. This
-/// is done by replacing the AddRec SCEVs of the original scalar loop (TheLoop)
-/// with new AddRecs where the step is multiplied by StepMultiplier and Offset *
-/// Step is added. Also checks if all sub-expressions are analyzable w.r.t.
-/// uniformity.
-class SCEVAddRecForUniformityRewriter
-    : public SCEVRewriteVisitor<SCEVAddRecForUniformityRewriter> {
-  /// Multiplier to be applied to the step of AddRecs in TheLoop.
-  unsigned StepMultiplier;
-
-  /// Offset to be added to the AddRecs in TheLoop.
-  unsigned Offset;
-
-  /// Loop for which to rewrite AddRecsFor.
-  Loop *TheLoop;
-
-  /// Is any sub-expressions not analyzable w.r.t. uniformity?
-  bool CannotAnalyze = false;
-
-  bool canAnalyze() const { return !CannotAnalyze; }
-
-public:
-  SCEVAddRecForUniformityRewriter(ScalarEvolution &SE, unsigned StepMultiplier,
-                                  unsigned Offset, Loop *TheLoop)
-      : SCEVRewriteVisitor(SE), StepMultiplier(StepMultiplier), Offset(Offset),
-        TheLoop(TheLoop) {}
-
-  const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
-    assert(Expr->getLoop() == TheLoop &&
-           "addrec outside of TheLoop must be invariant and should have been "
-           "handled earlier");
-    // Build a new AddRec by multiplying the step by StepMultiplier and
-    // incrementing the start by Offset * step.
-    Type *Ty = Expr->getType();
-    auto *Step = Expr->getStepRecurrence(SE);
-    if (!SE.isLoopInvariant(Step, TheLoop)) {
-      CannotAnalyze = true;
-      return Expr;
-    }
-    auto *NewStep = SE.getMulExpr(Step, SE.getConstant(Ty, StepMultiplier));
-    auto *ScaledOffset = SE.getMulExpr(Step, SE.getConstant(Ty, Offset));
-    auto *NewStart = SE.getAddExpr(Expr->getStart(), ScaledOffset);
-    return SE.getAddRecExpr(NewStart, NewStep, TheLoop, SCEV::FlagAnyWrap);
-  }
-
-  const SCEV *visit(const SCEV *S) {
-    if (CannotAnalyze || SE.isLoopInvariant(S, TheLoop))
-      return S;
-    return SCEVRewriteVisitor<SCEVAddRecForUniformityRewriter>::visit(S);
-  }
-
-  const SCEV *visitUnknown(const SCEVUnknown *S) {
-    if (SE.isLoopInvariant(S, TheLoop))
-      return S;
-    // The value could vary across iterations.
-    CannotAnalyze = true;
-    return S;
-  }
-
-  const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *S) {
-    // Could not analyze the expression.
-    CannotAnalyze = true;
-    return S;
-  }
-
-  static const SCEV *rewrite(const SCEV *S, ScalarEvolution &SE,
-                             unsigned StepMultiplier, unsigned Offset,
-                             Loop *TheLoop) {
-    /// Bail out if the expression does not contain an UDiv expression.
-    /// Uniform values which are not loop invariant require operations to strip
-    /// out the lowest bits. For now just look for UDivs and use it to avoid
-    /// re-writing UDIV-free expressions for other lanes to limit compile time.
-    if (!SCEVExprContains(S,
-                          [](const SCEV *S) { return isa<SCEVUDivExpr>(S); }))
-      return SE.getCouldNotCompute();
-
-    SCEVAddRecForUniformityRewriter Rewriter(SE, StepMultiplier, Offset,
-                                             TheLoop);
-    const SCEV *Result = Rewriter.visit(S);
-
-    if (Rewriter.canAnalyze())
-      return Result;
-    return SE.getCouldNotCompute();
-  }
-};
-
-} // namespace
-
-bool LoopAccessInfo::isUniform(Value *V, std::optional<ElementCount> VF) const {
+bool LoopAccessInfo::isInvariant(Value *V) const {
   auto *SE = PSE->getSE();
-  // Since we rely on SCEV for uniformity, if the type is not SCEVable, it is
-  // never considered uniform.
   // TODO: Is this really what we want? Even without FP SCEV, we may want some
-  // trivially loop-invariant FP values to be considered uniform.
+  // trivially loop-invariant FP values to be considered invariant.
   if (!SE->isSCEVable(V->getType()))
     return false;
   const SCEV *S = SE->getSCEV(V);
-  if (SE->isLoopInvariant(S, TheLoop))
-    return true;
-  if (!VF || VF->isScalable())
-    return false;
-  if (VF->isScalar())
-    return true;
-
-  // Rewrite AddRecs in TheLoop to step by VF and check if the expression for
-  // lane 0 matches the expressions for all other lanes.
-  unsigned FixedVF = VF->getKnownMinValue();
-  const SCEV *FirstLaneExpr =
-      SCEVAddRecForUniformityRewriter::rewrite(S, *SE, FixedVF, 0, TheLoop);
-  if (isa<SCEVCouldNotCompute>(FirstLaneExpr))
-    return false;
-
-  // Make sure the expressions for lanes FixedVF-1..1 match the expression for
-  // lane 0. We check lanes in reverse order for compile-time, as frequently
-  // checking the last lane is sufficient to rule out uniformity.
-  return all_of(reverse(seq<unsigned>(1, FixedVF)), [&](unsigned I) {
-    const SCEV *IthLaneExpr =
-        SCEVAddRecForUniformityRewriter::rewrite(S, *SE, FixedVF, I, TheLoop);
-    return FirstLaneExpr == IthLaneExpr;
-  });
+  return SE->isLoopInvariant(S, TheLoop);
 }
 
 /// Find the operand of the GEP that should be checked for consecutive

diff  --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 2fa54b3011dd6..f923f0be66219 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -471,13 +471,135 @@ int LoopVectorizationLegality::isConsecutivePtr(Type *AccessTy,
   return 0;
 }
 
-bool LoopVectorizationLegality::isUniform(
-    Value *V, std::optional<ElementCount> VF) const {
-  return LAI->isUniform(V, VF);
+bool LoopVectorizationLegality::isInvariant(Value *V) const {
+  return LAI->isInvariant(V);
 }
 
-bool LoopVectorizationLegality::isUniformMemOp(
-    Instruction &I, std::optional<ElementCount> VF) const {
+namespace {
+/// A rewriter to build the SCEVs for each of the VF lanes in the expected
+/// vectorized loop, which can then be compared to detect their uniformity. This
+/// is done by replacing the AddRec SCEVs of the original scalar loop (TheLoop)
+/// with new AddRecs where the step is multiplied by StepMultiplier and Offset *
+/// Step is added. Also checks if all sub-expressions are analyzable w.r.t.
+/// uniformity.
+class SCEVAddRecForUniformityRewriter
+    : public SCEVRewriteVisitor<SCEVAddRecForUniformityRewriter> {
+  /// Multiplier to be applied to the step of AddRecs in TheLoop.
+  unsigned StepMultiplier;
+
+  /// Offset to be added to the AddRecs in TheLoop.
+  unsigned Offset;
+
+  /// Loop for which to rewrite AddRecsFor.
+  Loop *TheLoop;
+
+  /// Is any sub-expressions not analyzable w.r.t. uniformity?
+  bool CannotAnalyze = false;
+
+  bool canAnalyze() const { return !CannotAnalyze; }
+
+public:
+  SCEVAddRecForUniformityRewriter(ScalarEvolution &SE, unsigned StepMultiplier,
+                                  unsigned Offset, Loop *TheLoop)
+      : SCEVRewriteVisitor(SE), StepMultiplier(StepMultiplier), Offset(Offset),
+        TheLoop(TheLoop) {}
+
+  const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
+    assert(Expr->getLoop() == TheLoop &&
+           "addrec outside of TheLoop must be invariant and should have been "
+           "handled earlier");
+    // Build a new AddRec by multiplying the step by StepMultiplier and
+    // incrementing the start by Offset * step.
+    Type *Ty = Expr->getType();
+    auto *Step = Expr->getStepRecurrence(SE);
+    if (!SE.isLoopInvariant(Step, TheLoop)) {
+      CannotAnalyze = true;
+      return Expr;
+    }
+    auto *NewStep = SE.getMulExpr(Step, SE.getConstant(Ty, StepMultiplier));
+    auto *ScaledOffset = SE.getMulExpr(Step, SE.getConstant(Ty, Offset));
+    auto *NewStart = SE.getAddExpr(Expr->getStart(), ScaledOffset);
+    return SE.getAddRecExpr(NewStart, NewStep, TheLoop, SCEV::FlagAnyWrap);
+  }
+
+  const SCEV *visit(const SCEV *S) {
+    if (CannotAnalyze || SE.isLoopInvariant(S, TheLoop))
+      return S;
+    return SCEVRewriteVisitor<SCEVAddRecForUniformityRewriter>::visit(S);
+  }
+
+  const SCEV *visitUnknown(const SCEVUnknown *S) {
+    if (SE.isLoopInvariant(S, TheLoop))
+      return S;
+    // The value could vary across iterations.
+    CannotAnalyze = true;
+    return S;
+  }
+
+  const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *S) {
+    // Could not analyze the expression.
+    CannotAnalyze = true;
+    return S;
+  }
+
+  static const SCEV *rewrite(const SCEV *S, ScalarEvolution &SE,
+                             unsigned StepMultiplier, unsigned Offset,
+                             Loop *TheLoop) {
+    /// Bail out if the expression does not contain an UDiv expression.
+    /// Uniform values which are not loop invariant require operations to strip
+    /// out the lowest bits. For now just look for UDivs and use it to avoid
+    /// re-writing UDIV-free expressions for other lanes to limit compile time.
+    if (!SCEVExprContains(S,
+                          [](const SCEV *S) { return isa<SCEVUDivExpr>(S); }))
+      return SE.getCouldNotCompute();
+
+    SCEVAddRecForUniformityRewriter Rewriter(SE, StepMultiplier, Offset,
+                                             TheLoop);
+    const SCEV *Result = Rewriter.visit(S);
+
+    if (Rewriter.canAnalyze())
+      return Result;
+    return SE.getCouldNotCompute();
+  }
+};
+
+} // namespace
+
+bool LoopVectorizationLegality::isUniform(Value *V, ElementCount VF) const {
+  if (isInvariant(V))
+    return true;
+  if (VF.isScalable())
+    return false;
+  if (VF.isScalar())
+    return true;
+
+  // Since we rely on SCEV for uniformity, if the type is not SCEVable, it is
+  // never considered uniform.
+  auto *SE = PSE.getSE();
+  if (!SE->isSCEVable(V->getType()))
+    return false;
+  const SCEV *S = SE->getSCEV(V);
+
+  // Rewrite AddRecs in TheLoop to step by VF and check if the expression for
+  // lane 0 matches the expressions for all other lanes.
+  unsigned FixedVF = VF.getKnownMinValue();
+  const SCEV *FirstLaneExpr =
+      SCEVAddRecForUniformityRewriter::rewrite(S, *SE, FixedVF, 0, TheLoop);
+  if (isa<SCEVCouldNotCompute>(FirstLaneExpr))
+    return false;
+
+  // Make sure the expressions for lanes FixedVF-1..1 match the expression for
+  // lane 0. We check lanes in reverse order for compile-time, as frequently
+  // checking the last lane is sufficient to rule out uniformity.
+  return all_of(reverse(seq<unsigned>(1, FixedVF)), [&](unsigned I) {
+    const SCEV *IthLaneExpr =
+        SCEVAddRecForUniformityRewriter::rewrite(S, *SE, FixedVF, I, TheLoop);
+    return FirstLaneExpr == IthLaneExpr;
+  });
+}
+
+bool LoopVectorizationLegality::isUniformMemOp(Instruction &I,
+                                               ElementCount VF) const {
   Value *Ptr = getLoadStorePointerOperand(&I);
   if (!Ptr)
     return false;

diff  --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index e102efca72649..a4cd4394697f3 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4436,10 +4436,10 @@ bool LoopVectorizationCostModel::isPredicatedInst(Instruction *I) const {
     // both speculation safety (which follows from the same argument as loads),
     // but also must prove the value being stored is correct.  The easiest
     // form of the later is to require that all values stored are the same.
-    if (Legal->isUniformMemOp(*I) &&
-      (isa<LoadInst>(I) ||
-       (isa<StoreInst>(I) &&
-        TheLoop->isLoopInvariant(cast<StoreInst>(I)->getValueOperand()))) &&
+    if (Legal->isInvariant(getLoadStorePointerOperand(I)) &&
+        (isa<LoadInst>(I) ||
+         (isa<StoreInst>(I) &&
+          TheLoop->isLoopInvariant(cast<StoreInst>(I)->getValueOperand()))) &&
         !Legal->blockNeedsPredication(I->getParent()))
       return false;
     return true;
@@ -4510,7 +4510,8 @@ LoopVectorizationCostModel::getDivRemSpeculationCost(Instruction *I,
   // second vector operand. One example of this are shifts on x86.
   Value *Op2 = I->getOperand(1);
   auto Op2Info = TTI.getOperandInfo(Op2);
-  if (Op2Info.Kind == TargetTransformInfo::OK_AnyValue && Legal->isUniform(Op2))
+  if (Op2Info.Kind == TargetTransformInfo::OK_AnyValue &&
+      Legal->isInvariant(Op2))
     Op2Info.Kind = TargetTransformInfo::OK_UniformValue;
 
   SmallVector<const Value *, 4> Operands(I->operand_values());
@@ -4704,7 +4705,7 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
     if (isa<StoreInst>(I) && I->getOperand(0) == Ptr)
       return false;
     return getLoadStorePointerOperand(I) == Ptr &&
-           (isUniformDecision(I, VF) || Legal->isUniform(Ptr));
+           (isUniformDecision(I, VF) || Legal->isInvariant(Ptr));
   };
 
   // Holds a list of values which are known to have at least one uniform use.
@@ -6511,7 +6512,7 @@ LoopVectorizationCostModel::getUniformMemOpCost(Instruction *I,
   }
   StoreInst *SI = cast<StoreInst>(I);
 
-  bool isLoopInvariantStoreValue = Legal->isUniform(SI->getValueOperand());
+  bool isLoopInvariantStoreValue = Legal->isInvariant(SI->getValueOperand());
   return TTI.getAddressComputationCost(ValTy) +
          TTI.getMemoryOpCost(Instruction::Store, ValTy, Alignment, AS,
                              CostKind) +
@@ -7186,7 +7187,8 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF,
     // second vector operand. One example of this are shifts on x86.
     Value *Op2 = I->getOperand(1);
     auto Op2Info = TTI.getOperandInfo(Op2);
-    if (Op2Info.Kind == TargetTransformInfo::OK_AnyValue && Legal->isUniform(Op2))
+    if (Op2Info.Kind == TargetTransformInfo::OK_AnyValue &&
+        Legal->isInvariant(Op2))
       Op2Info.Kind = TargetTransformInfo::OK_UniformValue;
 
     SmallVector<const Value *, 4> Operands(I->operand_values());