[llvm] 7fbfcc6 - [LV/LAA] Use PSE to identify stride multiplies which simplify [mostly nfc]

Thu May 11 11:16:11 PDT 2023

Author: Philip Reames
Date: 2023-05-11T11:16:04-07:00
New Revision: 7fbfcc653f372aed37f81ec0ca4bb2b0ee90a9f4

URL: https://github.com/llvm/llvm-project/commit/7fbfcc653f372aed37f81ec0ca4bb2b0ee90a9f4
DIFF: https://github.com/llvm/llvm-project/commit/7fbfcc653f372aed37f81ec0ca4bb2b0ee90a9f4.diff

LOG: [LV/LAA] Use PSE to identify stride multiplies which simplify [mostly nfc]

LV/LAA will speculate that (some) strided access patterns have unit stride, and insert runtime checks if required.

LV cost models a multiply by such a stride as free.  We did this by keeping around the StrideSet structure, just to check if one of the operands were one of the strides we speculated.

We can instead just ask PredicatedScalarEvolution if either of the operands are one (after predicates are applied).  We get mostly the same result - PSE can prove it in more cases in theory - and simpler code.

Added: 
    

Modified: 
    llvm/include/llvm/Analysis/LoopAccessAnalysis.h
    llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
    llvm/lib/Analysis/LoopAccessAnalysis.cpp
    llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
index 11b4d621d7640..0258285746d92 100644

--- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -616,9 +616,6 @@ class LoopAccessInfo {
     return SymbolicStrides;
   }
 
-  /// Pointer has a symbolic stride.
-  bool hasStride(Value *V) const { return StrideSet.count(V); }
-
   /// Print the information about the memory accesses in the loop.
   void print(raw_ostream &OS, unsigned Depth = 0) const;
 
@@ -702,9 +699,6 @@ class LoopAccessInfo {
   /// If an access has a symbolic strides, this maps the pointer value to
   /// the stride symbol.
   DenseMap<Value *, const SCEV *> SymbolicStrides;
-
-  /// Set of symbolic strides values.
-  SmallPtrSet<Value *, 8> StrideSet;
 };
 
 Value *stripIntegerCast(Value *V);

diff  --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
index 1863e2e65553f..ec71eb178af17 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -371,8 +371,6 @@ class LoopVectorizationLegality {
     return LAI->getDepChecker().getMaxSafeVectorWidthInBits();
   }
 
-  bool hasStride(Value *V) { return LAI->hasStride(V); }
-
   /// Returns true if vector representation of the instruction \p I
   /// requires mask.
   bool isMaskRequired(const Instruction *I) const {

diff  --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index df21679e14448..358f97f83d40c 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -2758,7 +2758,6 @@ void LoopAccessInfo::collectStridedAccess(Value *MemAccess) {
   // SCEVUnknown as we expect.
   Value *StrideVal = stripIntegerCast(Stride);
   SymbolicStrides[Ptr] = cast<SCEVUnknown>(PSE->getSCEV(StrideVal));
-  StrideSet.insert(Stride);
 }
 
 LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,

diff  --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 2c27e18e2301b..4727788ee090f 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -6447,11 +6447,6 @@ static const SCEV *getAddressAccessSCEV(
   return PSE.getSCEV(Ptr);
 }
 
-static bool isStrideMul(Instruction *I, LoopVectorizationLegality *Legal) {
-  return Legal->hasStride(I->getOperand(0)) ||
-         Legal->hasStride(I->getOperand(1));
-}
-
 InstructionCost
 LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
                                                         ElementCount VF) {
@@ -7219,8 +7214,12 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF,
   case Instruction::And:
   case Instruction::Or:
   case Instruction::Xor: {
-    // Since we will replace the stride by 1 the multiplication should go away.
-    if (I->getOpcode() == Instruction::Mul && isStrideMul(I, Legal))
+    // If we're speculating on the stride being 1, the multiplication may
+    // fold away.  We can generalize this for all operations using the notion
+    // of neutral elements.  (TODO)
+    if (I->getOpcode() == Instruction::Mul &&
+        (PSE.getSCEV(I->getOperand(0))->isOne() ||
+         PSE.getSCEV(I->getOperand(1))->isOne()))
       return 0;
 
     // Detect reduction patterns