[llvm] Decompose gep variable upstream (PR #69152)

Mon Oct 16 00:33:10 PDT 2023

llvmbot wrote:




@llvm/pr-subscribers-llvm-analysis

Author: Harvin Iriawan (harviniriawan)

<details>
<summary>Changes</summary>

Further work on top of #65759  (the first 2 commits of this pull request, still pending review).

    [BasicAA] Add Vscale GEP decomposition on variable index
    
      Enable BasicAA to be done on Scalable GEP & LocationSize
      Scalable GEP expression such as @llvm.vscale and GEP of scalable type
      are attached to the VariableGEPIndex, with Val representing Vscale.
    
      AA works if there's only one variable index (the vscale) and
      constant offsets in the GEP for now

---

Patch is 45.93 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/69152.diff


10 Files Affected:

- (modified) llvm/include/llvm/Analysis/MemoryLocation.h (+36-12) 
- (modified) llvm/lib/Analysis/BasicAliasAnalysis.cpp (+181-60) 
- (modified) llvm/lib/Analysis/MemoryDependenceAnalysis.cpp (+3-2) 
- (modified) llvm/lib/CodeGen/StackProtector.cpp (+2-2) 
- (modified) llvm/lib/Transforms/IPO/AttributorAttributes.cpp (+2-1) 
- (modified) llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp (+19-10) 
- (added) llvm/test/Analysis/AliasSet/memloc-vscale.ll (+53) 
- (modified) llvm/test/Analysis/BasicAA/vscale.ll (+69-42) 
- (added) llvm/test/Transforms/GVN/scalable-memloc.ll (+29) 
- (modified) llvm/test/Transforms/GVN/vscale.ll (+3-8) 


``````````diff

diff --git a/llvm/include/llvm/Analysis/MemoryLocation.h b/llvm/include/llvm/Analysis/MemoryLocation.h
index 85ca84e68a13971..ef87412572145fa 100644
--- a/llvm/include/llvm/Analysis/MemoryLocation.h
+++ b/llvm/include/llvm/Analysis/MemoryLocation.h
@@ -64,16 +64,19 @@ class Value;
 //
 // If asked to represent a pathologically large value, this will degrade to
 // std::nullopt.
+// Store Scalable information in bit 62 of Value. Scalable information is
+// required to do Alias Analysis on Scalable quantities
 class LocationSize {
   enum : uint64_t {
     BeforeOrAfterPointer = ~uint64_t(0),
-    AfterPointer = BeforeOrAfterPointer - 1,
-    MapEmpty = BeforeOrAfterPointer - 2,
-    MapTombstone = BeforeOrAfterPointer - 3,
+    ScalableBit = uint64_t(1) << 62,
+    AfterPointer = (BeforeOrAfterPointer - 1) & ~ScalableBit,
+    MapEmpty = (BeforeOrAfterPointer - 2) & ~ScalableBit,
+    MapTombstone = (BeforeOrAfterPointer - 3) & ~ScalableBit,
     ImpreciseBit = uint64_t(1) << 63,
 
     // The maximum value we can represent without falling back to 'unknown'.
-    MaxValue = (MapTombstone - 1) & ~ImpreciseBit,
+    MaxValue = (MapTombstone - 1) & ~(ImpreciseBit | ScalableBit),
   };
 
   uint64_t Value;
@@ -88,6 +91,7 @@ class LocationSize {
                 "AfterPointer is imprecise by definition.");
   static_assert(BeforeOrAfterPointer & ImpreciseBit,
                 "BeforeOrAfterPointer is imprecise by definition.");
+  static_assert(~(MaxValue & ScalableBit), "Max value don't have bit 62 set");
 
 public:
   // FIXME: Migrate all users to construct via either `precise` or `upperBound`,
@@ -98,12 +102,17 @@ class LocationSize {
   // this assumes the provided value is precise.
   constexpr LocationSize(uint64_t Raw)
       : Value(Raw > MaxValue ? AfterPointer : Raw) {}
-
-  static LocationSize precise(uint64_t Value) { return LocationSize(Value); }
+  constexpr LocationSize(uint64_t Raw, bool Scalable)
+      : Value(Raw > MaxValue ? AfterPointer
+                             : Raw | (Scalable ? ScalableBit : uint64_t(0))) {}
+
+  // Make construction of LocationSize that takes in uint64_t to set Scalable
+  // information as false
+  static LocationSize precise(uint64_t Value) {
+    return LocationSize(Value, false /*Scalable*/);
+  }
   static LocationSize precise(TypeSize Value) {
-    if (Value.isScalable())
-      return afterPointer();
-    return precise(Value.getFixedValue());
+    return LocationSize(Value.getKnownMinValue(), Value.isScalable());
   }
 
   static LocationSize upperBound(uint64_t Value) {
@@ -150,6 +159,8 @@ class LocationSize {
       return beforeOrAfterPointer();
     if (Value == AfterPointer || Other.Value == AfterPointer)
       return afterPointer();
+    if (isScalable() || Other.isScalable())
+      return afterPointer();
 
     return upperBound(std::max(getValue(), Other.getValue()));
   }
@@ -157,9 +168,20 @@ class LocationSize {
   bool hasValue() const {
     return Value != AfterPointer && Value != BeforeOrAfterPointer;
   }
-  uint64_t getValue() const {
+  bool isScalable() const { return (Value & ScalableBit); }
+
+  TypeSize getValue() const {
     assert(hasValue() && "Getting value from an unknown LocationSize!");
-    return Value & ~ImpreciseBit;
+    assert((Value & ~(ImpreciseBit | ScalableBit)) < MaxValue &&
+           "Scalable bit of value should be masked");
+    return {Value & ~(ImpreciseBit | ScalableBit), isScalable()};
+  }
+
+  uint64_t getUintValue() const {
+    assert(hasValue() && "Getting value from an unknown LocationSize!");
+    assert((Value & ~(ImpreciseBit | ScalableBit)) < MaxValue &&
+           "Scalable bit of value should be masked");
+    return Value & ~(ImpreciseBit | ScalableBit);
   }
 
   // Returns whether or not this value is precise. Note that if a value is
@@ -169,7 +191,9 @@ class LocationSize {
   }
 
   // Convenience method to check if this LocationSize's value is 0.
-  bool isZero() const { return hasValue() && getValue() == 0; }
+  bool isZero() const {
+    return hasValue() && getValue().getKnownMinValue() == 0;
+  }
 
   /// Whether accesses before the base pointer are possible.
   bool mayBeBeforePointer() const { return Value == BeforeOrAfterPointer; }
diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
index c162b8f6edc1905..951716cfe99131a 100644
--- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -44,6 +44,7 @@
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/Operator.h"
+#include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/User.h"
 #include "llvm/IR/Value.h"
@@ -63,6 +64,7 @@
 #define DEBUG_TYPE "basicaa"
 
 using namespace llvm;
+using namespace llvm::PatternMatch;
 
 /// Enable analysis of recursive PHI nodes.
 static cl::opt<bool> EnableRecPhiAnalysis("basic-aa-recphi", cl::Hidden,
@@ -101,22 +103,23 @@ bool BasicAAResult::invalidate(Function &Fn, const PreservedAnalyses &PA,
 //===----------------------------------------------------------------------===//
 
 /// Returns the size of the object specified by V or UnknownSize if unknown.
-static uint64_t getObjectSize(const Value *V, const DataLayout &DL,
-                              const TargetLibraryInfo &TLI,
-                              bool NullIsValidLoc,
-                              bool RoundToAlign = false) {
+/// getObjectSize does not support scalable Value
+static LocationSize getObjectSize(const Value *V, const DataLayout &DL,
+                                  const TargetLibraryInfo &TLI,
+                                  bool NullIsValidLoc,
+                                  bool RoundToAlign = false) {
   uint64_t Size;
   ObjectSizeOpts Opts;
   Opts.RoundToAlign = RoundToAlign;
   Opts.NullIsUnknownSize = NullIsValidLoc;
   if (getObjectSize(V, Size, DL, &TLI, Opts))
-    return Size;
-  return MemoryLocation::UnknownSize;
+    return LocationSize(Size);
+  return LocationSize(MemoryLocation::UnknownSize);
 }
 
 /// Returns true if we can prove that the object specified by V is smaller than
 /// Size.
-static bool isObjectSmallerThan(const Value *V, uint64_t Size,
+static bool isObjectSmallerThan(const Value *V, LocationSize Size,
                                 const DataLayout &DL,
                                 const TargetLibraryInfo &TLI,
                                 bool NullIsValidLoc) {
@@ -151,19 +154,21 @@ static bool isObjectSmallerThan(const Value *V, uint64_t Size,
 
   // This function needs to use the aligned object size because we allow
   // reads a bit past the end given sufficient alignment.
-  uint64_t ObjectSize = getObjectSize(V, DL, TLI, NullIsValidLoc,
-                                      /*RoundToAlign*/ true);
+  LocationSize ObjectSize = getObjectSize(V, DL, TLI, NullIsValidLoc,
+                                          /*RoundToAlign*/ true);
 
-  return ObjectSize != MemoryLocation::UnknownSize && ObjectSize < Size;
+  // Bail on comparing V and Size if Size is scalable
+  return ObjectSize != MemoryLocation::UnknownSize && !Size.isScalable() &&
+         ObjectSize.getValue() < Size.getValue();
 }
 
 /// Return the minimal extent from \p V to the end of the underlying object,
 /// assuming the result is used in an aliasing query. E.g., we do use the query
 /// location size and the fact that null pointers cannot alias here.
-static uint64_t getMinimalExtentFrom(const Value &V,
-                                     const LocationSize &LocSize,
-                                     const DataLayout &DL,
-                                     bool NullIsValidLoc) {
+static LocationSize getMinimalExtentFrom(const Value &V,
+                                         const LocationSize &LocSize,
+                                         const DataLayout &DL,
+                                         bool NullIsValidLoc) {
   // If we have dereferenceability information we know a lower bound for the
   // extent as accesses for a lower offset would be valid. We need to exclude
   // the "or null" part if null is a valid pointer. We can ignore frees, as an
@@ -175,15 +180,16 @@ static uint64_t getMinimalExtentFrom(const Value &V,
   // If queried with a precise location size, we assume that location size to be
   // accessed, thus valid.
   if (LocSize.isPrecise())
-    DerefBytes = std::max(DerefBytes, LocSize.getValue());
-  return DerefBytes;
+    DerefBytes = std::max(DerefBytes, LocSize.getValue().getKnownMinValue());
+  return LocationSize(DerefBytes, LocSize.isScalable());
 }
 
 /// Returns true if we can prove that the object specified by V has size Size.
-static bool isObjectSize(const Value *V, uint64_t Size, const DataLayout &DL,
+static bool isObjectSize(const Value *V, TypeSize Size, const DataLayout &DL,
                          const TargetLibraryInfo &TLI, bool NullIsValidLoc) {
-  uint64_t ObjectSize = getObjectSize(V, DL, TLI, NullIsValidLoc);
-  return ObjectSize != MemoryLocation::UnknownSize && ObjectSize == Size;
+  LocationSize ObjectSize = getObjectSize(V, DL, TLI, NullIsValidLoc);
+  return ObjectSize != MemoryLocation::UnknownSize &&
+         ObjectSize.getValue() == Size;
 }
 
 //===----------------------------------------------------------------------===//
@@ -342,13 +348,20 @@ struct LinearExpression {
 
 /// Analyzes the specified value as a linear expression: "A*V + B", where A and
 /// B are constant integers.
-static LinearExpression GetLinearExpression(
-    const CastedValue &Val,  const DataLayout &DL, unsigned Depth,
-    AssumptionCache *AC, DominatorTree *DT) {
+static LinearExpression GetLinearExpression(const CastedValue &Val,
+                                            const DataLayout &DL,
+                                            unsigned Depth, AssumptionCache *AC,
+                                            DominatorTree *DT) {
   // Limit our recursion depth.
   if (Depth == 6)
     return Val;
 
+  // If llvm.vscale is matched, set linear expression with scale 1 and offset 0
+  if (match(Val.V, m_VScale())) {
+    return LinearExpression(Val, APInt(Val.getBitWidth(), 1),
+                            APInt(Val.getBitWidth(), 0), true);
+  }
+
   if (const ConstantInt *Const = dyn_cast<ConstantInt>(Val.V))
     return LinearExpression(Val, APInt(Val.getBitWidth(), 0),
                             Val.evaluateWith(Const->getValue()), true);
@@ -455,6 +468,12 @@ struct VariableGEPIndex {
   CastedValue Val;
   APInt Scale;
 
+  // A value representing vscale quantity in a GEP expression
+  bool IsVscale;
+  // A flag indicating that the IsVscale variable GEP holds more than one Val
+  // (e.g. Vscale^2 or Vscale * X)
+  bool InvalidVarVscale;
+
   // Context instruction to use when querying information about this index.
   const Instruction *CxtI;
 
@@ -477,13 +496,10 @@ struct VariableGEPIndex {
     dbgs() << "\n";
   }
   void print(raw_ostream &OS) const {
-    OS << "(V=" << Val.V->getName()
-       << ", zextbits=" << Val.ZExtBits
-       << ", sextbits=" << Val.SExtBits
-       << ", truncbits=" << Val.TruncBits
-       << ", scale=" << Scale
-       << ", nsw=" << IsNSW
-       << ", negated=" << IsNegated << ")";
+    OS << "(V=" << Val.V->getName() << "  IsVscale=" << IsVscale
+       << ", zextbits=" << Val.ZExtBits << ", sextbits=" << Val.SExtBits
+       << ", truncbits=" << Val.TruncBits << ", scale=" << Scale
+       << ", nsw=" << IsNSW << ", negated=" << IsNegated << ")";
   }
 };
 }
@@ -604,6 +620,7 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
     for (User::const_op_iterator I = GEPOp->op_begin() + 1, E = GEPOp->op_end();
          I != E; ++I, ++GTI) {
       const Value *Index = *I;
+      const bool ScalableGEP = isa<ScalableVectorType>(GTI.getIndexedType());
       // Compute the (potentially symbolic) offset in bytes for this index.
       if (StructType *STy = GTI.getStructTypeOrNull()) {
         // For a struct, add the member offset.
@@ -615,27 +632,17 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
         continue;
       }
 
+      TypeSize AllocTypeSize = DL.getTypeAllocSize(GTI.getIndexedType());
       // For an array/pointer, add the element offset, explicitly scaled.
+      // Skip adding to constant offset if GEP index is marked as scalable
       if (const ConstantInt *CIdx = dyn_cast<ConstantInt>(Index)) {
         if (CIdx->isZero())
           continue;
-
-        // Don't attempt to analyze GEPs if the scalable index is not zero.
-        TypeSize AllocTypeSize = DL.getTypeAllocSize(GTI.getIndexedType());
-        if (AllocTypeSize.isScalable()) {
-          Decomposed.Base = V;
-          return Decomposed;
+        if (!ScalableGEP) {
+          Decomposed.Offset += AllocTypeSize.getFixedValue() *
+                               CIdx->getValue().sextOrTrunc(MaxIndexSize);
+          continue;
         }
-
-        Decomposed.Offset += AllocTypeSize.getFixedValue() *
-                             CIdx->getValue().sextOrTrunc(MaxIndexSize);
-        continue;
-      }
-
-      TypeSize AllocTypeSize = DL.getTypeAllocSize(GTI.getIndexedType());
-      if (AllocTypeSize.isScalable()) {
-        Decomposed.Base = V;
-        return Decomposed;
       }
 
       GepHasConstantOffset = false;
@@ -645,22 +652,50 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
       unsigned Width = Index->getType()->getIntegerBitWidth();
       unsigned SExtBits = IndexSize > Width ? IndexSize - Width : 0;
       unsigned TruncBits = IndexSize < Width ? Width - IndexSize : 0;
-      LinearExpression LE = GetLinearExpression(
-          CastedValue(Index, 0, SExtBits, TruncBits), DL, 0, AC, DT);
+      // Scalable GEP decomposition
+      // Allow Scalable GEP to be decomposed in the case of
+      //    1. getelementptr <4 x vscale x i32> with 1st index as a constant
+      //    2. Index which have a leaf of @llvm.vscale
+      // In both cases, essentially CastedValue of VariableGEPIndex is Vscale,
+      // however in the 1st case, CastedValue is of type constant, hence another
+      // flag in VariableGEPIndex is created in this case, IsVscale If GEP is
+      // Scalable type, e.g. <4 x vscale x i32>, the first index will have
+      // vscale as a variable index, create a LE in this case
+      LinearExpression LE(CastedValue(Index, 0, SExtBits, TruncBits));
+      if (ScalableGEP) {
+        if (const ConstantInt *CIdx = dyn_cast<ConstantInt>(Index)) {
+          LE = LinearExpression(
+              CastedValue(Index, 0, SExtBits, TruncBits),
+              CastedValue(Index, 0, SExtBits, TruncBits)
+                  .evaluateWith(CIdx->getValue()),
+              APInt(CastedValue(Index, 0, SExtBits, TruncBits).getBitWidth(),
+                    0),
+              true);
+          assert(LE.Offset.isZero() &&
+                 "Scalable GEP, the offset of LE should be 0");
+        }
+      } else
+        LE = GetLinearExpression(CastedValue(Index, 0, SExtBits, TruncBits), DL,
+                                 0, AC, DT);
 
       // Scale by the type size.
-      unsigned TypeSize = AllocTypeSize.getFixedValue();
+      unsigned TypeSize = AllocTypeSize.getKnownMinValue();
       LE = LE.mul(APInt(IndexSize, TypeSize), GEPOp->isInBounds());
       Decomposed.Offset += LE.Offset.sext(MaxIndexSize);
       APInt Scale = LE.Scale.sext(MaxIndexSize);
+      bool LEhasVscale = match(LE.Val.V, m_VScale());
 
       // If we already had an occurrence of this index variable, merge this
       // scale into it.  For example, we want to handle:
       //   A[x][x] -> x*16 + x*4 -> x*20
       // This also ensures that 'x' only appears in the index list once.
+      // Only add to IsVscale VariableGEPIndex if it's @llvm.vscale or gep
+      // vscale index
       for (unsigned i = 0, e = Decomposed.VarIndices.size(); i != e; ++i) {
-        if (Decomposed.VarIndices[i].Val.V == LE.Val.V &&
-            Decomposed.VarIndices[i].Val.hasSameCastsAs(LE.Val)) {
+        if (Decomposed.VarIndices[i].Val.hasSameCastsAs(LE.Val) &&
+            ((Decomposed.VarIndices[i].IsVscale &&
+              (ScalableGEP || LEhasVscale)) ||
+             Decomposed.VarIndices[i].Val.V == LE.Val.V)) {
           Scale += Decomposed.VarIndices[i].Scale;
           Decomposed.VarIndices.erase(Decomposed.VarIndices.begin() + i);
           break;
@@ -669,10 +704,19 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
 
       // Make sure that we have a scale that makes sense for this target's
       // index size.
+      // Only allow variableGEP decomposition for constants, in the case of
+      // vscale
       Scale = adjustToIndexSize(Scale, IndexSize);
+      bool InvalidVarVscale = (ScalableGEP && LEhasVscale) ||
+                              (ScalableGEP && !isa<ConstantInt>(LE.Val.V));
 
       if (!!Scale) {
-        VariableGEPIndex Entry = {LE.Val, Scale, CxtI, LE.IsNSW,
+        VariableGEPIndex Entry = {LE.Val,
+                                  Scale,
+                                  ScalableGEP || LEhasVscale,
+                                  InvalidVarVscale,
+                                  CxtI,
+                                  LE.IsNSW,
                                   /* IsNegated */ false};
         Decomposed.VarIndices.push_back(Entry);
       }
@@ -1049,6 +1093,19 @@ AliasResult BasicAAResult::aliasGEP(
   if (DecompGEP1.Base == GEP1 && DecompGEP2.Base == V2)
     return AliasResult::MayAlias;
 
+  // If Vscale Variable index has another variable in V, return mayAlias
+  for (unsigned i = 0, e = DecompGEP1.VarIndices.size(); i != e; ++i) {
+    if (DecompGEP1.VarIndices[i].IsVscale &&
+        DecompGEP1.VarIndices[i].InvalidVarVscale)
+      return AliasResult::MayAlias;
+  }
+
+  for (unsigned i = 0, e = DecompGEP2.VarIndices.size(); i != e; ++i) {
+    if (DecompGEP2.VarIndices[i].IsVscale &&
+        DecompGEP2.VarIndices[i].InvalidVarVscale)
+      return AliasResult::MayAlias;
+  }
+
   // Subtract the GEP2 pointer from the GEP1 pointer to find out their
   // symbolic difference.
   subtractDecomposedGEPs(DecompGEP1, DecompGEP2, AAQI);
@@ -1056,14 +1113,14 @@ AliasResult BasicAAResult::aliasGEP(
   // If an inbounds GEP would have to start from an out of bounds address
   // for the two to alias, then we can assume noalias.
   if (*DecompGEP1.InBounds && DecompGEP1.VarIndices.empty() &&
-      V2Size.hasValue() && DecompGEP1.Offset.sge(V2Size.getValue()) &&
+      V2Size.hasValue() && DecompGEP1.Offset.sge(V2Size.getUintValue()) &&
       isBaseOfObject(DecompGEP2.Base))
     return AliasResult::NoAlias;
 
   if (isa<GEPOperator>(V2)) {
     // Symmetric case to above.
     if (*DecompGEP2.InBounds && DecompGEP1.VarIndices.empty() &&
-        V1Size.hasValue() && DecompGEP1.Offset.sle(-V1Size.getValue()) &&
+        V1Size.hasValue() && DecompGEP1.Offset.sle(-V1Size.getUintValue()) &&
         isBaseOfObject(DecompGEP1.Base))
       return AliasResult::NoAlias;
   }
@@ -1116,13 +1173,13 @@ AliasResult BasicAAResult::aliasGEP(
     if (!VLeftSize.hasValue())
       return AliasResult::MayAlias;
 
-    const uint64_t LSize = VLeftSize.getValue();
-    if (Off.ult(LSize)) {
+    const uint64_t LSize = VLeftSize.getUintValue();
+    if (Off.ult(LSize) && !VLeftSize.isScalable()) {
       // Conservatively drop processing if a phi was visited and/or offset is
       // too big.
       AliasResult AR = AliasResult::PartialAlias;
       if (VRightSize.hasValue() && Off.ule(INT32_MAX) &&
-          (Off + VRightSize.getValue()).ule(LSize)) {
+          (Off + VRightSize.getUintValue()).ule(LSize)) {
         // Memory referenced by right pointer is nested. Save the offset in
         // cache. Note that originally offset estimated as GEP1-V2, but
         // AliasResult contains the shift that represents GEP1+Offset=V2.
@@ -1131,7 +1188,8 @@ AliasResult BasicAAResult::aliasGEP(
       }
       return AR;
     }
-    return AliasResult::NoAlias;
+    if (!VLeftSize.isScalable())
+      return AliasResult::NoAlias;
   }
 
   // We need to know both acess sizes for all the following heuristics.
@@ -1144,6 +1202,11 @@ AliasResult BasicAAResult::aliasGEP(
     const VariableGEPIndex &Index = DecompGEP1.VarIndices[i];
     const APInt &Scale = Index.Scale;
     APInt ScaleForGCD = Scale;
+    assert(
+        Index.I...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/69152