[llvm] Decompose gep variable upstream (PR #69152)

Mon Oct 16 00:32:03 PDT 2023

https://github.com/harviniriawan created https://github.com/llvm/llvm-project/pull/69152

Further work on top of #65759  (the first 2 commits of this pull request, still pending review).

>From 15658008e993be4f6dd561537693ee9ed2631ac4 Mon Sep 17 00:00:00 2001
From: Harvin Iriawan <harvin.iriawan at arm.com>
Date: Fri, 8 Sep 2023 10:33:34 +0100
Subject: [PATCH 1/3] [Analysis] Add Scalable field in MemoryLocation.h

  This is the first of a series of patch to improve Alias Analysis on
  Scalable quantities.
  Keep Scalable information from TypeSize which
  will be used in Alias Analysis.
---
 llvm/include/llvm/Analysis/MemoryLocation.h   | 41 +++++++++-----
 llvm/lib/Analysis/BasicAliasAnalysis.cpp      | 54 +++++++++++--------
 .../lib/Analysis/MemoryDependenceAnalysis.cpp |  3 +-
 llvm/lib/CodeGen/StackProtector.cpp           |  4 +-
 .../Transforms/IPO/AttributorAttributes.cpp   |  3 +-
 .../Scalar/DeadStoreElimination.cpp           | 36 ++++++++-----
 llvm/test/Analysis/AliasSet/memloc-vscale.ll  | 52 ++++++++++++++++++
 llvm/test/Transforms/GVN/scalable-memloc.ll   | 29 ++++++++++
 8 files changed, 173 insertions(+), 49 deletions(-)
 create mode 100644 llvm/test/Analysis/AliasSet/memloc-vscale.ll
 create mode 100644 llvm/test/Transforms/GVN/scalable-memloc.ll

diff --git a/llvm/include/llvm/Analysis/MemoryLocation.h b/llvm/include/llvm/Analysis/MemoryLocation.h
index 85ca84e68a13971..ff45d0a96c4cfbf 100644
--- a/llvm/include/llvm/Analysis/MemoryLocation.h
+++ b/llvm/include/llvm/Analysis/MemoryLocation.h
@@ -64,16 +64,19 @@ class Value;
 //
 // If asked to represent a pathologically large value, this will degrade to
 // std::nullopt.
+// Store Scalable information in bit 62 of Value. Scalable information is
+// required to do Alias Analysis on Scalable quantities
 class LocationSize {
   enum : uint64_t {
     BeforeOrAfterPointer = ~uint64_t(0),
-    AfterPointer = BeforeOrAfterPointer - 1,
-    MapEmpty = BeforeOrAfterPointer - 2,
-    MapTombstone = BeforeOrAfterPointer - 3,
+    ScalableBit = uint64_t(1) << 62,
+    AfterPointer = (BeforeOrAfterPointer - 1) & ~ScalableBit,
+    MapEmpty = (BeforeOrAfterPointer - 2) & ~ScalableBit,
+    MapTombstone = (BeforeOrAfterPointer - 3) & ~ScalableBit,
     ImpreciseBit = uint64_t(1) << 63,
 
     // The maximum value we can represent without falling back to 'unknown'.
-    MaxValue = (MapTombstone - 1) & ~ImpreciseBit,
+    MaxValue = (MapTombstone - 1) & ~(ImpreciseBit | ScalableBit),
   };
 
   uint64_t Value;
@@ -88,6 +91,7 @@ class LocationSize {
                 "AfterPointer is imprecise by definition.");
   static_assert(BeforeOrAfterPointer & ImpreciseBit,
                 "BeforeOrAfterPointer is imprecise by definition.");
+  static_assert(~(MaxValue & ScalableBit), "Max value don't have bit 62 set");
 
 public:
   // FIXME: Migrate all users to construct via either `precise` or `upperBound`,
@@ -98,12 +102,17 @@ class LocationSize {
   // this assumes the provided value is precise.
   constexpr LocationSize(uint64_t Raw)
       : Value(Raw > MaxValue ? AfterPointer : Raw) {}
-
-  static LocationSize precise(uint64_t Value) { return LocationSize(Value); }
+  constexpr LocationSize(uint64_t Raw, bool Scalable)
+      : Value(Raw > MaxValue ? AfterPointer
+                             : Raw | (Scalable ? ScalableBit : uint64_t(0))) {}
+
+  // Make construction of LocationSize that takes in uint64_t to set Scalable
+  // information as false
+  static LocationSize precise(uint64_t Value) {
+    return LocationSize(Value, false /*Scalable*/);
+  }
   static LocationSize precise(TypeSize Value) {
-    if (Value.isScalable())
-      return afterPointer();
-    return precise(Value.getFixedValue());
+    return LocationSize(Value.getKnownMinValue(), Value.isScalable());
   }
 
   static LocationSize upperBound(uint64_t Value) {
@@ -150,6 +159,8 @@ class LocationSize {
       return beforeOrAfterPointer();
     if (Value == AfterPointer || Other.Value == AfterPointer)
       return afterPointer();
+    if (isScalable() || Other.isScalable())
+      return afterPointer();
 
     return upperBound(std::max(getValue(), Other.getValue()));
   }
@@ -157,9 +168,13 @@ class LocationSize {
   bool hasValue() const {
     return Value != AfterPointer && Value != BeforeOrAfterPointer;
   }
-  uint64_t getValue() const {
+  bool isScalable() const { return (Value & ScalableBit); }
+
+  TypeSize getValue() const {
     assert(hasValue() && "Getting value from an unknown LocationSize!");
-    return Value & ~ImpreciseBit;
+    assert((Value & ~(ImpreciseBit | ScalableBit)) < MaxValue &&
+           "Scalable bit of value should be masked");
+    return {Value & ~(ImpreciseBit | ScalableBit), isScalable()};
   }
 
   // Returns whether or not this value is precise. Note that if a value is
@@ -169,7 +184,9 @@ class LocationSize {
   }
 
   // Convenience method to check if this LocationSize's value is 0.
-  bool isZero() const { return hasValue() && getValue() == 0; }
+  bool isZero() const {
+    return hasValue() && getValue().getKnownMinValue() == 0;
+  }
 
   /// Whether accesses before the base pointer are possible.
   bool mayBeBeforePointer() const { return Value == BeforeOrAfterPointer; }
diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
index c162b8f6edc1905..113343b881c478b 100644
--- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -101,22 +101,23 @@ bool BasicAAResult::invalidate(Function &Fn, const PreservedAnalyses &PA,
 //===----------------------------------------------------------------------===//
 
 /// Returns the size of the object specified by V or UnknownSize if unknown.
-static uint64_t getObjectSize(const Value *V, const DataLayout &DL,
-                              const TargetLibraryInfo &TLI,
-                              bool NullIsValidLoc,
-                              bool RoundToAlign = false) {
+/// getObjectSize does not support scalable Value
+static LocationSize getObjectSize(const Value *V, const DataLayout &DL,
+                                  const TargetLibraryInfo &TLI,
+                                  bool NullIsValidLoc,
+                                  bool RoundToAlign = false) {
   uint64_t Size;
   ObjectSizeOpts Opts;
   Opts.RoundToAlign = RoundToAlign;
   Opts.NullIsUnknownSize = NullIsValidLoc;
   if (getObjectSize(V, Size, DL, &TLI, Opts))
-    return Size;
-  return MemoryLocation::UnknownSize;
+    return LocationSize(Size);
+  return LocationSize(MemoryLocation::UnknownSize);
 }
 
 /// Returns true if we can prove that the object specified by V is smaller than
 /// Size.
-static bool isObjectSmallerThan(const Value *V, uint64_t Size,
+static bool isObjectSmallerThan(const Value *V, LocationSize Size,
                                 const DataLayout &DL,
                                 const TargetLibraryInfo &TLI,
                                 bool NullIsValidLoc) {
@@ -151,19 +152,21 @@ static bool isObjectSmallerThan(const Value *V, uint64_t Size,
 
   // This function needs to use the aligned object size because we allow
   // reads a bit past the end given sufficient alignment.
-  uint64_t ObjectSize = getObjectSize(V, DL, TLI, NullIsValidLoc,
-                                      /*RoundToAlign*/ true);
+  LocationSize ObjectSize = getObjectSize(V, DL, TLI, NullIsValidLoc,
+                                          /*RoundToAlign*/ true);
 
-  return ObjectSize != MemoryLocation::UnknownSize && ObjectSize < Size;
+  // Bail on comparing V and Size if Size is scalable
+  return ObjectSize != MemoryLocation::UnknownSize && !Size.isScalable() &&
+         ObjectSize.getValue() < Size.getValue();
 }
 
 /// Return the minimal extent from \p V to the end of the underlying object,
 /// assuming the result is used in an aliasing query. E.g., we do use the query
 /// location size and the fact that null pointers cannot alias here.
-static uint64_t getMinimalExtentFrom(const Value &V,
-                                     const LocationSize &LocSize,
-                                     const DataLayout &DL,
-                                     bool NullIsValidLoc) {
+static LocationSize getMinimalExtentFrom(const Value &V,
+                                         const LocationSize &LocSize,
+                                         const DataLayout &DL,
+                                         bool NullIsValidLoc) {
   // If we have dereferenceability information we know a lower bound for the
   // extent as accesses for a lower offset would be valid. We need to exclude
   // the "or null" part if null is a valid pointer. We can ignore frees, as an
@@ -175,15 +178,16 @@ static uint64_t getMinimalExtentFrom(const Value &V,
   // If queried with a precise location size, we assume that location size to be
   // accessed, thus valid.
   if (LocSize.isPrecise())
-    DerefBytes = std::max(DerefBytes, LocSize.getValue());
-  return DerefBytes;
+    DerefBytes = std::max(DerefBytes, LocSize.getValue().getKnownMinValue());
+  return LocationSize(DerefBytes, LocSize.isScalable());
 }
 
 /// Returns true if we can prove that the object specified by V has size Size.
-static bool isObjectSize(const Value *V, uint64_t Size, const DataLayout &DL,
+static bool isObjectSize(const Value *V, TypeSize Size, const DataLayout &DL,
                          const TargetLibraryInfo &TLI, bool NullIsValidLoc) {
-  uint64_t ObjectSize = getObjectSize(V, DL, TLI, NullIsValidLoc);
-  return ObjectSize != MemoryLocation::UnknownSize && ObjectSize == Size;
+  LocationSize ObjectSize = getObjectSize(V, DL, TLI, NullIsValidLoc);
+  return ObjectSize != MemoryLocation::UnknownSize &&
+         ObjectSize.getValue() == Size;
 }
 
 //===----------------------------------------------------------------------===//
@@ -1055,15 +1059,19 @@ AliasResult BasicAAResult::aliasGEP(
 
   // If an inbounds GEP would have to start from an out of bounds address
   // for the two to alias, then we can assume noalias.
+  // TODO: Remove !isScalable() once BasicAA fully support scalable location
+  // size
   if (*DecompGEP1.InBounds && DecompGEP1.VarIndices.empty() &&
-      V2Size.hasValue() && DecompGEP1.Offset.sge(V2Size.getValue()) &&
+      V2Size.hasValue() && !V2Size.isScalable() &&
+      DecompGEP1.Offset.sge(V2Size.getValue()) &&
       isBaseOfObject(DecompGEP2.Base))
     return AliasResult::NoAlias;
 
   if (isa<GEPOperator>(V2)) {
     // Symmetric case to above.
     if (*DecompGEP2.InBounds && DecompGEP1.VarIndices.empty() &&
-        V1Size.hasValue() && DecompGEP1.Offset.sle(-V1Size.getValue()) &&
+        V1Size.hasValue() && !V1Size.isScalable() &&
+        DecompGEP1.Offset.sle(-V1Size.getValue()) &&
         isBaseOfObject(DecompGEP1.Base))
       return AliasResult::NoAlias;
   }
@@ -1087,6 +1095,10 @@ AliasResult BasicAAResult::aliasGEP(
     return BaseAlias;
   }
 
+  // Bail on analysing scalable LocationSize
+  if (V1Size.isScalable() || V2Size.isScalable())
+    return AliasResult::MayAlias;
+
   // If there is a constant difference between the pointers, but the difference
   // is less than the size of the associated memory object, then we know
   // that the objects are partially overlapping.  If the difference is
diff --git a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
index 49a31a52cf0e850..91effb73c5d3deb 100644
--- a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -1103,7 +1103,8 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
         // be conservative.
         ThrowOutEverything =
             CacheInfo->Size.isPrecise() != Loc.Size.isPrecise() ||
-            CacheInfo->Size.getValue() < Loc.Size.getValue();
+            TypeSize::isKnownLT(CacheInfo->Size.getValue(),
+                                Loc.Size.getValue());
       } else {
         // For our purposes, unknown size > all others.
         ThrowOutEverything = !Loc.Size.hasValue();
diff --git a/llvm/lib/CodeGen/StackProtector.cpp b/llvm/lib/CodeGen/StackProtector.cpp
index 387b653f8815367..b24a9dcb88f3283 100644
--- a/llvm/lib/CodeGen/StackProtector.cpp
+++ b/llvm/lib/CodeGen/StackProtector.cpp
@@ -176,10 +176,10 @@ static bool HasAddressTaken(const Instruction *AI, TypeSize AllocSize,
     const auto *I = cast<Instruction>(U);
     // If this instruction accesses memory make sure it doesn't access beyond
     // the bounds of the allocated object.
+    // TODO: TypeSize::getFixed should be modified to adapt to scalable vectors
     std::optional<MemoryLocation> MemLoc = MemoryLocation::getOrNone(I);
     if (MemLoc && MemLoc->Size.hasValue() &&
-        !TypeSize::isKnownGE(AllocSize,
-                             TypeSize::getFixed(MemLoc->Size.getValue())))
+        !TypeSize::isKnownGE(AllocSize, MemLoc->Size.getValue()))
       return true;
     switch (I->getOpcode()) {
     case Instruction::Store:
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 3808eef7ab4e27a..94759beaadda091 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -2531,7 +2531,8 @@ static int64_t getKnownNonNullAndDerefBytesForUse(
   }
 
   std::optional<MemoryLocation> Loc = MemoryLocation::getOrNone(I);
-  if (!Loc || Loc->Ptr != UseV || !Loc->Size.isPrecise() || I->isVolatile())
+  if (!Loc || Loc->Ptr != UseV || !Loc->Size.isPrecise() ||
+      Loc->Size.isScalable() || I->isVolatile())
     return 0;
 
   int64_t Offset;
diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 0586ff191d94ed1..7d879b9ad6f60fb 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -205,16 +205,16 @@ static bool isShortenableAtTheBeginning(Instruction *I) {
   return isa<AnyMemSetInst>(I);
 }
 
-static uint64_t getPointerSize(const Value *V, const DataLayout &DL,
-                               const TargetLibraryInfo &TLI,
-                               const Function *F) {
+static LocationSize getPointerSize(const Value *V, const DataLayout &DL,
+                                   const TargetLibraryInfo &TLI,
+                                   const Function *F) {
   uint64_t Size;
   ObjectSizeOpts Opts;
   Opts.NullIsUnknownSize = NullPointerIsDefined(F);
 
   if (getObjectSize(V, Size, DL, &TLI, Opts))
-    return Size;
-  return MemoryLocation::UnknownSize;
+    return LocationSize(Size);
+  return LocationSize(MemoryLocation::UnknownSize);
 }
 
 namespace {
@@ -951,9 +951,10 @@ struct DSEState {
     // case the size/offset of the dead store does not matter.
     if (DeadUndObj == KillingUndObj && KillingLocSize.isPrecise() &&
         isIdentifiedObject(KillingUndObj)) {
-      uint64_t KillingUndObjSize = getPointerSize(KillingUndObj, DL, TLI, &F);
-      if (KillingUndObjSize != MemoryLocation::UnknownSize &&
-          KillingUndObjSize == KillingLocSize.getValue())
+      LocationSize KillingUndObjSize =
+          getPointerSize(KillingUndObj, DL, TLI, &F);
+      if (KillingUndObjSize.hasValue() &&
+          KillingUndObjSize.getValue() == KillingLocSize.getValue())
         return OW_Complete;
     }
 
@@ -976,22 +977,30 @@ struct DSEState {
       return isMaskedStoreOverwrite(KillingI, DeadI, BatchAA);
     }
 
-    const uint64_t KillingSize = KillingLocSize.getValue();
-    const uint64_t DeadSize = DeadLoc.Size.getValue();
+    const TypeSize KillingSize = KillingLocSize.getValue();
+    const TypeSize DeadSize = DeadLoc.Size.getValue();
+    // Bail on doing Size comparison which depends on AA for now
+    // TODO: Remove AnyScalable once Alias Analysis deal with scalable vectors
+    const bool AnyScalable =
+        DeadSize.isScalable() || KillingLocSize.isScalable();
 
+    // TODO: Remove AnyScalable constraint once alias analysis fully support
+    // scalable quantities
+    if (AnyScalable)
+      return OW_Unknown;
     // Query the alias information
     AliasResult AAR = BatchAA.alias(KillingLoc, DeadLoc);
 
     // If the start pointers are the same, we just have to compare sizes to see if
     // the killing store was larger than the dead store.
-    if (AAR == AliasResult::MustAlias) {
+    if (AAR == AliasResult::MustAlias && !AnyScalable) {
       // Make sure that the KillingSize size is >= the DeadSize size.
       if (KillingSize >= DeadSize)
         return OW_Complete;
     }
 
     // If we hit a partial alias we may have a full overwrite
-    if (AAR == AliasResult::PartialAlias && AAR.hasOffset()) {
+    if (AAR == AliasResult::PartialAlias && AAR.hasOffset() & !AnyScalable) {
       int32_t Off = AAR.getOffset();
       if (Off >= 0 && (uint64_t)Off + DeadSize <= KillingSize)
         return OW_Complete;
@@ -1039,6 +1048,9 @@ struct DSEState {
     //
     // We have to be careful here as *Off is signed while *.Size is unsigned.
 
+    if (AnyScalable)
+      return OW_Unknown;
+
     // Check if the dead access starts "not before" the killing one.
     if (DeadOff >= KillingOff) {
       // If the dead access ends "not after" the killing access then the
diff --git a/llvm/test/Analysis/AliasSet/memloc-vscale.ll b/llvm/test/Analysis/AliasSet/memloc-vscale.ll
new file mode 100644
index 000000000000000..8a83645ddaf9a87
--- /dev/null
+++ b/llvm/test/Analysis/AliasSet/memloc-vscale.ll
@@ -0,0 +1,52 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S < %s -passes=print-alias-sets 2>&1 | FileCheck %s
+
+; CHECK-LABEL: Alias sets for function 'sn'
+; CHECK: AliasSet[{{.*}}, 1] must alias, Mod Pointers: (ptr %p, unknown after)
+define void @sn(ptr %p) {;
+  store <vscale x 2 x i64> zeroinitializer, ptr %p, align 2
+  store i64 0, ptr %p, align 2
+  ret void
+}
+
+; CHECK-LABEL: Alias sets for function 'ns'
+; CHECK: AliasSet[{{.*}}, 1] must alias, Mod Pointers: (ptr %p, unknown after)
+define void @ns(ptr %p) {
+  store i64 0, ptr %p, align 2
+  store <vscale x 2 x i64> zeroinitializer, ptr %p, align 2
+  ret void
+}
+
+; CHECK-LABEL: Alias sets for function 'ss':
+; CHECK: AliasSet[{{.*}}, 1] must alias, Mod       Pointers: (ptr %p, LocationSize::precise(vscale x 16))
+define void @ss(ptr %p) {
+  store <vscale x 2 x i64> zeroinitializer, ptr %p, align 2
+  store <vscale x 2 x i64> zeroinitializer, ptr %p, align 2
+  ret void
+}
+
+; CHECK-LABEL: Alias sets for function 'ss2':
+; CHECK: AliasSet[{{.*}}, 1] must alias, Mod       Pointers: (ptr %p, unknown after)
+define void @ss2(ptr %p) {
+  store <vscale x 2 x i64> zeroinitializer, ptr %p, align 2
+  store <vscale x 2 x i64> zeroinitializer, ptr %p, align 2
+  store <vscale x 4 x i64> zeroinitializer, ptr %p, align 2
+  ret void
+}
+; CHECK-LABEL: Alias sets for function 'son':
+; CHECK: AliasSet[{{.*}}, 2] may alias, Mod       Pointers: (ptr %g, LocationSize::precise(vscale x 16)), (ptr %p, LocationSize::precise(8))
+define void @son(ptr %p) {
+  %g = getelementptr i8, ptr %p, i64 8
+  store <vscale x 2 x i64> zeroinitializer, ptr %g, align 2
+  store i64 0, ptr %p, align 2
+  ret void
+}
+
+; CHECK-LABEL: Alias sets for function 'sno':
+; CHECK: AliasSet[{{.*}}, 2] may alias, Mod       Pointers: (ptr %p, LocationSize::precise(vscale x 16)), (ptr %g, LocationSize::precise(8))
+define void @sno(ptr %p) {
+  %g = getelementptr i8, ptr %p, i64 8
+  store <vscale x 2 x i64> zeroinitializer, ptr %p, align 2
+  store i64 0, ptr %g, align 2
+  ret void
+}
diff --git a/llvm/test/Transforms/GVN/scalable-memloc.ll b/llvm/test/Transforms/GVN/scalable-memloc.ll
new file mode 100644
index 000000000000000..23b4c19f280ca5d
--- /dev/null
+++ b/llvm/test/Transforms/GVN/scalable-memloc.ll
@@ -0,0 +1,29 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S < %s -passes=gvn | FileCheck %s
+
+define void @test(i1 %cmp19, ptr %p) {
+; CHECK-LABEL: @test(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[CMP19:%.*]], label [[WHILE_BODY_LR_PH:%.*]], label [[FOR_COND_PREHEADER:%.*]]
+; CHECK:       while.body.lr.ph:
+; CHECK-NEXT:    [[DOTPRE1:%.*]] = load <vscale x 2 x double>, ptr [[P:%.*]], align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <vscale x 2 x double> [[DOTPRE1]], i64 0
+; CHECK-NEXT:    ret void
+; CHECK:       for.cond.preheader:
+; CHECK-NEXT:    [[DOTPRE:%.*]] = load double, ptr [[P]], align 8
+; CHECK-NEXT:    [[ADD:%.*]] = fadd double [[DOTPRE]], 0.000000e+00
+; CHECK-NEXT:    ret void
+;
+entry:
+  br i1 %cmp19, label %while.body.lr.ph, label %for.cond.preheader
+
+while.body.lr.ph:                                 ; preds = %entry
+  %.pre1 = load <vscale x 2 x double>, ptr %p, align 16
+  %0 = extractelement <vscale x 2 x double> %.pre1, i64 0
+  ret void
+
+for.cond.preheader:                               ; preds = %entry
+  %.pre = load double, ptr %p, align 8
+  %add = fadd double %.pre, 0.000000e+00
+  ret void
+}

>From 7114dc27876fce29e43a69903d06f6bfaf4caa5d Mon Sep 17 00:00:00 2001
From: Harvin Iriawan <harvin.iriawan at arm.com>
Date: Mon, 2 Oct 2023 17:04:11 +0100
Subject: [PATCH 2/3] Address comment

---
 llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 7d879b9ad6f60fb..197656a97f27057 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -993,14 +993,14 @@ struct DSEState {
 
     // If the start pointers are the same, we just have to compare sizes to see if
     // the killing store was larger than the dead store.
-    if (AAR == AliasResult::MustAlias && !AnyScalable) {
+    if (AAR == AliasResult::MustAlias) {
       // Make sure that the KillingSize size is >= the DeadSize size.
       if (KillingSize >= DeadSize)
         return OW_Complete;
     }
 
     // If we hit a partial alias we may have a full overwrite
-    if (AAR == AliasResult::PartialAlias && AAR.hasOffset() & !AnyScalable) {
+    if (AAR == AliasResult::PartialAlias && AAR.hasOffset()) {
       int32_t Off = AAR.getOffset();
       if (Off >= 0 && (uint64_t)Off + DeadSize <= KillingSize)
         return OW_Complete;
@@ -1048,9 +1048,6 @@ struct DSEState {
     //
     // We have to be careful here as *Off is signed while *.Size is unsigned.
 
-    if (AnyScalable)
-      return OW_Unknown;
-
     // Check if the dead access starts "not before" the killing one.
     if (DeadOff >= KillingOff) {
       // If the dead access ends "not after" the killing access then the

>From b649f5912c74d6584d704744f3da571e90d19d8a Mon Sep 17 00:00:00 2001
From: Harvin Iriawan <harvin.iriawan at arm.com>
Date: Mon, 9 Oct 2023 16:07:20 +0100
Subject: [PATCH 3/3] [BasicAA] Add Vscale GEP decomposition on variable index

  Enable BasicAA to be done on Scalable GEP & LocationSize
  Scalable GEP expression such as @llvm.vscale and GEP of scalable type
  are attached to the VariableGEPIndex, with Val representing Vscale.

  AA works if there's only one variable index (the vscale) and
  constant offsets in the GEP for now
---
 llvm/include/llvm/Analysis/MemoryLocation.h   |   7 +
 llvm/lib/Analysis/BasicAliasAnalysis.cpp      | 207 +++++++++++++-----
 .../lib/Analysis/MemoryDependenceAnalysis.cpp |   2 +-
 llvm/test/Analysis/AliasSet/memloc-vscale.ll  |   3 +-
 llvm/test/Analysis/BasicAA/vscale.ll          | 111 ++++++----
 llvm/test/Transforms/GVN/vscale.ll            |  11 +-
 6 files changed, 240 insertions(+), 101 deletions(-)

diff --git a/llvm/include/llvm/Analysis/MemoryLocation.h b/llvm/include/llvm/Analysis/MemoryLocation.h
index ff45d0a96c4cfbf..ef87412572145fa 100644
--- a/llvm/include/llvm/Analysis/MemoryLocation.h
+++ b/llvm/include/llvm/Analysis/MemoryLocation.h
@@ -177,6 +177,13 @@ class LocationSize {
     return {Value & ~(ImpreciseBit | ScalableBit), isScalable()};
   }
 
+  uint64_t getUintValue() const {
+    assert(hasValue() && "Getting value from an unknown LocationSize!");
+    assert((Value & ~(ImpreciseBit | ScalableBit)) < MaxValue &&
+           "Scalable bit of value should be masked");
+    return Value & ~(ImpreciseBit | ScalableBit);
+  }
+
   // Returns whether or not this value is precise. Note that if a value is
   // precise, it's guaranteed to not be unknown.
   bool isPrecise() const {
diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
index 113343b881c478b..951716cfe99131a 100644
--- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -44,6 +44,7 @@
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/Operator.h"
+#include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/User.h"
 #include "llvm/IR/Value.h"
@@ -63,6 +64,7 @@
 #define DEBUG_TYPE "basicaa"
 
 using namespace llvm;
+using namespace llvm::PatternMatch;
 
 /// Enable analysis of recursive PHI nodes.
 static cl::opt<bool> EnableRecPhiAnalysis("basic-aa-recphi", cl::Hidden,
@@ -346,13 +348,20 @@ struct LinearExpression {
 
 /// Analyzes the specified value as a linear expression: "A*V + B", where A and
 /// B are constant integers.
-static LinearExpression GetLinearExpression(
-    const CastedValue &Val,  const DataLayout &DL, unsigned Depth,
-    AssumptionCache *AC, DominatorTree *DT) {
+static LinearExpression GetLinearExpression(const CastedValue &Val,
+                                            const DataLayout &DL,
+                                            unsigned Depth, AssumptionCache *AC,
+                                            DominatorTree *DT) {
   // Limit our recursion depth.
   if (Depth == 6)
     return Val;
 
+  // If llvm.vscale is matched, set linear expression with scale 1 and offset 0
+  if (match(Val.V, m_VScale())) {
+    return LinearExpression(Val, APInt(Val.getBitWidth(), 1),
+                            APInt(Val.getBitWidth(), 0), true);
+  }
+
   if (const ConstantInt *Const = dyn_cast<ConstantInt>(Val.V))
     return LinearExpression(Val, APInt(Val.getBitWidth(), 0),
                             Val.evaluateWith(Const->getValue()), true);
@@ -459,6 +468,12 @@ struct VariableGEPIndex {
   CastedValue Val;
   APInt Scale;
 
+  // A value representing vscale quantity in a GEP expression
+  bool IsVscale;
+  // A flag indicating that the IsVscale variable GEP holds more than one Val
+  // (e.g. Vscale^2 or Vscale * X)
+  bool InvalidVarVscale;
+
   // Context instruction to use when querying information about this index.
   const Instruction *CxtI;
 
@@ -481,13 +496,10 @@ struct VariableGEPIndex {
     dbgs() << "\n";
   }
   void print(raw_ostream &OS) const {
-    OS << "(V=" << Val.V->getName()
-       << ", zextbits=" << Val.ZExtBits
-       << ", sextbits=" << Val.SExtBits
-       << ", truncbits=" << Val.TruncBits
-       << ", scale=" << Scale
-       << ", nsw=" << IsNSW
-       << ", negated=" << IsNegated << ")";
+    OS << "(V=" << Val.V->getName() << "  IsVscale=" << IsVscale
+       << ", zextbits=" << Val.ZExtBits << ", sextbits=" << Val.SExtBits
+       << ", truncbits=" << Val.TruncBits << ", scale=" << Scale
+       << ", nsw=" << IsNSW << ", negated=" << IsNegated << ")";
   }
 };
 }
@@ -608,6 +620,7 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
     for (User::const_op_iterator I = GEPOp->op_begin() + 1, E = GEPOp->op_end();
          I != E; ++I, ++GTI) {
       const Value *Index = *I;
+      const bool ScalableGEP = isa<ScalableVectorType>(GTI.getIndexedType());
       // Compute the (potentially symbolic) offset in bytes for this index.
       if (StructType *STy = GTI.getStructTypeOrNull()) {
         // For a struct, add the member offset.
@@ -619,27 +632,17 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
         continue;
       }
 
+      TypeSize AllocTypeSize = DL.getTypeAllocSize(GTI.getIndexedType());
       // For an array/pointer, add the element offset, explicitly scaled.
+      // Skip adding to constant offset if GEP index is marked as scalable
       if (const ConstantInt *CIdx = dyn_cast<ConstantInt>(Index)) {
         if (CIdx->isZero())
           continue;
-
-        // Don't attempt to analyze GEPs if the scalable index is not zero.
-        TypeSize AllocTypeSize = DL.getTypeAllocSize(GTI.getIndexedType());
-        if (AllocTypeSize.isScalable()) {
-          Decomposed.Base = V;
-          return Decomposed;
+        if (!ScalableGEP) {
+          Decomposed.Offset += AllocTypeSize.getFixedValue() *
+                               CIdx->getValue().sextOrTrunc(MaxIndexSize);
+          continue;
         }
-
-        Decomposed.Offset += AllocTypeSize.getFixedValue() *
-                             CIdx->getValue().sextOrTrunc(MaxIndexSize);
-        continue;
-      }
-
-      TypeSize AllocTypeSize = DL.getTypeAllocSize(GTI.getIndexedType());
-      if (AllocTypeSize.isScalable()) {
-        Decomposed.Base = V;
-        return Decomposed;
       }
 
       GepHasConstantOffset = false;
@@ -649,22 +652,50 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
       unsigned Width = Index->getType()->getIntegerBitWidth();
       unsigned SExtBits = IndexSize > Width ? IndexSize - Width : 0;
       unsigned TruncBits = IndexSize < Width ? Width - IndexSize : 0;
-      LinearExpression LE = GetLinearExpression(
-          CastedValue(Index, 0, SExtBits, TruncBits), DL, 0, AC, DT);
+      // Scalable GEP decomposition
+      // Allow Scalable GEP to be decomposed in the case of
+      //    1. getelementptr <4 x vscale x i32> with 1st index as a constant
+      //    2. Index which have a leaf of @llvm.vscale
+      // In both cases, essentially CastedValue of VariableGEPIndex is Vscale,
+      // however in the 1st case, CastedValue is of type constant, hence another
+      // flag in VariableGEPIndex is created in this case, IsVscale If GEP is
+      // Scalable type, e.g. <4 x vscale x i32>, the first index will have
+      // vscale as a variable index, create a LE in this case
+      LinearExpression LE(CastedValue(Index, 0, SExtBits, TruncBits));
+      if (ScalableGEP) {
+        if (const ConstantInt *CIdx = dyn_cast<ConstantInt>(Index)) {
+          LE = LinearExpression(
+              CastedValue(Index, 0, SExtBits, TruncBits),
+              CastedValue(Index, 0, SExtBits, TruncBits)
+                  .evaluateWith(CIdx->getValue()),
+              APInt(CastedValue(Index, 0, SExtBits, TruncBits).getBitWidth(),
+                    0),
+              true);
+          assert(LE.Offset.isZero() &&
+                 "Scalable GEP, the offset of LE should be 0");
+        }
+      } else
+        LE = GetLinearExpression(CastedValue(Index, 0, SExtBits, TruncBits), DL,
+                                 0, AC, DT);
 
       // Scale by the type size.
-      unsigned TypeSize = AllocTypeSize.getFixedValue();
+      unsigned TypeSize = AllocTypeSize.getKnownMinValue();
       LE = LE.mul(APInt(IndexSize, TypeSize), GEPOp->isInBounds());
       Decomposed.Offset += LE.Offset.sext(MaxIndexSize);
       APInt Scale = LE.Scale.sext(MaxIndexSize);
+      bool LEhasVscale = match(LE.Val.V, m_VScale());
 
       // If we already had an occurrence of this index variable, merge this
       // scale into it.  For example, we want to handle:
       //   A[x][x] -> x*16 + x*4 -> x*20
       // This also ensures that 'x' only appears in the index list once.
+      // Only add to IsVscale VariableGEPIndex if it's @llvm.vscale or gep
+      // vscale index
       for (unsigned i = 0, e = Decomposed.VarIndices.size(); i != e; ++i) {
-        if (Decomposed.VarIndices[i].Val.V == LE.Val.V &&
-            Decomposed.VarIndices[i].Val.hasSameCastsAs(LE.Val)) {
+        if (Decomposed.VarIndices[i].Val.hasSameCastsAs(LE.Val) &&
+            ((Decomposed.VarIndices[i].IsVscale &&
+              (ScalableGEP || LEhasVscale)) ||
+             Decomposed.VarIndices[i].Val.V == LE.Val.V)) {
           Scale += Decomposed.VarIndices[i].Scale;
           Decomposed.VarIndices.erase(Decomposed.VarIndices.begin() + i);
           break;
@@ -673,10 +704,19 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
 
       // Make sure that we have a scale that makes sense for this target's
       // index size.
+      // Only allow variableGEP decomposition for constants, in the case of
+      // vscale
       Scale = adjustToIndexSize(Scale, IndexSize);
+      bool InvalidVarVscale = (ScalableGEP && LEhasVscale) ||
+                              (ScalableGEP && !isa<ConstantInt>(LE.Val.V));
 
       if (!!Scale) {
-        VariableGEPIndex Entry = {LE.Val, Scale, CxtI, LE.IsNSW,
+        VariableGEPIndex Entry = {LE.Val,
+                                  Scale,
+                                  ScalableGEP || LEhasVscale,
+                                  InvalidVarVscale,
+                                  CxtI,
+                                  LE.IsNSW,
                                   /* IsNegated */ false};
         Decomposed.VarIndices.push_back(Entry);
       }
@@ -1053,25 +1093,34 @@ AliasResult BasicAAResult::aliasGEP(
   if (DecompGEP1.Base == GEP1 && DecompGEP2.Base == V2)
     return AliasResult::MayAlias;
 
+  // If Vscale Variable index has another variable in V, return mayAlias
+  for (unsigned i = 0, e = DecompGEP1.VarIndices.size(); i != e; ++i) {
+    if (DecompGEP1.VarIndices[i].IsVscale &&
+        DecompGEP1.VarIndices[i].InvalidVarVscale)
+      return AliasResult::MayAlias;
+  }
+
+  for (unsigned i = 0, e = DecompGEP2.VarIndices.size(); i != e; ++i) {
+    if (DecompGEP2.VarIndices[i].IsVscale &&
+        DecompGEP2.VarIndices[i].InvalidVarVscale)
+      return AliasResult::MayAlias;
+  }
+
   // Subtract the GEP2 pointer from the GEP1 pointer to find out their
   // symbolic difference.
   subtractDecomposedGEPs(DecompGEP1, DecompGEP2, AAQI);
 
   // If an inbounds GEP would have to start from an out of bounds address
   // for the two to alias, then we can assume noalias.
-  // TODO: Remove !isScalable() once BasicAA fully support scalable location
-  // size
   if (*DecompGEP1.InBounds && DecompGEP1.VarIndices.empty() &&
-      V2Size.hasValue() && !V2Size.isScalable() &&
-      DecompGEP1.Offset.sge(V2Size.getValue()) &&
+      V2Size.hasValue() && DecompGEP1.Offset.sge(V2Size.getUintValue()) &&
       isBaseOfObject(DecompGEP2.Base))
     return AliasResult::NoAlias;
 
   if (isa<GEPOperator>(V2)) {
     // Symmetric case to above.
     if (*DecompGEP2.InBounds && DecompGEP1.VarIndices.empty() &&
-        V1Size.hasValue() && !V1Size.isScalable() &&
-        DecompGEP1.Offset.sle(-V1Size.getValue()) &&
+        V1Size.hasValue() && DecompGEP1.Offset.sle(-V1Size.getUintValue()) &&
         isBaseOfObject(DecompGEP1.Base))
       return AliasResult::NoAlias;
   }
@@ -1095,10 +1144,6 @@ AliasResult BasicAAResult::aliasGEP(
     return BaseAlias;
   }
 
-  // Bail on analysing scalable LocationSize
-  if (V1Size.isScalable() || V2Size.isScalable())
-    return AliasResult::MayAlias;
-
   // If there is a constant difference between the pointers, but the difference
   // is less than the size of the associated memory object, then we know
   // that the objects are partially overlapping.  If the difference is
@@ -1128,13 +1173,13 @@ AliasResult BasicAAResult::aliasGEP(
     if (!VLeftSize.hasValue())
       return AliasResult::MayAlias;
 
-    const uint64_t LSize = VLeftSize.getValue();
-    if (Off.ult(LSize)) {
+    const uint64_t LSize = VLeftSize.getUintValue();
+    if (Off.ult(LSize) && !VLeftSize.isScalable()) {
       // Conservatively drop processing if a phi was visited and/or offset is
       // too big.
       AliasResult AR = AliasResult::PartialAlias;
       if (VRightSize.hasValue() && Off.ule(INT32_MAX) &&
-          (Off + VRightSize.getValue()).ule(LSize)) {
+          (Off + VRightSize.getUintValue()).ule(LSize)) {
         // Memory referenced by right pointer is nested. Save the offset in
         // cache. Note that originally offset estimated as GEP1-V2, but
         // AliasResult contains the shift that represents GEP1+Offset=V2.
@@ -1143,7 +1188,8 @@ AliasResult BasicAAResult::aliasGEP(
       }
       return AR;
     }
-    return AliasResult::NoAlias;
+    if (!VLeftSize.isScalable())
+      return AliasResult::NoAlias;
   }
 
   // We need to know both acess sizes for all the following heuristics.
@@ -1156,6 +1202,11 @@ AliasResult BasicAAResult::aliasGEP(
     const VariableGEPIndex &Index = DecompGEP1.VarIndices[i];
     const APInt &Scale = Index.Scale;
     APInt ScaleForGCD = Scale;
+    assert(
+        Index.IsVscale
+            ? match(Index.Val.V, m_VScale()) || isa<ConstantInt>(Index.Val.V)
+            : true &&
+                  "Not allowed to have non-constant values if IsVscale is set");
     if (!Index.IsNSW)
       ScaleForGCD =
           APInt::getOneBitSet(Scale.getBitWidth(), Scale.countr_zero());
@@ -1187,6 +1238,56 @@ AliasResult BasicAAResult::aliasGEP(
       OffsetRange = OffsetRange.add(CR);
   }
 
+  // VScale Alias Analysis
+  // GEPs with Vscale will have the expression A*Vscale + B
+  // Since VScale is strictly a positive number, the larger GEP can be known
+  if (DecompGEP1.VarIndices.size() == 1 &&
+      (V1Size.isScalable() || V2Size.isScalable())) {
+    const VariableGEPIndex &ScalableVar = DecompGEP1.VarIndices[0];
+    bool StrictlyPos = false, StrictlyNeg = false;
+    APInt &Off = DecompGEP1.Offset;
+    if (!ScalableVar.IsNegated) {
+      if (Off.isNegative())
+        StrictlyPos = ScalableVar.Scale.ugt(Off.abs());
+      else
+        StrictlyPos = true;
+    } else
+      StrictlyPos = Off.isNonNegative();
+
+    if (ScalableVar.IsNegated) {
+      if (Off.isNonNegative())
+        StrictlyNeg = Off.ult(ScalableVar.Scale.abs());
+      else
+        StrictlyNeg = true;
+    } else
+      StrictlyNeg = Off.isNegative();
+
+    if (StrictlyPos || StrictlyNeg) {
+      LocationSize VLeftSize = V2Size;
+      LocationSize VRightSize = V1Size;
+      const bool Swapped = StrictlyNeg;
+
+      if (Swapped) {
+        std::swap(VLeftSize, VRightSize);
+        Off = -Off;
+      }
+
+      const uint64_t LSize = VLeftSize.getUintValue();
+      if (VLeftSize.isScalable() && ScalableVar.Scale.ult(LSize) &&
+          (ScalableVar.Scale + DecompGEP1.Offset).ult(LSize))
+        return AliasResult::PartialAlias;
+
+      if ((ScalableVar.Scale.uge(LSize) && VLeftSize.isScalable()) ||
+          ((ScalableVar.Scale + DecompGEP1.Offset).uge(LSize) &&
+           !VLeftSize.isScalable()))
+        return AliasResult::NoAlias;
+    }
+  }
+
+  // Bail on Scalable location size from now onwards
+  if (V1Size.isScalable() || V2Size.isScalable())
+    return AliasResult::MayAlias;
+
   // We now have accesses at two offsets from the same base:
   //  1. (...)*GCD + DecompGEP1.Offset with size V1Size
   //  2. 0 with size V2Size
@@ -1728,8 +1829,11 @@ void BasicAAResult::subtractDecomposedGEPs(DecomposedGEP &DestGEP,
     bool Found = false;
     for (auto I : enumerate(DestGEP.VarIndices)) {
       VariableGEPIndex &Dest = I.value();
-      if (!isValueEqualInPotentialCycles(Dest.Val.V, Src.Val.V, AAQI) ||
-          !Dest.Val.hasSameCastsAs(Src.Val))
+      const bool SrcDestAreVscale = Dest.IsVscale && Src.IsVscale;
+      // Suppress base value checks if Src and Dst are of constant Vscale
+      if ((!isValueEqualInPotentialCycles(Dest.Val.V, Src.Val.V, AAQI) &&
+           !SrcDestAreVscale) ||
+          (!Dest.Val.hasSameCastsAs(Src.Val) && !SrcDestAreVscale))
         continue;
 
       // Normalize IsNegated if we're going to lose the NSW flag anyway.
@@ -1753,7 +1857,12 @@ void BasicAAResult::subtractDecomposedGEPs(DecomposedGEP &DestGEP,
 
     // If we didn't consume this entry, add it to the end of the Dest list.
     if (!Found) {
-      VariableGEPIndex Entry = {Src.Val, Src.Scale, Src.CxtI, Src.IsNSW,
+      VariableGEPIndex Entry = {Src.Val,
+                                Src.Scale,
+                                Src.IsVscale,
+                                /* InvalidVarVscale*/ false,
+                                Src.CxtI,
+                                Src.IsNSW,
                                 /* IsNegated */ true};
       DestGEP.VarIndices.push_back(Entry);
     }
diff --git a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
index 91effb73c5d3deb..5e00761b6b23695 100644
--- a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -373,7 +373,7 @@ static bool canSkipClobberingStore(const StoreInst *SI,
     return false;
   if (MemoryLocation::get(SI).Size != MemLoc.Size)
     return false;
-  if (std::min(MemLocAlign, SI->getAlign()).value() < MemLoc.Size.getValue())
+  if (std::min(MemLocAlign, SI->getAlign()).value() < MemLoc.Size.getUintValue())
     return false;
 
   auto *LI = dyn_cast<LoadInst>(SI->getValueOperand());
diff --git a/llvm/test/Analysis/AliasSet/memloc-vscale.ll b/llvm/test/Analysis/AliasSet/memloc-vscale.ll
index 8a83645ddaf9a87..ee67f7c15fb41b5 100644
--- a/llvm/test/Analysis/AliasSet/memloc-vscale.ll
+++ b/llvm/test/Analysis/AliasSet/memloc-vscale.ll
@@ -34,7 +34,8 @@ define void @ss2(ptr %p) {
   ret void
 }
 ; CHECK-LABEL: Alias sets for function 'son':
-; CHECK: AliasSet[{{.*}}, 2] may alias, Mod       Pointers: (ptr %g, LocationSize::precise(vscale x 16)), (ptr %p, LocationSize::precise(8))
+; CHECK: AliasSet[{{.*}}, 1] must alias, Mod       Pointers: (ptr %g, LocationSize::precise(vscale x 16))
+; CHECK: AliasSet[{{.*}}, 1] must alias, Mod       Pointers: (ptr %p, LocationSize::precise(8))
 define void @son(ptr %p) {
   %g = getelementptr i8, ptr %p, i64 8
   store <vscale x 2 x i64> zeroinitializer, ptr %g, align 2
diff --git a/llvm/test/Analysis/BasicAA/vscale.ll b/llvm/test/Analysis/BasicAA/vscale.ll
index 0d6d8fea392bbfc..2d277eacdcb8aeb 100644
--- a/llvm/test/Analysis/BasicAA/vscale.ll
+++ b/llvm/test/Analysis/BasicAA/vscale.ll
@@ -4,8 +4,8 @@
 
 ; CHECK-LABEL: gep_alloca_const_offset_1
 ; CHECK-DAG:  MustAlias:    <vscale x 4 x i32>* %alloc, <vscale x 4 x i32>* %gep1
-; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %alloc, <vscale x 4 x i32>* %gep2
-; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %gep1, <vscale x 4 x i32>* %gep2
+; CHECK-DAG:  NoAlias:     <vscale x 4 x i32>* %alloc, <vscale x 4 x i32>* %gep2
+; CHECK-DAG:  NoAlias:     <vscale x 4 x i32>* %gep1, <vscale x 4 x i32>* %gep2
 define void @gep_alloca_const_offset_1() {
   %alloc = alloca <vscale x 4 x i32>
   %gep1 = getelementptr <vscale x 4 x i32>, ptr %alloc, i64 0
@@ -17,10 +17,9 @@ define void @gep_alloca_const_offset_1() {
 }
 
 ; CHECK-LABEL: gep_alloca_const_offset_2
-; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %alloc, <vscale x 4 x i32>* %gep1
-; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %alloc, <vscale x 4 x i32>* %gep2
-; TODO: AliasResult for gep1,gep2 can be improved as MustAlias
-; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %gep1, <vscale x 4 x i32>* %gep2
+; CHECK-DAG:  NoAlias:     <vscale x 4 x i32>* %alloc, <vscale x 4 x i32>* %gep1
+; CHECK-DAG:  NoAlias:     <vscale x 4 x i32>* %alloc, <vscale x 4 x i32>* %gep2
+; CHECK-DAG:  MustAlias:   <vscale x 4 x i32>* %gep1, <vscale x 4 x i32>* %gep2
 define void @gep_alloca_const_offset_2() {
   %alloc = alloca <vscale x 4 x i32>
   %gep1 = getelementptr <vscale x 4 x i32>, ptr %alloc, i64 1
@@ -33,8 +32,8 @@ define void @gep_alloca_const_offset_2() {
 
 ; CHECK-LABEL: gep_alloca_const_offset_3
 ; CHECK-DAG:  MustAlias:    <vscale x 4 x i32>* %alloc, <vscale x 4 x i32>* %gep1
-; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %alloc, i32* %gep2
-; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %gep1, i32* %gep2
+; CHECK-DAG:  MayAlias: <vscale x 4 x i32>* %alloc, i32* %gep2
+; CHECK-DAG:  MayAlias: <vscale x 4 x i32>* %gep1, i32* %gep2
 define void @gep_alloca_const_offset_3() {
   %alloc = alloca <vscale x 4 x i32>
   %gep1 = getelementptr <vscale x 4 x i32>, ptr %alloc, i64 0
@@ -74,10 +73,9 @@ define void @gep_alloca_symbolic_offset(i64 %idx1, i64 %idx2) {
 }
 
 ; CHECK-LABEL: gep_same_base_const_offset
-; CHECK-DAG:  MayAlias:     i32* %gep1, <vscale x 4 x i32>* %p
-; CHECK-DAG:  MayAlias:     i32* %gep2, <vscale x 4 x i32>* %p
-; TODO: AliasResult for gep1,gep2 can be improved as NoAlias
-; CHECK-DAG:  MayAlias:     i32* %gep1, i32* %gep2
+; CHECK-DAG:  NoAlias:     i32* %gep1, <vscale x 4 x i32>* %p
+; CHECK-DAG:  NoAlias:     i32* %gep2, <vscale x 4 x i32>* %p
+; CHECK-DAG:  NoAlias:     i32* %gep1, i32* %gep2
 define void @gep_same_base_const_offset(ptr %p) {
   %gep1 = getelementptr <vscale x 4 x i32>, ptr %p, i64 1, i64 0
   %gep2 = getelementptr <vscale x 4 x i32>, ptr %p, i64 1, i64 1
@@ -101,8 +99,8 @@ define void @gep_same_base_symbolic_offset(ptr %p, i64 %idx1, i64 %idx2) {
 }
 
 ; CHECK-LABEL: gep_different_base_const_offset
-; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %gep1, <vscale x 4 x i32>* %p1
-; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %gep2, <vscale x 4 x i32>* %p2
+; CHECK-DAG:  NoAlias:     <vscale x 4 x i32>* %gep1, <vscale x 4 x i32>* %p1
+; CHECK-DAG:  NoAlias:     <vscale x 4 x i32>* %gep2, <vscale x 4 x i32>* %p2
 ; CHECK-DAG:  NoAlias:      <vscale x 4 x i32>* %p1, <vscale x 4 x i32>* %p2
 ; CHECK-DAG:  NoAlias:      <vscale x 4 x i32>* %gep1, <vscale x 4 x i32>* %p2
 ; CHECK-DAG:  NoAlias:      <vscale x 4 x i32>* %gep2, <vscale x 4 x i32>* %p1
@@ -117,12 +115,41 @@ define void @gep_different_base_const_offset(ptr noalias %p1, ptr noalias %p2) {
   ret void
 }
 
+; getelementptr @llvm.vscale tests
+; CHECK-LABEL: gep_llvm_vscale_no_alias
+; CHECK-DAG: NoAlias:      <vscale x 4 x i32>* %gep1, <vscale x 4 x i32>* %gep2
+; CHECK-DAG: MustAlias:    <vscale x 4 x i32>* %gep1, <vscale x 4 x i32>* %gep3
+; CHECK-DAG: NoAlias:      <vscale x 4 x i32>* %gep2, <vscale x 4 x i32>* %gep3
+
+declare i64 @llvm.vscale.i64()
+define void @gep_llvm_vscale_no_alias(ptr %p) {
+  %t1 = tail call i64 @llvm.vscale.i64()
+  %t2 = shl nuw nsw i64 %t1, 3
+  %gep1 = getelementptr i32, ptr %p, i64 %t2
+  %gep2 = getelementptr <vscale x 4 x i32>, ptr %p, i64 1
+  %gep3 = getelementptr <vscale x 4 x i32>, ptr %p, i64 2
+  load <vscale x 4 x i32>, ptr %gep1
+  load <vscale x 4 x i32>, ptr %gep2
+  load <vscale x 4 x i32>, ptr %gep3
+  ret void
+}
+
+; CHECK-LABEL: gep_llvm_vscale_squared_may_alias
+; CHECK-DAG: MayAlias:      <vscale x 4 x i32>* %gep1, <vscale x 4 x i32>* %gep2
+define void @gep_llvm_vscale_squared_may_alias(ptr %p) {
+  %t1 = tail call i64 @llvm.vscale.i64()
+  %gep1 = getelementptr <vscale x 4 x i32>, ptr %p, i64 %t1
+  %gep2 = getelementptr i32, ptr %p, i64 1
+  load <vscale x 4 x i32>, ptr %gep1
+  load <vscale x 4 x i32>, ptr %gep2
+  ret void
+}
 ; getelementptr + bitcast
 
 ; CHECK-LABEL: gep_bitcast_1
 ; CHECK-DAG:   MustAlias:    i32* %p, <vscale x 4 x i32>* %p
-; CHECK-DAG:   MayAlias:     i32* %gep1, <vscale x 4 x i32>* %p
-; CHECK-DAG:   MayAlias:     i32* %gep1, i32* %p
+; CHECK-DAG:   NoAlias:      i32* %gep1, <vscale x 4 x i32>* %p
+; CHECK-DAG:   NoAlias:      i32* %gep1, i32* %p
 ; CHECK-DAG:   MayAlias:     i32* %gep2, <vscale x 4 x i32>* %p
 ; CHECK-DAG:   MayAlias:     i32* %gep1, i32* %gep2
 ; CHECK-DAG:   NoAlias:      i32* %gep2, i32* %p
@@ -138,11 +165,11 @@ define void @gep_bitcast_1(ptr %p) {
 
 ; CHECK-LABEL: gep_bitcast_2
 ; CHECK-DAG:  MustAlias:    <vscale x 4 x float>* %p, <vscale x 4 x i32>* %p
-; CHECK-DAG:  MayAlias:     i32* %gep1, <vscale x 4 x i32>* %p
-; CHECK-DAG:  MayAlias:     i32* %gep1, <vscale x 4 x float>* %p
-; CHECK-DAG:  MayAlias:     float* %gep2, <vscale x 4 x i32>* %p
-; CHECK-DAG:  MayAlias:     i32* %gep1, float* %gep2
-; CHECK-DAG:  MayAlias:     float* %gep2, <vscale x 4 x float>* %p
+; CHECK-DAG:  NoAlias:      i32* %gep1, <vscale x 4 x i32>* %p
+; CHECK-DAG:  NoAlias:      i32* %gep1, <vscale x 4 x float>* %p
+; CHECK-DAG:  NoAlias:      float* %gep2, <vscale x 4 x i32>* %p
+; CHECK-DAG:  MustAlias:    i32* %gep1, float* %gep2
+; CHECK-DAG:  NoAlias:      float* %gep2, <vscale x 4 x float>* %p
 define void @gep_bitcast_2(ptr %p) {
   %gep1 = getelementptr <vscale x 4 x i32>, ptr %p, i64 1, i64 0
   %gep2 = getelementptr <vscale x 4 x float>, ptr %p, i64 1, i64 0
@@ -159,8 +186,8 @@ define void @gep_bitcast_2(ptr %p) {
 ; CHECK-DAG:  MayAlias:     i32* %a, <vscale x 4 x i32>* %p
 ; CHECK-DAG:  MayAlias:     i32* %a, i32* %gep
 ; CHECK-DAG:  MayAlias:     i32* %a, i32* %gep_rec_1
-; CHECK-DAG:  MayAlias:     i32* %gep, <vscale x 4 x i32>* %p
-; CHECK-DAG:  MayAlias:     i32* %gep_rec_1, <vscale x 4 x i32>* %p
+; CHECK-DAG:  NoAlias:      i32* %gep, <vscale x 4 x i32>* %p
+; CHECK-DAG:  NoAlias:      i32* %gep_rec_1, <vscale x 4 x i32>* %p
 ; CHECK-DAG:  NoAlias:      i32* %gep, i32* %gep_rec_1
 define void @gep_recursion_level_1(ptr %a, ptr %p) {
   %gep = getelementptr <vscale x 4 x i32>, ptr %p, i64 1, i64 2
@@ -174,10 +201,10 @@ define void @gep_recursion_level_1(ptr %a, ptr %p) {
 
 ; CHECK-LABEL: gep_recursion_level_1_bitcast
 ; CHECK-DAG:  MustAlias:    i32* %a, <vscale x 4 x i32>* %a
-; CHECK-DAG:  MayAlias:     i32* %a, i32* %gep
-; CHECK-DAG:  MayAlias:     i32* %a, i32* %gep_rec_1
-; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %a, i32* %gep
-; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %a, i32* %gep_rec_1
+; CHECK-DAG:  NoAlias:      i32* %a, i32* %gep
+; CHECK-DAG:  NoAlias:      i32* %a, i32* %gep_rec_1
+; CHECK-DAG:  NoAlias:      <vscale x 4 x i32>* %a, i32* %gep
+; CHECK-DAG:  NoAlias:      <vscale x 4 x i32>* %a, i32* %gep_rec_1
 ; CHECK-DAG:  NoAlias:      i32* %gep, i32* %gep_rec_1
 define void @gep_recursion_level_1_bitcast(ptr %a) {
   %gep = getelementptr <vscale x 4 x i32>, ptr %a, i64 1, i64 2
@@ -194,9 +221,9 @@ define void @gep_recursion_level_1_bitcast(ptr %a) {
 ; CHECK-DAG:  MayAlias:     i32* %a, i32* %gep
 ; CHECK-DAG:  MayAlias:     i32* %a, i32* %gep_rec_1
 ; CHECK-DAG:  MayAlias:     i32* %a, i32* %gep_rec_2
-; CHECK-DAG:  MayAlias:     i32* %gep, <vscale x 4 x i32>* %p
-; CHECK-DAG:  MayAlias:     i32* %gep_rec_1, <vscale x 4 x i32>* %p
-; CHECK-DAG:  MayAlias:     i32* %gep_rec_2, <vscale x 4 x i32>* %p
+; CHECK-DAG:  NoAlias:      i32* %gep, <vscale x 4 x i32>* %p
+; CHECK-DAG:  NoAlias:      i32* %gep_rec_1, <vscale x 4 x i32>* %p
+; CHECK-DAG:  NoAlias:      i32* %gep_rec_2, <vscale x 4 x i32>* %p
 ; CHECK-DAG:  NoAlias:      i32* %gep, i32* %gep_rec_1
 ; CHECK-DAG:  NoAlias:      i32* %gep, i32* %gep_rec_2
 ; CHECK-DAG:  NoAlias:      i32* %gep_rec_1, i32* %gep_rec_2
@@ -221,34 +248,34 @@ define void @gep_recursion_level_2(ptr %a, ptr %p) {
 ; CHECK-DAG: MayAlias:     i32* %a, i32* %gep_rec_4
 ; CHECK-DAG: MayAlias:     i32* %a, i32* %gep_rec_5
 ; CHECK-DAG: MayAlias:     i32* %a, i32* %gep_rec_6
-; CHECK-DAG: MayAlias:     i32* %gep, <vscale x 4 x i32>* %p
-; CHECK-DAG: MayAlias:     i32* %gep_rec_1, <vscale x 4 x i32>* %p
-; CHECK-DAG: MayAlias:     i32* %gep_rec_2, <vscale x 4 x i32>* %p
-; CHECK-DAG: MayAlias:     i32* %gep_rec_3, <vscale x 4 x i32>* %p
-; CHECK-DAG: MayAlias:     i32* %gep_rec_4, <vscale x 4 x i32>* %p
-; CHECK-DAG: MayAlias:     i32* %gep_rec_5, <vscale x 4 x i32>* %p
+; CHECK-DAG: NoAlias:      i32* %gep, <vscale x 4 x i32>* %p
+; CHECK-DAG: NoAlias:      i32* %gep_rec_1, <vscale x 4 x i32>* %p
+; CHECK-DAG: NoAlias:      i32* %gep_rec_2, <vscale x 4 x i32>* %p
+; CHECK-DAG: NoAlias:      i32* %gep_rec_3, <vscale x 4 x i32>* %p
+; CHECK-DAG: NoAlias:      i32* %gep_rec_4, <vscale x 4 x i32>* %p
+; CHECK-DAG: NoAlias:      i32* %gep_rec_5, <vscale x 4 x i32>* %p
 ; CHECK-DAG: MayAlias:     i32* %gep_rec_6, <vscale x 4 x i32>* %p
 ; CHECK-DAG: NoAlias:      i32* %gep, i32* %gep_rec_1
 ; CHECK-DAG: NoAlias:      i32* %gep, i32* %gep_rec_2
 ; CHECK-DAG: NoAlias:      i32* %gep, i32* %gep_rec_3
 ; CHECK-DAG: NoAlias:      i32* %gep, i32* %gep_rec_4
 ; CHECK-DAG: NoAlias:      i32* %gep, i32* %gep_rec_5
-; CHECK-DAG: NoAlias:      i32* %gep, i32* %gep_rec_6
+; CHECK-DAG: MayAlias:     i32* %gep, i32* %gep_rec_6
 ; CHECK-DAG: NoAlias:      i32* %gep_rec_1, i32* %gep_rec_2
 ; CHECK-DAG: NoAlias:      i32* %gep_rec_1, i32* %gep_rec_3
 ; CHECK-DAG: NoAlias:      i32* %gep_rec_1, i32* %gep_rec_4
 ; CHECK-DAG: NoAlias:      i32* %gep_rec_1, i32* %gep_rec_5
-; CHECK-DAG: NoAlias:      i32* %gep_rec_1, i32* %gep_rec_6
+; CHECK-DAG: MayAlias:     i32* %gep_rec_1, i32* %gep_rec_6
 ; CHECK-DAG: NoAlias:      i32* %gep_rec_2, i32* %gep_rec_3
 ; CHECK-DAG: NoAlias:      i32* %gep_rec_2, i32* %gep_rec_4
 ; CHECK-DAG: NoAlias:      i32* %gep_rec_2, i32* %gep_rec_5
-; CHECK-DAG: NoAlias:      i32* %gep_rec_2, i32* %gep_rec_6
+; CHECK-DAG: MayAlias:     i32* %gep_rec_2, i32* %gep_rec_6
 ; CHECK-DAG: NoAlias:      i32* %gep_rec_3, i32* %gep_rec_4
 ; CHECK-DAG: NoAlias:      i32* %gep_rec_3, i32* %gep_rec_5
-; CHECK-DAG: NoAlias:      i32* %gep_rec_3, i32* %gep_rec_6
+; CHECK-DAG: MayAlias:     i32* %gep_rec_3, i32* %gep_rec_6
 ; CHECK-DAG: NoAlias:      i32* %gep_rec_4, i32* %gep_rec_5
-; CHECK-DAG: NoAlias:      i32* %gep_rec_4, i32* %gep_rec_6
-; CHECK-DAG: NoAlias:      i32* %gep_rec_5, i32* %gep_rec_6
+; CHECK-DAG: MayAlias:     i32* %gep_rec_4, i32* %gep_rec_6
+; CHECK-DAG: MayAlias:     i32* %gep_rec_5, i32* %gep_rec_6
 ; GEP max lookup depth was set to 6.
 define void @gep_recursion_max_lookup_depth_reached(ptr %a, ptr %p) {
   %gep = getelementptr <vscale x 4 x i32>, ptr %p, i64 1, i64 2
diff --git a/llvm/test/Transforms/GVN/vscale.ll b/llvm/test/Transforms/GVN/vscale.ll
index 71adaed8e5722bd..3ecae9f54fddc7e 100644
--- a/llvm/test/Transforms/GVN/vscale.ll
+++ b/llvm/test/Transforms/GVN/vscale.ll
@@ -84,10 +84,7 @@ define i32 @load_clobber_load_gep3(ptr %p) {
 ; CHECK-LABEL: @load_clobber_load_gep3(
 ; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[P:%.*]], i64 1, i64 0
 ; CHECK-NEXT:    [[LOAD1:%.*]] = load i32, ptr [[GEP1]], align 4
-; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr <vscale x 4 x float>, ptr [[P]], i64 1, i64 0
-; CHECK-NEXT:    [[LOAD2:%.*]] = load float, ptr [[GEP2]], align 4
-; CHECK-NEXT:    [[CAST:%.*]] = bitcast float [[LOAD2]] to i32
-; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[LOAD1]], [[CAST]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[LOAD1]], [[LOAD1]]
 ; CHECK-NEXT:    ret i32 [[ADD]]
 ;
   %gep1 = getelementptr <vscale x 4 x i32>, ptr %p, i64 1, i64 0
@@ -277,8 +274,7 @@ define void @redundant_load_elimination_2(i1 %c, ptr %p, ptr %q) {
 ; CHECK-NEXT:    store i32 1, ptr [[GEP2]], align 4
 ; CHECK-NEXT:    br i1 [[C:%.*]], label [[IF_ELSE:%.*]], label [[IF_THEN:%.*]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    [[T:%.*]] = load i32, ptr [[GEP1]], align 4
-; CHECK-NEXT:    store i32 [[T]], ptr [[Q:%.*]], align 4
+; CHECK-NEXT:    store i32 0, ptr [[Q:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ; CHECK:       if.else:
 ; CHECK-NEXT:    ret void
@@ -367,8 +363,7 @@ define void @missing_load_elimination(i1 %c, ptr %p, ptr %q, <vscale x 4 x i32>
 ; CHECK-NEXT:    store <vscale x 4 x i32> [[V:%.*]], ptr [[P1]], align 16
 ; CHECK-NEXT:    br i1 [[C:%.*]], label [[IF_ELSE:%.*]], label [[IF_THEN:%.*]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    [[T:%.*]] = load <vscale x 4 x i32>, ptr [[P]], align 16
-; CHECK-NEXT:    store <vscale x 4 x i32> [[T]], ptr [[Q:%.*]], align 16
+; CHECK-NEXT:    store <vscale x 4 x i32> zeroinitializer, ptr [[Q:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ; CHECK:       if.else:
 ; CHECK-NEXT:    ret void