[llvm] [BasicAA] Add Vscale GEP decomposition on variable index (PR #69152)

Tue Jan 9 02:35:24 PST 2024

https://github.com/davemgreen updated https://github.com/llvm/llvm-project/pull/69152

>From 5e70335ba098e1da59cb11484ae7c298eb1c30de Mon Sep 17 00:00:00 2001
From: Harvin Iriawan <harvin.iriawan at arm.com>
Date: Mon, 9 Oct 2023 16:07:20 +0100
Subject: [PATCH 1/2] [BasicAA] Add Vscale GEP decomposition on variable index

  Enable BasicAA to be done on Scalable GEP & LocationSize
  Scalable GEP expression such as @llvm.vscale and GEP of scalable type
  are attached to the VariableGEPIndex, with Val representing Vscale.

  VScale AA works if there's only one variable index (the vscale) and
  constant offsets in the GEP for now
---
 llvm/lib/Analysis/BasicAliasAnalysis.cpp     | 202 ++++++++++++++-----
 llvm/test/Analysis/AliasSet/memloc-vscale.ll |   3 +-
 llvm/test/Analysis/BasicAA/vscale.ll         | 111 ++++++----
 llvm/test/Transforms/GVN/vscale.ll           |  11 +-
 4 files changed, 228 insertions(+), 99 deletions(-)

diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
index a4a0846df7af15..8f4adc3ad84bc8 100644
--- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -44,6 +44,7 @@
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/Operator.h"
+#include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/User.h"
 #include "llvm/IR/Value.h"
@@ -63,6 +64,7 @@
 #define DEBUG_TYPE "basicaa"
 
 using namespace llvm;
+using namespace llvm::PatternMatch;
 
 /// Enable analysis of recursive PHI nodes.
 static cl::opt<bool> EnableRecPhiAnalysis("basic-aa-recphi", cl::Hidden,
@@ -357,13 +359,20 @@ struct LinearExpression {
 
 /// Analyzes the specified value as a linear expression: "A*V + B", where A and
 /// B are constant integers.
-static LinearExpression GetLinearExpression(
-    const CastedValue &Val,  const DataLayout &DL, unsigned Depth,
-    AssumptionCache *AC, DominatorTree *DT) {
+static LinearExpression GetLinearExpression(const CastedValue &Val,
+                                            const DataLayout &DL,
+                                            unsigned Depth, AssumptionCache *AC,
+                                            DominatorTree *DT) {
   // Limit our recursion depth.
   if (Depth == 6)
     return Val;
 
+  // If llvm.vscale is matched, set linear expression with scale 1 and offset 0
+  if (match(Val.V, m_VScale())) {
+    return LinearExpression(Val, APInt(Val.getBitWidth(), 1),
+                            APInt(Val.getBitWidth(), 0), true);
+  }
+
   if (const ConstantInt *Const = dyn_cast<ConstantInt>(Val.V))
     return LinearExpression(Val, APInt(Val.getBitWidth(), 0),
                             Val.evaluateWith(Const->getValue()), true);
@@ -473,6 +482,9 @@ struct VariableGEPIndex {
   CastedValue Val;
   APInt Scale;
 
+  // A value representing vscale quantity in a GEP expression
+  bool IsVScale;
+
   // Context instruction to use when querying information about this index.
   const Instruction *CxtI;
 
@@ -495,13 +507,10 @@ struct VariableGEPIndex {
     dbgs() << "\n";
   }
   void print(raw_ostream &OS) const {
-    OS << "(V=" << Val.V->getName()
-       << ", zextbits=" << Val.ZExtBits
-       << ", sextbits=" << Val.SExtBits
-       << ", truncbits=" << Val.TruncBits
-       << ", scale=" << Scale
-       << ", nsw=" << IsNSW
-       << ", negated=" << IsNegated << ")";
+    OS << "(V=" << Val.V->getName() << "  IsVScale=" << IsVScale
+       << ", zextbits=" << Val.ZExtBits << ", sextbits=" << Val.SExtBits
+       << ", truncbits=" << Val.TruncBits << ", scale=" << Scale
+       << ", nsw=" << IsNSW << ", negated=" << IsNegated << ")";
   }
 };
 }
@@ -622,6 +631,7 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
     for (User::const_op_iterator I = GEPOp->op_begin() + 1, E = GEPOp->op_end();
          I != E; ++I, ++GTI) {
       const Value *Index = *I;
+      const bool ScalableGEP = isa<ScalableVectorType>(GTI.getIndexedType());
       // Compute the (potentially symbolic) offset in bytes for this index.
       if (StructType *STy = GTI.getStructTypeOrNull()) {
         // For a struct, add the member offset.
@@ -633,27 +643,18 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
         continue;
       }
 
+      TypeSize AllocTypeSize = GTI.getSequentialElementStride(DL);
       // For an array/pointer, add the element offset, explicitly scaled.
+      // Skip adding to constant offset if GEP index is marked as scalable
+      // they are handled below as variable offset
       if (const ConstantInt *CIdx = dyn_cast<ConstantInt>(Index)) {
         if (CIdx->isZero())
           continue;
-
-        // Don't attempt to analyze GEPs if the scalable index is not zero.
-        TypeSize AllocTypeSize = GTI.getSequentialElementStride(DL);
-        if (AllocTypeSize.isScalable()) {
-          Decomposed.Base = V;
-          return Decomposed;
+        if (!ScalableGEP) {
+          Decomposed.Offset += AllocTypeSize.getFixedValue() *
+                               CIdx->getValue().sextOrTrunc(MaxIndexSize);
+          continue;
         }
-
-        Decomposed.Offset += AllocTypeSize.getFixedValue() *
-                             CIdx->getValue().sextOrTrunc(MaxIndexSize);
-        continue;
-      }
-
-      TypeSize AllocTypeSize = GTI.getSequentialElementStride(DL);
-      if (AllocTypeSize.isScalable()) {
-        Decomposed.Base = V;
-        return Decomposed;
       }
 
       GepHasConstantOffset = false;
@@ -663,22 +664,55 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
       unsigned Width = Index->getType()->getIntegerBitWidth();
       unsigned SExtBits = IndexSize > Width ? IndexSize - Width : 0;
       unsigned TruncBits = IndexSize < Width ? Width - IndexSize : 0;
-      LinearExpression LE = GetLinearExpression(
-          CastedValue(Index, 0, SExtBits, TruncBits), DL, 0, AC, DT);
+      // Scalable GEP decomposition
+      // Allow Scalable GEP to be decomposed in the case of
+      //    1. getelementptr <4 x vscale x i32> with 1st index as a constant
+      //    2. Index which have a leaf of @llvm.vscale
+      // In both cases, essentially CastedValue of VariableGEPIndex is Vscale,
+      // however in the 1st case, CastedValue is of type constant, hence another
+      // flag in VariableGEPIndex is created in this case, IsVScale If GEP is
+      // Scalable type, e.g. <4 x vscale x i32>, the first index will have
+      // vscale as a variable index, create a LE in this case
+      LinearExpression LE(CastedValue(Index, 0, SExtBits, TruncBits));
+      if (ScalableGEP) {
+        if (const ConstantInt *CIdx = dyn_cast<ConstantInt>(Index)) {
+          LE = LinearExpression(
+              CastedValue(Index, 0, SExtBits, TruncBits),
+              CastedValue(Index, 0, SExtBits, TruncBits)
+                  .evaluateWith(CIdx->getValue()),
+              APInt(CastedValue(Index, 0, SExtBits, TruncBits).getBitWidth(),
+                    0),
+              true);
+          assert(LE.Offset.isZero() && "For Scalable GEP constant first index, "
+                                       "the offset of LE should be 0");
+        } else {
+          // if first index is not a constant, a single variable gep will
+          // contain 2 variables, bail in this case
+          Decomposed.Base = V;
+          return Decomposed;
+        }
+      } else
+        LE = GetLinearExpression(CastedValue(Index, 0, SExtBits, TruncBits), DL,
+                                 0, AC, DT);
 
       // Scale by the type size.
-      unsigned TypeSize = AllocTypeSize.getFixedValue();
+      unsigned TypeSize = AllocTypeSize.getKnownMinValue();
       LE = LE.mul(APInt(IndexSize, TypeSize), GEPOp->isInBounds());
       Decomposed.Offset += LE.Offset.sext(MaxIndexSize);
       APInt Scale = LE.Scale.sext(MaxIndexSize);
+      bool LEhasVscale = match(LE.Val.V, m_VScale());
 
       // If we already had an occurrence of this index variable, merge this
       // scale into it.  For example, we want to handle:
       //   A[x][x] -> x*16 + x*4 -> x*20
       // This also ensures that 'x' only appears in the index list once.
+      // Only add to IsVScale VariableGEPIndex if it's @llvm.vscale or gep
+      // vscale index
       for (unsigned i = 0, e = Decomposed.VarIndices.size(); i != e; ++i) {
-        if (Decomposed.VarIndices[i].Val.V == LE.Val.V &&
-            Decomposed.VarIndices[i].Val.hasSameCastsAs(LE.Val)) {
+        if (Decomposed.VarIndices[i].Val.hasSameCastsAs(LE.Val) &&
+            ((Decomposed.VarIndices[i].IsVScale &&
+              (ScalableGEP || LEhasVscale)) ||
+             Decomposed.VarIndices[i].Val.V == LE.Val.V)) {
           Scale += Decomposed.VarIndices[i].Scale;
           LE.IsNSW = false; // We cannot guarantee nsw for the merge.
           Decomposed.VarIndices.erase(Decomposed.VarIndices.begin() + i);
@@ -687,11 +721,21 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
       }
 
       // Make sure that we have a scale that makes sense for this target's
-      // index size.
+      // index size.Only allow variableGEP decomposition for constants, in the
+      // case of vscale
       adjustToIndexSize(Scale, IndexSize);
+      bool InvalidVarVScale = (ScalableGEP && LEhasVscale) ||
+                              (ScalableGEP && !isa<ConstantInt>(LE.Val.V));
+
+      assert(!InvalidVarVScale &&
+             "Variable GEP index contains VScale and another variable");
 
       if (!!Scale) {
-        VariableGEPIndex Entry = {LE.Val, Scale, CxtI, LE.IsNSW,
+        VariableGEPIndex Entry = {LE.Val,
+                                  Scale,
+                                  ScalableGEP || LEhasVscale,
+                                  CxtI,
+                                  LE.IsNSW,
                                   /* IsNegated */ false};
         Decomposed.VarIndices.push_back(Entry);
       }
@@ -1074,19 +1118,17 @@ AliasResult BasicAAResult::aliasGEP(
 
   // If an inbounds GEP would have to start from an out of bounds address
   // for the two to alias, then we can assume noalias.
-  // TODO: Remove !isScalable() once BasicAA fully support scalable location
-  // size
   if (*DecompGEP1.InBounds && DecompGEP1.VarIndices.empty() &&
-      V2Size.hasValue() && !V2Size.isScalable() &&
-      DecompGEP1.Offset.sge(V2Size.getValue()) &&
+      V2Size.hasValue() &&
+      DecompGEP1.Offset.sge(V2Size.getValue().getKnownMinValue()) &&
       isBaseOfObject(DecompGEP2.Base))
     return AliasResult::NoAlias;
 
   if (isa<GEPOperator>(V2)) {
     // Symmetric case to above.
     if (*DecompGEP2.InBounds && DecompGEP1.VarIndices.empty() &&
-        V1Size.hasValue() && !V1Size.isScalable() &&
-        DecompGEP1.Offset.sle(-V1Size.getValue()) &&
+        V1Size.hasValue() &&
+        DecompGEP1.Offset.sle(-V1Size.getValue().getKnownMinValue()) &&
         isBaseOfObject(DecompGEP1.Base))
       return AliasResult::NoAlias;
   }
@@ -1110,10 +1152,6 @@ AliasResult BasicAAResult::aliasGEP(
     return BaseAlias;
   }
 
-  // Bail on analysing scalable LocationSize
-  if (V1Size.isScalable() || V2Size.isScalable())
-    return AliasResult::MayAlias;
-
   // If there is a constant difference between the pointers, but the difference
   // is less than the size of the associated memory object, then we know
   // that the objects are partially overlapping.  If the difference is
@@ -1140,16 +1178,16 @@ AliasResult BasicAAResult::aliasGEP(
       Off = -Off;
     }
 
-    if (!VLeftSize.hasValue())
+    if (!VLeftSize.hasValue() || VLeftSize.isScalable())
       return AliasResult::MayAlias;
 
-    const uint64_t LSize = VLeftSize.getValue();
+    const uint64_t LSize = VLeftSize.getValue().getKnownMinValue();
     if (Off.ult(LSize)) {
       // Conservatively drop processing if a phi was visited and/or offset is
       // too big.
       AliasResult AR = AliasResult::PartialAlias;
       if (VRightSize.hasValue() && Off.ule(INT32_MAX) &&
-          (Off + VRightSize.getValue()).ule(LSize)) {
+          (Off + VRightSize.getValue().getKnownMinValue()).ule(LSize)) {
         // Memory referenced by right pointer is nested. Save the offset in
         // cache. Note that originally offset estimated as GEP1-V2, but
         // AliasResult contains the shift that represents GEP1+Offset=V2.
@@ -1165,12 +1203,71 @@ AliasResult BasicAAResult::aliasGEP(
   if (!V1Size.hasValue() || !V2Size.hasValue())
     return AliasResult::MayAlias;
 
+  // VScale Alias Analysis
+  // GEPs with Vscale will have the expression A*Vscale + B (1 variable index
+  // and constant offset) The difference between two GEPs and Scalable
+  // LocationSize can then be analysed as they have the form of
+  //     LSize                SubtractDecomposedGEP output
+  //   A * Vscale                   B * Vscale + C
+  // Since VScale is strictly a positive number (Vscale >= 1), the larger GEP
+  // can be known
+  // TODO: Use knowledge of vscale_range to make the analysis more accurate
+  if (DecompGEP1.VarIndices.size() == 1 && DecompGEP1.VarIndices[0].IsVScale &&
+      (V1Size.isScalable() || V2Size.isScalable())) {
+    const VariableGEPIndex &ScalableVar = DecompGEP1.VarIndices[0];
+    bool StrictlyPos = false, StrictlyNeg = false;
+    APInt &Off = DecompGEP1.Offset;
+    if (!ScalableVar.IsNegated) {
+      if (Off.isNegative())
+        StrictlyPos = ScalableVar.Scale.ugt(Off.abs());
+      else
+        StrictlyPos = true;
+    } else
+      StrictlyPos = Off.isNonNegative();
+
+    if (ScalableVar.IsNegated) {
+      if (Off.isNonNegative())
+        StrictlyNeg = Off.ult(ScalableVar.Scale.abs());
+      else
+        StrictlyNeg = true;
+    } else
+      StrictlyNeg = Off.isNegative();
+
+    if (StrictlyPos || StrictlyNeg) {
+      LocationSize VLeftSize = V2Size;
+      LocationSize VRightSize = V1Size;
+      const bool Swapped = StrictlyNeg;
+
+      if (Swapped) {
+        std::swap(VLeftSize, VRightSize);
+        Off = -Off;
+      }
+
+      const uint64_t LSize = VLeftSize.getValue().getKnownMinValue();
+      if (VLeftSize.isScalable() && ScalableVar.Scale.ult(LSize) &&
+          (ScalableVar.Scale + DecompGEP1.Offset).ult(LSize))
+        return AliasResult::PartialAlias;
+
+      if ((ScalableVar.Scale.uge(LSize) && VLeftSize.isScalable()) ||
+          ((ScalableVar.Scale + DecompGEP1.Offset).uge(LSize) &&
+           !VLeftSize.isScalable()))
+        return AliasResult::NoAlias;
+    }
+  }
+
+  // Bail on Scalable location size from now onwards
+  if (V1Size.isScalable() || V2Size.isScalable())
+    return AliasResult::MayAlias;
+
   APInt GCD;
   ConstantRange OffsetRange = ConstantRange(DecompGEP1.Offset);
   for (unsigned i = 0, e = DecompGEP1.VarIndices.size(); i != e; ++i) {
     const VariableGEPIndex &Index = DecompGEP1.VarIndices[i];
     const APInt &Scale = Index.Scale;
     APInt ScaleForGCD = Scale;
+    assert((!Index.IsVScale || match(Index.Val.V, m_VScale()) ||
+            isa<ConstantInt>(Index.Val.V)) &&
+           "Not allowed to have non-constant values if IsVScale is set");
     if (!Index.IsNSW)
       ScaleForGCD =
           APInt::getOneBitSet(Scale.getBitWidth(), Scale.countr_zero());
@@ -1752,7 +1849,12 @@ void BasicAAResult::subtractDecomposedGEPs(DecomposedGEP &DestGEP,
     bool Found = false;
     for (auto I : enumerate(DestGEP.VarIndices)) {
       VariableGEPIndex &Dest = I.value();
-      if (!isValueEqualInPotentialCycles(Dest.Val.V, Src.Val.V, AAQI) ||
+      if (Dest.IsVScale != Src.IsVScale)
+        continue;
+      const bool SrcDestAreVScale = Dest.IsVScale && Src.IsVScale;
+      // Suppress base value checks if Src and Dst are of constant VScale
+      if ((!SrcDestAreVScale &&
+           !isValueEqualInPotentialCycles(Dest.Val.V, Src.Val.V, AAQI)) ||
           !Dest.Val.hasSameCastsAs(Src.Val))
         continue;
 
@@ -1777,7 +1879,11 @@ void BasicAAResult::subtractDecomposedGEPs(DecomposedGEP &DestGEP,
 
     // If we didn't consume this entry, add it to the end of the Dest list.
     if (!Found) {
-      VariableGEPIndex Entry = {Src.Val, Src.Scale, Src.CxtI, Src.IsNSW,
+      VariableGEPIndex Entry = {Src.Val,
+                                Src.Scale,
+                                Src.IsVScale,
+                                Src.CxtI,
+                                Src.IsNSW,
                                 /* IsNegated */ true};
       DestGEP.VarIndices.push_back(Entry);
     }
diff --git a/llvm/test/Analysis/AliasSet/memloc-vscale.ll b/llvm/test/Analysis/AliasSet/memloc-vscale.ll
index 8a83645ddaf9a8..ee67f7c15fb41b 100644
--- a/llvm/test/Analysis/AliasSet/memloc-vscale.ll
+++ b/llvm/test/Analysis/AliasSet/memloc-vscale.ll
@@ -34,7 +34,8 @@ define void @ss2(ptr %p) {
   ret void
 }
 ; CHECK-LABEL: Alias sets for function 'son':
-; CHECK: AliasSet[{{.*}}, 2] may alias, Mod       Pointers: (ptr %g, LocationSize::precise(vscale x 16)), (ptr %p, LocationSize::precise(8))
+; CHECK: AliasSet[{{.*}}, 1] must alias, Mod       Pointers: (ptr %g, LocationSize::precise(vscale x 16))
+; CHECK: AliasSet[{{.*}}, 1] must alias, Mod       Pointers: (ptr %p, LocationSize::precise(8))
 define void @son(ptr %p) {
   %g = getelementptr i8, ptr %p, i64 8
   store <vscale x 2 x i64> zeroinitializer, ptr %g, align 2
diff --git a/llvm/test/Analysis/BasicAA/vscale.ll b/llvm/test/Analysis/BasicAA/vscale.ll
index 0d6d8fea392bbf..2d277eacdcb8ae 100644
--- a/llvm/test/Analysis/BasicAA/vscale.ll
+++ b/llvm/test/Analysis/BasicAA/vscale.ll
@@ -4,8 +4,8 @@
 
 ; CHECK-LABEL: gep_alloca_const_offset_1
 ; CHECK-DAG:  MustAlias:    <vscale x 4 x i32>* %alloc, <vscale x 4 x i32>* %gep1
-; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %alloc, <vscale x 4 x i32>* %gep2
-; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %gep1, <vscale x 4 x i32>* %gep2
+; CHECK-DAG:  NoAlias:     <vscale x 4 x i32>* %alloc, <vscale x 4 x i32>* %gep2
+; CHECK-DAG:  NoAlias:     <vscale x 4 x i32>* %gep1, <vscale x 4 x i32>* %gep2
 define void @gep_alloca_const_offset_1() {
   %alloc = alloca <vscale x 4 x i32>
   %gep1 = getelementptr <vscale x 4 x i32>, ptr %alloc, i64 0
@@ -17,10 +17,9 @@ define void @gep_alloca_const_offset_1() {
 }
 
 ; CHECK-LABEL: gep_alloca_const_offset_2
-; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %alloc, <vscale x 4 x i32>* %gep1
-; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %alloc, <vscale x 4 x i32>* %gep2
-; TODO: AliasResult for gep1,gep2 can be improved as MustAlias
-; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %gep1, <vscale x 4 x i32>* %gep2
+; CHECK-DAG:  NoAlias:     <vscale x 4 x i32>* %alloc, <vscale x 4 x i32>* %gep1
+; CHECK-DAG:  NoAlias:     <vscale x 4 x i32>* %alloc, <vscale x 4 x i32>* %gep2
+; CHECK-DAG:  MustAlias:   <vscale x 4 x i32>* %gep1, <vscale x 4 x i32>* %gep2
 define void @gep_alloca_const_offset_2() {
   %alloc = alloca <vscale x 4 x i32>
   %gep1 = getelementptr <vscale x 4 x i32>, ptr %alloc, i64 1
@@ -33,8 +32,8 @@ define void @gep_alloca_const_offset_2() {
 
 ; CHECK-LABEL: gep_alloca_const_offset_3
 ; CHECK-DAG:  MustAlias:    <vscale x 4 x i32>* %alloc, <vscale x 4 x i32>* %gep1
-; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %alloc, i32* %gep2
-; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %gep1, i32* %gep2
+; CHECK-DAG:  MayAlias: <vscale x 4 x i32>* %alloc, i32* %gep2
+; CHECK-DAG:  MayAlias: <vscale x 4 x i32>* %gep1, i32* %gep2
 define void @gep_alloca_const_offset_3() {
   %alloc = alloca <vscale x 4 x i32>
   %gep1 = getelementptr <vscale x 4 x i32>, ptr %alloc, i64 0
@@ -74,10 +73,9 @@ define void @gep_alloca_symbolic_offset(i64 %idx1, i64 %idx2) {
 }
 
 ; CHECK-LABEL: gep_same_base_const_offset
-; CHECK-DAG:  MayAlias:     i32* %gep1, <vscale x 4 x i32>* %p
-; CHECK-DAG:  MayAlias:     i32* %gep2, <vscale x 4 x i32>* %p
-; TODO: AliasResult for gep1,gep2 can be improved as NoAlias
-; CHECK-DAG:  MayAlias:     i32* %gep1, i32* %gep2
+; CHECK-DAG:  NoAlias:     i32* %gep1, <vscale x 4 x i32>* %p
+; CHECK-DAG:  NoAlias:     i32* %gep2, <vscale x 4 x i32>* %p
+; CHECK-DAG:  NoAlias:     i32* %gep1, i32* %gep2
 define void @gep_same_base_const_offset(ptr %p) {
   %gep1 = getelementptr <vscale x 4 x i32>, ptr %p, i64 1, i64 0
   %gep2 = getelementptr <vscale x 4 x i32>, ptr %p, i64 1, i64 1
@@ -101,8 +99,8 @@ define void @gep_same_base_symbolic_offset(ptr %p, i64 %idx1, i64 %idx2) {
 }
 
 ; CHECK-LABEL: gep_different_base_const_offset
-; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %gep1, <vscale x 4 x i32>* %p1
-; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %gep2, <vscale x 4 x i32>* %p2
+; CHECK-DAG:  NoAlias:     <vscale x 4 x i32>* %gep1, <vscale x 4 x i32>* %p1
+; CHECK-DAG:  NoAlias:     <vscale x 4 x i32>* %gep2, <vscale x 4 x i32>* %p2
 ; CHECK-DAG:  NoAlias:      <vscale x 4 x i32>* %p1, <vscale x 4 x i32>* %p2
 ; CHECK-DAG:  NoAlias:      <vscale x 4 x i32>* %gep1, <vscale x 4 x i32>* %p2
 ; CHECK-DAG:  NoAlias:      <vscale x 4 x i32>* %gep2, <vscale x 4 x i32>* %p1
@@ -117,12 +115,41 @@ define void @gep_different_base_const_offset(ptr noalias %p1, ptr noalias %p2) {
   ret void
 }
 
+; getelementptr @llvm.vscale tests
+; CHECK-LABEL: gep_llvm_vscale_no_alias
+; CHECK-DAG: NoAlias:      <vscale x 4 x i32>* %gep1, <vscale x 4 x i32>* %gep2
+; CHECK-DAG: MustAlias:    <vscale x 4 x i32>* %gep1, <vscale x 4 x i32>* %gep3
+; CHECK-DAG: NoAlias:      <vscale x 4 x i32>* %gep2, <vscale x 4 x i32>* %gep3
+
+declare i64 @llvm.vscale.i64()
+define void @gep_llvm_vscale_no_alias(ptr %p) {
+  %t1 = tail call i64 @llvm.vscale.i64()
+  %t2 = shl nuw nsw i64 %t1, 3
+  %gep1 = getelementptr i32, ptr %p, i64 %t2
+  %gep2 = getelementptr <vscale x 4 x i32>, ptr %p, i64 1
+  %gep3 = getelementptr <vscale x 4 x i32>, ptr %p, i64 2
+  load <vscale x 4 x i32>, ptr %gep1
+  load <vscale x 4 x i32>, ptr %gep2
+  load <vscale x 4 x i32>, ptr %gep3
+  ret void
+}
+
+; CHECK-LABEL: gep_llvm_vscale_squared_may_alias
+; CHECK-DAG: MayAlias:      <vscale x 4 x i32>* %gep1, <vscale x 4 x i32>* %gep2
+define void @gep_llvm_vscale_squared_may_alias(ptr %p) {
+  %t1 = tail call i64 @llvm.vscale.i64()
+  %gep1 = getelementptr <vscale x 4 x i32>, ptr %p, i64 %t1
+  %gep2 = getelementptr i32, ptr %p, i64 1
+  load <vscale x 4 x i32>, ptr %gep1
+  load <vscale x 4 x i32>, ptr %gep2
+  ret void
+}
 ; getelementptr + bitcast
 
 ; CHECK-LABEL: gep_bitcast_1
 ; CHECK-DAG:   MustAlias:    i32* %p, <vscale x 4 x i32>* %p
-; CHECK-DAG:   MayAlias:     i32* %gep1, <vscale x 4 x i32>* %p
-; CHECK-DAG:   MayAlias:     i32* %gep1, i32* %p
+; CHECK-DAG:   NoAlias:      i32* %gep1, <vscale x 4 x i32>* %p
+; CHECK-DAG:   NoAlias:      i32* %gep1, i32* %p
 ; CHECK-DAG:   MayAlias:     i32* %gep2, <vscale x 4 x i32>* %p
 ; CHECK-DAG:   MayAlias:     i32* %gep1, i32* %gep2
 ; CHECK-DAG:   NoAlias:      i32* %gep2, i32* %p
@@ -138,11 +165,11 @@ define void @gep_bitcast_1(ptr %p) {
 
 ; CHECK-LABEL: gep_bitcast_2
 ; CHECK-DAG:  MustAlias:    <vscale x 4 x float>* %p, <vscale x 4 x i32>* %p
-; CHECK-DAG:  MayAlias:     i32* %gep1, <vscale x 4 x i32>* %p
-; CHECK-DAG:  MayAlias:     i32* %gep1, <vscale x 4 x float>* %p
-; CHECK-DAG:  MayAlias:     float* %gep2, <vscale x 4 x i32>* %p
-; CHECK-DAG:  MayAlias:     i32* %gep1, float* %gep2
-; CHECK-DAG:  MayAlias:     float* %gep2, <vscale x 4 x float>* %p
+; CHECK-DAG:  NoAlias:      i32* %gep1, <vscale x 4 x i32>* %p
+; CHECK-DAG:  NoAlias:      i32* %gep1, <vscale x 4 x float>* %p
+; CHECK-DAG:  NoAlias:      float* %gep2, <vscale x 4 x i32>* %p
+; CHECK-DAG:  MustAlias:    i32* %gep1, float* %gep2
+; CHECK-DAG:  NoAlias:      float* %gep2, <vscale x 4 x float>* %p
 define void @gep_bitcast_2(ptr %p) {
   %gep1 = getelementptr <vscale x 4 x i32>, ptr %p, i64 1, i64 0
   %gep2 = getelementptr <vscale x 4 x float>, ptr %p, i64 1, i64 0
@@ -159,8 +186,8 @@ define void @gep_bitcast_2(ptr %p) {
 ; CHECK-DAG:  MayAlias:     i32* %a, <vscale x 4 x i32>* %p
 ; CHECK-DAG:  MayAlias:     i32* %a, i32* %gep
 ; CHECK-DAG:  MayAlias:     i32* %a, i32* %gep_rec_1
-; CHECK-DAG:  MayAlias:     i32* %gep, <vscale x 4 x i32>* %p
-; CHECK-DAG:  MayAlias:     i32* %gep_rec_1, <vscale x 4 x i32>* %p
+; CHECK-DAG:  NoAlias:      i32* %gep, <vscale x 4 x i32>* %p
+; CHECK-DAG:  NoAlias:      i32* %gep_rec_1, <vscale x 4 x i32>* %p
 ; CHECK-DAG:  NoAlias:      i32* %gep, i32* %gep_rec_1
 define void @gep_recursion_level_1(ptr %a, ptr %p) {
   %gep = getelementptr <vscale x 4 x i32>, ptr %p, i64 1, i64 2
@@ -174,10 +201,10 @@ define void @gep_recursion_level_1(ptr %a, ptr %p) {
 
 ; CHECK-LABEL: gep_recursion_level_1_bitcast
 ; CHECK-DAG:  MustAlias:    i32* %a, <vscale x 4 x i32>* %a
-; CHECK-DAG:  MayAlias:     i32* %a, i32* %gep
-; CHECK-DAG:  MayAlias:     i32* %a, i32* %gep_rec_1
-; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %a, i32* %gep
-; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %a, i32* %gep_rec_1
+; CHECK-DAG:  NoAlias:      i32* %a, i32* %gep
+; CHECK-DAG:  NoAlias:      i32* %a, i32* %gep_rec_1
+; CHECK-DAG:  NoAlias:      <vscale x 4 x i32>* %a, i32* %gep
+; CHECK-DAG:  NoAlias:      <vscale x 4 x i32>* %a, i32* %gep_rec_1
 ; CHECK-DAG:  NoAlias:      i32* %gep, i32* %gep_rec_1
 define void @gep_recursion_level_1_bitcast(ptr %a) {
   %gep = getelementptr <vscale x 4 x i32>, ptr %a, i64 1, i64 2
@@ -194,9 +221,9 @@ define void @gep_recursion_level_1_bitcast(ptr %a) {
 ; CHECK-DAG:  MayAlias:     i32* %a, i32* %gep
 ; CHECK-DAG:  MayAlias:     i32* %a, i32* %gep_rec_1
 ; CHECK-DAG:  MayAlias:     i32* %a, i32* %gep_rec_2
-; CHECK-DAG:  MayAlias:     i32* %gep, <vscale x 4 x i32>* %p
-; CHECK-DAG:  MayAlias:     i32* %gep_rec_1, <vscale x 4 x i32>* %p
-; CHECK-DAG:  MayAlias:     i32* %gep_rec_2, <vscale x 4 x i32>* %p
+; CHECK-DAG:  NoAlias:      i32* %gep, <vscale x 4 x i32>* %p
+; CHECK-DAG:  NoAlias:      i32* %gep_rec_1, <vscale x 4 x i32>* %p
+; CHECK-DAG:  NoAlias:      i32* %gep_rec_2, <vscale x 4 x i32>* %p
 ; CHECK-DAG:  NoAlias:      i32* %gep, i32* %gep_rec_1
 ; CHECK-DAG:  NoAlias:      i32* %gep, i32* %gep_rec_2
 ; CHECK-DAG:  NoAlias:      i32* %gep_rec_1, i32* %gep_rec_2
@@ -221,34 +248,34 @@ define void @gep_recursion_level_2(ptr %a, ptr %p) {
 ; CHECK-DAG: MayAlias:     i32* %a, i32* %gep_rec_4
 ; CHECK-DAG: MayAlias:     i32* %a, i32* %gep_rec_5
 ; CHECK-DAG: MayAlias:     i32* %a, i32* %gep_rec_6
-; CHECK-DAG: MayAlias:     i32* %gep, <vscale x 4 x i32>* %p
-; CHECK-DAG: MayAlias:     i32* %gep_rec_1, <vscale x 4 x i32>* %p
-; CHECK-DAG: MayAlias:     i32* %gep_rec_2, <vscale x 4 x i32>* %p
-; CHECK-DAG: MayAlias:     i32* %gep_rec_3, <vscale x 4 x i32>* %p
-; CHECK-DAG: MayAlias:     i32* %gep_rec_4, <vscale x 4 x i32>* %p
-; CHECK-DAG: MayAlias:     i32* %gep_rec_5, <vscale x 4 x i32>* %p
+; CHECK-DAG: NoAlias:      i32* %gep, <vscale x 4 x i32>* %p
+; CHECK-DAG: NoAlias:      i32* %gep_rec_1, <vscale x 4 x i32>* %p
+; CHECK-DAG: NoAlias:      i32* %gep_rec_2, <vscale x 4 x i32>* %p
+; CHECK-DAG: NoAlias:      i32* %gep_rec_3, <vscale x 4 x i32>* %p
+; CHECK-DAG: NoAlias:      i32* %gep_rec_4, <vscale x 4 x i32>* %p
+; CHECK-DAG: NoAlias:      i32* %gep_rec_5, <vscale x 4 x i32>* %p
 ; CHECK-DAG: MayAlias:     i32* %gep_rec_6, <vscale x 4 x i32>* %p
 ; CHECK-DAG: NoAlias:      i32* %gep, i32* %gep_rec_1
 ; CHECK-DAG: NoAlias:      i32* %gep, i32* %gep_rec_2
 ; CHECK-DAG: NoAlias:      i32* %gep, i32* %gep_rec_3
 ; CHECK-DAG: NoAlias:      i32* %gep, i32* %gep_rec_4
 ; CHECK-DAG: NoAlias:      i32* %gep, i32* %gep_rec_5
-; CHECK-DAG: NoAlias:      i32* %gep, i32* %gep_rec_6
+; CHECK-DAG: MayAlias:     i32* %gep, i32* %gep_rec_6
 ; CHECK-DAG: NoAlias:      i32* %gep_rec_1, i32* %gep_rec_2
 ; CHECK-DAG: NoAlias:      i32* %gep_rec_1, i32* %gep_rec_3
 ; CHECK-DAG: NoAlias:      i32* %gep_rec_1, i32* %gep_rec_4
 ; CHECK-DAG: NoAlias:      i32* %gep_rec_1, i32* %gep_rec_5
-; CHECK-DAG: NoAlias:      i32* %gep_rec_1, i32* %gep_rec_6
+; CHECK-DAG: MayAlias:     i32* %gep_rec_1, i32* %gep_rec_6
 ; CHECK-DAG: NoAlias:      i32* %gep_rec_2, i32* %gep_rec_3
 ; CHECK-DAG: NoAlias:      i32* %gep_rec_2, i32* %gep_rec_4
 ; CHECK-DAG: NoAlias:      i32* %gep_rec_2, i32* %gep_rec_5
-; CHECK-DAG: NoAlias:      i32* %gep_rec_2, i32* %gep_rec_6
+; CHECK-DAG: MayAlias:     i32* %gep_rec_2, i32* %gep_rec_6
 ; CHECK-DAG: NoAlias:      i32* %gep_rec_3, i32* %gep_rec_4
 ; CHECK-DAG: NoAlias:      i32* %gep_rec_3, i32* %gep_rec_5
-; CHECK-DAG: NoAlias:      i32* %gep_rec_3, i32* %gep_rec_6
+; CHECK-DAG: MayAlias:     i32* %gep_rec_3, i32* %gep_rec_6
 ; CHECK-DAG: NoAlias:      i32* %gep_rec_4, i32* %gep_rec_5
-; CHECK-DAG: NoAlias:      i32* %gep_rec_4, i32* %gep_rec_6
-; CHECK-DAG: NoAlias:      i32* %gep_rec_5, i32* %gep_rec_6
+; CHECK-DAG: MayAlias:     i32* %gep_rec_4, i32* %gep_rec_6
+; CHECK-DAG: MayAlias:     i32* %gep_rec_5, i32* %gep_rec_6
 ; GEP max lookup depth was set to 6.
 define void @gep_recursion_max_lookup_depth_reached(ptr %a, ptr %p) {
   %gep = getelementptr <vscale x 4 x i32>, ptr %p, i64 1, i64 2
diff --git a/llvm/test/Transforms/GVN/vscale.ll b/llvm/test/Transforms/GVN/vscale.ll
index 71adaed8e5722b..3ecae9f54fddc7 100644
--- a/llvm/test/Transforms/GVN/vscale.ll
+++ b/llvm/test/Transforms/GVN/vscale.ll
@@ -84,10 +84,7 @@ define i32 @load_clobber_load_gep3(ptr %p) {
 ; CHECK-LABEL: @load_clobber_load_gep3(
 ; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[P:%.*]], i64 1, i64 0
 ; CHECK-NEXT:    [[LOAD1:%.*]] = load i32, ptr [[GEP1]], align 4
-; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr <vscale x 4 x float>, ptr [[P]], i64 1, i64 0
-; CHECK-NEXT:    [[LOAD2:%.*]] = load float, ptr [[GEP2]], align 4
-; CHECK-NEXT:    [[CAST:%.*]] = bitcast float [[LOAD2]] to i32
-; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[LOAD1]], [[CAST]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[LOAD1]], [[LOAD1]]
 ; CHECK-NEXT:    ret i32 [[ADD]]
 ;
   %gep1 = getelementptr <vscale x 4 x i32>, ptr %p, i64 1, i64 0
@@ -277,8 +274,7 @@ define void @redundant_load_elimination_2(i1 %c, ptr %p, ptr %q) {
 ; CHECK-NEXT:    store i32 1, ptr [[GEP2]], align 4
 ; CHECK-NEXT:    br i1 [[C:%.*]], label [[IF_ELSE:%.*]], label [[IF_THEN:%.*]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    [[T:%.*]] = load i32, ptr [[GEP1]], align 4
-; CHECK-NEXT:    store i32 [[T]], ptr [[Q:%.*]], align 4
+; CHECK-NEXT:    store i32 0, ptr [[Q:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ; CHECK:       if.else:
 ; CHECK-NEXT:    ret void
@@ -367,8 +363,7 @@ define void @missing_load_elimination(i1 %c, ptr %p, ptr %q, <vscale x 4 x i32>
 ; CHECK-NEXT:    store <vscale x 4 x i32> [[V:%.*]], ptr [[P1]], align 16
 ; CHECK-NEXT:    br i1 [[C:%.*]], label [[IF_ELSE:%.*]], label [[IF_THEN:%.*]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    [[T:%.*]] = load <vscale x 4 x i32>, ptr [[P]], align 16
-; CHECK-NEXT:    store <vscale x 4 x i32> [[T]], ptr [[Q:%.*]], align 16
+; CHECK-NEXT:    store <vscale x 4 x i32> zeroinitializer, ptr [[Q:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ; CHECK:       if.else:
 ; CHECK-NEXT:    ret void

>From 8af8a42e0749d42c8953c704f0d337e10b138ed5 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Tue, 19 Dec 2023 06:41:47 +0000
Subject: [PATCH 2/2] Updates and extra testing

---
 llvm/lib/Analysis/BasicAliasAnalysis.cpp | 104 ++++++++----------
 llvm/test/Analysis/BasicAA/vscale.ll     | 131 ++++++++++++++++++++++-
 2 files changed, 175 insertions(+), 60 deletions(-)

diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
index 8f4adc3ad84bc8..a8a425101fb4b7 100644
--- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -672,7 +672,7 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
       // however in the 1st case, CastedValue is of type constant, hence another
       // flag in VariableGEPIndex is created in this case, IsVScale If GEP is
       // Scalable type, e.g. <4 x vscale x i32>, the first index will have
-      // vscale as a variable index, create a LE in this case
+      // vscale as a variable index, create a LE in this case.
       LinearExpression LE(CastedValue(Index, 0, SExtBits, TruncBits));
       if (ScalableGEP) {
         if (const ConstantInt *CIdx = dyn_cast<ConstantInt>(Index)) {
@@ -691,9 +691,10 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
           Decomposed.Base = V;
           return Decomposed;
         }
-      } else
+      } else {
         LE = GetLinearExpression(CastedValue(Index, 0, SExtBits, TruncBits), DL,
                                  0, AC, DT);
+      }
 
       // Scale by the type size.
       unsigned TypeSize = AllocTypeSize.getKnownMinValue();
@@ -707,7 +708,7 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
       //   A[x][x] -> x*16 + x*4 -> x*20
       // This also ensures that 'x' only appears in the index list once.
       // Only add to IsVScale VariableGEPIndex if it's @llvm.vscale or gep
-      // vscale index
+      // vscale index.
       for (unsigned i = 0, e = Decomposed.VarIndices.size(); i != e; ++i) {
         if (Decomposed.VarIndices[i].Val.hasSameCastsAs(LE.Val) &&
             ((Decomposed.VarIndices[i].IsVScale &&
@@ -721,7 +722,7 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
       }
 
       // Make sure that we have a scale that makes sense for this target's
-      // index size.Only allow variableGEP decomposition for constants, in the
+      // index size. Only allow variableGEP decomposition for constants, in the
       // case of vscale
       adjustToIndexSize(Scale, IndexSize);
       bool InvalidVarVScale = (ScalableGEP && LEhasVscale) ||
@@ -1119,7 +1120,7 @@ AliasResult BasicAAResult::aliasGEP(
   // If an inbounds GEP would have to start from an out of bounds address
   // for the two to alias, then we can assume noalias.
   if (*DecompGEP1.InBounds && DecompGEP1.VarIndices.empty() &&
-      V2Size.hasValue() &&
+      V2Size.hasValue() && !V2Size.isScalable() &&
       DecompGEP1.Offset.sge(V2Size.getValue().getKnownMinValue()) &&
       isBaseOfObject(DecompGEP2.Base))
     return AliasResult::NoAlias;
@@ -1127,7 +1128,7 @@ AliasResult BasicAAResult::aliasGEP(
   if (isa<GEPOperator>(V2)) {
     // Symmetric case to above.
     if (*DecompGEP2.InBounds && DecompGEP1.VarIndices.empty() &&
-        V1Size.hasValue() &&
+        V1Size.hasValue() && !V1Size.isScalable() &&
         DecompGEP1.Offset.sle(-V1Size.getValue().getKnownMinValue()) &&
         isBaseOfObject(DecompGEP1.Base))
       return AliasResult::NoAlias;
@@ -1205,54 +1206,38 @@ AliasResult BasicAAResult::aliasGEP(
 
   // VScale Alias Analysis
   // GEPs with Vscale will have the expression A*Vscale + B (1 variable index
-  // and constant offset) The difference between two GEPs and Scalable
+  // and constant offset). The difference between two GEPs and Scalable
   // LocationSize can then be analysed as they have the form of
-  //     LSize                SubtractDecomposedGEP output
-  //   A * Vscale                   B * Vscale + C
+  //     Size                SubtractDecomposedGEP output
+  //   V1Size [* Vscale]         Scale * Vscale + Off
   // Since VScale is strictly a positive number (Vscale >= 1), the larger GEP
   // can be known
   // TODO: Use knowledge of vscale_range to make the analysis more accurate
-  if (DecompGEP1.VarIndices.size() == 1 && DecompGEP1.VarIndices[0].IsVScale &&
-      (V1Size.isScalable() || V2Size.isScalable())) {
+  if (DecompGEP1.VarIndices.size() == 1 && DecompGEP1.VarIndices[0].IsVScale) {
     const VariableGEPIndex &ScalableVar = DecompGEP1.VarIndices[0];
-    bool StrictlyPos = false, StrictlyNeg = false;
-    APInt &Off = DecompGEP1.Offset;
-    if (!ScalableVar.IsNegated) {
-      if (Off.isNegative())
-        StrictlyPos = ScalableVar.Scale.ugt(Off.abs());
-      else
-        StrictlyPos = true;
-    } else
-      StrictlyPos = Off.isNonNegative();
-
-    if (ScalableVar.IsNegated) {
-      if (Off.isNonNegative())
-        StrictlyNeg = Off.ult(ScalableVar.Scale.abs());
-      else
-        StrictlyNeg = true;
-    } else
-      StrictlyNeg = Off.isNegative();
-
-    if (StrictlyPos || StrictlyNeg) {
-      LocationSize VLeftSize = V2Size;
-      LocationSize VRightSize = V1Size;
-      const bool Swapped = StrictlyNeg;
-
-      if (Swapped) {
-        std::swap(VLeftSize, VRightSize);
-        Off = -Off;
-      }
-
-      const uint64_t LSize = VLeftSize.getValue().getKnownMinValue();
-      if (VLeftSize.isScalable() && ScalableVar.Scale.ult(LSize) &&
-          (ScalableVar.Scale + DecompGEP1.Offset).ult(LSize))
-        return AliasResult::PartialAlias;
+    const APInt &Off = DecompGEP1.Offset;
+    APInt Scale =
+        ScalableVar.IsNegated ? -ScalableVar.Scale : ScalableVar.Scale;
+
+    // Lower limit
+    // V1Size not scalable => Scale*v <= -V1Size-Off providing Scale<=0
+    if (!V1Size.isScalable() && Scale.isNonPositive() &&
+        Scale.sle(-V1Size.getValue().getKnownMinValue() - Off))
+      return AliasResult::NoAlias;
+    // V1Size is scalable => Off <= (-V1Size-Scale)*v => given V1Size-Scale>=0
+    APInt TS = (-V1Size.getValue().getKnownMinValue() - Scale);
+    if (V1Size.isScalable() && TS.isNonNegative() && Off.sle(TS))
+      return AliasResult::NoAlias;
 
-      if ((ScalableVar.Scale.uge(LSize) && VLeftSize.isScalable()) ||
-          ((ScalableVar.Scale + DecompGEP1.Offset).uge(LSize) &&
-           !VLeftSize.isScalable()))
-        return AliasResult::NoAlias;
-    }
+    // Upper limit
+    // V2Size not scalable => Scale*v >= V2Size-Off
+    if (!V2Size.isScalable() && Scale.isNonNegative() &&
+        Scale.sge(V2Size.getValue().getKnownMinValue() - Off))
+      return AliasResult::NoAlias;
+    // V2Size is scalable => Off >= (V2Size-Scale)*v => Off>=0, V2Size-Scale<=0
+    TS = (V2Size.getValue().getKnownMinValue() - Scale);
+    if (V2Size.isScalable() && TS.isNonPositive() && Off.sge(TS))
+      return AliasResult::NoAlias;
   }
 
   // Bail on Scalable location size from now onwards
@@ -1265,9 +1250,6 @@ AliasResult BasicAAResult::aliasGEP(
     const VariableGEPIndex &Index = DecompGEP1.VarIndices[i];
     const APInt &Scale = Index.Scale;
     APInt ScaleForGCD = Scale;
-    assert((!Index.IsVScale || match(Index.Val.V, m_VScale()) ||
-            isa<ConstantInt>(Index.Val.V)) &&
-           "Not allowed to have non-constant values if IsVScale is set");
     if (!Index.IsNSW)
       ScaleForGCD =
           APInt::getOneBitSet(Scale.getBitWidth(), Scale.countr_zero());
@@ -1277,10 +1259,17 @@ AliasResult BasicAAResult::aliasGEP(
     else
       GCD = APIntOps::GreatestCommonDivisor(GCD, ScaleForGCD.abs());
 
-    ConstantRange CR = computeConstantRange(Index.Val.V, /* ForSigned */ false,
-                                            true, &AC, Index.CxtI);
-    KnownBits Known =
-        computeKnownBits(Index.Val.V, DL, 0, &AC, Index.CxtI, DT);
+    // FIXME: This could be expanded to use a more precise range for vscale.
+    ConstantRange CR =
+        Index.IsVScale
+            ? ConstantRange::getNonEmpty(
+                  APInt(OffsetRange.getBitWidth(), 1),
+                  APInt::getMaxValue(OffsetRange.getBitWidth()))
+            : computeConstantRange(Index.Val.V, /* ForSigned */ false, true,
+                                   &AC, Index.CxtI);
+    KnownBits Known = Index.IsVScale ? KnownBits(OffsetRange.getBitWidth())
+                                     : computeKnownBits(Index.Val.V, DL, 0, &AC,
+                                                        Index.CxtI, DT);
     CR = CR.intersectWith(
         ConstantRange::fromKnownBits(Known, /* Signed */ true),
         ConstantRange::Signed);
@@ -1313,7 +1302,7 @@ AliasResult BasicAAResult::aliasGEP(
     return AliasResult::NoAlias;
 
   // Compute ranges of potentially accessed bytes for both accesses. If the
-  // interseciton is empty, there can be no overlap.
+  // intersection is empty, there can be no overlap.
   unsigned BW = OffsetRange.getBitWidth();
   ConstantRange Range1 = OffsetRange.add(
       ConstantRange(APInt(BW, 0), APInt(BW, V1Size.getValue())));
@@ -1851,9 +1840,8 @@ void BasicAAResult::subtractDecomposedGEPs(DecomposedGEP &DestGEP,
       VariableGEPIndex &Dest = I.value();
       if (Dest.IsVScale != Src.IsVScale)
         continue;
-      const bool SrcDestAreVScale = Dest.IsVScale && Src.IsVScale;
       // Suppress base value checks if Src and Dst are of constant VScale
-      if ((!SrcDestAreVScale &&
+      if ((!Dest.IsVScale &&
            !isValueEqualInPotentialCycles(Dest.Val.V, Src.Val.V, AAQI)) ||
           !Dest.Val.hasSameCastsAs(Src.Val))
         continue;
diff --git a/llvm/test/Analysis/BasicAA/vscale.ll b/llvm/test/Analysis/BasicAA/vscale.ll
index 2d277eacdcb8ae..dd4c16e4e41b03 100644
--- a/llvm/test/Analysis/BasicAA/vscale.ll
+++ b/llvm/test/Analysis/BasicAA/vscale.ll
@@ -120,8 +120,6 @@ define void @gep_different_base_const_offset(ptr noalias %p1, ptr noalias %p2) {
 ; CHECK-DAG: NoAlias:      <vscale x 4 x i32>* %gep1, <vscale x 4 x i32>* %gep2
 ; CHECK-DAG: MustAlias:    <vscale x 4 x i32>* %gep1, <vscale x 4 x i32>* %gep3
 ; CHECK-DAG: NoAlias:      <vscale x 4 x i32>* %gep2, <vscale x 4 x i32>* %gep3
-
-declare i64 @llvm.vscale.i64()
 define void @gep_llvm_vscale_no_alias(ptr %p) {
   %t1 = tail call i64 @llvm.vscale.i64()
   %t2 = shl nuw nsw i64 %t1, 3
@@ -134,6 +132,8 @@ define void @gep_llvm_vscale_no_alias(ptr %p) {
   ret void
 }
 
+declare i64 @llvm.vscale.i64()
+
 ; CHECK-LABEL: gep_llvm_vscale_squared_may_alias
 ; CHECK-DAG: MayAlias:      <vscale x 4 x i32>* %gep1, <vscale x 4 x i32>* %gep2
 define void @gep_llvm_vscale_squared_may_alias(ptr %p) {
@@ -144,6 +144,7 @@ define void @gep_llvm_vscale_squared_may_alias(ptr %p) {
   load <vscale x 4 x i32>, ptr %gep2
   ret void
 }
+
 ; getelementptr + bitcast
 
 ; CHECK-LABEL: gep_bitcast_1
@@ -180,6 +181,132 @@ define void @gep_bitcast_2(ptr %p) {
   ret void
 }
 
+; negative offset tests
+
+; CHECK-LABEL: gep_neg_notscalable
+; CHECK-DAG:   NoAlias:      <4 x i32>* %p, <4 x i32>* %vm16
+; CHECK-DAG:   NoAlias:      <4 x i32>* %m16, <4 x i32>* %p
+; CHECK-DAG:   MayAlias:     <4 x i32>* %m16, <4 x i32>* %vm16
+; CHECK-DAG:   NoAlias:      <4 x i32>* %p, <4 x i32>* %vm16m16
+; CHECK-DAG:   NoAlias:      <4 x i32>* %vm16, <4 x i32>* %vm16m16
+; CHECK-DAG:   NoAlias:      <4 x i32>* %m16, <4 x i32>* %vm16m16
+; CHECK-DAG:   MayAlias:     <4 x i32>* %m16pv16, <4 x i32>* %p
+; CHECK-DAG:   NoAlias:      <4 x i32>* %m16pv16, <4 x i32>* %vm16
+; CHECK-DAG:   NoAlias:      <4 x i32>* %m16, <4 x i32>* %m16pv16
+; CHECK-DAG:   NoAlias:      <4 x i32>* %m16pv16, <4 x i32>* %vm16m16
+define void @gep_neg_notscalable(ptr %p) {
+  %vm16 = getelementptr <vscale x 4 x i32>, ptr %p, i64 -1
+  %m16 = getelementptr <4 x i32>, ptr %p, i64 -1
+  %vm16m16 = getelementptr <4 x i32>, ptr %vm16, i64 -1
+  %m16pv16 = getelementptr <vscale x 4 x i32>, ptr %m16, i64 1
+  load <4 x i32>, ptr %p
+  load <4 x i32>, ptr %vm16
+  load <4 x i32>, ptr %m16
+  load <4 x i32>, ptr %vm16m16
+  load <4 x i32>, ptr %m16pv16
+  ret void
+}
+
+; CHECK-LABEL: gep_neg_scalable
+; CHECK-DAG:   NoAlias:      <vscale x 4 x i32>* %p, <vscale x 4 x i32>* %vm16
+; CHECK-DAG:   MayAlias:     <vscale x 4 x i32>* %m16, <vscale x 4 x i32>* %p
+; CHECK-DAG:   MayAlias:     <vscale x 4 x i32>* %m16, <vscale x 4 x i32>* %vm16
+; CHECK-DAG:   NoAlias:      <vscale x 4 x i32>* %p, <vscale x 4 x i32>* %vm16m16
+; CHECK-DAG:   MayAlias:     <vscale x 4 x i32>* %vm16, <vscale x 4 x i32>* %vm16m16
+; CHECK-DAG:   NoAlias:      <vscale x 4 x i32>* %m16, <vscale x 4 x i32>* %vm16m16
+; CHECK-DAG:   MustAlias:    <vscale x 4 x i32>* %m16pv16, <vscale x 4 x i32>* %p
+; CHECK-DAG:   NoAlias:      <vscale x 4 x i32>* %m16pv16, <vscale x 4 x i32>* %vm16
+; CHECK-DAG:   MayAlias:     <vscale x 4 x i32>* %m16, <vscale x 4 x i32>* %m16pv16
+; CHECK-DAG:   NoAlias:      <vscale x 4 x i32>* %m16pv16, <vscale x 4 x i32>* %vm16m16
+define void @gep_neg_scalable(ptr %p) {
+  %vm16 = getelementptr <vscale x 4 x i32>, ptr %p, i64 -1
+  %m16 = getelementptr <4 x i32>, ptr %p, i64 -1
+  %vm16m16 = getelementptr <4 x i32>, ptr %vm16, i64 -1
+  %m16pv16 = getelementptr <vscale x 4 x i32>, ptr %vm16, i64 1
+  load <vscale x 4 x i32>, ptr %p
+  load <vscale x 4 x i32>, ptr %vm16
+  load <vscale x 4 x i32>, ptr %m16
+  load <vscale x 4 x i32>, ptr %vm16m16
+  load <vscale x 4 x i32>, ptr %m16pv16
+  ret void
+}
+
+; CHECK-LABEL: gep_pos_notscalable
+; CHECK-DAG:   NoAlias:      <4 x i32>* %p, <4 x i32>* %vm16
+; CHECK-DAG:   NoAlias:      <4 x i32>* %m16, <4 x i32>* %p
+; CHECK-DAG:   MayAlias:     <4 x i32>* %m16, <4 x i32>* %vm16
+; CHECK-DAG:   NoAlias:      <4 x i32>* %p, <4 x i32>* %vm16m16
+; CHECK-DAG:   NoAlias:      <4 x i32>* %vm16, <4 x i32>* %vm16m16
+; CHECK-DAG:   NoAlias:      <4 x i32>* %m16, <4 x i32>* %vm16m16
+; CHECK-DAG:   MustAlias:    <4 x i32>* %m16pv16, <4 x i32>* %p
+; CHECK-DAG:   NoAlias:      <4 x i32>* %m16pv16, <4 x i32>* %vm16
+; CHECK-DAG:   NoAlias:      <4 x i32>* %m16, <4 x i32>* %m16pv16
+; CHECK-DAG:   NoAlias:      <4 x i32>* %m16pv16, <4 x i32>* %vm16m16
+define void @gep_pos_notscalable(ptr %p) {
+  %vm16 = getelementptr <vscale x 4 x i32>, ptr %p, i64 1
+  %m16 = getelementptr <4 x i32>, ptr %p, i64 1
+  %vm16m16 = getelementptr <4 x i32>, ptr %vm16, i64 1
+  %m16pv16 = getelementptr <vscale x 4 x i32>, ptr %vm16, i64 -1
+  load <4 x i32>, ptr %p
+  load <4 x i32>, ptr %vm16
+  load <4 x i32>, ptr %m16
+  load <4 x i32>, ptr %vm16m16
+  load <4 x i32>, ptr %m16pv16
+  ret void
+}
+
+; CHECK-LABEL: gep_pos_scalable
+; CHECK-DAG:   NoAlias:      <vscale x 4 x i32>* %p, <vscale x 4 x i32>* %vm16
+; CHECK-DAG:   MayAlias:     <vscale x 4 x i32>* %m16, <vscale x 4 x i32>* %p
+; CHECK-DAG:   MayAlias:     <vscale x 4 x i32>* %m16, <vscale x 4 x i32>* %vm16
+; CHECK-DAG:   NoAlias:      <vscale x 4 x i32>* %p, <vscale x 4 x i32>* %vm16m16
+; CHECK-DAG:   MayAlias:     <vscale x 4 x i32>* %vm16, <vscale x 4 x i32>* %vm16m16
+; CHECK-DAG:   NoAlias:      <vscale x 4 x i32>* %m16, <vscale x 4 x i32>* %vm16m16
+; CHECK-DAG:   MustAlias:    <vscale x 4 x i32>* %m16pv16, <vscale x 4 x i32>* %p
+; CHECK-DAG:   NoAlias:      <vscale x 4 x i32>* %m16pv16, <vscale x 4 x i32>* %vm16
+; CHECK-DAG:   MayAlias:     <vscale x 4 x i32>* %m16, <vscale x 4 x i32>* %m16pv16
+; CHECK-DAG:   NoAlias:      <vscale x 4 x i32>* %m16pv16, <vscale x 4 x i32>* %vm16m16
+define void @gep_pos_scalable(ptr %p) {
+  %vm16 = getelementptr <vscale x 4 x i32>, ptr %p, i64 1
+  %m16 = getelementptr <4 x i32>, ptr %p, i64 1
+  %vm16m16 = getelementptr <4 x i32>, ptr %vm16, i64 1
+  %m16pv16 = getelementptr <vscale x 4 x i32>, ptr %vm16, i64 -1
+  load <vscale x 4 x i32>, ptr %p
+  load <vscale x 4 x i32>, ptr %vm16
+  load <vscale x 4 x i32>, ptr %m16
+  load <vscale x 4 x i32>, ptr %vm16m16
+  load <vscale x 4 x i32>, ptr %m16pv16
+  ret void
+}
+
+; CHECK-LABEL: v1v2types
+; CHECK-DAG:  MustAlias:    <4 x i32>* %p, <vscale x 4 x i32>* %p
+; CHECK-DAG:  NoAlias:      <vscale x 4 x i32>* %p, <vscale x 4 x i32>* %vm16
+; CHECK-DAG:  NoAlias:      <4 x i32>* %p, <vscale x 4 x i32>* %vm16
+; CHECK-DAG:  NoAlias:      <vscale x 4 x i32>* %p, <4 x i32>* %vm16
+; CHECK-DAG:  NoAlias:      <4 x i32>* %p, <4 x i32>* %vm16
+; CHECK-DAG:  MustAlias:    <4 x i32>* %vm16, <vscale x 4 x i32>* %vm16
+; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %m16, <vscale x 4 x i32>* %p
+; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %m16, <4 x i32>* %p
+; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %m16, <vscale x 4 x i32>* %vm16
+; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %m16, <4 x i32>* %vm16
+; CHECK-DAG:  NoAlias:      <4 x i32>* %m16, <vscale x 4 x i32>* %p
+; CHECK-DAG:  NoAlias:      <4 x i32>* %m16, <4 x i32>* %p
+; CHECK-DAG:  MayAlias:     <4 x i32>* %m16, <vscale x 4 x i32>* %vm16
+; CHECK-DAG:  MayAlias:     <4 x i32>* %m16, <4 x i32>* %vm16
+; CHECK-DAG:  MustAlias:    <4 x i32>* %m16, <vscale x 4 x i32>* %m16
+define void @v1v2types(ptr %p) {
+  %vm16 = getelementptr <vscale x 4 x i32>, ptr %p, i64 -1
+  %m16 = getelementptr <4 x i32>, ptr %p, i64 -1
+  load <vscale x 4 x i32>, ptr %p
+  load <4 x i32>, ptr %p
+  load <vscale x 4 x i32>, ptr %vm16
+  load <4 x i32>, ptr %vm16
+  load <vscale x 4 x i32>, ptr %m16
+  load <4 x i32>, ptr %m16
+  ret void
+}
+
 ; getelementptr recursion
 
 ; CHECK-LABEL: gep_recursion_level_1