[llvm] [BasicAA] Add Vscale GEP decomposition on variable index (PR #69152)

Tue Jan 16 04:13:05 PST 2024

https://github.com/davemgreen updated https://github.com/llvm/llvm-project/pull/69152

>From 5284840a459ffbc618448e676c0775ee40cacba4 Mon Sep 17 00:00:00 2001
From: Harvin Iriawan <harvin.iriawan at arm.com>
Date: Mon, 9 Oct 2023 16:07:20 +0100
Subject: [PATCH 1/3] [BasicAA] Add Vscale GEP decomposition on variable index

  Enable BasicAA to be done on Scalable GEP & LocationSize
  Scalable GEP expression such as @llvm.vscale and GEP of scalable type
  are attached to the VariableGEPIndex, with Val representing Vscale.

  VScale AA works if there's only one variable index (the vscale) and
  constant offsets in the GEP for now
---
 llvm/lib/Analysis/BasicAliasAnalysis.cpp     | 202 ++++++++++++++-----
 llvm/test/Analysis/AliasSet/memloc-vscale.ll |   3 +-
 llvm/test/Analysis/BasicAA/vscale.ll         | 111 ++++++----
 llvm/test/Transforms/GVN/vscale.ll           |  11 +-
 4 files changed, 228 insertions(+), 99 deletions(-)

diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
index a4a0846df7af15..8f4adc3ad84bc8 100644
--- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -44,6 +44,7 @@
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/Operator.h"
+#include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/User.h"
 #include "llvm/IR/Value.h"
@@ -63,6 +64,7 @@
 #define DEBUG_TYPE "basicaa"
 
 using namespace llvm;
+using namespace llvm::PatternMatch;
 
 /// Enable analysis of recursive PHI nodes.
 static cl::opt<bool> EnableRecPhiAnalysis("basic-aa-recphi", cl::Hidden,
@@ -357,13 +359,20 @@ struct LinearExpression {
 
 /// Analyzes the specified value as a linear expression: "A*V + B", where A and
 /// B are constant integers.
-static LinearExpression GetLinearExpression(
-    const CastedValue &Val,  const DataLayout &DL, unsigned Depth,
-    AssumptionCache *AC, DominatorTree *DT) {
+static LinearExpression GetLinearExpression(const CastedValue &Val,
+                                            const DataLayout &DL,
+                                            unsigned Depth, AssumptionCache *AC,
+                                            DominatorTree *DT) {
   // Limit our recursion depth.
   if (Depth == 6)
     return Val;
 
+  // If llvm.vscale is matched, set linear expression with scale 1 and offset 0
+  if (match(Val.V, m_VScale())) {
+    return LinearExpression(Val, APInt(Val.getBitWidth(), 1),
+                            APInt(Val.getBitWidth(), 0), true);
+  }
+
   if (const ConstantInt *Const = dyn_cast<ConstantInt>(Val.V))
     return LinearExpression(Val, APInt(Val.getBitWidth(), 0),
                             Val.evaluateWith(Const->getValue()), true);
@@ -473,6 +482,9 @@ struct VariableGEPIndex {
   CastedValue Val;
   APInt Scale;
 
+  // A value representing vscale quantity in a GEP expression
+  bool IsVScale;
+
   // Context instruction to use when querying information about this index.
   const Instruction *CxtI;
 
@@ -495,13 +507,10 @@ struct VariableGEPIndex {
     dbgs() << "\n";
   }
   void print(raw_ostream &OS) const {
-    OS << "(V=" << Val.V->getName()
-       << ", zextbits=" << Val.ZExtBits
-       << ", sextbits=" << Val.SExtBits
-       << ", truncbits=" << Val.TruncBits
-       << ", scale=" << Scale
-       << ", nsw=" << IsNSW
-       << ", negated=" << IsNegated << ")";
+    OS << "(V=" << Val.V->getName() << "  IsVScale=" << IsVScale
+       << ", zextbits=" << Val.ZExtBits << ", sextbits=" << Val.SExtBits
+       << ", truncbits=" << Val.TruncBits << ", scale=" << Scale
+       << ", nsw=" << IsNSW << ", negated=" << IsNegated << ")";
   }
 };
 }
@@ -622,6 +631,7 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
     for (User::const_op_iterator I = GEPOp->op_begin() + 1, E = GEPOp->op_end();
          I != E; ++I, ++GTI) {
       const Value *Index = *I;
+      const bool ScalableGEP = isa<ScalableVectorType>(GTI.getIndexedType());
       // Compute the (potentially symbolic) offset in bytes for this index.
       if (StructType *STy = GTI.getStructTypeOrNull()) {
         // For a struct, add the member offset.
@@ -633,27 +643,18 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
         continue;
       }
 
+      TypeSize AllocTypeSize = GTI.getSequentialElementStride(DL);
       // For an array/pointer, add the element offset, explicitly scaled.
+      // Skip adding to constant offset if GEP index is marked as scalable
+      // they are handled below as variable offset
       if (const ConstantInt *CIdx = dyn_cast<ConstantInt>(Index)) {
         if (CIdx->isZero())
           continue;
-
-        // Don't attempt to analyze GEPs if the scalable index is not zero.
-        TypeSize AllocTypeSize = GTI.getSequentialElementStride(DL);
-        if (AllocTypeSize.isScalable()) {
-          Decomposed.Base = V;
-          return Decomposed;
+        if (!ScalableGEP) {
+          Decomposed.Offset += AllocTypeSize.getFixedValue() *
+                               CIdx->getValue().sextOrTrunc(MaxIndexSize);
+          continue;
         }
-
-        Decomposed.Offset += AllocTypeSize.getFixedValue() *
-                             CIdx->getValue().sextOrTrunc(MaxIndexSize);
-        continue;
-      }
-
-      TypeSize AllocTypeSize = GTI.getSequentialElementStride(DL);
-      if (AllocTypeSize.isScalable()) {
-        Decomposed.Base = V;
-        return Decomposed;
       }
 
       GepHasConstantOffset = false;
@@ -663,22 +664,55 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
       unsigned Width = Index->getType()->getIntegerBitWidth();
       unsigned SExtBits = IndexSize > Width ? IndexSize - Width : 0;
       unsigned TruncBits = IndexSize < Width ? Width - IndexSize : 0;
-      LinearExpression LE = GetLinearExpression(
-          CastedValue(Index, 0, SExtBits, TruncBits), DL, 0, AC, DT);
+      // Scalable GEP decomposition
+      // Allow Scalable GEP to be decomposed in the case of
+      //    1. getelementptr <4 x vscale x i32> with 1st index as a constant
+      //    2. Index which have a leaf of @llvm.vscale
+      // In both cases, essentially CastedValue of VariableGEPIndex is Vscale,
+      // however in the 1st case, CastedValue is of type constant, hence another
+      // flag in VariableGEPIndex is created in this case, IsVScale If GEP is
+      // Scalable type, e.g. <4 x vscale x i32>, the first index will have
+      // vscale as a variable index, create a LE in this case
+      LinearExpression LE(CastedValue(Index, 0, SExtBits, TruncBits));
+      if (ScalableGEP) {
+        if (const ConstantInt *CIdx = dyn_cast<ConstantInt>(Index)) {
+          LE = LinearExpression(
+              CastedValue(Index, 0, SExtBits, TruncBits),
+              CastedValue(Index, 0, SExtBits, TruncBits)
+                  .evaluateWith(CIdx->getValue()),
+              APInt(CastedValue(Index, 0, SExtBits, TruncBits).getBitWidth(),
+                    0),
+              true);
+          assert(LE.Offset.isZero() && "For Scalable GEP constant first index, "
+                                       "the offset of LE should be 0");
+        } else {
+          // if first index is not a constant, a single variable gep will
+          // contain 2 variables, bail in this case
+          Decomposed.Base = V;
+          return Decomposed;
+        }
+      } else
+        LE = GetLinearExpression(CastedValue(Index, 0, SExtBits, TruncBits), DL,
+                                 0, AC, DT);
 
       // Scale by the type size.
-      unsigned TypeSize = AllocTypeSize.getFixedValue();
+      unsigned TypeSize = AllocTypeSize.getKnownMinValue();
       LE = LE.mul(APInt(IndexSize, TypeSize), GEPOp->isInBounds());
       Decomposed.Offset += LE.Offset.sext(MaxIndexSize);
       APInt Scale = LE.Scale.sext(MaxIndexSize);
+      bool LEhasVscale = match(LE.Val.V, m_VScale());
 
       // If we already had an occurrence of this index variable, merge this
       // scale into it.  For example, we want to handle:
       //   A[x][x] -> x*16 + x*4 -> x*20
       // This also ensures that 'x' only appears in the index list once.
+      // Only add to IsVScale VariableGEPIndex if it's @llvm.vscale or gep
+      // vscale index
       for (unsigned i = 0, e = Decomposed.VarIndices.size(); i != e; ++i) {
-        if (Decomposed.VarIndices[i].Val.V == LE.Val.V &&
-            Decomposed.VarIndices[i].Val.hasSameCastsAs(LE.Val)) {
+        if (Decomposed.VarIndices[i].Val.hasSameCastsAs(LE.Val) &&
+            ((Decomposed.VarIndices[i].IsVScale &&
+              (ScalableGEP || LEhasVscale)) ||
+             Decomposed.VarIndices[i].Val.V == LE.Val.V)) {
           Scale += Decomposed.VarIndices[i].Scale;
           LE.IsNSW = false; // We cannot guarantee nsw for the merge.
           Decomposed.VarIndices.erase(Decomposed.VarIndices.begin() + i);
@@ -687,11 +721,21 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
       }
 
       // Make sure that we have a scale that makes sense for this target's
-      // index size.
+      // index size.Only allow variableGEP decomposition for constants, in the
+      // case of vscale
       adjustToIndexSize(Scale, IndexSize);
+      bool InvalidVarVScale = (ScalableGEP && LEhasVscale) ||
+                              (ScalableGEP && !isa<ConstantInt>(LE.Val.V));
+
+      assert(!InvalidVarVScale &&
+             "Variable GEP index contains VScale and another variable");
 
       if (!!Scale) {
-        VariableGEPIndex Entry = {LE.Val, Scale, CxtI, LE.IsNSW,
+        VariableGEPIndex Entry = {LE.Val,
+                                  Scale,
+                                  ScalableGEP || LEhasVscale,
+                                  CxtI,
+                                  LE.IsNSW,
                                   /* IsNegated */ false};
         Decomposed.VarIndices.push_back(Entry);
       }
@@ -1074,19 +1118,17 @@ AliasResult BasicAAResult::aliasGEP(
 
   // If an inbounds GEP would have to start from an out of bounds address
   // for the two to alias, then we can assume noalias.
-  // TODO: Remove !isScalable() once BasicAA fully support scalable location
-  // size
   if (*DecompGEP1.InBounds && DecompGEP1.VarIndices.empty() &&
-      V2Size.hasValue() && !V2Size.isScalable() &&
-      DecompGEP1.Offset.sge(V2Size.getValue()) &&
+      V2Size.hasValue() &&
+      DecompGEP1.Offset.sge(V2Size.getValue().getKnownMinValue()) &&
       isBaseOfObject(DecompGEP2.Base))
     return AliasResult::NoAlias;
 
   if (isa<GEPOperator>(V2)) {
     // Symmetric case to above.
     if (*DecompGEP2.InBounds && DecompGEP1.VarIndices.empty() &&
-        V1Size.hasValue() && !V1Size.isScalable() &&
-        DecompGEP1.Offset.sle(-V1Size.getValue()) &&
+        V1Size.hasValue() &&
+        DecompGEP1.Offset.sle(-V1Size.getValue().getKnownMinValue()) &&
         isBaseOfObject(DecompGEP1.Base))
       return AliasResult::NoAlias;
   }
@@ -1110,10 +1152,6 @@ AliasResult BasicAAResult::aliasGEP(
     return BaseAlias;
   }
 
-  // Bail on analysing scalable LocationSize
-  if (V1Size.isScalable() || V2Size.isScalable())
-    return AliasResult::MayAlias;
-
   // If there is a constant difference between the pointers, but the difference
   // is less than the size of the associated memory object, then we know
   // that the objects are partially overlapping.  If the difference is
@@ -1140,16 +1178,16 @@ AliasResult BasicAAResult::aliasGEP(
       Off = -Off;
     }
 
-    if (!VLeftSize.hasValue())
+    if (!VLeftSize.hasValue() || VLeftSize.isScalable())
       return AliasResult::MayAlias;
 
-    const uint64_t LSize = VLeftSize.getValue();
+    const uint64_t LSize = VLeftSize.getValue().getKnownMinValue();
     if (Off.ult(LSize)) {
       // Conservatively drop processing if a phi was visited and/or offset is
       // too big.
       AliasResult AR = AliasResult::PartialAlias;
       if (VRightSize.hasValue() && Off.ule(INT32_MAX) &&
-          (Off + VRightSize.getValue()).ule(LSize)) {
+          (Off + VRightSize.getValue().getKnownMinValue()).ule(LSize)) {
         // Memory referenced by right pointer is nested. Save the offset in
         // cache. Note that originally offset estimated as GEP1-V2, but
         // AliasResult contains the shift that represents GEP1+Offset=V2.
@@ -1165,12 +1203,71 @@ AliasResult BasicAAResult::aliasGEP(
   if (!V1Size.hasValue() || !V2Size.hasValue())
     return AliasResult::MayAlias;
 
+  // VScale Alias Analysis
+  // GEPs with Vscale will have the expression A*Vscale + B (1 variable index
+  // and constant offset) The difference between two GEPs and Scalable
+  // LocationSize can then be analysed as they have the form of
+  //     LSize                SubtractDecomposedGEP output
+  //   A * Vscale                   B * Vscale + C
+  // Since VScale is strictly a positive number (Vscale >= 1), the larger GEP
+  // can be known
+  // TODO: Use knowledge of vscale_range to make the analysis more accurate
+  if (DecompGEP1.VarIndices.size() == 1 && DecompGEP1.VarIndices[0].IsVScale &&
+      (V1Size.isScalable() || V2Size.isScalable())) {
+    const VariableGEPIndex &ScalableVar = DecompGEP1.VarIndices[0];
+    bool StrictlyPos = false, StrictlyNeg = false;
+    APInt &Off = DecompGEP1.Offset;
+    if (!ScalableVar.IsNegated) {
+      if (Off.isNegative())
+        StrictlyPos = ScalableVar.Scale.ugt(Off.abs());
+      else
+        StrictlyPos = true;
+    } else
+      StrictlyPos = Off.isNonNegative();
+
+    if (ScalableVar.IsNegated) {
+      if (Off.isNonNegative())
+        StrictlyNeg = Off.ult(ScalableVar.Scale.abs());
+      else
+        StrictlyNeg = true;
+    } else
+      StrictlyNeg = Off.isNegative();
+
+    if (StrictlyPos || StrictlyNeg) {
+      LocationSize VLeftSize = V2Size;
+      LocationSize VRightSize = V1Size;
+      const bool Swapped = StrictlyNeg;
+
+      if (Swapped) {
+        std::swap(VLeftSize, VRightSize);
+        Off = -Off;
+      }
+
+      const uint64_t LSize = VLeftSize.getValue().getKnownMinValue();
+      if (VLeftSize.isScalable() && ScalableVar.Scale.ult(LSize) &&
+          (ScalableVar.Scale + DecompGEP1.Offset).ult(LSize))
+        return AliasResult::PartialAlias;
+
+      if ((ScalableVar.Scale.uge(LSize) && VLeftSize.isScalable()) ||
+          ((ScalableVar.Scale + DecompGEP1.Offset).uge(LSize) &&
+           !VLeftSize.isScalable()))
+        return AliasResult::NoAlias;
+    }
+  }
+
+  // Bail on Scalable location size from now onwards
+  if (V1Size.isScalable() || V2Size.isScalable())
+    return AliasResult::MayAlias;
+
   APInt GCD;
   ConstantRange OffsetRange = ConstantRange(DecompGEP1.Offset);
   for (unsigned i = 0, e = DecompGEP1.VarIndices.size(); i != e; ++i) {
     const VariableGEPIndex &Index = DecompGEP1.VarIndices[i];
     const APInt &Scale = Index.Scale;
     APInt ScaleForGCD = Scale;
+    assert((!Index.IsVScale || match(Index.Val.V, m_VScale()) ||
+            isa<ConstantInt>(Index.Val.V)) &&
+           "Not allowed to have non-constant values if IsVScale is set");
     if (!Index.IsNSW)
       ScaleForGCD =
           APInt::getOneBitSet(Scale.getBitWidth(), Scale.countr_zero());
@@ -1752,7 +1849,12 @@ void BasicAAResult::subtractDecomposedGEPs(DecomposedGEP &DestGEP,
     bool Found = false;
     for (auto I : enumerate(DestGEP.VarIndices)) {
       VariableGEPIndex &Dest = I.value();
-      if (!isValueEqualInPotentialCycles(Dest.Val.V, Src.Val.V, AAQI) ||
+      if (Dest.IsVScale != Src.IsVScale)
+        continue;
+      const bool SrcDestAreVScale = Dest.IsVScale && Src.IsVScale;
+      // Suppress base value checks if Src and Dst are of constant VScale
+      if ((!SrcDestAreVScale &&
+           !isValueEqualInPotentialCycles(Dest.Val.V, Src.Val.V, AAQI)) ||
           !Dest.Val.hasSameCastsAs(Src.Val))
         continue;
 
@@ -1777,7 +1879,11 @@ void BasicAAResult::subtractDecomposedGEPs(DecomposedGEP &DestGEP,
 
     // If we didn't consume this entry, add it to the end of the Dest list.
     if (!Found) {
-      VariableGEPIndex Entry = {Src.Val, Src.Scale, Src.CxtI, Src.IsNSW,
+      VariableGEPIndex Entry = {Src.Val,
+                                Src.Scale,
+                                Src.IsVScale,
+                                Src.CxtI,
+                                Src.IsNSW,
                                 /* IsNegated */ true};
       DestGEP.VarIndices.push_back(Entry);
     }
diff --git a/llvm/test/Analysis/AliasSet/memloc-vscale.ll b/llvm/test/Analysis/AliasSet/memloc-vscale.ll
index 8a83645ddaf9a8..ee67f7c15fb41b 100644
--- a/llvm/test/Analysis/AliasSet/memloc-vscale.ll
+++ b/llvm/test/Analysis/AliasSet/memloc-vscale.ll
@@ -34,7 +34,8 @@ define void @ss2(ptr %p) {
   ret void
 }
 ; CHECK-LABEL: Alias sets for function 'son':
-; CHECK: AliasSet[{{.*}}, 2] may alias, Mod       Pointers: (ptr %g, LocationSize::precise(vscale x 16)), (ptr %p, LocationSize::precise(8))
+; CHECK: AliasSet[{{.*}}, 1] must alias, Mod       Pointers: (ptr %g, LocationSize::precise(vscale x 16))
+; CHECK: AliasSet[{{.*}}, 1] must alias, Mod       Pointers: (ptr %p, LocationSize::precise(8))
 define void @son(ptr %p) {
   %g = getelementptr i8, ptr %p, i64 8
   store <vscale x 2 x i64> zeroinitializer, ptr %g, align 2
diff --git a/llvm/test/Analysis/BasicAA/vscale.ll b/llvm/test/Analysis/BasicAA/vscale.ll
index 0d6d8fea392bbf..2d277eacdcb8ae 100644
--- a/llvm/test/Analysis/BasicAA/vscale.ll
+++ b/llvm/test/Analysis/BasicAA/vscale.ll
@@ -4,8 +4,8 @@
 
 ; CHECK-LABEL: gep_alloca_const_offset_1
 ; CHECK-DAG:  MustAlias:    <vscale x 4 x i32>* %alloc, <vscale x 4 x i32>* %gep1
-; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %alloc, <vscale x 4 x i32>* %gep2
-; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %gep1, <vscale x 4 x i32>* %gep2
+; CHECK-DAG:  NoAlias:     <vscale x 4 x i32>* %alloc, <vscale x 4 x i32>* %gep2
+; CHECK-DAG:  NoAlias:     <vscale x 4 x i32>* %gep1, <vscale x 4 x i32>* %gep2
 define void @gep_alloca_const_offset_1() {
   %alloc = alloca <vscale x 4 x i32>
   %gep1 = getelementptr <vscale x 4 x i32>, ptr %alloc, i64 0
@@ -17,10 +17,9 @@ define void @gep_alloca_const_offset_1() {
 }
 
 ; CHECK-LABEL: gep_alloca_const_offset_2
-; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %alloc, <vscale x 4 x i32>* %gep1
-; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %alloc, <vscale x 4 x i32>* %gep2
-; TODO: AliasResult for gep1,gep2 can be improved as MustAlias
-; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %gep1, <vscale x 4 x i32>* %gep2
+; CHECK-DAG:  NoAlias:     <vscale x 4 x i32>* %alloc, <vscale x 4 x i32>* %gep1
+; CHECK-DAG:  NoAlias:     <vscale x 4 x i32>* %alloc, <vscale x 4 x i32>* %gep2
+; CHECK-DAG:  MustAlias:   <vscale x 4 x i32>* %gep1, <vscale x 4 x i32>* %gep2
 define void @gep_alloca_const_offset_2() {
   %alloc = alloca <vscale x 4 x i32>
   %gep1 = getelementptr <vscale x 4 x i32>, ptr %alloc, i64 1
@@ -33,8 +32,8 @@ define void @gep_alloca_const_offset_2() {
 
 ; CHECK-LABEL: gep_alloca_const_offset_3
 ; CHECK-DAG:  MustAlias:    <vscale x 4 x i32>* %alloc, <vscale x 4 x i32>* %gep1
-; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %alloc, i32* %gep2
-; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %gep1, i32* %gep2
+; CHECK-DAG:  MayAlias: <vscale x 4 x i32>* %alloc, i32* %gep2
+; CHECK-DAG:  MayAlias: <vscale x 4 x i32>* %gep1, i32* %gep2
 define void @gep_alloca_const_offset_3() {
   %alloc = alloca <vscale x 4 x i32>
   %gep1 = getelementptr <vscale x 4 x i32>, ptr %alloc, i64 0
@@ -74,10 +73,9 @@ define void @gep_alloca_symbolic_offset(i64 %idx1, i64 %idx2) {
 }
 
 ; CHECK-LABEL: gep_same_base_const_offset
-; CHECK-DAG:  MayAlias:     i32* %gep1, <vscale x 4 x i32>* %p
-; CHECK-DAG:  MayAlias:     i32* %gep2, <vscale x 4 x i32>* %p
-; TODO: AliasResult for gep1,gep2 can be improved as NoAlias
-; CHECK-DAG:  MayAlias:     i32* %gep1, i32* %gep2
+; CHECK-DAG:  NoAlias:     i32* %gep1, <vscale x 4 x i32>* %p
+; CHECK-DAG:  NoAlias:     i32* %gep2, <vscale x 4 x i32>* %p
+; CHECK-DAG:  NoAlias:     i32* %gep1, i32* %gep2
 define void @gep_same_base_const_offset(ptr %p) {
   %gep1 = getelementptr <vscale x 4 x i32>, ptr %p, i64 1, i64 0
   %gep2 = getelementptr <vscale x 4 x i32>, ptr %p, i64 1, i64 1
@@ -101,8 +99,8 @@ define void @gep_same_base_symbolic_offset(ptr %p, i64 %idx1, i64 %idx2) {
 }
 
 ; CHECK-LABEL: gep_different_base_const_offset
-; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %gep1, <vscale x 4 x i32>* %p1
-; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %gep2, <vscale x 4 x i32>* %p2
+; CHECK-DAG:  NoAlias:     <vscale x 4 x i32>* %gep1, <vscale x 4 x i32>* %p1
+; CHECK-DAG:  NoAlias:     <vscale x 4 x i32>* %gep2, <vscale x 4 x i32>* %p2
 ; CHECK-DAG:  NoAlias:      <vscale x 4 x i32>* %p1, <vscale x 4 x i32>* %p2
 ; CHECK-DAG:  NoAlias:      <vscale x 4 x i32>* %gep1, <vscale x 4 x i32>* %p2
 ; CHECK-DAG:  NoAlias:      <vscale x 4 x i32>* %gep2, <vscale x 4 x i32>* %p1
@@ -117,12 +115,41 @@ define void @gep_different_base_const_offset(ptr noalias %p1, ptr noalias %p2) {
   ret void
 }
 
+; getelementptr @llvm.vscale tests
+; CHECK-LABEL: gep_llvm_vscale_no_alias
+; CHECK-DAG: NoAlias:      <vscale x 4 x i32>* %gep1, <vscale x 4 x i32>* %gep2
+; CHECK-DAG: MustAlias:    <vscale x 4 x i32>* %gep1, <vscale x 4 x i32>* %gep3
+; CHECK-DAG: NoAlias:      <vscale x 4 x i32>* %gep2, <vscale x 4 x i32>* %gep3
+
+declare i64 @llvm.vscale.i64()
+define void @gep_llvm_vscale_no_alias(ptr %p) {
+  %t1 = tail call i64 @llvm.vscale.i64()
+  %t2 = shl nuw nsw i64 %t1, 3
+  %gep1 = getelementptr i32, ptr %p, i64 %t2
+  %gep2 = getelementptr <vscale x 4 x i32>, ptr %p, i64 1
+  %gep3 = getelementptr <vscale x 4 x i32>, ptr %p, i64 2
+  load <vscale x 4 x i32>, ptr %gep1
+  load <vscale x 4 x i32>, ptr %gep2
+  load <vscale x 4 x i32>, ptr %gep3
+  ret void
+}
+
+; CHECK-LABEL: gep_llvm_vscale_squared_may_alias
+; CHECK-DAG: MayAlias:      <vscale x 4 x i32>* %gep1, <vscale x 4 x i32>* %gep2
+define void @gep_llvm_vscale_squared_may_alias(ptr %p) {
+  %t1 = tail call i64 @llvm.vscale.i64()
+  %gep1 = getelementptr <vscale x 4 x i32>, ptr %p, i64 %t1
+  %gep2 = getelementptr i32, ptr %p, i64 1
+  load <vscale x 4 x i32>, ptr %gep1
+  load <vscale x 4 x i32>, ptr %gep2
+  ret void
+}
 ; getelementptr + bitcast
 
 ; CHECK-LABEL: gep_bitcast_1
 ; CHECK-DAG:   MustAlias:    i32* %p, <vscale x 4 x i32>* %p
-; CHECK-DAG:   MayAlias:     i32* %gep1, <vscale x 4 x i32>* %p
-; CHECK-DAG:   MayAlias:     i32* %gep1, i32* %p
+; CHECK-DAG:   NoAlias:      i32* %gep1, <vscale x 4 x i32>* %p
+; CHECK-DAG:   NoAlias:      i32* %gep1, i32* %p
 ; CHECK-DAG:   MayAlias:     i32* %gep2, <vscale x 4 x i32>* %p
 ; CHECK-DAG:   MayAlias:     i32* %gep1, i32* %gep2
 ; CHECK-DAG:   NoAlias:      i32* %gep2, i32* %p
@@ -138,11 +165,11 @@ define void @gep_bitcast_1(ptr %p) {
 
 ; CHECK-LABEL: gep_bitcast_2
 ; CHECK-DAG:  MustAlias:    <vscale x 4 x float>* %p, <vscale x 4 x i32>* %p
-; CHECK-DAG:  MayAlias:     i32* %gep1, <vscale x 4 x i32>* %p
-; CHECK-DAG:  MayAlias:     i32* %gep1, <vscale x 4 x float>* %p
-; CHECK-DAG:  MayAlias:     float* %gep2, <vscale x 4 x i32>* %p
-; CHECK-DAG:  MayAlias:     i32* %gep1, float* %gep2
-; CHECK-DAG:  MayAlias:     float* %gep2, <vscale x 4 x float>* %p
+; CHECK-DAG:  NoAlias:      i32* %gep1, <vscale x 4 x i32>* %p
+; CHECK-DAG:  NoAlias:      i32* %gep1, <vscale x 4 x float>* %p
+; CHECK-DAG:  NoAlias:      float* %gep2, <vscale x 4 x i32>* %p
+; CHECK-DAG:  MustAlias:    i32* %gep1, float* %gep2
+; CHECK-DAG:  NoAlias:      float* %gep2, <vscale x 4 x float>* %p
 define void @gep_bitcast_2(ptr %p) {
   %gep1 = getelementptr <vscale x 4 x i32>, ptr %p, i64 1, i64 0
   %gep2 = getelementptr <vscale x 4 x float>, ptr %p, i64 1, i64 0
@@ -159,8 +186,8 @@ define void @gep_bitcast_2(ptr %p) {
 ; CHECK-DAG:  MayAlias:     i32* %a, <vscale x 4 x i32>* %p
 ; CHECK-DAG:  MayAlias:     i32* %a, i32* %gep
 ; CHECK-DAG:  MayAlias:     i32* %a, i32* %gep_rec_1
-; CHECK-DAG:  MayAlias:     i32* %gep, <vscale x 4 x i32>* %p
-; CHECK-DAG:  MayAlias:     i32* %gep_rec_1, <vscale x 4 x i32>* %p
+; CHECK-DAG:  NoAlias:      i32* %gep, <vscale x 4 x i32>* %p
+; CHECK-DAG:  NoAlias:      i32* %gep_rec_1, <vscale x 4 x i32>* %p
 ; CHECK-DAG:  NoAlias:      i32* %gep, i32* %gep_rec_1
 define void @gep_recursion_level_1(ptr %a, ptr %p) {
   %gep = getelementptr <vscale x 4 x i32>, ptr %p, i64 1, i64 2
@@ -174,10 +201,10 @@ define void @gep_recursion_level_1(ptr %a, ptr %p) {
 
 ; CHECK-LABEL: gep_recursion_level_1_bitcast
 ; CHECK-DAG:  MustAlias:    i32* %a, <vscale x 4 x i32>* %a
-; CHECK-DAG:  MayAlias:     i32* %a, i32* %gep
-; CHECK-DAG:  MayAlias:     i32* %a, i32* %gep_rec_1
-; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %a, i32* %gep
-; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %a, i32* %gep_rec_1
+; CHECK-DAG:  NoAlias:      i32* %a, i32* %gep
+; CHECK-DAG:  NoAlias:      i32* %a, i32* %gep_rec_1
+; CHECK-DAG:  NoAlias:      <vscale x 4 x i32>* %a, i32* %gep
+; CHECK-DAG:  NoAlias:      <vscale x 4 x i32>* %a, i32* %gep_rec_1
 ; CHECK-DAG:  NoAlias:      i32* %gep, i32* %gep_rec_1
 define void @gep_recursion_level_1_bitcast(ptr %a) {
   %gep = getelementptr <vscale x 4 x i32>, ptr %a, i64 1, i64 2
@@ -194,9 +221,9 @@ define void @gep_recursion_level_1_bitcast(ptr %a) {
 ; CHECK-DAG:  MayAlias:     i32* %a, i32* %gep
 ; CHECK-DAG:  MayAlias:     i32* %a, i32* %gep_rec_1
 ; CHECK-DAG:  MayAlias:     i32* %a, i32* %gep_rec_2
-; CHECK-DAG:  MayAlias:     i32* %gep, <vscale x 4 x i32>* %p
-; CHECK-DAG:  MayAlias:     i32* %gep_rec_1, <vscale x 4 x i32>* %p
-; CHECK-DAG:  MayAlias:     i32* %gep_rec_2, <vscale x 4 x i32>* %p
+; CHECK-DAG:  NoAlias:      i32* %gep, <vscale x 4 x i32>* %p
+; CHECK-DAG:  NoAlias:      i32* %gep_rec_1, <vscale x 4 x i32>* %p
+; CHECK-DAG:  NoAlias:      i32* %gep_rec_2, <vscale x 4 x i32>* %p
 ; CHECK-DAG:  NoAlias:      i32* %gep, i32* %gep_rec_1
 ; CHECK-DAG:  NoAlias:      i32* %gep, i32* %gep_rec_2
 ; CHECK-DAG:  NoAlias:      i32* %gep_rec_1, i32* %gep_rec_2
@@ -221,34 +248,34 @@ define void @gep_recursion_level_2(ptr %a, ptr %p) {
 ; CHECK-DAG: MayAlias:     i32* %a, i32* %gep_rec_4
 ; CHECK-DAG: MayAlias:     i32* %a, i32* %gep_rec_5
 ; CHECK-DAG: MayAlias:     i32* %a, i32* %gep_rec_6
-; CHECK-DAG: MayAlias:     i32* %gep, <vscale x 4 x i32>* %p
-; CHECK-DAG: MayAlias:     i32* %gep_rec_1, <vscale x 4 x i32>* %p
-; CHECK-DAG: MayAlias:     i32* %gep_rec_2, <vscale x 4 x i32>* %p
-; CHECK-DAG: MayAlias:     i32* %gep_rec_3, <vscale x 4 x i32>* %p
-; CHECK-DAG: MayAlias:     i32* %gep_rec_4, <vscale x 4 x i32>* %p
-; CHECK-DAG: MayAlias:     i32* %gep_rec_5, <vscale x 4 x i32>* %p
+; CHECK-DAG: NoAlias:      i32* %gep, <vscale x 4 x i32>* %p
+; CHECK-DAG: NoAlias:      i32* %gep_rec_1, <vscale x 4 x i32>* %p
+; CHECK-DAG: NoAlias:      i32* %gep_rec_2, <vscale x 4 x i32>* %p
+; CHECK-DAG: NoAlias:      i32* %gep_rec_3, <vscale x 4 x i32>* %p
+; CHECK-DAG: NoAlias:      i32* %gep_rec_4, <vscale x 4 x i32>* %p
+; CHECK-DAG: NoAlias:      i32* %gep_rec_5, <vscale x 4 x i32>* %p
 ; CHECK-DAG: MayAlias:     i32* %gep_rec_6, <vscale x 4 x i32>* %p
 ; CHECK-DAG: NoAlias:      i32* %gep, i32* %gep_rec_1
 ; CHECK-DAG: NoAlias:      i32* %gep, i32* %gep_rec_2
 ; CHECK-DAG: NoAlias:      i32* %gep, i32* %gep_rec_3
 ; CHECK-DAG: NoAlias:      i32* %gep, i32* %gep_rec_4
 ; CHECK-DAG: NoAlias:      i32* %gep, i32* %gep_rec_5
-; CHECK-DAG: NoAlias:      i32* %gep, i32* %gep_rec_6
+; CHECK-DAG: MayAlias:     i32* %gep, i32* %gep_rec_6
 ; CHECK-DAG: NoAlias:      i32* %gep_rec_1, i32* %gep_rec_2
 ; CHECK-DAG: NoAlias:      i32* %gep_rec_1, i32* %gep_rec_3
 ; CHECK-DAG: NoAlias:      i32* %gep_rec_1, i32* %gep_rec_4
 ; CHECK-DAG: NoAlias:      i32* %gep_rec_1, i32* %gep_rec_5
-; CHECK-DAG: NoAlias:      i32* %gep_rec_1, i32* %gep_rec_6
+; CHECK-DAG: MayAlias:     i32* %gep_rec_1, i32* %gep_rec_6
 ; CHECK-DAG: NoAlias:      i32* %gep_rec_2, i32* %gep_rec_3
 ; CHECK-DAG: NoAlias:      i32* %gep_rec_2, i32* %gep_rec_4
 ; CHECK-DAG: NoAlias:      i32* %gep_rec_2, i32* %gep_rec_5
-; CHECK-DAG: NoAlias:      i32* %gep_rec_2, i32* %gep_rec_6
+; CHECK-DAG: MayAlias:     i32* %gep_rec_2, i32* %gep_rec_6
 ; CHECK-DAG: NoAlias:      i32* %gep_rec_3, i32* %gep_rec_4
 ; CHECK-DAG: NoAlias:      i32* %gep_rec_3, i32* %gep_rec_5
-; CHECK-DAG: NoAlias:      i32* %gep_rec_3, i32* %gep_rec_6
+; CHECK-DAG: MayAlias:     i32* %gep_rec_3, i32* %gep_rec_6
 ; CHECK-DAG: NoAlias:      i32* %gep_rec_4, i32* %gep_rec_5
-; CHECK-DAG: NoAlias:      i32* %gep_rec_4, i32* %gep_rec_6
-; CHECK-DAG: NoAlias:      i32* %gep_rec_5, i32* %gep_rec_6
+; CHECK-DAG: MayAlias:     i32* %gep_rec_4, i32* %gep_rec_6
+; CHECK-DAG: MayAlias:     i32* %gep_rec_5, i32* %gep_rec_6
 ; GEP max lookup depth was set to 6.
 define void @gep_recursion_max_lookup_depth_reached(ptr %a, ptr %p) {
   %gep = getelementptr <vscale x 4 x i32>, ptr %p, i64 1, i64 2
diff --git a/llvm/test/Transforms/GVN/vscale.ll b/llvm/test/Transforms/GVN/vscale.ll
index 71adaed8e5722b..3ecae9f54fddc7 100644
--- a/llvm/test/Transforms/GVN/vscale.ll
+++ b/llvm/test/Transforms/GVN/vscale.ll
@@ -84,10 +84,7 @@ define i32 @load_clobber_load_gep3(ptr %p) {
 ; CHECK-LABEL: @load_clobber_load_gep3(
 ; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[P:%.*]], i64 1, i64 0
 ; CHECK-NEXT:    [[LOAD1:%.*]] = load i32, ptr [[GEP1]], align 4
-; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr <vscale x 4 x float>, ptr [[P]], i64 1, i64 0
-; CHECK-NEXT:    [[LOAD2:%.*]] = load float, ptr [[GEP2]], align 4
-; CHECK-NEXT:    [[CAST:%.*]] = bitcast float [[LOAD2]] to i32
-; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[LOAD1]], [[CAST]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[LOAD1]], [[LOAD1]]
 ; CHECK-NEXT:    ret i32 [[ADD]]
 ;
   %gep1 = getelementptr <vscale x 4 x i32>, ptr %p, i64 1, i64 0
@@ -277,8 +274,7 @@ define void @redundant_load_elimination_2(i1 %c, ptr %p, ptr %q) {
 ; CHECK-NEXT:    store i32 1, ptr [[GEP2]], align 4
 ; CHECK-NEXT:    br i1 [[C:%.*]], label [[IF_ELSE:%.*]], label [[IF_THEN:%.*]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    [[T:%.*]] = load i32, ptr [[GEP1]], align 4
-; CHECK-NEXT:    store i32 [[T]], ptr [[Q:%.*]], align 4
+; CHECK-NEXT:    store i32 0, ptr [[Q:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ; CHECK:       if.else:
 ; CHECK-NEXT:    ret void
@@ -367,8 +363,7 @@ define void @missing_load_elimination(i1 %c, ptr %p, ptr %q, <vscale x 4 x i32>
 ; CHECK-NEXT:    store <vscale x 4 x i32> [[V:%.*]], ptr [[P1]], align 16
 ; CHECK-NEXT:    br i1 [[C:%.*]], label [[IF_ELSE:%.*]], label [[IF_THEN:%.*]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    [[T:%.*]] = load <vscale x 4 x i32>, ptr [[P]], align 16
-; CHECK-NEXT:    store <vscale x 4 x i32> [[T]], ptr [[Q:%.*]], align 16
+; CHECK-NEXT:    store <vscale x 4 x i32> zeroinitializer, ptr [[Q:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ; CHECK:       if.else:
 ; CHECK-NEXT:    ret void

>From 1ab88eeaa49348e73b0d65b34eb7058208ef6284 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Tue, 19 Dec 2023 06:41:47 +0000
Subject: [PATCH 2/3] Updates and extra testing

---
 llvm/lib/Analysis/BasicAliasAnalysis.cpp | 104 ++++++++----------
 llvm/test/Analysis/BasicAA/vscale.ll     | 131 ++++++++++++++++++++++-
 2 files changed, 175 insertions(+), 60 deletions(-)

diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
index 8f4adc3ad84bc8..a8a425101fb4b7 100644
--- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -672,7 +672,7 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
       // however in the 1st case, CastedValue is of type constant, hence another
       // flag in VariableGEPIndex is created in this case, IsVScale If GEP is
       // Scalable type, e.g. <4 x vscale x i32>, the first index will have
-      // vscale as a variable index, create a LE in this case
+      // vscale as a variable index, create a LE in this case.
       LinearExpression LE(CastedValue(Index, 0, SExtBits, TruncBits));
       if (ScalableGEP) {
         if (const ConstantInt *CIdx = dyn_cast<ConstantInt>(Index)) {
@@ -691,9 +691,10 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
           Decomposed.Base = V;
           return Decomposed;
         }
-      } else
+      } else {
         LE = GetLinearExpression(CastedValue(Index, 0, SExtBits, TruncBits), DL,
                                  0, AC, DT);
+      }
 
       // Scale by the type size.
       unsigned TypeSize = AllocTypeSize.getKnownMinValue();
@@ -707,7 +708,7 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
       //   A[x][x] -> x*16 + x*4 -> x*20
       // This also ensures that 'x' only appears in the index list once.
       // Only add to IsVScale VariableGEPIndex if it's @llvm.vscale or gep
-      // vscale index
+      // vscale index.
       for (unsigned i = 0, e = Decomposed.VarIndices.size(); i != e; ++i) {
         if (Decomposed.VarIndices[i].Val.hasSameCastsAs(LE.Val) &&
             ((Decomposed.VarIndices[i].IsVScale &&
@@ -721,7 +722,7 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
       }
 
       // Make sure that we have a scale that makes sense for this target's
-      // index size.Only allow variableGEP decomposition for constants, in the
+      // index size. Only allow variableGEP decomposition for constants, in the
       // case of vscale
       adjustToIndexSize(Scale, IndexSize);
       bool InvalidVarVScale = (ScalableGEP && LEhasVscale) ||
@@ -1119,7 +1120,7 @@ AliasResult BasicAAResult::aliasGEP(
   // If an inbounds GEP would have to start from an out of bounds address
   // for the two to alias, then we can assume noalias.
   if (*DecompGEP1.InBounds && DecompGEP1.VarIndices.empty() &&
-      V2Size.hasValue() &&
+      V2Size.hasValue() && !V2Size.isScalable() &&
       DecompGEP1.Offset.sge(V2Size.getValue().getKnownMinValue()) &&
       isBaseOfObject(DecompGEP2.Base))
     return AliasResult::NoAlias;
@@ -1127,7 +1128,7 @@ AliasResult BasicAAResult::aliasGEP(
   if (isa<GEPOperator>(V2)) {
     // Symmetric case to above.
     if (*DecompGEP2.InBounds && DecompGEP1.VarIndices.empty() &&
-        V1Size.hasValue() &&
+        V1Size.hasValue() && !V1Size.isScalable() &&
         DecompGEP1.Offset.sle(-V1Size.getValue().getKnownMinValue()) &&
         isBaseOfObject(DecompGEP1.Base))
       return AliasResult::NoAlias;
@@ -1205,54 +1206,38 @@ AliasResult BasicAAResult::aliasGEP(
 
   // VScale Alias Analysis
   // GEPs with Vscale will have the expression A*Vscale + B (1 variable index
-  // and constant offset) The difference between two GEPs and Scalable
+  // and constant offset). The difference between two GEPs and Scalable
   // LocationSize can then be analysed as they have the form of
-  //     LSize                SubtractDecomposedGEP output
-  //   A * Vscale                   B * Vscale + C
+  //     Size                SubtractDecomposedGEP output
+  //   V1Size [* Vscale]         Scale * Vscale + Off
   // Since VScale is strictly a positive number (Vscale >= 1), the larger GEP
   // can be known
   // TODO: Use knowledge of vscale_range to make the analysis more accurate
-  if (DecompGEP1.VarIndices.size() == 1 && DecompGEP1.VarIndices[0].IsVScale &&
-      (V1Size.isScalable() || V2Size.isScalable())) {
+  if (DecompGEP1.VarIndices.size() == 1 && DecompGEP1.VarIndices[0].IsVScale) {
     const VariableGEPIndex &ScalableVar = DecompGEP1.VarIndices[0];
-    bool StrictlyPos = false, StrictlyNeg = false;
-    APInt &Off = DecompGEP1.Offset;
-    if (!ScalableVar.IsNegated) {
-      if (Off.isNegative())
-        StrictlyPos = ScalableVar.Scale.ugt(Off.abs());
-      else
-        StrictlyPos = true;
-    } else
-      StrictlyPos = Off.isNonNegative();
-
-    if (ScalableVar.IsNegated) {
-      if (Off.isNonNegative())
-        StrictlyNeg = Off.ult(ScalableVar.Scale.abs());
-      else
-        StrictlyNeg = true;
-    } else
-      StrictlyNeg = Off.isNegative();
-
-    if (StrictlyPos || StrictlyNeg) {
-      LocationSize VLeftSize = V2Size;
-      LocationSize VRightSize = V1Size;
-      const bool Swapped = StrictlyNeg;
-
-      if (Swapped) {
-        std::swap(VLeftSize, VRightSize);
-        Off = -Off;
-      }
-
-      const uint64_t LSize = VLeftSize.getValue().getKnownMinValue();
-      if (VLeftSize.isScalable() && ScalableVar.Scale.ult(LSize) &&
-          (ScalableVar.Scale + DecompGEP1.Offset).ult(LSize))
-        return AliasResult::PartialAlias;
+    const APInt &Off = DecompGEP1.Offset;
+    APInt Scale =
+        ScalableVar.IsNegated ? -ScalableVar.Scale : ScalableVar.Scale;
+
+    // Lower limit
+    // V1Size not scalable => Scale*v <= -V1Size-Off providing Scale<=0
+    if (!V1Size.isScalable() && Scale.isNonPositive() &&
+        Scale.sle(-V1Size.getValue().getKnownMinValue() - Off))
+      return AliasResult::NoAlias;
+    // V1Size is scalable => Off <= (-V1Size-Scale)*v => given V1Size-Scale>=0
+    APInt TS = (-V1Size.getValue().getKnownMinValue() - Scale);
+    if (V1Size.isScalable() && TS.isNonNegative() && Off.sle(TS))
+      return AliasResult::NoAlias;
 
-      if ((ScalableVar.Scale.uge(LSize) && VLeftSize.isScalable()) ||
-          ((ScalableVar.Scale + DecompGEP1.Offset).uge(LSize) &&
-           !VLeftSize.isScalable()))
-        return AliasResult::NoAlias;
-    }
+    // Upper limit
+    // V2Size not scalable => Scale*v >= V2Size-Off
+    if (!V2Size.isScalable() && Scale.isNonNegative() &&
+        Scale.sge(V2Size.getValue().getKnownMinValue() - Off))
+      return AliasResult::NoAlias;
+    // V2Size is scalable => Off >= (V2Size-Scale)*v => Off>=0, V2Size-Scale<=0
+    TS = (V2Size.getValue().getKnownMinValue() - Scale);
+    if (V2Size.isScalable() && TS.isNonPositive() && Off.sge(TS))
+      return AliasResult::NoAlias;
   }
 
   // Bail on Scalable location size from now onwards
@@ -1265,9 +1250,6 @@ AliasResult BasicAAResult::aliasGEP(
     const VariableGEPIndex &Index = DecompGEP1.VarIndices[i];
     const APInt &Scale = Index.Scale;
     APInt ScaleForGCD = Scale;
-    assert((!Index.IsVScale || match(Index.Val.V, m_VScale()) ||
-            isa<ConstantInt>(Index.Val.V)) &&
-           "Not allowed to have non-constant values if IsVScale is set");
     if (!Index.IsNSW)
       ScaleForGCD =
           APInt::getOneBitSet(Scale.getBitWidth(), Scale.countr_zero());
@@ -1277,10 +1259,17 @@ AliasResult BasicAAResult::aliasGEP(
     else
       GCD = APIntOps::GreatestCommonDivisor(GCD, ScaleForGCD.abs());
 
-    ConstantRange CR = computeConstantRange(Index.Val.V, /* ForSigned */ false,
-                                            true, &AC, Index.CxtI);
-    KnownBits Known =
-        computeKnownBits(Index.Val.V, DL, 0, &AC, Index.CxtI, DT);
+    // FIXME: This could be expanded to use a more precise range for vscale.
+    ConstantRange CR =
+        Index.IsVScale
+            ? ConstantRange::getNonEmpty(
+                  APInt(OffsetRange.getBitWidth(), 1),
+                  APInt::getMaxValue(OffsetRange.getBitWidth()))
+            : computeConstantRange(Index.Val.V, /* ForSigned */ false, true,
+                                   &AC, Index.CxtI);
+    KnownBits Known = Index.IsVScale ? KnownBits(OffsetRange.getBitWidth())
+                                     : computeKnownBits(Index.Val.V, DL, 0, &AC,
+                                                        Index.CxtI, DT);
     CR = CR.intersectWith(
         ConstantRange::fromKnownBits(Known, /* Signed */ true),
         ConstantRange::Signed);
@@ -1313,7 +1302,7 @@ AliasResult BasicAAResult::aliasGEP(
     return AliasResult::NoAlias;
 
   // Compute ranges of potentially accessed bytes for both accesses. If the
-  // interseciton is empty, there can be no overlap.
+  // intersection is empty, there can be no overlap.
   unsigned BW = OffsetRange.getBitWidth();
   ConstantRange Range1 = OffsetRange.add(
       ConstantRange(APInt(BW, 0), APInt(BW, V1Size.getValue())));
@@ -1851,9 +1840,8 @@ void BasicAAResult::subtractDecomposedGEPs(DecomposedGEP &DestGEP,
       VariableGEPIndex &Dest = I.value();
       if (Dest.IsVScale != Src.IsVScale)
         continue;
-      const bool SrcDestAreVScale = Dest.IsVScale && Src.IsVScale;
       // Suppress base value checks if Src and Dst are of constant VScale
-      if ((!SrcDestAreVScale &&
+      if ((!Dest.IsVScale &&
            !isValueEqualInPotentialCycles(Dest.Val.V, Src.Val.V, AAQI)) ||
           !Dest.Val.hasSameCastsAs(Src.Val))
         continue;
diff --git a/llvm/test/Analysis/BasicAA/vscale.ll b/llvm/test/Analysis/BasicAA/vscale.ll
index 2d277eacdcb8ae..dd4c16e4e41b03 100644
--- a/llvm/test/Analysis/BasicAA/vscale.ll
+++ b/llvm/test/Analysis/BasicAA/vscale.ll
@@ -120,8 +120,6 @@ define void @gep_different_base_const_offset(ptr noalias %p1, ptr noalias %p2) {
 ; CHECK-DAG: NoAlias:      <vscale x 4 x i32>* %gep1, <vscale x 4 x i32>* %gep2
 ; CHECK-DAG: MustAlias:    <vscale x 4 x i32>* %gep1, <vscale x 4 x i32>* %gep3
 ; CHECK-DAG: NoAlias:      <vscale x 4 x i32>* %gep2, <vscale x 4 x i32>* %gep3
-
-declare i64 @llvm.vscale.i64()
 define void @gep_llvm_vscale_no_alias(ptr %p) {
   %t1 = tail call i64 @llvm.vscale.i64()
   %t2 = shl nuw nsw i64 %t1, 3
@@ -134,6 +132,8 @@ define void @gep_llvm_vscale_no_alias(ptr %p) {
   ret void
 }
 
+declare i64 @llvm.vscale.i64()
+
 ; CHECK-LABEL: gep_llvm_vscale_squared_may_alias
 ; CHECK-DAG: MayAlias:      <vscale x 4 x i32>* %gep1, <vscale x 4 x i32>* %gep2
 define void @gep_llvm_vscale_squared_may_alias(ptr %p) {
@@ -144,6 +144,7 @@ define void @gep_llvm_vscale_squared_may_alias(ptr %p) {
   load <vscale x 4 x i32>, ptr %gep2
   ret void
 }
+
 ; getelementptr + bitcast
 
 ; CHECK-LABEL: gep_bitcast_1
@@ -180,6 +181,132 @@ define void @gep_bitcast_2(ptr %p) {
   ret void
 }
 
+; negative offset tests
+
+; CHECK-LABEL: gep_neg_notscalable
+; CHECK-DAG:   NoAlias:      <4 x i32>* %p, <4 x i32>* %vm16
+; CHECK-DAG:   NoAlias:      <4 x i32>* %m16, <4 x i32>* %p
+; CHECK-DAG:   MayAlias:     <4 x i32>* %m16, <4 x i32>* %vm16
+; CHECK-DAG:   NoAlias:      <4 x i32>* %p, <4 x i32>* %vm16m16
+; CHECK-DAG:   NoAlias:      <4 x i32>* %vm16, <4 x i32>* %vm16m16
+; CHECK-DAG:   NoAlias:      <4 x i32>* %m16, <4 x i32>* %vm16m16
+; CHECK-DAG:   MayAlias:     <4 x i32>* %m16pv16, <4 x i32>* %p
+; CHECK-DAG:   NoAlias:      <4 x i32>* %m16pv16, <4 x i32>* %vm16
+; CHECK-DAG:   NoAlias:      <4 x i32>* %m16, <4 x i32>* %m16pv16
+; CHECK-DAG:   NoAlias:      <4 x i32>* %m16pv16, <4 x i32>* %vm16m16
+define void @gep_neg_notscalable(ptr %p) {
+  %vm16 = getelementptr <vscale x 4 x i32>, ptr %p, i64 -1
+  %m16 = getelementptr <4 x i32>, ptr %p, i64 -1
+  %vm16m16 = getelementptr <4 x i32>, ptr %vm16, i64 -1
+  %m16pv16 = getelementptr <vscale x 4 x i32>, ptr %m16, i64 1
+  load <4 x i32>, ptr %p
+  load <4 x i32>, ptr %vm16
+  load <4 x i32>, ptr %m16
+  load <4 x i32>, ptr %vm16m16
+  load <4 x i32>, ptr %m16pv16
+  ret void
+}
+
+; CHECK-LABEL: gep_neg_scalable
+; CHECK-DAG:   NoAlias:      <vscale x 4 x i32>* %p, <vscale x 4 x i32>* %vm16
+; CHECK-DAG:   MayAlias:     <vscale x 4 x i32>* %m16, <vscale x 4 x i32>* %p
+; CHECK-DAG:   MayAlias:     <vscale x 4 x i32>* %m16, <vscale x 4 x i32>* %vm16
+; CHECK-DAG:   NoAlias:      <vscale x 4 x i32>* %p, <vscale x 4 x i32>* %vm16m16
+; CHECK-DAG:   MayAlias:     <vscale x 4 x i32>* %vm16, <vscale x 4 x i32>* %vm16m16
+; CHECK-DAG:   NoAlias:      <vscale x 4 x i32>* %m16, <vscale x 4 x i32>* %vm16m16
+; CHECK-DAG:   MustAlias:    <vscale x 4 x i32>* %m16pv16, <vscale x 4 x i32>* %p
+; CHECK-DAG:   NoAlias:      <vscale x 4 x i32>* %m16pv16, <vscale x 4 x i32>* %vm16
+; CHECK-DAG:   MayAlias:     <vscale x 4 x i32>* %m16, <vscale x 4 x i32>* %m16pv16
+; CHECK-DAG:   NoAlias:      <vscale x 4 x i32>* %m16pv16, <vscale x 4 x i32>* %vm16m16
+define void @gep_neg_scalable(ptr %p) {
+  %vm16 = getelementptr <vscale x 4 x i32>, ptr %p, i64 -1
+  %m16 = getelementptr <4 x i32>, ptr %p, i64 -1
+  %vm16m16 = getelementptr <4 x i32>, ptr %vm16, i64 -1
+  %m16pv16 = getelementptr <vscale x 4 x i32>, ptr %vm16, i64 1
+  load <vscale x 4 x i32>, ptr %p
+  load <vscale x 4 x i32>, ptr %vm16
+  load <vscale x 4 x i32>, ptr %m16
+  load <vscale x 4 x i32>, ptr %vm16m16
+  load <vscale x 4 x i32>, ptr %m16pv16
+  ret void
+}
+
+; CHECK-LABEL: gep_pos_notscalable
+; CHECK-DAG:   NoAlias:      <4 x i32>* %p, <4 x i32>* %vm16
+; CHECK-DAG:   NoAlias:      <4 x i32>* %m16, <4 x i32>* %p
+; CHECK-DAG:   MayAlias:     <4 x i32>* %m16, <4 x i32>* %vm16
+; CHECK-DAG:   NoAlias:      <4 x i32>* %p, <4 x i32>* %vm16m16
+; CHECK-DAG:   NoAlias:      <4 x i32>* %vm16, <4 x i32>* %vm16m16
+; CHECK-DAG:   NoAlias:      <4 x i32>* %m16, <4 x i32>* %vm16m16
+; CHECK-DAG:   MustAlias:    <4 x i32>* %m16pv16, <4 x i32>* %p
+; CHECK-DAG:   NoAlias:      <4 x i32>* %m16pv16, <4 x i32>* %vm16
+; CHECK-DAG:   NoAlias:      <4 x i32>* %m16, <4 x i32>* %m16pv16
+; CHECK-DAG:   NoAlias:      <4 x i32>* %m16pv16, <4 x i32>* %vm16m16
+define void @gep_pos_notscalable(ptr %p) {
+  %vm16 = getelementptr <vscale x 4 x i32>, ptr %p, i64 1
+  %m16 = getelementptr <4 x i32>, ptr %p, i64 1
+  %vm16m16 = getelementptr <4 x i32>, ptr %vm16, i64 1
+  %m16pv16 = getelementptr <vscale x 4 x i32>, ptr %vm16, i64 -1
+  load <4 x i32>, ptr %p
+  load <4 x i32>, ptr %vm16
+  load <4 x i32>, ptr %m16
+  load <4 x i32>, ptr %vm16m16
+  load <4 x i32>, ptr %m16pv16
+  ret void
+}
+
+; CHECK-LABEL: gep_pos_scalable
+; CHECK-DAG:   NoAlias:      <vscale x 4 x i32>* %p, <vscale x 4 x i32>* %vm16
+; CHECK-DAG:   MayAlias:     <vscale x 4 x i32>* %m16, <vscale x 4 x i32>* %p
+; CHECK-DAG:   MayAlias:     <vscale x 4 x i32>* %m16, <vscale x 4 x i32>* %vm16
+; CHECK-DAG:   NoAlias:      <vscale x 4 x i32>* %p, <vscale x 4 x i32>* %vm16m16
+; CHECK-DAG:   MayAlias:     <vscale x 4 x i32>* %vm16, <vscale x 4 x i32>* %vm16m16
+; CHECK-DAG:   NoAlias:      <vscale x 4 x i32>* %m16, <vscale x 4 x i32>* %vm16m16
+; CHECK-DAG:   MustAlias:    <vscale x 4 x i32>* %m16pv16, <vscale x 4 x i32>* %p
+; CHECK-DAG:   NoAlias:      <vscale x 4 x i32>* %m16pv16, <vscale x 4 x i32>* %vm16
+; CHECK-DAG:   MayAlias:     <vscale x 4 x i32>* %m16, <vscale x 4 x i32>* %m16pv16
+; CHECK-DAG:   NoAlias:      <vscale x 4 x i32>* %m16pv16, <vscale x 4 x i32>* %vm16m16
+define void @gep_pos_scalable(ptr %p) {
+  %vm16 = getelementptr <vscale x 4 x i32>, ptr %p, i64 1
+  %m16 = getelementptr <4 x i32>, ptr %p, i64 1
+  %vm16m16 = getelementptr <4 x i32>, ptr %vm16, i64 1
+  %m16pv16 = getelementptr <vscale x 4 x i32>, ptr %vm16, i64 -1
+  load <vscale x 4 x i32>, ptr %p
+  load <vscale x 4 x i32>, ptr %vm16
+  load <vscale x 4 x i32>, ptr %m16
+  load <vscale x 4 x i32>, ptr %vm16m16
+  load <vscale x 4 x i32>, ptr %m16pv16
+  ret void
+}
+
+; CHECK-LABEL: v1v2types
+; CHECK-DAG:  MustAlias:    <4 x i32>* %p, <vscale x 4 x i32>* %p
+; CHECK-DAG:  NoAlias:      <vscale x 4 x i32>* %p, <vscale x 4 x i32>* %vm16
+; CHECK-DAG:  NoAlias:      <4 x i32>* %p, <vscale x 4 x i32>* %vm16
+; CHECK-DAG:  NoAlias:      <vscale x 4 x i32>* %p, <4 x i32>* %vm16
+; CHECK-DAG:  NoAlias:      <4 x i32>* %p, <4 x i32>* %vm16
+; CHECK-DAG:  MustAlias:    <4 x i32>* %vm16, <vscale x 4 x i32>* %vm16
+; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %m16, <vscale x 4 x i32>* %p
+; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %m16, <4 x i32>* %p
+; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %m16, <vscale x 4 x i32>* %vm16
+; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %m16, <4 x i32>* %vm16
+; CHECK-DAG:  NoAlias:      <4 x i32>* %m16, <vscale x 4 x i32>* %p
+; CHECK-DAG:  NoAlias:      <4 x i32>* %m16, <4 x i32>* %p
+; CHECK-DAG:  MayAlias:     <4 x i32>* %m16, <vscale x 4 x i32>* %vm16
+; CHECK-DAG:  MayAlias:     <4 x i32>* %m16, <4 x i32>* %vm16
+; CHECK-DAG:  MustAlias:    <4 x i32>* %m16, <vscale x 4 x i32>* %m16
+define void @v1v2types(ptr %p) {
+  %vm16 = getelementptr <vscale x 4 x i32>, ptr %p, i64 -1
+  %m16 = getelementptr <4 x i32>, ptr %p, i64 -1
+  load <vscale x 4 x i32>, ptr %p
+  load <4 x i32>, ptr %p
+  load <vscale x 4 x i32>, ptr %vm16
+  load <4 x i32>, ptr %vm16
+  load <vscale x 4 x i32>, ptr %m16
+  load <4 x i32>, ptr %m16
+  ret void
+}
+
 ; getelementptr recursion
 
 ; CHECK-LABEL: gep_recursion_level_1

>From d741491d91e92544ad80a80a0f83b6ed70bb0b02 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Mon, 15 Jan 2024 15:49:32 +0000
Subject: [PATCH 3/3] Add VScaleSentinel and remove the useful part

---
 llvm/lib/Analysis/BasicAliasAnalysis.cpp | 176 +++++++++--------------
 llvm/test/Analysis/BasicAA/vscale.ll     | 110 +++++++-------
 llvm/test/Transforms/GVN/vscale.ll       |   3 +-
 3 files changed, 121 insertions(+), 168 deletions(-)

diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
index a8a425101fb4b7..a814bc0e53cb7f 100644
--- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -253,7 +253,14 @@ void EarliestEscapeInfo::removeInstruction(Instruction *I) {
 //===----------------------------------------------------------------------===//
 
 namespace {
-/// Represents zext(sext(trunc(V))).
+
+// VScale can be introduced from both gep offsets and uses of by llvm.vscale.
+// This value represents vscale in VariableGEPIndex, so the two can be used in
+// similar ways.
+const Value *VScaleSentinel = reinterpret_cast<const Value *>(-1);
+
+/// Represents zext(sext(trunc(V))). For vscale V uses VScaleSentinel which is
+/// assumed to be 64bit (truncated if needed).
 struct CastedValue {
   const Value *V;
   unsigned ZExtBits = 0;
@@ -266,16 +273,25 @@ struct CastedValue {
       : V(V), ZExtBits(ZExtBits), SExtBits(SExtBits), TruncBits(TruncBits) {}
 
   unsigned getBitWidth() const {
-    return V->getType()->getPrimitiveSizeInBits() - TruncBits + ZExtBits +
-           SExtBits;
+    return (V == VScaleSentinel ? 64 : V->getType()->getPrimitiveSizeInBits()) -
+           TruncBits + ZExtBits + SExtBits;
   }
 
   CastedValue withValue(const Value *NewV) const {
     return CastedValue(NewV, ZExtBits, SExtBits, TruncBits);
   }
 
+  CastedValue withVScale(const Value *VScale) const {
+    unsigned BitWidth = VScale->getType()->getPrimitiveSizeInBits();
+    if (BitWidth > 64)
+      return CastedValue(VScale, ZExtBits, SExtBits, TruncBits);
+    return CastedValue(VScaleSentinel, ZExtBits, SExtBits,
+                       64 - BitWidth + TruncBits);
+  }
+
   /// Replace V with zext(NewV)
   CastedValue withZExtOfValue(const Value *NewV) const {
+    assert(V != VScaleSentinel);
     unsigned ExtendBy = V->getType()->getPrimitiveSizeInBits() -
                         NewV->getType()->getPrimitiveSizeInBits();
     if (ExtendBy <= TruncBits)
@@ -288,6 +304,7 @@ struct CastedValue {
 
   /// Replace V with sext(NewV)
   CastedValue withSExtOfValue(const Value *NewV) const {
+    assert(V != VScaleSentinel);
     unsigned ExtendBy = V->getType()->getPrimitiveSizeInBits() -
                         NewV->getType()->getPrimitiveSizeInBits();
     if (ExtendBy <= TruncBits)
@@ -299,7 +316,8 @@ struct CastedValue {
   }
 
   APInt evaluateWith(APInt N) const {
-    assert(N.getBitWidth() == V->getType()->getPrimitiveSizeInBits() &&
+    assert((V == VScaleSentinel ||
+            N.getBitWidth() == V->getType()->getPrimitiveSizeInBits()) &&
            "Incompatible bit width");
     if (TruncBits) N = N.trunc(N.getBitWidth() - TruncBits);
     if (SExtBits) N = N.sext(N.getBitWidth() + SExtBits);
@@ -308,7 +326,8 @@ struct CastedValue {
   }
 
   ConstantRange evaluateWith(ConstantRange N) const {
-    assert(N.getBitWidth() == V->getType()->getPrimitiveSizeInBits() &&
+    assert((V == VScaleSentinel ||
+            N.getBitWidth() == V->getType()->getPrimitiveSizeInBits()) &&
            "Incompatible bit width");
     if (TruncBits) N = N.truncate(N.getBitWidth() - TruncBits);
     if (SExtBits) N = N.signExtend(N.getBitWidth() + SExtBits);
@@ -359,19 +378,18 @@ struct LinearExpression {
 
 /// Analyzes the specified value as a linear expression: "A*V + B", where A and
 /// B are constant integers.
-static LinearExpression GetLinearExpression(const CastedValue &Val,
-                                            const DataLayout &DL,
-                                            unsigned Depth, AssumptionCache *AC,
-                                            DominatorTree *DT) {
+static LinearExpression GetLinearExpression(
+    const CastedValue &Val,  const DataLayout &DL, unsigned Depth,
+    AssumptionCache *AC, DominatorTree *DT) {
   // Limit our recursion depth.
   if (Depth == 6)
     return Val;
 
-  // If llvm.vscale is matched, set linear expression with scale 1 and offset 0
-  if (match(Val.V, m_VScale())) {
-    return LinearExpression(Val, APInt(Val.getBitWidth(), 1),
+  // If llvm.vscale is matched, set linear expression with the VScaleSentinel
+  // and scale 1, offset 0
+  if (match(Val.V, m_VScale()))
+    return LinearExpression(Val.withVScale(Val.V), APInt(Val.getBitWidth(), 1),
                             APInt(Val.getBitWidth(), 0), true);
-  }
 
   if (const ConstantInt *Const = dyn_cast<ConstantInt>(Val.V))
     return LinearExpression(Val, APInt(Val.getBitWidth(), 0),
@@ -482,9 +500,6 @@ struct VariableGEPIndex {
   CastedValue Val;
   APInt Scale;
 
-  // A value representing vscale quantity in a GEP expression
-  bool IsVScale;
-
   // Context instruction to use when querying information about this index.
   const Instruction *CxtI;
 
@@ -502,12 +517,14 @@ struct VariableGEPIndex {
     return Scale == Other.Scale;
   }
 
+  bool isVScale() const { return Val.V == VScaleSentinel; }
+
   void dump() const {
     print(dbgs());
     dbgs() << "\n";
   }
   void print(raw_ostream &OS) const {
-    OS << "(V=" << Val.V->getName() << "  IsVScale=" << IsVScale
+    OS << "(V=" << (isVScale() ? "vscale" : Val.V->getName())
        << ", zextbits=" << Val.ZExtBits << ", sextbits=" << Val.SExtBits
        << ", truncbits=" << Val.TruncBits << ", scale=" << Scale
        << ", nsw=" << IsNSW << ", negated=" << IsNegated << ")";
@@ -631,7 +648,6 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
     for (User::const_op_iterator I = GEPOp->op_begin() + 1, E = GEPOp->op_end();
          I != E; ++I, ++GTI) {
       const Value *Index = *I;
-      const bool ScalableGEP = isa<ScalableVectorType>(GTI.getIndexedType());
       // Compute the (potentially symbolic) offset in bytes for this index.
       if (StructType *STy = GTI.getStructTypeOrNull()) {
         // For a struct, add the member offset.
@@ -644,6 +660,7 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
       }
 
       TypeSize AllocTypeSize = GTI.getSequentialElementStride(DL);
+      const bool ScalableGEP = isa<ScalableVectorType>(GTI.getIndexedType());
       // For an array/pointer, add the element offset, explicitly scaled.
       // Skip adding to constant offset if GEP index is marked as scalable
       // they are handled below as variable offset
@@ -668,21 +685,15 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
       // Allow Scalable GEP to be decomposed in the case of
       //    1. getelementptr <4 x vscale x i32> with 1st index as a constant
       //    2. Index which have a leaf of @llvm.vscale
-      // In both cases, essentially CastedValue of VariableGEPIndex is Vscale,
-      // however in the 1st case, CastedValue is of type constant, hence another
-      // flag in VariableGEPIndex is created in this case, IsVScale If GEP is
-      // Scalable type, e.g. <4 x vscale x i32>, the first index will have
-      // vscale as a variable index, create a LE in this case.
+      // In both cases, essentially CastedValue of VariableGEPIndex is VScale,
+      // however the intrinsic cannot be used as V in both cases, a
+      // VScaleSentinel is used to represent both.
       LinearExpression LE(CastedValue(Index, 0, SExtBits, TruncBits));
       if (ScalableGEP) {
         if (const ConstantInt *CIdx = dyn_cast<ConstantInt>(Index)) {
-          LE = LinearExpression(
-              CastedValue(Index, 0, SExtBits, TruncBits),
-              CastedValue(Index, 0, SExtBits, TruncBits)
-                  .evaluateWith(CIdx->getValue()),
-              APInt(CastedValue(Index, 0, SExtBits, TruncBits).getBitWidth(),
-                    0),
-              true);
+          LE = LinearExpression(LE.Val.withVScale(CIdx),
+                                LE.Val.evaluateWith(CIdx->getValue()),
+                                APInt(LE.Val.getBitWidth(), 0), true);
           assert(LE.Offset.isZero() && "For Scalable GEP constant first index, "
                                        "the offset of LE should be 0");
         } else {
@@ -692,8 +703,7 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
           return Decomposed;
         }
       } else {
-        LE = GetLinearExpression(CastedValue(Index, 0, SExtBits, TruncBits), DL,
-                                 0, AC, DT);
+        LE = GetLinearExpression(LE.Val, DL, 0, AC, DT);
       }
 
       // Scale by the type size.
@@ -701,19 +711,14 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
       LE = LE.mul(APInt(IndexSize, TypeSize), GEPOp->isInBounds());
       Decomposed.Offset += LE.Offset.sext(MaxIndexSize);
       APInt Scale = LE.Scale.sext(MaxIndexSize);
-      bool LEhasVscale = match(LE.Val.V, m_VScale());
 
       // If we already had an occurrence of this index variable, merge this
       // scale into it.  For example, we want to handle:
       //   A[x][x] -> x*16 + x*4 -> x*20
       // This also ensures that 'x' only appears in the index list once.
-      // Only add to IsVScale VariableGEPIndex if it's @llvm.vscale or gep
-      // vscale index.
       for (unsigned i = 0, e = Decomposed.VarIndices.size(); i != e; ++i) {
-        if (Decomposed.VarIndices[i].Val.hasSameCastsAs(LE.Val) &&
-            ((Decomposed.VarIndices[i].IsVScale &&
-              (ScalableGEP || LEhasVscale)) ||
-             Decomposed.VarIndices[i].Val.V == LE.Val.V)) {
+        if (Decomposed.VarIndices[i].Val.V == LE.Val.V &&
+            Decomposed.VarIndices[i].Val.hasSameCastsAs(LE.Val)) {
           Scale += Decomposed.VarIndices[i].Scale;
           LE.IsNSW = false; // We cannot guarantee nsw for the merge.
           Decomposed.VarIndices.erase(Decomposed.VarIndices.begin() + i);
@@ -722,21 +727,11 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
       }
 
       // Make sure that we have a scale that makes sense for this target's
-      // index size. Only allow variableGEP decomposition for constants, in the
-      // case of vscale
+      // index size.
       adjustToIndexSize(Scale, IndexSize);
-      bool InvalidVarVScale = (ScalableGEP && LEhasVscale) ||
-                              (ScalableGEP && !isa<ConstantInt>(LE.Val.V));
-
-      assert(!InvalidVarVScale &&
-             "Variable GEP index contains VScale and another variable");
 
       if (!!Scale) {
-        VariableGEPIndex Entry = {LE.Val,
-                                  Scale,
-                                  ScalableGEP || LEhasVscale,
-                                  CxtI,
-                                  LE.IsNSW,
+        VariableGEPIndex Entry = {LE.Val, Scale, CxtI, LE.IsNSW,
                                   /* IsNegated */ false};
         Decomposed.VarIndices.push_back(Entry);
       }
@@ -1121,7 +1116,7 @@ AliasResult BasicAAResult::aliasGEP(
   // for the two to alias, then we can assume noalias.
   if (*DecompGEP1.InBounds && DecompGEP1.VarIndices.empty() &&
       V2Size.hasValue() && !V2Size.isScalable() &&
-      DecompGEP1.Offset.sge(V2Size.getValue().getKnownMinValue()) &&
+      DecompGEP1.Offset.sge(V2Size.getValue()) &&
       isBaseOfObject(DecompGEP2.Base))
     return AliasResult::NoAlias;
 
@@ -1129,7 +1124,7 @@ AliasResult BasicAAResult::aliasGEP(
     // Symmetric case to above.
     if (*DecompGEP2.InBounds && DecompGEP1.VarIndices.empty() &&
         V1Size.hasValue() && !V1Size.isScalable() &&
-        DecompGEP1.Offset.sle(-V1Size.getValue().getKnownMinValue()) &&
+        DecompGEP1.Offset.sle(-V1Size.getValue()) &&
         isBaseOfObject(DecompGEP1.Base))
       return AliasResult::NoAlias;
   }
@@ -1182,13 +1177,13 @@ AliasResult BasicAAResult::aliasGEP(
     if (!VLeftSize.hasValue() || VLeftSize.isScalable())
       return AliasResult::MayAlias;
 
-    const uint64_t LSize = VLeftSize.getValue().getKnownMinValue();
+    const uint64_t LSize = VLeftSize.getValue();
     if (Off.ult(LSize)) {
       // Conservatively drop processing if a phi was visited and/or offset is
       // too big.
       AliasResult AR = AliasResult::PartialAlias;
-      if (VRightSize.hasValue() && Off.ule(INT32_MAX) &&
-          (Off + VRightSize.getValue().getKnownMinValue()).ule(LSize)) {
+      if (VRightSize.hasValue() && !VRightSize.isScalable() &&
+          Off.ule(INT32_MAX) && (Off + VRightSize.getValue()).ule(LSize)) {
         // Memory referenced by right pointer is nested. Save the offset in
         // cache. Note that originally offset estimated as GEP1-V2, but
         // AliasResult contains the shift that represents GEP1+Offset=V2.
@@ -1204,42 +1199,6 @@ AliasResult BasicAAResult::aliasGEP(
   if (!V1Size.hasValue() || !V2Size.hasValue())
     return AliasResult::MayAlias;
 
-  // VScale Alias Analysis
-  // GEPs with Vscale will have the expression A*Vscale + B (1 variable index
-  // and constant offset). The difference between two GEPs and Scalable
-  // LocationSize can then be analysed as they have the form of
-  //     Size                SubtractDecomposedGEP output
-  //   V1Size [* Vscale]         Scale * Vscale + Off
-  // Since VScale is strictly a positive number (Vscale >= 1), the larger GEP
-  // can be known
-  // TODO: Use knowledge of vscale_range to make the analysis more accurate
-  if (DecompGEP1.VarIndices.size() == 1 && DecompGEP1.VarIndices[0].IsVScale) {
-    const VariableGEPIndex &ScalableVar = DecompGEP1.VarIndices[0];
-    const APInt &Off = DecompGEP1.Offset;
-    APInt Scale =
-        ScalableVar.IsNegated ? -ScalableVar.Scale : ScalableVar.Scale;
-
-    // Lower limit
-    // V1Size not scalable => Scale*v <= -V1Size-Off providing Scale<=0
-    if (!V1Size.isScalable() && Scale.isNonPositive() &&
-        Scale.sle(-V1Size.getValue().getKnownMinValue() - Off))
-      return AliasResult::NoAlias;
-    // V1Size is scalable => Off <= (-V1Size-Scale)*v => given V1Size-Scale>=0
-    APInt TS = (-V1Size.getValue().getKnownMinValue() - Scale);
-    if (V1Size.isScalable() && TS.isNonNegative() && Off.sle(TS))
-      return AliasResult::NoAlias;
-
-    // Upper limit
-    // V2Size not scalable => Scale*v >= V2Size-Off
-    if (!V2Size.isScalable() && Scale.isNonNegative() &&
-        Scale.sge(V2Size.getValue().getKnownMinValue() - Off))
-      return AliasResult::NoAlias;
-    // V2Size is scalable => Off >= (V2Size-Scale)*v => Off>=0, V2Size-Scale<=0
-    TS = (V2Size.getValue().getKnownMinValue() - Scale);
-    if (V2Size.isScalable() && TS.isNonPositive() && Off.sge(TS))
-      return AliasResult::NoAlias;
-  }
-
   // Bail on Scalable location size from now onwards
   if (V1Size.isScalable() || V2Size.isScalable())
     return AliasResult::MayAlias;
@@ -1259,17 +1218,14 @@ AliasResult BasicAAResult::aliasGEP(
     else
       GCD = APIntOps::GreatestCommonDivisor(GCD, ScaleForGCD.abs());
 
-    // FIXME: This could be expanded to use a more precise range for vscale.
     ConstantRange CR =
-        Index.IsVScale
-            ? ConstantRange::getNonEmpty(
-                  APInt(OffsetRange.getBitWidth(), 1),
-                  APInt::getMaxValue(OffsetRange.getBitWidth()))
+        Index.isVScale()
+            ? getVScaleRange(&F, OffsetRange.getBitWidth())
             : computeConstantRange(Index.Val.V, /* ForSigned */ false, true,
                                    &AC, Index.CxtI);
-    KnownBits Known = Index.IsVScale ? KnownBits(OffsetRange.getBitWidth())
-                                     : computeKnownBits(Index.Val.V, DL, 0, &AC,
-                                                        Index.CxtI, DT);
+    KnownBits Known = Index.isVScale() ? KnownBits(OffsetRange.getBitWidth())
+                                       : computeKnownBits(Index.Val.V, DL, 0,
+                                                          &AC, Index.CxtI, DT);
     CR = CR.intersectWith(
         ConstantRange::fromKnownBits(Known, /* Signed */ true),
         ConstantRange::Signed);
@@ -1302,7 +1258,7 @@ AliasResult BasicAAResult::aliasGEP(
     return AliasResult::NoAlias;
 
   // Compute ranges of potentially accessed bytes for both accesses. If the
-  // intersection is empty, there can be no overlap.
+  // interseciton is empty, there can be no overlap.
   unsigned BW = OffsetRange.getBitWidth();
   ConstantRange Range1 = OffsetRange.add(
       ConstantRange(APInt(BW, 0), APInt(BW, V1Size.getValue())));
@@ -1317,7 +1273,7 @@ AliasResult BasicAAResult::aliasGEP(
   if (DecompGEP1.VarIndices.size() == 1) {
     // VarIndex = Scale*V.
     const VariableGEPIndex &Var = DecompGEP1.VarIndices[0];
-    if (Var.Val.TruncBits == 0 &&
+    if (Var.Val.TruncBits == 0 && !Var.isVScale() &&
         isKnownNonZero(Var.Val.V, DL, 0, &AC, Var.CxtI, DT)) {
       // Check if abs(V*Scale) >= abs(Scale) holds in the presence of
       // potentially wrapping math.
@@ -1350,6 +1306,7 @@ AliasResult BasicAAResult::aliasGEP(
     const VariableGEPIndex &Var0 = DecompGEP1.VarIndices[0];
     const VariableGEPIndex &Var1 = DecompGEP1.VarIndices[1];
     if (Var0.hasNegatedScaleOf(Var1) && Var0.Val.TruncBits == 0 &&
+        !Var0.isVScale() && !Var1.isVScale() &&
         Var0.Val.hasSameCastsAs(Var1.Val) && !AAQI.MayBeCrossIteration &&
         isKnownNonEqual(Var0.Val.V, Var1.Val.V, DL, &AC, /* CxtI */ nullptr,
                         DT))
@@ -1838,10 +1795,9 @@ void BasicAAResult::subtractDecomposedGEPs(DecomposedGEP &DestGEP,
     bool Found = false;
     for (auto I : enumerate(DestGEP.VarIndices)) {
       VariableGEPIndex &Dest = I.value();
-      if (Dest.IsVScale != Src.IsVScale)
+      if (Dest.isVScale() != Src.isVScale())
         continue;
-      // Suppress base value checks if Src and Dst are of constant VScale
-      if ((!Dest.IsVScale &&
+      if ((!Dest.isVScale() &&
            !isValueEqualInPotentialCycles(Dest.Val.V, Src.Val.V, AAQI)) ||
           !Dest.Val.hasSameCastsAs(Src.Val))
         continue;
@@ -1867,11 +1823,7 @@ void BasicAAResult::subtractDecomposedGEPs(DecomposedGEP &DestGEP,
 
     // If we didn't consume this entry, add it to the end of the Dest list.
     if (!Found) {
-      VariableGEPIndex Entry = {Src.Val,
-                                Src.Scale,
-                                Src.IsVScale,
-                                Src.CxtI,
-                                Src.IsNSW,
+      VariableGEPIndex Entry = {Src.Val, Src.Scale, Src.CxtI, Src.IsNSW,
                                 /* IsNegated */ true};
       DestGEP.VarIndices.push_back(Entry);
     }
@@ -1893,8 +1845,8 @@ bool BasicAAResult::constantOffsetHeuristic(const DecomposedGEP &GEP,
 
   const VariableGEPIndex &Var0 = GEP.VarIndices[0], &Var1 = GEP.VarIndices[1];
 
-  if (Var0.Val.TruncBits != 0 || !Var0.Val.hasSameCastsAs(Var1.Val) ||
-      !Var0.hasNegatedScaleOf(Var1) ||
+  if (Var0.isVScale() || Var1.isVScale() || Var0.Val.TruncBits != 0 ||
+      !Var0.Val.hasSameCastsAs(Var1.Val) || !Var0.hasNegatedScaleOf(Var1) ||
       Var0.Val.V->getType() != Var1.Val.V->getType())
     return false;
 
diff --git a/llvm/test/Analysis/BasicAA/vscale.ll b/llvm/test/Analysis/BasicAA/vscale.ll
index dd4c16e4e41b03..8e90a1462722b8 100644
--- a/llvm/test/Analysis/BasicAA/vscale.ll
+++ b/llvm/test/Analysis/BasicAA/vscale.ll
@@ -4,8 +4,8 @@
 
 ; CHECK-LABEL: gep_alloca_const_offset_1
 ; CHECK-DAG:  MustAlias:    <vscale x 4 x i32>* %alloc, <vscale x 4 x i32>* %gep1
-; CHECK-DAG:  NoAlias:     <vscale x 4 x i32>* %alloc, <vscale x 4 x i32>* %gep2
-; CHECK-DAG:  NoAlias:     <vscale x 4 x i32>* %gep1, <vscale x 4 x i32>* %gep2
+; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %alloc, <vscale x 4 x i32>* %gep2
+; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %gep1, <vscale x 4 x i32>* %gep2
 define void @gep_alloca_const_offset_1() {
   %alloc = alloca <vscale x 4 x i32>
   %gep1 = getelementptr <vscale x 4 x i32>, ptr %alloc, i64 0
@@ -17,9 +17,9 @@ define void @gep_alloca_const_offset_1() {
 }
 
 ; CHECK-LABEL: gep_alloca_const_offset_2
-; CHECK-DAG:  NoAlias:     <vscale x 4 x i32>* %alloc, <vscale x 4 x i32>* %gep1
-; CHECK-DAG:  NoAlias:     <vscale x 4 x i32>* %alloc, <vscale x 4 x i32>* %gep2
-; CHECK-DAG:  MustAlias:   <vscale x 4 x i32>* %gep1, <vscale x 4 x i32>* %gep2
+; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %alloc, <vscale x 4 x i32>* %gep1
+; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %alloc, <vscale x 4 x i32>* %gep2
+; CHECK-DAG:  MustAlias:    <vscale x 4 x i32>* %gep1, <vscale x 4 x i32>* %gep2
 define void @gep_alloca_const_offset_2() {
   %alloc = alloca <vscale x 4 x i32>
   %gep1 = getelementptr <vscale x 4 x i32>, ptr %alloc, i64 1
@@ -32,8 +32,8 @@ define void @gep_alloca_const_offset_2() {
 
 ; CHECK-LABEL: gep_alloca_const_offset_3
 ; CHECK-DAG:  MustAlias:    <vscale x 4 x i32>* %alloc, <vscale x 4 x i32>* %gep1
-; CHECK-DAG:  MayAlias: <vscale x 4 x i32>* %alloc, i32* %gep2
-; CHECK-DAG:  MayAlias: <vscale x 4 x i32>* %gep1, i32* %gep2
+; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %alloc, i32* %gep2
+; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %gep1, i32* %gep2
 define void @gep_alloca_const_offset_3() {
   %alloc = alloca <vscale x 4 x i32>
   %gep1 = getelementptr <vscale x 4 x i32>, ptr %alloc, i64 0
@@ -73,9 +73,9 @@ define void @gep_alloca_symbolic_offset(i64 %idx1, i64 %idx2) {
 }
 
 ; CHECK-LABEL: gep_same_base_const_offset
-; CHECK-DAG:  NoAlias:     i32* %gep1, <vscale x 4 x i32>* %p
-; CHECK-DAG:  NoAlias:     i32* %gep2, <vscale x 4 x i32>* %p
-; CHECK-DAG:  NoAlias:     i32* %gep1, i32* %gep2
+; CHECK-DAG:  MayAlias:     i32* %gep1, <vscale x 4 x i32>* %p
+; CHECK-DAG:  MayAlias:     i32* %gep2, <vscale x 4 x i32>* %p
+; CHECK-DAG:  NoAlias:      i32* %gep1, i32* %gep2
 define void @gep_same_base_const_offset(ptr %p) {
   %gep1 = getelementptr <vscale x 4 x i32>, ptr %p, i64 1, i64 0
   %gep2 = getelementptr <vscale x 4 x i32>, ptr %p, i64 1, i64 1
@@ -99,8 +99,8 @@ define void @gep_same_base_symbolic_offset(ptr %p, i64 %idx1, i64 %idx2) {
 }
 
 ; CHECK-LABEL: gep_different_base_const_offset
-; CHECK-DAG:  NoAlias:     <vscale x 4 x i32>* %gep1, <vscale x 4 x i32>* %p1
-; CHECK-DAG:  NoAlias:     <vscale x 4 x i32>* %gep2, <vscale x 4 x i32>* %p2
+; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %gep1, <vscale x 4 x i32>* %p1
+; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %gep2, <vscale x 4 x i32>* %p2
 ; CHECK-DAG:  NoAlias:      <vscale x 4 x i32>* %p1, <vscale x 4 x i32>* %p2
 ; CHECK-DAG:  NoAlias:      <vscale x 4 x i32>* %gep1, <vscale x 4 x i32>* %p2
 ; CHECK-DAG:  NoAlias:      <vscale x 4 x i32>* %gep2, <vscale x 4 x i32>* %p1
@@ -117,9 +117,9 @@ define void @gep_different_base_const_offset(ptr noalias %p1, ptr noalias %p2) {
 
 ; getelementptr @llvm.vscale tests
 ; CHECK-LABEL: gep_llvm_vscale_no_alias
-; CHECK-DAG: NoAlias:      <vscale x 4 x i32>* %gep1, <vscale x 4 x i32>* %gep2
+; CHECK-DAG: MayAlias:     <vscale x 4 x i32>* %gep1, <vscale x 4 x i32>* %gep2
 ; CHECK-DAG: MustAlias:    <vscale x 4 x i32>* %gep1, <vscale x 4 x i32>* %gep3
-; CHECK-DAG: NoAlias:      <vscale x 4 x i32>* %gep2, <vscale x 4 x i32>* %gep3
+; CHECK-DAG: MayAlias:     <vscale x 4 x i32>* %gep2, <vscale x 4 x i32>* %gep3
 define void @gep_llvm_vscale_no_alias(ptr %p) {
   %t1 = tail call i64 @llvm.vscale.i64()
   %t2 = shl nuw nsw i64 %t1, 3
@@ -149,12 +149,12 @@ define void @gep_llvm_vscale_squared_may_alias(ptr %p) {
 
 ; CHECK-LABEL: gep_bitcast_1
 ; CHECK-DAG:   MustAlias:    i32* %p, <vscale x 4 x i32>* %p
-; CHECK-DAG:   NoAlias:      i32* %gep1, <vscale x 4 x i32>* %p
+; CHECK-DAG:   MayAlias:     i32* %gep1, <vscale x 4 x i32>* %p
 ; CHECK-DAG:   NoAlias:      i32* %gep1, i32* %p
 ; CHECK-DAG:   MayAlias:     i32* %gep2, <vscale x 4 x i32>* %p
 ; CHECK-DAG:   MayAlias:     i32* %gep1, i32* %gep2
 ; CHECK-DAG:   NoAlias:      i32* %gep2, i32* %p
-define void @gep_bitcast_1(ptr %p) {
+define void @gep_bitcast_1(ptr %p) vscale_range(1,16) {
   %gep1 = getelementptr <vscale x 4 x i32>, ptr %p, i64 1, i64 0
   %gep2 = getelementptr i32, ptr %p, i64 4
   load <vscale x 4 x i32>, ptr %p
@@ -166,12 +166,12 @@ define void @gep_bitcast_1(ptr %p) {
 
 ; CHECK-LABEL: gep_bitcast_2
 ; CHECK-DAG:  MustAlias:    <vscale x 4 x float>* %p, <vscale x 4 x i32>* %p
-; CHECK-DAG:  NoAlias:      i32* %gep1, <vscale x 4 x i32>* %p
-; CHECK-DAG:  NoAlias:      i32* %gep1, <vscale x 4 x float>* %p
-; CHECK-DAG:  NoAlias:      float* %gep2, <vscale x 4 x i32>* %p
+; CHECK-DAG:  MayAlias:     i32* %gep1, <vscale x 4 x i32>* %p
+; CHECK-DAG:  MayAlias:     i32* %gep1, <vscale x 4 x float>* %p
+; CHECK-DAG:  MayAlias:     float* %gep2, <vscale x 4 x i32>* %p
 ; CHECK-DAG:  MustAlias:    i32* %gep1, float* %gep2
-; CHECK-DAG:  NoAlias:      float* %gep2, <vscale x 4 x float>* %p
-define void @gep_bitcast_2(ptr %p) {
+; CHECK-DAG:  MayAlias:     float* %gep2, <vscale x 4 x float>* %p
+define void @gep_bitcast_2(ptr %p) vscale_range(1,16) {
   %gep1 = getelementptr <vscale x 4 x i32>, ptr %p, i64 1, i64 0
   %gep2 = getelementptr <vscale x 4 x float>, ptr %p, i64 1, i64 0
   load i32, ptr %gep1
@@ -189,12 +189,12 @@ define void @gep_bitcast_2(ptr %p) {
 ; CHECK-DAG:   MayAlias:     <4 x i32>* %m16, <4 x i32>* %vm16
 ; CHECK-DAG:   NoAlias:      <4 x i32>* %p, <4 x i32>* %vm16m16
 ; CHECK-DAG:   NoAlias:      <4 x i32>* %vm16, <4 x i32>* %vm16m16
-; CHECK-DAG:   NoAlias:      <4 x i32>* %m16, <4 x i32>* %vm16m16
+; CHECK-DAG:   NoAlias:     <4 x i32>* %m16, <4 x i32>* %vm16m16
 ; CHECK-DAG:   MayAlias:     <4 x i32>* %m16pv16, <4 x i32>* %p
 ; CHECK-DAG:   NoAlias:      <4 x i32>* %m16pv16, <4 x i32>* %vm16
-; CHECK-DAG:   NoAlias:      <4 x i32>* %m16, <4 x i32>* %m16pv16
-; CHECK-DAG:   NoAlias:      <4 x i32>* %m16pv16, <4 x i32>* %vm16m16
-define void @gep_neg_notscalable(ptr %p) {
+; CHECK-DAG:   NoAlias:     <4 x i32>* %m16, <4 x i32>* %m16pv16
+; CHECK-DAG:   NoAlias:     <4 x i32>* %m16pv16, <4 x i32>* %vm16m16
+define void @gep_neg_notscalable(ptr %p) vscale_range(1,16) {
   %vm16 = getelementptr <vscale x 4 x i32>, ptr %p, i64 -1
   %m16 = getelementptr <4 x i32>, ptr %p, i64 -1
   %vm16m16 = getelementptr <4 x i32>, ptr %vm16, i64 -1
@@ -208,17 +208,17 @@ define void @gep_neg_notscalable(ptr %p) {
 }
 
 ; CHECK-LABEL: gep_neg_scalable
-; CHECK-DAG:   NoAlias:      <vscale x 4 x i32>* %p, <vscale x 4 x i32>* %vm16
+; CHECK-DAG:   MayAlias:     <vscale x 4 x i32>* %p, <vscale x 4 x i32>* %vm16
 ; CHECK-DAG:   MayAlias:     <vscale x 4 x i32>* %m16, <vscale x 4 x i32>* %p
 ; CHECK-DAG:   MayAlias:     <vscale x 4 x i32>* %m16, <vscale x 4 x i32>* %vm16
-; CHECK-DAG:   NoAlias:      <vscale x 4 x i32>* %p, <vscale x 4 x i32>* %vm16m16
+; CHECK-DAG:   MayAlias:     <vscale x 4 x i32>* %p, <vscale x 4 x i32>* %vm16m16
 ; CHECK-DAG:   MayAlias:     <vscale x 4 x i32>* %vm16, <vscale x 4 x i32>* %vm16m16
-; CHECK-DAG:   NoAlias:      <vscale x 4 x i32>* %m16, <vscale x 4 x i32>* %vm16m16
+; CHECK-DAG:   MayAlias:     <vscale x 4 x i32>* %m16, <vscale x 4 x i32>* %vm16m16
 ; CHECK-DAG:   MustAlias:    <vscale x 4 x i32>* %m16pv16, <vscale x 4 x i32>* %p
-; CHECK-DAG:   NoAlias:      <vscale x 4 x i32>* %m16pv16, <vscale x 4 x i32>* %vm16
+; CHECK-DAG:   MayAlias:     <vscale x 4 x i32>* %m16pv16, <vscale x 4 x i32>* %vm16
 ; CHECK-DAG:   MayAlias:     <vscale x 4 x i32>* %m16, <vscale x 4 x i32>* %m16pv16
-; CHECK-DAG:   NoAlias:      <vscale x 4 x i32>* %m16pv16, <vscale x 4 x i32>* %vm16m16
-define void @gep_neg_scalable(ptr %p) {
+; CHECK-DAG:   MayAlias:     <vscale x 4 x i32>* %m16pv16, <vscale x 4 x i32>* %vm16m16
+define void @gep_neg_scalable(ptr %p) vscale_range(1,16) {
   %vm16 = getelementptr <vscale x 4 x i32>, ptr %p, i64 -1
   %m16 = getelementptr <4 x i32>, ptr %p, i64 -1
   %vm16m16 = getelementptr <4 x i32>, ptr %vm16, i64 -1
@@ -242,7 +242,7 @@ define void @gep_neg_scalable(ptr %p) {
 ; CHECK-DAG:   NoAlias:      <4 x i32>* %m16pv16, <4 x i32>* %vm16
 ; CHECK-DAG:   NoAlias:      <4 x i32>* %m16, <4 x i32>* %m16pv16
 ; CHECK-DAG:   NoAlias:      <4 x i32>* %m16pv16, <4 x i32>* %vm16m16
-define void @gep_pos_notscalable(ptr %p) {
+define void @gep_pos_notscalable(ptr %p) vscale_range(1,16) {
   %vm16 = getelementptr <vscale x 4 x i32>, ptr %p, i64 1
   %m16 = getelementptr <4 x i32>, ptr %p, i64 1
   %vm16m16 = getelementptr <4 x i32>, ptr %vm16, i64 1
@@ -256,17 +256,17 @@ define void @gep_pos_notscalable(ptr %p) {
 }
 
 ; CHECK-LABEL: gep_pos_scalable
-; CHECK-DAG:   NoAlias:      <vscale x 4 x i32>* %p, <vscale x 4 x i32>* %vm16
+; CHECK-DAG:   MayAlias:     <vscale x 4 x i32>* %p, <vscale x 4 x i32>* %vm16
 ; CHECK-DAG:   MayAlias:     <vscale x 4 x i32>* %m16, <vscale x 4 x i32>* %p
 ; CHECK-DAG:   MayAlias:     <vscale x 4 x i32>* %m16, <vscale x 4 x i32>* %vm16
-; CHECK-DAG:   NoAlias:      <vscale x 4 x i32>* %p, <vscale x 4 x i32>* %vm16m16
+; CHECK-DAG:   MayAlias:     <vscale x 4 x i32>* %p, <vscale x 4 x i32>* %vm16m16
 ; CHECK-DAG:   MayAlias:     <vscale x 4 x i32>* %vm16, <vscale x 4 x i32>* %vm16m16
-; CHECK-DAG:   NoAlias:      <vscale x 4 x i32>* %m16, <vscale x 4 x i32>* %vm16m16
+; CHECK-DAG:   MayAlias:     <vscale x 4 x i32>* %m16, <vscale x 4 x i32>* %vm16m16
 ; CHECK-DAG:   MustAlias:    <vscale x 4 x i32>* %m16pv16, <vscale x 4 x i32>* %p
-; CHECK-DAG:   NoAlias:      <vscale x 4 x i32>* %m16pv16, <vscale x 4 x i32>* %vm16
+; CHECK-DAG:   MayAlias:     <vscale x 4 x i32>* %m16pv16, <vscale x 4 x i32>* %vm16
 ; CHECK-DAG:   MayAlias:     <vscale x 4 x i32>* %m16, <vscale x 4 x i32>* %m16pv16
-; CHECK-DAG:   NoAlias:      <vscale x 4 x i32>* %m16pv16, <vscale x 4 x i32>* %vm16m16
-define void @gep_pos_scalable(ptr %p) {
+; CHECK-DAG:   MayAlias:     <vscale x 4 x i32>* %m16pv16, <vscale x 4 x i32>* %vm16m16
+define void @gep_pos_scalable(ptr %p) vscale_range(1,16) {
   %vm16 = getelementptr <vscale x 4 x i32>, ptr %p, i64 1
   %m16 = getelementptr <4 x i32>, ptr %p, i64 1
   %vm16m16 = getelementptr <4 x i32>, ptr %vm16, i64 1
@@ -281,9 +281,9 @@ define void @gep_pos_scalable(ptr %p) {
 
 ; CHECK-LABEL: v1v2types
 ; CHECK-DAG:  MustAlias:    <4 x i32>* %p, <vscale x 4 x i32>* %p
-; CHECK-DAG:  NoAlias:      <vscale x 4 x i32>* %p, <vscale x 4 x i32>* %vm16
-; CHECK-DAG:  NoAlias:      <4 x i32>* %p, <vscale x 4 x i32>* %vm16
-; CHECK-DAG:  NoAlias:      <vscale x 4 x i32>* %p, <4 x i32>* %vm16
+; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %p, <vscale x 4 x i32>* %vm16
+; CHECK-DAG:  MayAlias:     <4 x i32>* %p, <vscale x 4 x i32>* %vm16
+; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %p, <4 x i32>* %vm16
 ; CHECK-DAG:  NoAlias:      <4 x i32>* %p, <4 x i32>* %vm16
 ; CHECK-DAG:  MustAlias:    <4 x i32>* %vm16, <vscale x 4 x i32>* %vm16
 ; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %m16, <vscale x 4 x i32>* %p
@@ -295,7 +295,7 @@ define void @gep_pos_scalable(ptr %p) {
 ; CHECK-DAG:  MayAlias:     <4 x i32>* %m16, <vscale x 4 x i32>* %vm16
 ; CHECK-DAG:  MayAlias:     <4 x i32>* %m16, <4 x i32>* %vm16
 ; CHECK-DAG:  MustAlias:    <4 x i32>* %m16, <vscale x 4 x i32>* %m16
-define void @v1v2types(ptr %p) {
+define void @v1v2types(ptr %p) vscale_range(1,16) {
   %vm16 = getelementptr <vscale x 4 x i32>, ptr %p, i64 -1
   %m16 = getelementptr <4 x i32>, ptr %p, i64 -1
   load <vscale x 4 x i32>, ptr %p
@@ -313,8 +313,8 @@ define void @v1v2types(ptr %p) {
 ; CHECK-DAG:  MayAlias:     i32* %a, <vscale x 4 x i32>* %p
 ; CHECK-DAG:  MayAlias:     i32* %a, i32* %gep
 ; CHECK-DAG:  MayAlias:     i32* %a, i32* %gep_rec_1
-; CHECK-DAG:  NoAlias:      i32* %gep, <vscale x 4 x i32>* %p
-; CHECK-DAG:  NoAlias:      i32* %gep_rec_1, <vscale x 4 x i32>* %p
+; CHECK-DAG:  MayAlias:     i32* %gep, <vscale x 4 x i32>* %p
+; CHECK-DAG:  MayAlias:     i32* %gep_rec_1, <vscale x 4 x i32>* %p
 ; CHECK-DAG:  NoAlias:      i32* %gep, i32* %gep_rec_1
 define void @gep_recursion_level_1(ptr %a, ptr %p) {
   %gep = getelementptr <vscale x 4 x i32>, ptr %p, i64 1, i64 2
@@ -330,8 +330,8 @@ define void @gep_recursion_level_1(ptr %a, ptr %p) {
 ; CHECK-DAG:  MustAlias:    i32* %a, <vscale x 4 x i32>* %a
 ; CHECK-DAG:  NoAlias:      i32* %a, i32* %gep
 ; CHECK-DAG:  NoAlias:      i32* %a, i32* %gep_rec_1
-; CHECK-DAG:  NoAlias:      <vscale x 4 x i32>* %a, i32* %gep
-; CHECK-DAG:  NoAlias:      <vscale x 4 x i32>* %a, i32* %gep_rec_1
+; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %a, i32* %gep
+; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %a, i32* %gep_rec_1
 ; CHECK-DAG:  NoAlias:      i32* %gep, i32* %gep_rec_1
 define void @gep_recursion_level_1_bitcast(ptr %a) {
   %gep = getelementptr <vscale x 4 x i32>, ptr %a, i64 1, i64 2
@@ -348,9 +348,9 @@ define void @gep_recursion_level_1_bitcast(ptr %a) {
 ; CHECK-DAG:  MayAlias:     i32* %a, i32* %gep
 ; CHECK-DAG:  MayAlias:     i32* %a, i32* %gep_rec_1
 ; CHECK-DAG:  MayAlias:     i32* %a, i32* %gep_rec_2
-; CHECK-DAG:  NoAlias:      i32* %gep, <vscale x 4 x i32>* %p
-; CHECK-DAG:  NoAlias:      i32* %gep_rec_1, <vscale x 4 x i32>* %p
-; CHECK-DAG:  NoAlias:      i32* %gep_rec_2, <vscale x 4 x i32>* %p
+; CHECK-DAG:  MayAlias:     i32* %gep, <vscale x 4 x i32>* %p
+; CHECK-DAG:  MayAlias:     i32* %gep_rec_1, <vscale x 4 x i32>* %p
+; CHECK-DAG:  MayAlias:     i32* %gep_rec_2, <vscale x 4 x i32>* %p
 ; CHECK-DAG:  NoAlias:      i32* %gep, i32* %gep_rec_1
 ; CHECK-DAG:  NoAlias:      i32* %gep, i32* %gep_rec_2
 ; CHECK-DAG:  NoAlias:      i32* %gep_rec_1, i32* %gep_rec_2
@@ -375,12 +375,12 @@ define void @gep_recursion_level_2(ptr %a, ptr %p) {
 ; CHECK-DAG: MayAlias:     i32* %a, i32* %gep_rec_4
 ; CHECK-DAG: MayAlias:     i32* %a, i32* %gep_rec_5
 ; CHECK-DAG: MayAlias:     i32* %a, i32* %gep_rec_6
-; CHECK-DAG: NoAlias:      i32* %gep, <vscale x 4 x i32>* %p
-; CHECK-DAG: NoAlias:      i32* %gep_rec_1, <vscale x 4 x i32>* %p
-; CHECK-DAG: NoAlias:      i32* %gep_rec_2, <vscale x 4 x i32>* %p
-; CHECK-DAG: NoAlias:      i32* %gep_rec_3, <vscale x 4 x i32>* %p
-; CHECK-DAG: NoAlias:      i32* %gep_rec_4, <vscale x 4 x i32>* %p
-; CHECK-DAG: NoAlias:      i32* %gep_rec_5, <vscale x 4 x i32>* %p
+; CHECK-DAG: MayAlias:     i32* %gep, <vscale x 4 x i32>* %p
+; CHECK-DAG: MayAlias:     i32* %gep_rec_1, <vscale x 4 x i32>* %p
+; CHECK-DAG: MayAlias:     i32* %gep_rec_2, <vscale x 4 x i32>* %p
+; CHECK-DAG: MayAlias:     i32* %gep_rec_3, <vscale x 4 x i32>* %p
+; CHECK-DAG: MayAlias:     i32* %gep_rec_4, <vscale x 4 x i32>* %p
+; CHECK-DAG: MayAlias:     i32* %gep_rec_5, <vscale x 4 x i32>* %p
 ; CHECK-DAG: MayAlias:     i32* %gep_rec_6, <vscale x 4 x i32>* %p
 ; CHECK-DAG: NoAlias:      i32* %gep, i32* %gep_rec_1
 ; CHECK-DAG: NoAlias:      i32* %gep, i32* %gep_rec_2
diff --git a/llvm/test/Transforms/GVN/vscale.ll b/llvm/test/Transforms/GVN/vscale.ll
index 3ecae9f54fddc7..4a7f2acdbf8a9a 100644
--- a/llvm/test/Transforms/GVN/vscale.ll
+++ b/llvm/test/Transforms/GVN/vscale.ll
@@ -363,7 +363,8 @@ define void @missing_load_elimination(i1 %c, ptr %p, ptr %q, <vscale x 4 x i32>
 ; CHECK-NEXT:    store <vscale x 4 x i32> [[V:%.*]], ptr [[P1]], align 16
 ; CHECK-NEXT:    br i1 [[C:%.*]], label [[IF_ELSE:%.*]], label [[IF_THEN:%.*]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    store <vscale x 4 x i32> zeroinitializer, ptr [[Q:%.*]], align 16
+; CHECK-NEXT:    [[T:%.*]] = load <vscale x 4 x i32>, ptr [[P]], align 16
+; CHECK-NEXT:    store <vscale x 4 x i32> [[T]], ptr [[Q:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ; CHECK:       if.else:
 ; CHECK-NEXT:    ret void