[llvm] [BasicAA] Add Vscale GEP decomposition on variable index (PR #69152)

Fri Oct 27 06:06:55 PDT 2023

https://github.com/harviniriawan updated https://github.com/llvm/llvm-project/pull/69152

>From d14d0146c350c48f3fa8e78152176d84c568b3cd Mon Sep 17 00:00:00 2001
From: Harvin Iriawan <harvin.iriawan at arm.com>
Date: Mon, 9 Oct 2023 16:07:20 +0100
Subject: [PATCH] [BasicAA] Add Vscale GEP decomposition on variable index

  Enable BasicAA to be done on Scalable GEP & LocationSize
  Scalable GEP expression such as @llvm.vscale and GEP of scalable type
  are attached to the VariableGEPIndex, with Val representing Vscale.

  VScale AA works if there's only one variable index (the vscale) and
  constant offsets in the GEP for now
---
 llvm/lib/Analysis/BasicAliasAnalysis.cpp     | 201 ++++++++++++++-----
 llvm/test/Analysis/AliasSet/memloc-vscale.ll |   3 +-
 llvm/test/Analysis/BasicAA/vscale.ll         | 111 ++++++----
 llvm/test/Transforms/GVN/vscale.ll           |  11 +-
 4 files changed, 228 insertions(+), 98 deletions(-)

diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
index 2acd5c47b7681e8..74fb52386ee1dcd 100644
--- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -44,6 +44,7 @@
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/Operator.h"
+#include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/User.h"
 #include "llvm/IR/Value.h"
@@ -63,6 +64,7 @@
 #define DEBUG_TYPE "basicaa"
 
 using namespace llvm;
+using namespace llvm::PatternMatch;
 
 /// Enable analysis of recursive PHI nodes.
 static cl::opt<bool> EnableRecPhiAnalysis("basic-aa-recphi", cl::Hidden,
@@ -344,13 +346,20 @@ struct LinearExpression {
 
 /// Analyzes the specified value as a linear expression: "A*V + B", where A and
 /// B are constant integers.
-static LinearExpression GetLinearExpression(
-    const CastedValue &Val,  const DataLayout &DL, unsigned Depth,
-    AssumptionCache *AC, DominatorTree *DT) {
+static LinearExpression GetLinearExpression(const CastedValue &Val,
+                                            const DataLayout &DL,
+                                            unsigned Depth, AssumptionCache *AC,
+                                            DominatorTree *DT) {
   // Limit our recursion depth.
   if (Depth == 6)
     return Val;
 
+  // If llvm.vscale is matched, set linear expression with scale 1 and offset 0
+  if (match(Val.V, m_VScale())) {
+    return LinearExpression(Val, APInt(Val.getBitWidth(), 1),
+                            APInt(Val.getBitWidth(), 0), true);
+  }
+
   if (const ConstantInt *Const = dyn_cast<ConstantInt>(Val.V))
     return LinearExpression(Val, APInt(Val.getBitWidth(), 0),
                             Val.evaluateWith(Const->getValue()), true);
@@ -457,6 +466,9 @@ struct VariableGEPIndex {
   CastedValue Val;
   APInt Scale;
 
+  // A value representing vscale quantity in a GEP expression
+  bool IsVScale;
+
   // Context instruction to use when querying information about this index.
   const Instruction *CxtI;
 
@@ -479,13 +491,10 @@ struct VariableGEPIndex {
     dbgs() << "\n";
   }
   void print(raw_ostream &OS) const {
-    OS << "(V=" << Val.V->getName()
-       << ", zextbits=" << Val.ZExtBits
-       << ", sextbits=" << Val.SExtBits
-       << ", truncbits=" << Val.TruncBits
-       << ", scale=" << Scale
-       << ", nsw=" << IsNSW
-       << ", negated=" << IsNegated << ")";
+    OS << "(V=" << Val.V->getName() << "  IsVScale=" << IsVScale
+       << ", zextbits=" << Val.ZExtBits << ", sextbits=" << Val.SExtBits
+       << ", truncbits=" << Val.TruncBits << ", scale=" << Scale
+       << ", nsw=" << IsNSW << ", negated=" << IsNegated << ")";
   }
 };
 }
@@ -606,6 +615,7 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
     for (User::const_op_iterator I = GEPOp->op_begin() + 1, E = GEPOp->op_end();
          I != E; ++I, ++GTI) {
       const Value *Index = *I;
+      const bool ScalableGEP = isa<ScalableVectorType>(GTI.getIndexedType());
       // Compute the (potentially symbolic) offset in bytes for this index.
       if (StructType *STy = GTI.getStructTypeOrNull()) {
         // For a struct, add the member offset.
@@ -617,27 +627,18 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
         continue;
       }
 
+      TypeSize AllocTypeSize = DL.getTypeAllocSize(GTI.getIndexedType());
       // For an array/pointer, add the element offset, explicitly scaled.
+      // Skip adding to constant offset if GEP index is marked as scalable
+      // they are handled below as variable offset
       if (const ConstantInt *CIdx = dyn_cast<ConstantInt>(Index)) {
         if (CIdx->isZero())
           continue;
-
-        // Don't attempt to analyze GEPs if the scalable index is not zero.
-        TypeSize AllocTypeSize = DL.getTypeAllocSize(GTI.getIndexedType());
-        if (AllocTypeSize.isScalable()) {
-          Decomposed.Base = V;
-          return Decomposed;
+        if (!ScalableGEP) {
+          Decomposed.Offset += AllocTypeSize.getFixedValue() *
+                               CIdx->getValue().sextOrTrunc(MaxIndexSize);
+          continue;
         }
-
-        Decomposed.Offset += AllocTypeSize.getFixedValue() *
-                             CIdx->getValue().sextOrTrunc(MaxIndexSize);
-        continue;
-      }
-
-      TypeSize AllocTypeSize = DL.getTypeAllocSize(GTI.getIndexedType());
-      if (AllocTypeSize.isScalable()) {
-        Decomposed.Base = V;
-        return Decomposed;
       }
 
       GepHasConstantOffset = false;
@@ -647,22 +648,55 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
       unsigned Width = Index->getType()->getIntegerBitWidth();
       unsigned SExtBits = IndexSize > Width ? IndexSize - Width : 0;
       unsigned TruncBits = IndexSize < Width ? Width - IndexSize : 0;
-      LinearExpression LE = GetLinearExpression(
-          CastedValue(Index, 0, SExtBits, TruncBits), DL, 0, AC, DT);
+      // Scalable GEP decomposition
+      // Allow Scalable GEP to be decomposed in the case of
+      //    1. getelementptr <4 x vscale x i32> with 1st index as a constant
+      //    2. Index which have a leaf of @llvm.vscale
+      // In both cases, essentially CastedValue of VariableGEPIndex is Vscale,
+      // however in the 1st case, CastedValue is of type constant, hence another
+      // flag in VariableGEPIndex is created in this case, IsVScale If GEP is
+      // Scalable type, e.g. <4 x vscale x i32>, the first index will have
+      // vscale as a variable index, create a LE in this case
+      LinearExpression LE(CastedValue(Index, 0, SExtBits, TruncBits));
+      if (ScalableGEP) {
+        if (const ConstantInt *CIdx = dyn_cast<ConstantInt>(Index)) {
+          LE = LinearExpression(
+              CastedValue(Index, 0, SExtBits, TruncBits),
+              CastedValue(Index, 0, SExtBits, TruncBits)
+                  .evaluateWith(CIdx->getValue()),
+              APInt(CastedValue(Index, 0, SExtBits, TruncBits).getBitWidth(),
+                    0),
+              true);
+          assert(LE.Offset.isZero() && "For Scalable GEP constant first index, "
+                                       "the offset of LE should be 0");
+        } else {
+          // if first index is not a constant, a single variable gep will
+          // contain 2 variables, bail in this case
+          Decomposed.Base = V;
+          return Decomposed;
+        }
+      } else
+        LE = GetLinearExpression(CastedValue(Index, 0, SExtBits, TruncBits), DL,
+                                 0, AC, DT);
 
       // Scale by the type size.
-      unsigned TypeSize = AllocTypeSize.getFixedValue();
+      unsigned TypeSize = AllocTypeSize.getKnownMinValue();
       LE = LE.mul(APInt(IndexSize, TypeSize), GEPOp->isInBounds());
       Decomposed.Offset += LE.Offset.sext(MaxIndexSize);
       APInt Scale = LE.Scale.sext(MaxIndexSize);
+      bool LEhasVscale = match(LE.Val.V, m_VScale());
 
       // If we already had an occurrence of this index variable, merge this
       // scale into it.  For example, we want to handle:
       //   A[x][x] -> x*16 + x*4 -> x*20
       // This also ensures that 'x' only appears in the index list once.
+      // Only add to IsVScale VariableGEPIndex if it's @llvm.vscale or gep
+      // vscale index
       for (unsigned i = 0, e = Decomposed.VarIndices.size(); i != e; ++i) {
-        if (Decomposed.VarIndices[i].Val.V == LE.Val.V &&
-            Decomposed.VarIndices[i].Val.hasSameCastsAs(LE.Val)) {
+        if (Decomposed.VarIndices[i].Val.hasSameCastsAs(LE.Val) &&
+            ((Decomposed.VarIndices[i].IsVScale &&
+              (ScalableGEP || LEhasVscale)) ||
+             Decomposed.VarIndices[i].Val.V == LE.Val.V)) {
           Scale += Decomposed.VarIndices[i].Scale;
           LE.IsNSW = false; // We cannot guarantee nsw for the merge.
           Decomposed.VarIndices.erase(Decomposed.VarIndices.begin() + i);
@@ -672,10 +706,21 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
 
       // Make sure that we have a scale that makes sense for this target's
       // index size.
+      // Only allow variableGEP decomposition for constants, in the case of
+      // vscale
       Scale = adjustToIndexSize(Scale, IndexSize);
+      bool InvalidVarVScale = (ScalableGEP && LEhasVscale) ||
+                              (ScalableGEP && !isa<ConstantInt>(LE.Val.V));
+
+      assert(!InvalidVarVScale &&
+             "Variable GEP index contains VScale and another variable");
 
       if (!!Scale) {
-        VariableGEPIndex Entry = {LE.Val, Scale, CxtI, LE.IsNSW,
+        VariableGEPIndex Entry = {LE.Val,
+                                  Scale,
+                                  ScalableGEP || LEhasVscale,
+                                  CxtI,
+                                  LE.IsNSW,
                                   /* IsNegated */ false};
         Decomposed.VarIndices.push_back(Entry);
       }
@@ -1058,19 +1103,17 @@ AliasResult BasicAAResult::aliasGEP(
 
   // If an inbounds GEP would have to start from an out of bounds address
   // for the two to alias, then we can assume noalias.
-  // TODO: Remove !isScalable() once BasicAA fully support scalable location
-  // size
   if (*DecompGEP1.InBounds && DecompGEP1.VarIndices.empty() &&
-      V2Size.hasValue() && !V2Size.isScalable() &&
-      DecompGEP1.Offset.sge(V2Size.getValue()) &&
+      V2Size.hasValue() &&
+      DecompGEP1.Offset.sge(V2Size.getValue().getKnownMinValue()) &&
       isBaseOfObject(DecompGEP2.Base))
     return AliasResult::NoAlias;
 
   if (isa<GEPOperator>(V2)) {
     // Symmetric case to above.
     if (*DecompGEP2.InBounds && DecompGEP1.VarIndices.empty() &&
-        V1Size.hasValue() && !V1Size.isScalable() &&
-        DecompGEP1.Offset.sle(-V1Size.getValue()) &&
+        V1Size.hasValue() &&
+        DecompGEP1.Offset.sle(-V1Size.getValue().getKnownMinValue()) &&
         isBaseOfObject(DecompGEP1.Base))
       return AliasResult::NoAlias;
   }
@@ -1094,10 +1137,6 @@ AliasResult BasicAAResult::aliasGEP(
     return BaseAlias;
   }
 
-  // Bail on analysing scalable LocationSize
-  if (V1Size.isScalable() || V2Size.isScalable())
-    return AliasResult::MayAlias;
-
   // If there is a constant difference between the pointers, but the difference
   // is less than the size of the associated memory object, then we know
   // that the objects are partially overlapping.  If the difference is
@@ -1124,16 +1163,16 @@ AliasResult BasicAAResult::aliasGEP(
       Off = -Off;
     }
 
-    if (!VLeftSize.hasValue())
+    if (!VLeftSize.hasValue() || VLeftSize.isScalable())
       return AliasResult::MayAlias;
 
-    const uint64_t LSize = VLeftSize.getValue();
+    const uint64_t LSize = VLeftSize.getValue().getKnownMinValue();
     if (Off.ult(LSize)) {
       // Conservatively drop processing if a phi was visited and/or offset is
       // too big.
       AliasResult AR = AliasResult::PartialAlias;
       if (VRightSize.hasValue() && Off.ule(INT32_MAX) &&
-          (Off + VRightSize.getValue()).ule(LSize)) {
+          (Off + VRightSize.getValue().getKnownMinValue()).ule(LSize)) {
         // Memory referenced by right pointer is nested. Save the offset in
         // cache. Note that originally offset estimated as GEP1-V2, but
         // AliasResult contains the shift that represents GEP1+Offset=V2.
@@ -1149,12 +1188,71 @@ AliasResult BasicAAResult::aliasGEP(
   if (!V1Size.hasValue() || !V2Size.hasValue())
     return AliasResult::MayAlias;
 
+  // VScale Alias Analysis
+  // GEPs with Vscale will have the expression A*Vscale + B (1 variable index
+  // and constant offset) The difference between two GEPs and Scalable
+  // LocationSize can then be analysed as they have the form of
+  //     LSize                SubtractDecomposedGEP output
+  //   A * Vscale                   B * Vscale + C
+  // Since VScale is strictly a positive number (Vscale >= 1), the larger GEP
+  // can be known
+  // TODO: Use knowledge of vscale_range to make the analysis more accurate
+  if (DecompGEP1.VarIndices.size() == 1 && DecompGEP1.VarIndices[0].IsVScale &&
+      (V1Size.isScalable() || V2Size.isScalable())) {
+    const VariableGEPIndex &ScalableVar = DecompGEP1.VarIndices[0];
+    bool StrictlyPos = false, StrictlyNeg = false;
+    APInt &Off = DecompGEP1.Offset;
+    if (!ScalableVar.IsNegated) {
+      if (Off.isNegative())
+        StrictlyPos = ScalableVar.Scale.ugt(Off.abs());
+      else
+        StrictlyPos = true;
+    } else
+      StrictlyPos = Off.isNonNegative();
+
+    if (ScalableVar.IsNegated) {
+      if (Off.isNonNegative())
+        StrictlyNeg = Off.ult(ScalableVar.Scale.abs());
+      else
+        StrictlyNeg = true;
+    } else
+      StrictlyNeg = Off.isNegative();
+
+    if (StrictlyPos || StrictlyNeg) {
+      LocationSize VLeftSize = V2Size;
+      LocationSize VRightSize = V1Size;
+      const bool Swapped = StrictlyNeg;
+
+      if (Swapped) {
+        std::swap(VLeftSize, VRightSize);
+        Off = -Off;
+      }
+
+      const uint64_t LSize = VLeftSize.getValue().getKnownMinValue();
+      if (VLeftSize.isScalable() && ScalableVar.Scale.ult(LSize) &&
+          (ScalableVar.Scale + DecompGEP1.Offset).ult(LSize))
+        return AliasResult::PartialAlias;
+
+      if ((ScalableVar.Scale.uge(LSize) && VLeftSize.isScalable()) ||
+          ((ScalableVar.Scale + DecompGEP1.Offset).uge(LSize) &&
+           !VLeftSize.isScalable()))
+        return AliasResult::NoAlias;
+    }
+  }
+
+  // Bail on Scalable location size from now onwards
+  if (V1Size.isScalable() || V2Size.isScalable())
+    return AliasResult::MayAlias;
+
   APInt GCD;
   ConstantRange OffsetRange = ConstantRange(DecompGEP1.Offset);
   for (unsigned i = 0, e = DecompGEP1.VarIndices.size(); i != e; ++i) {
     const VariableGEPIndex &Index = DecompGEP1.VarIndices[i];
     const APInt &Scale = Index.Scale;
     APInt ScaleForGCD = Scale;
+    assert((!Index.IsVScale || match(Index.Val.V, m_VScale()) ||
+            isa<ConstantInt>(Index.Val.V)) &&
+           "Not allowed to have non-constant values if IsVScale is set");
     if (!Index.IsNSW)
       ScaleForGCD =
           APInt::getOneBitSet(Scale.getBitWidth(), Scale.countr_zero());
@@ -1727,7 +1825,12 @@ void BasicAAResult::subtractDecomposedGEPs(DecomposedGEP &DestGEP,
     bool Found = false;
     for (auto I : enumerate(DestGEP.VarIndices)) {
       VariableGEPIndex &Dest = I.value();
-      if (!isValueEqualInPotentialCycles(Dest.Val.V, Src.Val.V, AAQI) ||
+      if (Dest.IsVScale != Src.IsVScale)
+        continue;
+      const bool SrcDestAreVScale = Dest.IsVScale && Src.IsVScale;
+      // Suppress base value checks if Src and Dst are of constant VScale
+      if ((!SrcDestAreVScale &&
+           !isValueEqualInPotentialCycles(Dest.Val.V, Src.Val.V, AAQI)) ||
           !Dest.Val.hasSameCastsAs(Src.Val))
         continue;
 
@@ -1752,7 +1855,11 @@ void BasicAAResult::subtractDecomposedGEPs(DecomposedGEP &DestGEP,
 
     // If we didn't consume this entry, add it to the end of the Dest list.
     if (!Found) {
-      VariableGEPIndex Entry = {Src.Val, Src.Scale, Src.CxtI, Src.IsNSW,
+      VariableGEPIndex Entry = {Src.Val,
+                                Src.Scale,
+                                Src.IsVScale,
+                                Src.CxtI,
+                                Src.IsNSW,
                                 /* IsNegated */ true};
       DestGEP.VarIndices.push_back(Entry);
     }
diff --git a/llvm/test/Analysis/AliasSet/memloc-vscale.ll b/llvm/test/Analysis/AliasSet/memloc-vscale.ll
index 8a83645ddaf9a87..ee67f7c15fb41b5 100644
--- a/llvm/test/Analysis/AliasSet/memloc-vscale.ll
+++ b/llvm/test/Analysis/AliasSet/memloc-vscale.ll
@@ -34,7 +34,8 @@ define void @ss2(ptr %p) {
   ret void
 }
 ; CHECK-LABEL: Alias sets for function 'son':
-; CHECK: AliasSet[{{.*}}, 2] may alias, Mod       Pointers: (ptr %g, LocationSize::precise(vscale x 16)), (ptr %p, LocationSize::precise(8))
+; CHECK: AliasSet[{{.*}}, 1] must alias, Mod       Pointers: (ptr %g, LocationSize::precise(vscale x 16))
+; CHECK: AliasSet[{{.*}}, 1] must alias, Mod       Pointers: (ptr %p, LocationSize::precise(8))
 define void @son(ptr %p) {
   %g = getelementptr i8, ptr %p, i64 8
   store <vscale x 2 x i64> zeroinitializer, ptr %g, align 2
diff --git a/llvm/test/Analysis/BasicAA/vscale.ll b/llvm/test/Analysis/BasicAA/vscale.ll
index 0d6d8fea392bbfc..2d277eacdcb8aeb 100644
--- a/llvm/test/Analysis/BasicAA/vscale.ll
+++ b/llvm/test/Analysis/BasicAA/vscale.ll
@@ -4,8 +4,8 @@
 
 ; CHECK-LABEL: gep_alloca_const_offset_1
 ; CHECK-DAG:  MustAlias:    <vscale x 4 x i32>* %alloc, <vscale x 4 x i32>* %gep1
-; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %alloc, <vscale x 4 x i32>* %gep2
-; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %gep1, <vscale x 4 x i32>* %gep2
+; CHECK-DAG:  NoAlias:     <vscale x 4 x i32>* %alloc, <vscale x 4 x i32>* %gep2
+; CHECK-DAG:  NoAlias:     <vscale x 4 x i32>* %gep1, <vscale x 4 x i32>* %gep2
 define void @gep_alloca_const_offset_1() {
   %alloc = alloca <vscale x 4 x i32>
   %gep1 = getelementptr <vscale x 4 x i32>, ptr %alloc, i64 0
@@ -17,10 +17,9 @@ define void @gep_alloca_const_offset_1() {
 }
 
 ; CHECK-LABEL: gep_alloca_const_offset_2
-; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %alloc, <vscale x 4 x i32>* %gep1
-; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %alloc, <vscale x 4 x i32>* %gep2
-; TODO: AliasResult for gep1,gep2 can be improved as MustAlias
-; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %gep1, <vscale x 4 x i32>* %gep2
+; CHECK-DAG:  NoAlias:     <vscale x 4 x i32>* %alloc, <vscale x 4 x i32>* %gep1
+; CHECK-DAG:  NoAlias:     <vscale x 4 x i32>* %alloc, <vscale x 4 x i32>* %gep2
+; CHECK-DAG:  MustAlias:   <vscale x 4 x i32>* %gep1, <vscale x 4 x i32>* %gep2
 define void @gep_alloca_const_offset_2() {
   %alloc = alloca <vscale x 4 x i32>
   %gep1 = getelementptr <vscale x 4 x i32>, ptr %alloc, i64 1
@@ -33,8 +32,8 @@ define void @gep_alloca_const_offset_2() {
 
 ; CHECK-LABEL: gep_alloca_const_offset_3
 ; CHECK-DAG:  MustAlias:    <vscale x 4 x i32>* %alloc, <vscale x 4 x i32>* %gep1
-; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %alloc, i32* %gep2
-; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %gep1, i32* %gep2
+; CHECK-DAG:  MayAlias: <vscale x 4 x i32>* %alloc, i32* %gep2
+; CHECK-DAG:  MayAlias: <vscale x 4 x i32>* %gep1, i32* %gep2
 define void @gep_alloca_const_offset_3() {
   %alloc = alloca <vscale x 4 x i32>
   %gep1 = getelementptr <vscale x 4 x i32>, ptr %alloc, i64 0
@@ -74,10 +73,9 @@ define void @gep_alloca_symbolic_offset(i64 %idx1, i64 %idx2) {
 }
 
 ; CHECK-LABEL: gep_same_base_const_offset
-; CHECK-DAG:  MayAlias:     i32* %gep1, <vscale x 4 x i32>* %p
-; CHECK-DAG:  MayAlias:     i32* %gep2, <vscale x 4 x i32>* %p
-; TODO: AliasResult for gep1,gep2 can be improved as NoAlias
-; CHECK-DAG:  MayAlias:     i32* %gep1, i32* %gep2
+; CHECK-DAG:  NoAlias:     i32* %gep1, <vscale x 4 x i32>* %p
+; CHECK-DAG:  NoAlias:     i32* %gep2, <vscale x 4 x i32>* %p
+; CHECK-DAG:  NoAlias:     i32* %gep1, i32* %gep2
 define void @gep_same_base_const_offset(ptr %p) {
   %gep1 = getelementptr <vscale x 4 x i32>, ptr %p, i64 1, i64 0
   %gep2 = getelementptr <vscale x 4 x i32>, ptr %p, i64 1, i64 1
@@ -101,8 +99,8 @@ define void @gep_same_base_symbolic_offset(ptr %p, i64 %idx1, i64 %idx2) {
 }
 
 ; CHECK-LABEL: gep_different_base_const_offset
-; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %gep1, <vscale x 4 x i32>* %p1
-; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %gep2, <vscale x 4 x i32>* %p2
+; CHECK-DAG:  NoAlias:     <vscale x 4 x i32>* %gep1, <vscale x 4 x i32>* %p1
+; CHECK-DAG:  NoAlias:     <vscale x 4 x i32>* %gep2, <vscale x 4 x i32>* %p2
 ; CHECK-DAG:  NoAlias:      <vscale x 4 x i32>* %p1, <vscale x 4 x i32>* %p2
 ; CHECK-DAG:  NoAlias:      <vscale x 4 x i32>* %gep1, <vscale x 4 x i32>* %p2
 ; CHECK-DAG:  NoAlias:      <vscale x 4 x i32>* %gep2, <vscale x 4 x i32>* %p1
@@ -117,12 +115,41 @@ define void @gep_different_base_const_offset(ptr noalias %p1, ptr noalias %p2) {
   ret void
 }
 
+; getelementptr @llvm.vscale tests
+; CHECK-LABEL: gep_llvm_vscale_no_alias
+; CHECK-DAG: NoAlias:      <vscale x 4 x i32>* %gep1, <vscale x 4 x i32>* %gep2
+; CHECK-DAG: MustAlias:    <vscale x 4 x i32>* %gep1, <vscale x 4 x i32>* %gep3
+; CHECK-DAG: NoAlias:      <vscale x 4 x i32>* %gep2, <vscale x 4 x i32>* %gep3
+
+declare i64 @llvm.vscale.i64()
+define void @gep_llvm_vscale_no_alias(ptr %p) {
+  %t1 = tail call i64 @llvm.vscale.i64()
+  %t2 = shl nuw nsw i64 %t1, 3
+  %gep1 = getelementptr i32, ptr %p, i64 %t2
+  %gep2 = getelementptr <vscale x 4 x i32>, ptr %p, i64 1
+  %gep3 = getelementptr <vscale x 4 x i32>, ptr %p, i64 2
+  load <vscale x 4 x i32>, ptr %gep1
+  load <vscale x 4 x i32>, ptr %gep2
+  load <vscale x 4 x i32>, ptr %gep3
+  ret void
+}
+
+; CHECK-LABEL: gep_llvm_vscale_squared_may_alias
+; CHECK-DAG: MayAlias:      <vscale x 4 x i32>* %gep1, <vscale x 4 x i32>* %gep2
+define void @gep_llvm_vscale_squared_may_alias(ptr %p) {
+  %t1 = tail call i64 @llvm.vscale.i64()
+  %gep1 = getelementptr <vscale x 4 x i32>, ptr %p, i64 %t1
+  %gep2 = getelementptr i32, ptr %p, i64 1
+  load <vscale x 4 x i32>, ptr %gep1
+  load <vscale x 4 x i32>, ptr %gep2
+  ret void
+}
 ; getelementptr + bitcast
 
 ; CHECK-LABEL: gep_bitcast_1
 ; CHECK-DAG:   MustAlias:    i32* %p, <vscale x 4 x i32>* %p
-; CHECK-DAG:   MayAlias:     i32* %gep1, <vscale x 4 x i32>* %p
-; CHECK-DAG:   MayAlias:     i32* %gep1, i32* %p
+; CHECK-DAG:   NoAlias:      i32* %gep1, <vscale x 4 x i32>* %p
+; CHECK-DAG:   NoAlias:      i32* %gep1, i32* %p
 ; CHECK-DAG:   MayAlias:     i32* %gep2, <vscale x 4 x i32>* %p
 ; CHECK-DAG:   MayAlias:     i32* %gep1, i32* %gep2
 ; CHECK-DAG:   NoAlias:      i32* %gep2, i32* %p
@@ -138,11 +165,11 @@ define void @gep_bitcast_1(ptr %p) {
 
 ; CHECK-LABEL: gep_bitcast_2
 ; CHECK-DAG:  MustAlias:    <vscale x 4 x float>* %p, <vscale x 4 x i32>* %p
-; CHECK-DAG:  MayAlias:     i32* %gep1, <vscale x 4 x i32>* %p
-; CHECK-DAG:  MayAlias:     i32* %gep1, <vscale x 4 x float>* %p
-; CHECK-DAG:  MayAlias:     float* %gep2, <vscale x 4 x i32>* %p
-; CHECK-DAG:  MayAlias:     i32* %gep1, float* %gep2
-; CHECK-DAG:  MayAlias:     float* %gep2, <vscale x 4 x float>* %p
+; CHECK-DAG:  NoAlias:      i32* %gep1, <vscale x 4 x i32>* %p
+; CHECK-DAG:  NoAlias:      i32* %gep1, <vscale x 4 x float>* %p
+; CHECK-DAG:  NoAlias:      float* %gep2, <vscale x 4 x i32>* %p
+; CHECK-DAG:  MustAlias:    i32* %gep1, float* %gep2
+; CHECK-DAG:  NoAlias:      float* %gep2, <vscale x 4 x float>* %p
 define void @gep_bitcast_2(ptr %p) {
   %gep1 = getelementptr <vscale x 4 x i32>, ptr %p, i64 1, i64 0
   %gep2 = getelementptr <vscale x 4 x float>, ptr %p, i64 1, i64 0
@@ -159,8 +186,8 @@ define void @gep_bitcast_2(ptr %p) {
 ; CHECK-DAG:  MayAlias:     i32* %a, <vscale x 4 x i32>* %p
 ; CHECK-DAG:  MayAlias:     i32* %a, i32* %gep
 ; CHECK-DAG:  MayAlias:     i32* %a, i32* %gep_rec_1
-; CHECK-DAG:  MayAlias:     i32* %gep, <vscale x 4 x i32>* %p
-; CHECK-DAG:  MayAlias:     i32* %gep_rec_1, <vscale x 4 x i32>* %p
+; CHECK-DAG:  NoAlias:      i32* %gep, <vscale x 4 x i32>* %p
+; CHECK-DAG:  NoAlias:      i32* %gep_rec_1, <vscale x 4 x i32>* %p
 ; CHECK-DAG:  NoAlias:      i32* %gep, i32* %gep_rec_1
 define void @gep_recursion_level_1(ptr %a, ptr %p) {
   %gep = getelementptr <vscale x 4 x i32>, ptr %p, i64 1, i64 2
@@ -174,10 +201,10 @@ define void @gep_recursion_level_1(ptr %a, ptr %p) {
 
 ; CHECK-LABEL: gep_recursion_level_1_bitcast
 ; CHECK-DAG:  MustAlias:    i32* %a, <vscale x 4 x i32>* %a
-; CHECK-DAG:  MayAlias:     i32* %a, i32* %gep
-; CHECK-DAG:  MayAlias:     i32* %a, i32* %gep_rec_1
-; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %a, i32* %gep
-; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %a, i32* %gep_rec_1
+; CHECK-DAG:  NoAlias:      i32* %a, i32* %gep
+; CHECK-DAG:  NoAlias:      i32* %a, i32* %gep_rec_1
+; CHECK-DAG:  NoAlias:      <vscale x 4 x i32>* %a, i32* %gep
+; CHECK-DAG:  NoAlias:      <vscale x 4 x i32>* %a, i32* %gep_rec_1
 ; CHECK-DAG:  NoAlias:      i32* %gep, i32* %gep_rec_1
 define void @gep_recursion_level_1_bitcast(ptr %a) {
   %gep = getelementptr <vscale x 4 x i32>, ptr %a, i64 1, i64 2
@@ -194,9 +221,9 @@ define void @gep_recursion_level_1_bitcast(ptr %a) {
 ; CHECK-DAG:  MayAlias:     i32* %a, i32* %gep
 ; CHECK-DAG:  MayAlias:     i32* %a, i32* %gep_rec_1
 ; CHECK-DAG:  MayAlias:     i32* %a, i32* %gep_rec_2
-; CHECK-DAG:  MayAlias:     i32* %gep, <vscale x 4 x i32>* %p
-; CHECK-DAG:  MayAlias:     i32* %gep_rec_1, <vscale x 4 x i32>* %p
-; CHECK-DAG:  MayAlias:     i32* %gep_rec_2, <vscale x 4 x i32>* %p
+; CHECK-DAG:  NoAlias:      i32* %gep, <vscale x 4 x i32>* %p
+; CHECK-DAG:  NoAlias:      i32* %gep_rec_1, <vscale x 4 x i32>* %p
+; CHECK-DAG:  NoAlias:      i32* %gep_rec_2, <vscale x 4 x i32>* %p
 ; CHECK-DAG:  NoAlias:      i32* %gep, i32* %gep_rec_1
 ; CHECK-DAG:  NoAlias:      i32* %gep, i32* %gep_rec_2
 ; CHECK-DAG:  NoAlias:      i32* %gep_rec_1, i32* %gep_rec_2
@@ -221,34 +248,34 @@ define void @gep_recursion_level_2(ptr %a, ptr %p) {
 ; CHECK-DAG: MayAlias:     i32* %a, i32* %gep_rec_4
 ; CHECK-DAG: MayAlias:     i32* %a, i32* %gep_rec_5
 ; CHECK-DAG: MayAlias:     i32* %a, i32* %gep_rec_6
-; CHECK-DAG: MayAlias:     i32* %gep, <vscale x 4 x i32>* %p
-; CHECK-DAG: MayAlias:     i32* %gep_rec_1, <vscale x 4 x i32>* %p
-; CHECK-DAG: MayAlias:     i32* %gep_rec_2, <vscale x 4 x i32>* %p
-; CHECK-DAG: MayAlias:     i32* %gep_rec_3, <vscale x 4 x i32>* %p
-; CHECK-DAG: MayAlias:     i32* %gep_rec_4, <vscale x 4 x i32>* %p
-; CHECK-DAG: MayAlias:     i32* %gep_rec_5, <vscale x 4 x i32>* %p
+; CHECK-DAG: NoAlias:      i32* %gep, <vscale x 4 x i32>* %p
+; CHECK-DAG: NoAlias:      i32* %gep_rec_1, <vscale x 4 x i32>* %p
+; CHECK-DAG: NoAlias:      i32* %gep_rec_2, <vscale x 4 x i32>* %p
+; CHECK-DAG: NoAlias:      i32* %gep_rec_3, <vscale x 4 x i32>* %p
+; CHECK-DAG: NoAlias:      i32* %gep_rec_4, <vscale x 4 x i32>* %p
+; CHECK-DAG: NoAlias:      i32* %gep_rec_5, <vscale x 4 x i32>* %p
 ; CHECK-DAG: MayAlias:     i32* %gep_rec_6, <vscale x 4 x i32>* %p
 ; CHECK-DAG: NoAlias:      i32* %gep, i32* %gep_rec_1
 ; CHECK-DAG: NoAlias:      i32* %gep, i32* %gep_rec_2
 ; CHECK-DAG: NoAlias:      i32* %gep, i32* %gep_rec_3
 ; CHECK-DAG: NoAlias:      i32* %gep, i32* %gep_rec_4
 ; CHECK-DAG: NoAlias:      i32* %gep, i32* %gep_rec_5
-; CHECK-DAG: NoAlias:      i32* %gep, i32* %gep_rec_6
+; CHECK-DAG: MayAlias:     i32* %gep, i32* %gep_rec_6
 ; CHECK-DAG: NoAlias:      i32* %gep_rec_1, i32* %gep_rec_2
 ; CHECK-DAG: NoAlias:      i32* %gep_rec_1, i32* %gep_rec_3
 ; CHECK-DAG: NoAlias:      i32* %gep_rec_1, i32* %gep_rec_4
 ; CHECK-DAG: NoAlias:      i32* %gep_rec_1, i32* %gep_rec_5
-; CHECK-DAG: NoAlias:      i32* %gep_rec_1, i32* %gep_rec_6
+; CHECK-DAG: MayAlias:     i32* %gep_rec_1, i32* %gep_rec_6
 ; CHECK-DAG: NoAlias:      i32* %gep_rec_2, i32* %gep_rec_3
 ; CHECK-DAG: NoAlias:      i32* %gep_rec_2, i32* %gep_rec_4
 ; CHECK-DAG: NoAlias:      i32* %gep_rec_2, i32* %gep_rec_5
-; CHECK-DAG: NoAlias:      i32* %gep_rec_2, i32* %gep_rec_6
+; CHECK-DAG: MayAlias:     i32* %gep_rec_2, i32* %gep_rec_6
 ; CHECK-DAG: NoAlias:      i32* %gep_rec_3, i32* %gep_rec_4
 ; CHECK-DAG: NoAlias:      i32* %gep_rec_3, i32* %gep_rec_5
-; CHECK-DAG: NoAlias:      i32* %gep_rec_3, i32* %gep_rec_6
+; CHECK-DAG: MayAlias:     i32* %gep_rec_3, i32* %gep_rec_6
 ; CHECK-DAG: NoAlias:      i32* %gep_rec_4, i32* %gep_rec_5
-; CHECK-DAG: NoAlias:      i32* %gep_rec_4, i32* %gep_rec_6
-; CHECK-DAG: NoAlias:      i32* %gep_rec_5, i32* %gep_rec_6
+; CHECK-DAG: MayAlias:     i32* %gep_rec_4, i32* %gep_rec_6
+; CHECK-DAG: MayAlias:     i32* %gep_rec_5, i32* %gep_rec_6
 ; GEP max lookup depth was set to 6.
 define void @gep_recursion_max_lookup_depth_reached(ptr %a, ptr %p) {
   %gep = getelementptr <vscale x 4 x i32>, ptr %p, i64 1, i64 2
diff --git a/llvm/test/Transforms/GVN/vscale.ll b/llvm/test/Transforms/GVN/vscale.ll
index 71adaed8e5722bd..3ecae9f54fddc7e 100644
--- a/llvm/test/Transforms/GVN/vscale.ll
+++ b/llvm/test/Transforms/GVN/vscale.ll
@@ -84,10 +84,7 @@ define i32 @load_clobber_load_gep3(ptr %p) {
 ; CHECK-LABEL: @load_clobber_load_gep3(
 ; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[P:%.*]], i64 1, i64 0
 ; CHECK-NEXT:    [[LOAD1:%.*]] = load i32, ptr [[GEP1]], align 4
-; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr <vscale x 4 x float>, ptr [[P]], i64 1, i64 0
-; CHECK-NEXT:    [[LOAD2:%.*]] = load float, ptr [[GEP2]], align 4
-; CHECK-NEXT:    [[CAST:%.*]] = bitcast float [[LOAD2]] to i32
-; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[LOAD1]], [[CAST]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[LOAD1]], [[LOAD1]]
 ; CHECK-NEXT:    ret i32 [[ADD]]
 ;
   %gep1 = getelementptr <vscale x 4 x i32>, ptr %p, i64 1, i64 0
@@ -277,8 +274,7 @@ define void @redundant_load_elimination_2(i1 %c, ptr %p, ptr %q) {
 ; CHECK-NEXT:    store i32 1, ptr [[GEP2]], align 4
 ; CHECK-NEXT:    br i1 [[C:%.*]], label [[IF_ELSE:%.*]], label [[IF_THEN:%.*]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    [[T:%.*]] = load i32, ptr [[GEP1]], align 4
-; CHECK-NEXT:    store i32 [[T]], ptr [[Q:%.*]], align 4
+; CHECK-NEXT:    store i32 0, ptr [[Q:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ; CHECK:       if.else:
 ; CHECK-NEXT:    ret void
@@ -367,8 +363,7 @@ define void @missing_load_elimination(i1 %c, ptr %p, ptr %q, <vscale x 4 x i32>
 ; CHECK-NEXT:    store <vscale x 4 x i32> [[V:%.*]], ptr [[P1]], align 16
 ; CHECK-NEXT:    br i1 [[C:%.*]], label [[IF_ELSE:%.*]], label [[IF_THEN:%.*]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    [[T:%.*]] = load <vscale x 4 x i32>, ptr [[P]], align 16
-; CHECK-NEXT:    store <vscale x 4 x i32> [[T]], ptr [[Q:%.*]], align 16
+; CHECK-NEXT:    store <vscale x 4 x i32> zeroinitializer, ptr [[Q:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ; CHECK:       if.else:
 ; CHECK-NEXT:    ret void