[llvm] c7d65e4 - [IR] Enable load/store/alloca for arrays of scalable vectors.

Thu Sep 14 06:57:07 PDT 2023

Author: Paul Walker
Date: 2023-09-14T13:49:01Z
New Revision: c7d65e4466eafe518937c59ef9a242234ed7a08a

URL: https://github.com/llvm/llvm-project/commit/c7d65e4466eafe518937c59ef9a242234ed7a08a
DIFF: https://github.com/llvm/llvm-project/commit/c7d65e4466eafe518937c59ef9a242234ed7a08a.diff

LOG: [IR] Enable load/store/alloca for arrays of scalable vectors.

Differential Revision: https://reviews.llvm.org/D158517

Added: 
    llvm/test/CodeGen/AArch64/alloca-load-store-scalable-array.ll
    llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-array.ll
    llvm/test/Transforms/GlobalOpt/2022-08-23-ScalableVectorArrayCrash.ll
    llvm/test/Transforms/InstCombine/scalable-vector-array.ll
    llvm/test/Transforms/SROA/scalable-vector-array.ll

Modified: 
    llvm/docs/LangRef.rst
    llvm/include/llvm/IR/Type.h
    llvm/lib/Analysis/InstructionSimplify.cpp
    llvm/lib/IR/Operator.cpp
    llvm/lib/IR/Type.cpp
    llvm/lib/IR/Verifier.cpp
    llvm/lib/Transforms/IPO/GlobalOpt.cpp
    llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
    llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
    llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
    llvm/test/Transforms/GVN/opaque-ptr.ll
    llvm/test/Transforms/InstCombine/gep-can-replace-gep-idx-with-zero-typesize.ll
    llvm/test/Transforms/InstCombine/opaque-ptr.ll
    llvm/test/Transforms/InstSimplify/gep.ll
    llvm/test/Transforms/SeparateConstOffsetFromGEP/AArch64/scalable-vector-geps.ll
    llvm/test/Verifier/scalable-global-vars.ll

Removed: 
    llvm/test/Other/scalable-vector-array.ll


################################################################################
diff  --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index dc5c84de420d76c..f542e70bcfee810 100644

--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -742,16 +742,16 @@ an optional list of attached :ref:`metadata <metadata>`.
 Variables and aliases can have a
 :ref:`Thread Local Storage Model <tls_model>`.
 
-:ref:`Scalable vectors <t_vector>` cannot be global variables or members of
-arrays because their size is unknown at compile time. They are allowed in
-structs to facilitate intrinsics returning multiple values. Generally, structs
-containing scalable vectors are not considered "sized" and cannot be used in
-loads, stores, allocas, or GEPs. The only exception to this rule is for structs
-that contain scalable vectors of the same type (e.g. ``{<vscale x 2 x i32>,
-<vscale x 2 x i32>}`` contains the same type while ``{<vscale x 2 x i32>,
-<vscale x 2 x i64>}`` doesn't). These kinds of structs (we may call them
-homogeneous scalable vector structs) are considered sized and can be used in
-loads, stores, allocas, but not GEPs.
+Globals cannot be or contain :ref:`Scalable vectors <t_vector>` because their
+size is unknown at compile time. They are allowed in structs to facilitate
+intrinsics returning multiple values. Generally, structs containing scalable
+vectors are not considered "sized" and cannot be used in loads, stores, allocas,
+or GEPs. The only exception to this rule is for structs that contain scalable
+vectors of the same type (e.g. ``{<vscale x 2 x i32>, <vscale x 2 x i32>}``
+contains the same type while ``{<vscale x 2 x i32>, <vscale x 2 x i64>}``
+doesn't). These kinds of structs (we may call them homogeneous scalable vector
+structs) are considered sized and can be used in loads, stores, allocas, but
+not GEPs.
 
 Syntax::
 

diff  --git a/llvm/include/llvm/IR/Type.h b/llvm/include/llvm/IR/Type.h
index 118f53b2ba16aba..c12e899d58fa834 100644
--- a/llvm/include/llvm/IR/Type.h
+++ b/llvm/include/llvm/IR/Type.h
@@ -209,8 +209,7 @@ class Type {
   /// Return true if this is a target extension type with a scalable layout.
   bool isScalableTargetExtTy() const;
 
-  /// Return true if this is a scalable vector type or a target extension type
-  /// with a scalable layout.
+  /// Return true if this is a type whose size is a known multiple of vscale.
   bool isScalableTy() const;
 
   /// Return true if this is a FP type or a vector of FP.

diff  --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index 5fe0d53c313d40e..2a3011075e47ed7 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -4934,7 +4934,7 @@ static Value *simplifyGEPInst(Type *SrcTy, Value *Ptr,
     return UndefValue::get(GEPTy);
 
   bool IsScalableVec =
-      isa<ScalableVectorType>(SrcTy) || any_of(Indices, [](const Value *V) {
+      SrcTy->isScalableTy() || any_of(Indices, [](const Value *V) {
         return isa<ScalableVectorType>(V->getType());
       });
 

diff  --git a/llvm/lib/IR/Operator.cpp b/llvm/lib/IR/Operator.cpp
index b57f3e3b2967eb9..d2a1f2eb49dafed 100644
--- a/llvm/lib/IR/Operator.cpp
+++ b/llvm/lib/IR/Operator.cpp
@@ -127,9 +127,7 @@ bool GEPOperator::accumulateConstantOffset(
   auto end = generic_gep_type_iterator<decltype(Index.end())>::end(Index.end());
   for (auto GTI = begin, GTE = end; GTI != GTE; ++GTI) {
     // Scalable vectors are multiplied by a runtime constant.
-    bool ScalableType = false;
-    if (isa<ScalableVectorType>(GTI.getIndexedType()))
-      ScalableType = true;
+    bool ScalableType = GTI.getIndexedType()->isScalableTy();
 
     Value *V = GTI.getOperand();
     StructType *STy = GTI.getStructTypeOrNull();
@@ -189,7 +187,7 @@ bool GEPOperator::collectOffset(
   for (gep_type_iterator GTI = gep_type_begin(this), GTE = gep_type_end(this);
        GTI != GTE; ++GTI) {
     // Scalable vectors are multiplied by a runtime constant.
-    bool ScalableType = isa<ScalableVectorType>(GTI.getIndexedType());
+    bool ScalableType = GTI.getIndexedType()->isScalableTy();
 
     Value *V = GTI.getOperand();
     StructType *STy = GTI.getStructTypeOrNull();

diff  --git a/llvm/lib/IR/Type.cpp b/llvm/lib/IR/Type.cpp
index f88d3ace64c26f6..97febcd99b4114f 100644
--- a/llvm/lib/IR/Type.cpp
+++ b/llvm/lib/IR/Type.cpp
@@ -58,6 +58,8 @@ bool Type::isIntegerTy(unsigned Bitwidth) const {
 }
 
 bool Type::isScalableTy() const {
+  if (const auto *ATy = dyn_cast<ArrayType>(this))
+    return ATy->getElementType()->isScalableTy();
   if (const auto *STy = dyn_cast<StructType>(this)) {
     SmallPtrSet<Type *, 4> Visited;
     return STy->containsScalableVectorType(&Visited);
@@ -658,8 +660,7 @@ ArrayType *ArrayType::get(Type *ElementType, uint64_t NumElements) {
 bool ArrayType::isValidElementType(Type *ElemTy) {
   return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() &&
          !ElemTy->isMetadataTy() && !ElemTy->isFunctionTy() &&
-         !ElemTy->isTokenTy() && !ElemTy->isX86_AMXTy() &&
-         !isa<ScalableVectorType>(ElemTy);
+         !ElemTy->isTokenTy() && !ElemTy->isX86_AMXTy();
 }
 
 //===----------------------------------------------------------------------===//

diff  --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index c0f30a62b8bccc3..10d176380ebed94 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -850,17 +850,9 @@ void Verifier::visitGlobalVariable(const GlobalVariable &GV) {
   }
 
   // Scalable vectors cannot be global variables, since we don't know
-  // the runtime size. If the global is an array containing scalable vectors,
-  // that will be caught by the isValidElementType methods in StructType or
-  // ArrayType instead.
-  Check(!isa<ScalableVectorType>(GV.getValueType()),
-        "Globals cannot contain scalable vectors", &GV);
-
-  if (auto *STy = dyn_cast<StructType>(GV.getValueType())) {
-    SmallPtrSet<Type *, 4> Visited;
-    Check(!STy->containsScalableVectorType(&Visited),
-          "Globals cannot contain scalable vectors", &GV);
-  }
+  // the runtime size.
+  Check(!GV.getValueType()->isScalableTy(),
+        "Globals cannot contain scalable types", &GV);
 
   // Check if it's a target extension type that disallows being used as a
   // global.

diff  --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index 658db532835a504..56a52d13c20d0cc 100644
--- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -390,7 +390,7 @@ static bool collectSRATypes(DenseMap<uint64_t, GlobalPart> &Parts,
       }
 
       // Scalable types not currently supported.
-      if (isa<ScalableVectorType>(Ty))
+      if (Ty->isScalableTy())
         return false;
 
       auto IsStored = [](Value *V, Constant *Initializer) {

diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 6e491acc1403859..e176d1bea25a0ed 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -804,7 +804,7 @@ static Instruction *unpackLoadToAggregate(InstCombinerImpl &IC, LoadInst &LI) {
       return nullptr;
 
     const DataLayout &DL = IC.getDataLayout();
-    auto EltSize = DL.getTypeAllocSize(ET);
+    TypeSize EltSize = DL.getTypeAllocSize(ET);
     const auto Align = LI.getAlign();
 
     auto *Addr = LI.getPointerOperand();
@@ -812,7 +812,7 @@ static Instruction *unpackLoadToAggregate(InstCombinerImpl &IC, LoadInst &LI) {
     auto *Zero = ConstantInt::get(IdxType, 0);
 
     Value *V = PoisonValue::get(T);
-    uint64_t Offset = 0;
+    TypeSize Offset = TypeSize::get(0, ET->isScalableTy());
     for (uint64_t i = 0; i < NumElements; i++) {
       Value *Indices[2] = {
         Zero,
@@ -820,9 +820,9 @@ static Instruction *unpackLoadToAggregate(InstCombinerImpl &IC, LoadInst &LI) {
       };
       auto *Ptr = IC.Builder.CreateInBoundsGEP(AT, Addr, ArrayRef(Indices),
                                                Name + ".elt");
+      auto EltAlign = commonAlignment(Align, Offset.getKnownMinValue());
       auto *L = IC.Builder.CreateAlignedLoad(AT->getElementType(), Ptr,
-                                             commonAlignment(Align, Offset),
-                                             Name + ".unpack");
+                                             EltAlign, Name + ".unpack");
       L->setAAMetadata(LI.getAAMetadata());
       V = IC.Builder.CreateInsertValue(V, L, i);
       Offset += EltSize;
@@ -957,7 +957,7 @@ static bool canReplaceGEPIdxWithZero(InstCombinerImpl &IC,
   Type *SourceElementType = GEPI->getSourceElementType();
   // Size information about scalable vectors is not available, so we cannot
   // deduce whether indexing at n is undefined behaviour or not. Bail out.
-  if (isa<ScalableVectorType>(SourceElementType))
+  if (SourceElementType->isScalableTy())
     return false;
 
   Type *AllocTy = GetElementPtrInst::getIndexedType(SourceElementType, Ops);
@@ -1323,7 +1323,7 @@ static bool unpackStoreToAggregate(InstCombinerImpl &IC, StoreInst &SI) {
       return false;
 
     const DataLayout &DL = IC.getDataLayout();
-    auto EltSize = DL.getTypeAllocSize(AT->getElementType());
+    TypeSize EltSize = DL.getTypeAllocSize(AT->getElementType());
     const auto Align = SI.getAlign();
 
     SmallString<16> EltName = V->getName();
@@ -1335,7 +1335,7 @@ static bool unpackStoreToAggregate(InstCombinerImpl &IC, StoreInst &SI) {
     auto *IdxType = Type::getInt64Ty(T->getContext());
     auto *Zero = ConstantInt::get(IdxType, 0);
 
-    uint64_t Offset = 0;
+    TypeSize Offset = TypeSize::get(0, AT->getElementType()->isScalableTy());
     for (uint64_t i = 0; i < NumElements; i++) {
       Value *Indices[2] = {
         Zero,
@@ -1344,7 +1344,7 @@ static bool unpackStoreToAggregate(InstCombinerImpl &IC, StoreInst &SI) {
       auto *Ptr =
           IC.Builder.CreateInBoundsGEP(AT, Addr, ArrayRef(Indices), AddrName);
       auto *Val = IC.Builder.CreateExtractValue(V, i, EltName);
-      auto EltAlign = commonAlignment(Align, Offset);
+      auto EltAlign = commonAlignment(Align, Offset.getKnownMinValue());
       Instruction *NS = IC.Builder.CreateAlignedStore(Val, Ptr, EltAlign);
       NS->setAAMetadata(SI.getAAMetadata());
       Offset += EltSize;

diff  --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 1b9943f7025ea6d..145386946320c40 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2005,7 +2005,7 @@ Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP,
     APInt Offset(DL.getIndexTypeSizeInBits(PtrTy), 0);
     if (NumVarIndices != Src->getNumIndices()) {
       // FIXME: getIndexedOffsetInType() does not handled scalable vectors.
-      if (isa<ScalableVectorType>(BaseType))
+      if (BaseType->isScalableTy())
         return nullptr;
 
       SmallVector<Value *> ConstantIndices;
@@ -2118,7 +2118,7 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
   SmallVector<Value *, 8> Indices(GEP.indices());
   Type *GEPType = GEP.getType();
   Type *GEPEltType = GEP.getSourceElementType();
-  bool IsGEPSrcEleScalable = isa<ScalableVectorType>(GEPEltType);
+  bool IsGEPSrcEleScalable = GEPEltType->isScalableTy();
   if (Value *V = simplifyGEPInst(GEPEltType, PtrOp, Indices, GEP.isInBounds(),
                                  SQ.getWithInstruction(&GEP)))
     return replaceInstUsesWith(GEP, V);

diff  --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
index 89d0b7c33e0d934..afd72cdd7dc0012 100644
--- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -830,7 +830,7 @@ SeparateConstOffsetFromGEP::accumulateByteOffset(GetElementPtrInst *GEP,
   for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
     if (GTI.isSequential()) {
       // Constant offsets of scalable types are not really constant.
-      if (isa<ScalableVectorType>(GTI.getIndexedType()))
+      if (GTI.getIndexedType()->isScalableTy())
         continue;
 
       // Tries to extract a constant offset from this GEP index.
@@ -1019,7 +1019,7 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
   for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
     if (GTI.isSequential()) {
       // Constant offsets of scalable types are not really constant.
-      if (isa<ScalableVectorType>(GTI.getIndexedType()))
+      if (GTI.getIndexedType()->isScalableTy())
         continue;
 
       // Splits this GEP index into a variadic part and a constant offset, and

diff  --git a/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-array.ll b/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-array.ll
new file mode 100644
index 000000000000000..49bdaf0fcde9418
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-array.ll
@@ -0,0 +1,106 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc < %s | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+%my_subtype = type <vscale x 2 x double>
+%my_type = type [3 x %my_subtype]
+
+define void @array_1D(ptr %addr) #0 {
+; CHECK-LABEL: array_1D:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    addvl sp, sp, #-3
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0, #1, mul vl]
+; CHECK-NEXT:    ld1d { z1.d }, p0/z, [x0, #2, mul vl]
+; CHECK-NEXT:    ld1d { z2.d }, p0/z, [x0]
+; CHECK-NEXT:    st1d { z2.d }, p0, [sp]
+; CHECK-NEXT:    st1d { z1.d }, p0, [sp, #2, mul vl]
+; CHECK-NEXT:    st1d { z0.d }, p0, [sp, #1, mul vl]
+; CHECK-NEXT:    addvl sp, sp, #3
+; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+entry:
+  %ret = alloca %my_type, align 8
+  %val = load %my_type, ptr %addr
+  store %my_type %val, ptr %ret, align 8
+  ret void
+}
+
+define %my_subtype @array_1D_extract(ptr %addr) #0 {
+; CHECK-LABEL: array_1D_extract:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    addvl sp, sp, #-3
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0, #1, mul vl]
+; CHECK-NEXT:    addvl sp, sp, #3
+; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+entry:
+  %ret = alloca %my_type, align 8
+  %val = load %my_type, ptr %addr
+  %elt = extractvalue %my_type %val, 1
+  ret %my_subtype %elt
+}
+
+define void @array_1D_insert(ptr %addr, %my_subtype %elt) #0 {
+; CHECK-LABEL: array_1D_insert:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    addvl sp, sp, #-3
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    ld1d { z1.d }, p0/z, [x0]
+; CHECK-NEXT:    ld1d { z2.d }, p0/z, [x0, #2, mul vl]
+; CHECK-NEXT:    st1d { z2.d }, p0, [sp, #2, mul vl]
+; CHECK-NEXT:    st1d { z1.d }, p0, [sp]
+; CHECK-NEXT:    st1d { z0.d }, p0, [sp, #1, mul vl]
+; CHECK-NEXT:    addvl sp, sp, #3
+; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+entry:
+  %ret = alloca %my_type, align 8
+  %val = load %my_type, ptr %addr
+  %ins = insertvalue %my_type %val, %my_subtype %elt, 1
+  store %my_type %ins, ptr %ret, align 8
+  ret void
+}
+
+define void @array_2D(ptr %addr) #0 {
+; CHECK-LABEL: array_2D:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    addvl sp, sp, #-6
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x30, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 48 * VG
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0, #1, mul vl]
+; CHECK-NEXT:    ld1d { z1.d }, p0/z, [x0, #2, mul vl]
+; CHECK-NEXT:    ld1d { z2.d }, p0/z, [x0, #3, mul vl]
+; CHECK-NEXT:    ld1d { z3.d }, p0/z, [x0, #4, mul vl]
+; CHECK-NEXT:    ld1d { z4.d }, p0/z, [x0, #5, mul vl]
+; CHECK-NEXT:    ld1d { z5.d }, p0/z, [x0]
+; CHECK-NEXT:    st1d { z5.d }, p0, [sp]
+; CHECK-NEXT:    st1d { z4.d }, p0, [sp, #5, mul vl]
+; CHECK-NEXT:    st1d { z3.d }, p0, [sp, #4, mul vl]
+; CHECK-NEXT:    st1d { z2.d }, p0, [sp, #3, mul vl]
+; CHECK-NEXT:    st1d { z1.d }, p0, [sp, #2, mul vl]
+; CHECK-NEXT:    st1d { z0.d }, p0, [sp, #1, mul vl]
+; CHECK-NEXT:    addvl sp, sp, #6
+; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+entry:
+  %ret = alloca [2 x %my_type], align 8
+  %val = load [2 x %my_type], ptr %addr
+  store [2 x %my_type] %val, ptr %ret, align 8
+  ret void
+}
+
+attributes #0 = { "target-features"="+sve" }

diff  --git a/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-array.ll b/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-array.ll
new file mode 100644
index 000000000000000..1fe91c721f4dd2b
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-array.ll
@@ -0,0 +1,40 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs --riscv-no-aliases < %s | FileCheck %s
+
+target triple = "riscv64-unknown-unknown-elf"
+
+%my_type = type [3 x <vscale x 1 x double>]
+
+define void @test(ptr %addr) {
+; CHECK-LABEL: test:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    csrrs a1, vlenb, zero
+; CHECK-NEXT:    slli a1, a1, 2
+; CHECK-NEXT:    sub sp, sp, a1
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
+; CHECK-NEXT:    csrrs a1, vlenb, zero
+; CHECK-NEXT:    add a2, a0, a1
+; CHECK-NEXT:    vl1re64.v v8, (a2)
+; CHECK-NEXT:    slli a2, a1, 1
+; CHECK-NEXT:    vl1re64.v v9, (a0)
+; CHECK-NEXT:    add a0, a0, a2
+; CHECK-NEXT:    vl1re64.v v10, (a0)
+; CHECK-NEXT:    addi a0, sp, 16
+; CHECK-NEXT:    vs1r.v v9, (a0)
+; CHECK-NEXT:    add a2, a0, a2
+; CHECK-NEXT:    vs1r.v v10, (a2)
+; CHECK-NEXT:    add a0, a0, a1
+; CHECK-NEXT:    vs1r.v v8, (a0)
+; CHECK-NEXT:    csrrs a0, vlenb, zero
+; CHECK-NEXT:    slli a0, a0, 2
+; CHECK-NEXT:    add sp, sp, a0
+; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %ret = alloca %my_type, align 8
+  %val = load %my_type, ptr %addr
+  store %my_type %val, ptr %ret, align 8
+  ret void
+}

diff  --git a/llvm/test/Other/scalable-vector-array.ll b/llvm/test/Other/scalable-vector-array.ll
deleted file mode 100644
index 4119bb6953b421c..000000000000000
--- a/llvm/test/Other/scalable-vector-array.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; RUN: not opt -S -passes=verify < %s 2>&1 | FileCheck %s
-
-;; Arrays cannot contain scalable vectors; make sure we detect them even
-;; when nested inside other aggregates.
-
-%ty = type { i64, [4 x <vscale x 256 x i1>] }
-; CHECK: error: invalid array element type
-; CHECK: %ty = type { i64, [4 x <vscale x 256 x i1>] }

diff  --git a/llvm/test/Transforms/GVN/opaque-ptr.ll b/llvm/test/Transforms/GVN/opaque-ptr.ll
index 911a7eba16794a6..4a0f9d319501c88 100644
--- a/llvm/test/Transforms/GVN/opaque-ptr.ll
+++ b/llvm/test/Transforms/GVN/opaque-ptr.ll
@@ -52,6 +52,12 @@ define void @gep_cse_offset_canonicalization(ptr %p, i64 %idx, i64 %idx2) {
 ; CHECK-NEXT:    call void @use(ptr [[GEP5]])
 ; CHECK-NEXT:    call void @use(ptr [[GEP5_SAME]])
 ; CHECK-NEXT:    call void @use(ptr [[GEP5_DIFFERENT]])
+; CHECK-NEXT:    [[GEP6:%.*]] = getelementptr [4 x <vscale x 4 x i32>], ptr [[P]], i64 [[IDX]], i64 1
+; CHECK-NEXT:    [[GEP6_SAME:%.*]] = getelementptr [4 x <vscale x 4 x float>], ptr [[P]], i64 [[IDX]], i64 1
+; CHECK-NEXT:    [[GEP6_DIFFERENT:%.*]] = getelementptr [4 x <vscale x 4 x float>], ptr [[P]], i64 [[IDX2]], i64 1
+; CHECK-NEXT:    call void @use(ptr [[GEP6]])
+; CHECK-NEXT:    call void @use(ptr [[GEP6_SAME]])
+; CHECK-NEXT:    call void @use(ptr [[GEP6_DIFFERENT]])
 ; CHECK-NEXT:    ret void
 ;
   %gep1 = getelementptr i64, ptr %p, i64 1
@@ -89,6 +95,12 @@ define void @gep_cse_offset_canonicalization(ptr %p, i64 %idx, i64 %idx2) {
   call void @use(ptr %gep5)
   call void @use(ptr %gep5.same)
   call void @use(ptr %gep5.
diff erent)
+  %gep6 = getelementptr [4 x <vscale x 4 x i32>], ptr %p, i64 %idx, i64 1
+  %gep6.same = getelementptr [4 x <vscale x 4 x float>], ptr %p, i64 %idx, i64 1
+  %gep6.
diff erent = getelementptr [4 x <vscale x 4 x float>], ptr %p, i64 %idx2, i64 1
+  call void @use(ptr %gep6)
+  call void @use(ptr %gep6.same)
+  call void @use(ptr %gep6.
diff erent)
   ret void
 }
 

diff  --git a/llvm/test/Transforms/GlobalOpt/2022-08-23-ScalableVectorArrayCrash.ll b/llvm/test/Transforms/GlobalOpt/2022-08-23-ScalableVectorArrayCrash.ll
new file mode 100644
index 000000000000000..1f358ec850bc492
--- /dev/null
+++ b/llvm/test/Transforms/GlobalOpt/2022-08-23-ScalableVectorArrayCrash.ll
@@ -0,0 +1,15 @@
+; RUN: opt -passes=globalopt < %s
+
+; Ensure we don't ICE by trying to optimize a scalable vector load of a global
+; variable.
+
+%struct.xxx = type <{ [96 x i8] }>
+
+ at .bss = internal unnamed_addr global %struct.xxx zeroinitializer, align 32
+
+define dso_local void @foo() local_unnamed_addr align 16 {
+L.entry:
+  store [4 x <vscale x 2 x double>] zeroinitializer, ptr @.bss, align 1
+  %0 = load [4 x <vscale x 2 x double>], ptr @.bss, align 8
+  unreachable
+}

diff  --git a/llvm/test/Transforms/InstCombine/gep-can-replace-gep-idx-with-zero-typesize.ll b/llvm/test/Transforms/InstCombine/gep-can-replace-gep-idx-with-zero-typesize.ll
index 6514158966ac590..5be2b90bf7a626a 100644
--- a/llvm/test/Transforms/InstCombine/gep-can-replace-gep-idx-with-zero-typesize.ll
+++ b/llvm/test/Transforms/InstCombine/gep-can-replace-gep-idx-with-zero-typesize.ll
@@ -18,3 +18,10 @@ define void @can_replace_gep_idx_with_zero_typesize(i64 %n, ptr %a, i64 %b) {
   call void @do_something(<vscale x 4 x i32> %tmp)
   ret void
 }
+
+define void @can_replace_gep_idx_with_zero_typesize_2(i64 %n, ptr %a, i64 %b) {
+  %idx = getelementptr [2 x <vscale x 4 x i32>], ptr %a, i64 %b, i64 0
+  %tmp = load <vscale x 4 x i32>, ptr %idx
+  call void @do_something(<vscale x 4 x i32> %tmp)
+  ret void
+}

diff  --git a/llvm/test/Transforms/InstCombine/opaque-ptr.ll b/llvm/test/Transforms/InstCombine/opaque-ptr.ll
index 4f12fa45e9ecadd..900d3f142a6ff79 100644
--- a/llvm/test/Transforms/InstCombine/opaque-ptr.ll
+++ b/llvm/test/Transforms/InstCombine/opaque-ptr.ll
@@ -298,6 +298,17 @@ define ptr @geps_combinable_scalable(ptr %a, i64 %idx) {
   ret ptr %a3
 }
 
+define ptr @geps_combinable_scalable_vector_array(ptr %a, i64 %idx) {
+; CHECK-LABEL: @geps_combinable_scalable_vector_array(
+; CHECK-NEXT:    [[A2:%.*]] = getelementptr inbounds [4 x <vscale x 2 x i32>], ptr [[A:%.*]], i64 1
+; CHECK-NEXT:    [[A3:%.*]] = getelementptr inbounds i8, ptr [[A2]], i64 4
+; CHECK-NEXT:    ret ptr [[A3]]
+;
+  %a2 = getelementptr inbounds [4 x <vscale x 2 x i32>], ptr %a, i64 1
+  %a3 = getelementptr inbounds i8, ptr %a2, i32 4
+  ret ptr %a3
+}
+
 define i1 @compare_geps_same_indices(ptr %a, ptr %b, i64 %idx) {
 ; CHECK-LABEL: @compare_geps_same_indices(
 ; CHECK-NEXT:    [[C:%.*]] = icmp eq ptr [[A:%.*]], [[B:%.*]]

diff  --git a/llvm/test/Transforms/InstCombine/scalable-vector-array.ll b/llvm/test/Transforms/InstCombine/scalable-vector-array.ll
new file mode 100644
index 000000000000000..d03184766b4a682
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/scalable-vector-array.ll
@@ -0,0 +1,28 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; RUN: opt -passes=instcombine -S < %s | FileCheck %s
+
+define <vscale x 4 x i32> @load(ptr %x) {
+; CHECK-LABEL: define <vscale x 4 x i32> @load
+; CHECK-SAME: (ptr [[X:%.*]]) {
+; CHECK-NEXT:    [[A_ELT1:%.*]] = getelementptr inbounds [2 x <vscale x 4 x i32>], ptr [[X]], i64 0, i64 1
+; CHECK-NEXT:    [[A_UNPACK2:%.*]] = load <vscale x 4 x i32>, ptr [[A_ELT1]], align 16
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[A_UNPACK2]]
+;
+  %a = load [2 x <vscale x 4 x i32>], ptr %x
+  %b = extractvalue [2 x <vscale x 4 x i32>] %a, 1
+  ret <vscale x 4 x i32> %b
+}
+
+define void @store(ptr %x, <vscale x 4 x i32> %y, <vscale x 4 x i32> %z) {
+; CHECK-LABEL: define void @store
+; CHECK-SAME: (ptr [[X:%.*]], <vscale x 4 x i32> [[Y:%.*]], <vscale x 4 x i32> [[Z:%.*]]) {
+; CHECK-NEXT:    store <vscale x 4 x i32> [[Y]], ptr [[X]], align 16
+; CHECK-NEXT:    [[X_REPACK1:%.*]] = getelementptr inbounds [2 x <vscale x 4 x i32>], ptr [[X]], i64 0, i64 1
+; CHECK-NEXT:    store <vscale x 4 x i32> [[Z]], ptr [[X_REPACK1]], align 16
+; CHECK-NEXT:    ret void
+;
+  %a = insertvalue [2 x <vscale x 4 x i32>] poison, <vscale x 4 x i32> %y, 0
+  %b = insertvalue [2 x <vscale x 4 x i32>] %a, <vscale x 4 x i32> %z, 1
+  store [2 x <vscale x 4 x i32>] %b, ptr %x
+  ret void
+}

diff  --git a/llvm/test/Transforms/InstSimplify/gep.ll b/llvm/test/Transforms/InstSimplify/gep.ll
index 5e70c2ca37c3a2e..281c286ef36ea2f 100644
--- a/llvm/test/Transforms/InstSimplify/gep.ll
+++ b/llvm/test/Transforms/InstSimplify/gep.ll
@@ -358,3 +358,12 @@ define <8 x ptr> @gep_vector_index_op3_poison_constant_index_afterwards(ptr %ptr
   %res = getelementptr inbounds %t.3, ptr %ptr, i64 0, i32 1, <8 x i64> poison, i32 1
   ret <8 x ptr> %res
 }
+
+define i64 @gep_array_of_scalable_vectors_ptr
diff (ptr %ptr) {
+  %c1 = getelementptr inbounds [8 x <vscale x 4 x i32>], ptr %ptr, i64 4
+  %c2 = getelementptr inbounds [8 x <vscale x 4 x i32>], ptr %ptr, i64 6
+  %c1.int = ptrtoint ptr %c1 to i64
+  %c2.int = ptrtoint ptr %c2 to i64
+  %
diff  = sub i64 %c2.int, %c1.int
+  ret i64 %
diff 
+}

diff  --git a/llvm/test/Transforms/SROA/scalable-vector-array.ll b/llvm/test/Transforms/SROA/scalable-vector-array.ll
new file mode 100644
index 000000000000000..dbb6b4cbea47fb5
--- /dev/null
+++ b/llvm/test/Transforms/SROA/scalable-vector-array.ll
@@ -0,0 +1,20 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; RUN: opt < %s -passes='sroa<preserve-cfg>' -S | FileCheck %s
+; RUN: opt < %s -passes='sroa<modify-cfg>' -S | FileCheck %s
+
+; This test checks that SROA runs mem2reg on arrays of scalable vectors.
+
+define [ 2 x <vscale x 4 x i32> ] @alloca(<vscale x 4 x i32> %x, <vscale x 4 x i32> %y) {
+; CHECK-LABEL: define [2 x <vscale x 4 x i32>] @alloca
+; CHECK-SAME: (<vscale x 4 x i32> [[X:%.*]], <vscale x 4 x i32> [[Y:%.*]]) {
+; CHECK-NEXT:    [[AGG0:%.*]] = insertvalue [2 x <vscale x 4 x i32>] poison, <vscale x 4 x i32> [[X]], 0
+; CHECK-NEXT:    [[AGG1:%.*]] = insertvalue [2 x <vscale x 4 x i32>] [[AGG0]], <vscale x 4 x i32> [[Y]], 1
+; CHECK-NEXT:    ret [2 x <vscale x 4 x i32>] [[AGG1]]
+;
+  %addr = alloca [ 2 x <vscale x 4 x i32> ], align 4
+  %agg0 = insertvalue [ 2 x <vscale x 4 x i32> ] poison, <vscale x 4 x i32> %x, 0
+  %agg1 = insertvalue [ 2 x <vscale x 4 x i32> ] %agg0, <vscale x 4 x i32> %y, 1
+  store [ 2 x <vscale x 4 x i32> ] %agg1, ptr %addr, align 4
+  %val = load [ 2 x <vscale x 4 x i32> ], ptr %addr, align 4
+  ret [ 2 x <vscale x 4 x i32> ] %val
+}

diff  --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AArch64/scalable-vector-geps.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AArch64/scalable-vector-geps.ll
index 2ceac4c44d10f4c..31d166506a4e48f 100644
--- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AArch64/scalable-vector-geps.ll
+++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AArch64/scalable-vector-geps.ll
@@ -28,4 +28,41 @@ define ptr @test2(ptr %base, i64 %idx) {
   ret ptr %gep
 }
 
+; Index is implicitly multiplied by vscale and so not really constant.
+define ptr @test3(ptr %base, i64 %idx) #0 {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    [[IDX_NEXT:%.*]] = add nuw nsw i64 [[IDX:%.*]], 1
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr [8 x <vscale x 4 x float>], ptr [[BASE:%.*]], i64 [[IDX_NEXT]]
+; CHECK-NEXT:    ret ptr [[GEP]]
+;
+  %idx.next = add nuw nsw i64 %idx, 1
+  %gep = getelementptr [8 x <vscale x 4 x float>], ptr %base, i64 %idx.next
+  ret ptr %gep
+}
+
+; Indices are implicitly multiplied by vscale and so not really constant.
+define ptr @test4(ptr %base, i64 %idx) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    [[IDX_NEXT:%.*]] = add nuw nsw i64 [[IDX:%.*]], 1
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr [8 x <vscale x 4 x float>], ptr [[BASE:%.*]], i64 3, i64 [[IDX_NEXT]]
+; CHECK-NEXT:    ret ptr [[GEP]]
+;
+  %idx.next = add nuw nsw i64 %idx, 1
+  %gep = getelementptr [8 x <vscale x 4 x float>], ptr %base, i64 3, i64 %idx.next
+  ret ptr %gep
+}
+
+; Whilst the first two indices are not constant, the calculation of the third
+; index does contain a constant that can be extracted.
+define ptr @test5(ptr %base, i64 %idx) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr [8 x <vscale x 4 x float>], ptr [[BASE:%.*]], i64 1, i64 3, i64 [[IDX:%.*]]
+; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr float, ptr [[TMP1]], i64 1
+; CHECK-NEXT:    ret ptr [[GEP2]]
+;
+  %idx.next = add nuw nsw i64 %idx, 1
+  %gep = getelementptr [8 x <vscale x 4 x float>], ptr %base, i64 1, i64 3, i64 %idx.next
+  ret ptr %gep
+}
+
 attributes #0 = { "target-features"="+sve" }

diff  --git a/llvm/test/Verifier/scalable-global-vars.ll b/llvm/test/Verifier/scalable-global-vars.ll
index 740cd23a2888a1b..81882261e664ef6 100644
--- a/llvm/test/Verifier/scalable-global-vars.ll
+++ b/llvm/test/Verifier/scalable-global-vars.ll
@@ -3,14 +3,15 @@
 ;; Global variables cannot be scalable vectors, since we don't
 ;; know the size at compile time.
 
-; CHECK: Globals cannot contain scalable vectors
+; CHECK: Globals cannot contain scalable types
 ; CHECK-NEXT: ptr @ScalableVecGlobal
 @ScalableVecGlobal = global <vscale x 4 x i32> zeroinitializer
 
-; CHECK-NEXT: Globals cannot contain scalable vectors
+; CHECK-NEXT: Globals cannot contain scalable types
+; CHECK-NEXT: ptr @ScalableVecArrayGlobal
+ at ScalableVecArrayGlobal = global [ 8 x  <vscale x 4 x i32> ] zeroinitializer
+
+; CHECK-NEXT: Globals cannot contain scalable types
 ; CHECK-NEXT: ptr @ScalableVecStructGlobal
 @ScalableVecStructGlobal = global { i32,  <vscale x 4 x i32> } zeroinitializer
 
-;; Global _pointers_ to scalable vectors are fine
-; CHECK-NOT: Globals cannot contain scalable vectors
- at ScalableVecPtr = global ptr zeroinitializer