[llvm] c7d65e4 - [IR] Enable load/store/alloca for arrays of scalable vectors.
Paul Walker via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 14 06:57:07 PDT 2023
Author: Paul Walker
Date: 2023-09-14T13:49:01Z
New Revision: c7d65e4466eafe518937c59ef9a242234ed7a08a
URL: https://github.com/llvm/llvm-project/commit/c7d65e4466eafe518937c59ef9a242234ed7a08a
DIFF: https://github.com/llvm/llvm-project/commit/c7d65e4466eafe518937c59ef9a242234ed7a08a.diff
LOG: [IR] Enable load/store/alloca for arrays of scalable vectors.
Differential Revision: https://reviews.llvm.org/D158517
Added:
llvm/test/CodeGen/AArch64/alloca-load-store-scalable-array.ll
llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-array.ll
llvm/test/Transforms/GlobalOpt/2022-08-23-ScalableVectorArrayCrash.ll
llvm/test/Transforms/InstCombine/scalable-vector-array.ll
llvm/test/Transforms/SROA/scalable-vector-array.ll
Modified:
llvm/docs/LangRef.rst
llvm/include/llvm/IR/Type.h
llvm/lib/Analysis/InstructionSimplify.cpp
llvm/lib/IR/Operator.cpp
llvm/lib/IR/Type.cpp
llvm/lib/IR/Verifier.cpp
llvm/lib/Transforms/IPO/GlobalOpt.cpp
llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
llvm/test/Transforms/GVN/opaque-ptr.ll
llvm/test/Transforms/InstCombine/gep-can-replace-gep-idx-with-zero-typesize.ll
llvm/test/Transforms/InstCombine/opaque-ptr.ll
llvm/test/Transforms/InstSimplify/gep.ll
llvm/test/Transforms/SeparateConstOffsetFromGEP/AArch64/scalable-vector-geps.ll
llvm/test/Verifier/scalable-global-vars.ll
Removed:
llvm/test/Other/scalable-vector-array.ll
################################################################################
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index dc5c84de420d76c..f542e70bcfee810 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -742,16 +742,16 @@ an optional list of attached :ref:`metadata <metadata>`.
Variables and aliases can have a
:ref:`Thread Local Storage Model <tls_model>`.
-:ref:`Scalable vectors <t_vector>` cannot be global variables or members of
-arrays because their size is unknown at compile time. They are allowed in
-structs to facilitate intrinsics returning multiple values. Generally, structs
-containing scalable vectors are not considered "sized" and cannot be used in
-loads, stores, allocas, or GEPs. The only exception to this rule is for structs
-that contain scalable vectors of the same type (e.g. ``{<vscale x 2 x i32>,
-<vscale x 2 x i32>}`` contains the same type while ``{<vscale x 2 x i32>,
-<vscale x 2 x i64>}`` doesn't). These kinds of structs (we may call them
-homogeneous scalable vector structs) are considered sized and can be used in
-loads, stores, allocas, but not GEPs.
+Globals cannot be or contain :ref:`Scalable vectors <t_vector>` because their
+size is unknown at compile time. They are allowed in structs to facilitate
+intrinsics returning multiple values. Generally, structs containing scalable
+vectors are not considered "sized" and cannot be used in loads, stores, allocas,
+or GEPs. The only exception to this rule is for structs that contain scalable
+vectors of the same type (e.g. ``{<vscale x 2 x i32>, <vscale x 2 x i32>}``
+contains the same type while ``{<vscale x 2 x i32>, <vscale x 2 x i64>}``
+doesn't). These kinds of structs (we may call them homogeneous scalable vector
+structs) are considered sized and can be used in loads, stores, allocas, but
+not GEPs.
Syntax::
diff --git a/llvm/include/llvm/IR/Type.h b/llvm/include/llvm/IR/Type.h
index 118f53b2ba16aba..c12e899d58fa834 100644
--- a/llvm/include/llvm/IR/Type.h
+++ b/llvm/include/llvm/IR/Type.h
@@ -209,8 +209,7 @@ class Type {
/// Return true if this is a target extension type with a scalable layout.
bool isScalableTargetExtTy() const;
- /// Return true if this is a scalable vector type or a target extension type
- /// with a scalable layout.
+ /// Return true if this is a type whose size is a known multiple of vscale.
bool isScalableTy() const;
/// Return true if this is a FP type or a vector of FP.
diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index 5fe0d53c313d40e..2a3011075e47ed7 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -4934,7 +4934,7 @@ static Value *simplifyGEPInst(Type *SrcTy, Value *Ptr,
return UndefValue::get(GEPTy);
bool IsScalableVec =
- isa<ScalableVectorType>(SrcTy) || any_of(Indices, [](const Value *V) {
+ SrcTy->isScalableTy() || any_of(Indices, [](const Value *V) {
return isa<ScalableVectorType>(V->getType());
});
diff --git a/llvm/lib/IR/Operator.cpp b/llvm/lib/IR/Operator.cpp
index b57f3e3b2967eb9..d2a1f2eb49dafed 100644
--- a/llvm/lib/IR/Operator.cpp
+++ b/llvm/lib/IR/Operator.cpp
@@ -127,9 +127,7 @@ bool GEPOperator::accumulateConstantOffset(
auto end = generic_gep_type_iterator<decltype(Index.end())>::end(Index.end());
for (auto GTI = begin, GTE = end; GTI != GTE; ++GTI) {
// Scalable vectors are multiplied by a runtime constant.
- bool ScalableType = false;
- if (isa<ScalableVectorType>(GTI.getIndexedType()))
- ScalableType = true;
+ bool ScalableType = GTI.getIndexedType()->isScalableTy();
Value *V = GTI.getOperand();
StructType *STy = GTI.getStructTypeOrNull();
@@ -189,7 +187,7 @@ bool GEPOperator::collectOffset(
for (gep_type_iterator GTI = gep_type_begin(this), GTE = gep_type_end(this);
GTI != GTE; ++GTI) {
// Scalable vectors are multiplied by a runtime constant.
- bool ScalableType = isa<ScalableVectorType>(GTI.getIndexedType());
+ bool ScalableType = GTI.getIndexedType()->isScalableTy();
Value *V = GTI.getOperand();
StructType *STy = GTI.getStructTypeOrNull();
diff --git a/llvm/lib/IR/Type.cpp b/llvm/lib/IR/Type.cpp
index f88d3ace64c26f6..97febcd99b4114f 100644
--- a/llvm/lib/IR/Type.cpp
+++ b/llvm/lib/IR/Type.cpp
@@ -58,6 +58,8 @@ bool Type::isIntegerTy(unsigned Bitwidth) const {
}
bool Type::isScalableTy() const {
+ if (const auto *ATy = dyn_cast<ArrayType>(this))
+ return ATy->getElementType()->isScalableTy();
if (const auto *STy = dyn_cast<StructType>(this)) {
SmallPtrSet<Type *, 4> Visited;
return STy->containsScalableVectorType(&Visited);
@@ -658,8 +660,7 @@ ArrayType *ArrayType::get(Type *ElementType, uint64_t NumElements) {
bool ArrayType::isValidElementType(Type *ElemTy) {
return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() &&
!ElemTy->isMetadataTy() && !ElemTy->isFunctionTy() &&
- !ElemTy->isTokenTy() && !ElemTy->isX86_AMXTy() &&
- !isa<ScalableVectorType>(ElemTy);
+ !ElemTy->isTokenTy() && !ElemTy->isX86_AMXTy();
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index c0f30a62b8bccc3..10d176380ebed94 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -850,17 +850,9 @@ void Verifier::visitGlobalVariable(const GlobalVariable &GV) {
}
// Scalable vectors cannot be global variables, since we don't know
- // the runtime size. If the global is an array containing scalable vectors,
- // that will be caught by the isValidElementType methods in StructType or
- // ArrayType instead.
- Check(!isa<ScalableVectorType>(GV.getValueType()),
- "Globals cannot contain scalable vectors", &GV);
-
- if (auto *STy = dyn_cast<StructType>(GV.getValueType())) {
- SmallPtrSet<Type *, 4> Visited;
- Check(!STy->containsScalableVectorType(&Visited),
- "Globals cannot contain scalable vectors", &GV);
- }
+ // the runtime size.
+ Check(!GV.getValueType()->isScalableTy(),
+ "Globals cannot contain scalable types", &GV);
// Check if it's a target extension type that disallows being used as a
// global.
diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index 658db532835a504..56a52d13c20d0cc 100644
--- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -390,7 +390,7 @@ static bool collectSRATypes(DenseMap<uint64_t, GlobalPart> &Parts,
}
// Scalable types not currently supported.
- if (isa<ScalableVectorType>(Ty))
+ if (Ty->isScalableTy())
return false;
auto IsStored = [](Value *V, Constant *Initializer) {
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 6e491acc1403859..e176d1bea25a0ed 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -804,7 +804,7 @@ static Instruction *unpackLoadToAggregate(InstCombinerImpl &IC, LoadInst &LI) {
return nullptr;
const DataLayout &DL = IC.getDataLayout();
- auto EltSize = DL.getTypeAllocSize(ET);
+ TypeSize EltSize = DL.getTypeAllocSize(ET);
const auto Align = LI.getAlign();
auto *Addr = LI.getPointerOperand();
@@ -812,7 +812,7 @@ static Instruction *unpackLoadToAggregate(InstCombinerImpl &IC, LoadInst &LI) {
auto *Zero = ConstantInt::get(IdxType, 0);
Value *V = PoisonValue::get(T);
- uint64_t Offset = 0;
+ TypeSize Offset = TypeSize::get(0, ET->isScalableTy());
for (uint64_t i = 0; i < NumElements; i++) {
Value *Indices[2] = {
Zero,
@@ -820,9 +820,9 @@ static Instruction *unpackLoadToAggregate(InstCombinerImpl &IC, LoadInst &LI) {
};
auto *Ptr = IC.Builder.CreateInBoundsGEP(AT, Addr, ArrayRef(Indices),
Name + ".elt");
+ auto EltAlign = commonAlignment(Align, Offset.getKnownMinValue());
auto *L = IC.Builder.CreateAlignedLoad(AT->getElementType(), Ptr,
- commonAlignment(Align, Offset),
- Name + ".unpack");
+ EltAlign, Name + ".unpack");
L->setAAMetadata(LI.getAAMetadata());
V = IC.Builder.CreateInsertValue(V, L, i);
Offset += EltSize;
@@ -957,7 +957,7 @@ static bool canReplaceGEPIdxWithZero(InstCombinerImpl &IC,
Type *SourceElementType = GEPI->getSourceElementType();
// Size information about scalable vectors is not available, so we cannot
// deduce whether indexing at n is undefined behaviour or not. Bail out.
- if (isa<ScalableVectorType>(SourceElementType))
+ if (SourceElementType->isScalableTy())
return false;
Type *AllocTy = GetElementPtrInst::getIndexedType(SourceElementType, Ops);
@@ -1323,7 +1323,7 @@ static bool unpackStoreToAggregate(InstCombinerImpl &IC, StoreInst &SI) {
return false;
const DataLayout &DL = IC.getDataLayout();
- auto EltSize = DL.getTypeAllocSize(AT->getElementType());
+ TypeSize EltSize = DL.getTypeAllocSize(AT->getElementType());
const auto Align = SI.getAlign();
SmallString<16> EltName = V->getName();
@@ -1335,7 +1335,7 @@ static bool unpackStoreToAggregate(InstCombinerImpl &IC, StoreInst &SI) {
auto *IdxType = Type::getInt64Ty(T->getContext());
auto *Zero = ConstantInt::get(IdxType, 0);
- uint64_t Offset = 0;
+ TypeSize Offset = TypeSize::get(0, AT->getElementType()->isScalableTy());
for (uint64_t i = 0; i < NumElements; i++) {
Value *Indices[2] = {
Zero,
@@ -1344,7 +1344,7 @@ static bool unpackStoreToAggregate(InstCombinerImpl &IC, StoreInst &SI) {
auto *Ptr =
IC.Builder.CreateInBoundsGEP(AT, Addr, ArrayRef(Indices), AddrName);
auto *Val = IC.Builder.CreateExtractValue(V, i, EltName);
- auto EltAlign = commonAlignment(Align, Offset);
+ auto EltAlign = commonAlignment(Align, Offset.getKnownMinValue());
Instruction *NS = IC.Builder.CreateAlignedStore(Val, Ptr, EltAlign);
NS->setAAMetadata(SI.getAAMetadata());
Offset += EltSize;
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 1b9943f7025ea6d..145386946320c40 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2005,7 +2005,7 @@ Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP,
APInt Offset(DL.getIndexTypeSizeInBits(PtrTy), 0);
if (NumVarIndices != Src->getNumIndices()) {
// FIXME: getIndexedOffsetInType() does not handled scalable vectors.
- if (isa<ScalableVectorType>(BaseType))
+ if (BaseType->isScalableTy())
return nullptr;
SmallVector<Value *> ConstantIndices;
@@ -2118,7 +2118,7 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
SmallVector<Value *, 8> Indices(GEP.indices());
Type *GEPType = GEP.getType();
Type *GEPEltType = GEP.getSourceElementType();
- bool IsGEPSrcEleScalable = isa<ScalableVectorType>(GEPEltType);
+ bool IsGEPSrcEleScalable = GEPEltType->isScalableTy();
if (Value *V = simplifyGEPInst(GEPEltType, PtrOp, Indices, GEP.isInBounds(),
SQ.getWithInstruction(&GEP)))
return replaceInstUsesWith(GEP, V);
diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
index 89d0b7c33e0d934..afd72cdd7dc0012 100644
--- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -830,7 +830,7 @@ SeparateConstOffsetFromGEP::accumulateByteOffset(GetElementPtrInst *GEP,
for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
if (GTI.isSequential()) {
// Constant offsets of scalable types are not really constant.
- if (isa<ScalableVectorType>(GTI.getIndexedType()))
+ if (GTI.getIndexedType()->isScalableTy())
continue;
// Tries to extract a constant offset from this GEP index.
@@ -1019,7 +1019,7 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
if (GTI.isSequential()) {
// Constant offsets of scalable types are not really constant.
- if (isa<ScalableVectorType>(GTI.getIndexedType()))
+ if (GTI.getIndexedType()->isScalableTy())
continue;
// Splits this GEP index into a variadic part and a constant offset, and
diff --git a/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-array.ll b/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-array.ll
new file mode 100644
index 000000000000000..49bdaf0fcde9418
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-array.ll
@@ -0,0 +1,106 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc < %s | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+%my_subtype = type <vscale x 2 x double>
+%my_type = type [3 x %my_subtype]
+
+define void @array_1D(ptr %addr) #0 {
+; CHECK-LABEL: array_1D:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: addvl sp, sp, #-3
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, #1, mul vl]
+; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0, #2, mul vl]
+; CHECK-NEXT: ld1d { z2.d }, p0/z, [x0]
+; CHECK-NEXT: st1d { z2.d }, p0, [sp]
+; CHECK-NEXT: st1d { z1.d }, p0, [sp, #2, mul vl]
+; CHECK-NEXT: st1d { z0.d }, p0, [sp, #1, mul vl]
+; CHECK-NEXT: addvl sp, sp, #3
+; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+entry:
+ %ret = alloca %my_type, align 8
+ %val = load %my_type, ptr %addr
+ store %my_type %val, ptr %ret, align 8
+ ret void
+}
+
+define %my_subtype @array_1D_extract(ptr %addr) #0 {
+; CHECK-LABEL: array_1D_extract:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: addvl sp, sp, #-3
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, #1, mul vl]
+; CHECK-NEXT: addvl sp, sp, #3
+; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+entry:
+ %ret = alloca %my_type, align 8
+ %val = load %my_type, ptr %addr
+ %elt = extractvalue %my_type %val, 1
+ ret %my_subtype %elt
+}
+
+define void @array_1D_insert(ptr %addr, %my_subtype %elt) #0 {
+; CHECK-LABEL: array_1D_insert:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: addvl sp, sp, #-3
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0]
+; CHECK-NEXT: ld1d { z2.d }, p0/z, [x0, #2, mul vl]
+; CHECK-NEXT: st1d { z2.d }, p0, [sp, #2, mul vl]
+; CHECK-NEXT: st1d { z1.d }, p0, [sp]
+; CHECK-NEXT: st1d { z0.d }, p0, [sp, #1, mul vl]
+; CHECK-NEXT: addvl sp, sp, #3
+; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+entry:
+ %ret = alloca %my_type, align 8
+ %val = load %my_type, ptr %addr
+ %ins = insertvalue %my_type %val, %my_subtype %elt, 1
+ store %my_type %ins, ptr %ret, align 8
+ ret void
+}
+
+define void @array_2D(ptr %addr) #0 {
+; CHECK-LABEL: array_2D:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: addvl sp, sp, #-6
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x30, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 48 * VG
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, #1, mul vl]
+; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0, #2, mul vl]
+; CHECK-NEXT: ld1d { z2.d }, p0/z, [x0, #3, mul vl]
+; CHECK-NEXT: ld1d { z3.d }, p0/z, [x0, #4, mul vl]
+; CHECK-NEXT: ld1d { z4.d }, p0/z, [x0, #5, mul vl]
+; CHECK-NEXT: ld1d { z5.d }, p0/z, [x0]
+; CHECK-NEXT: st1d { z5.d }, p0, [sp]
+; CHECK-NEXT: st1d { z4.d }, p0, [sp, #5, mul vl]
+; CHECK-NEXT: st1d { z3.d }, p0, [sp, #4, mul vl]
+; CHECK-NEXT: st1d { z2.d }, p0, [sp, #3, mul vl]
+; CHECK-NEXT: st1d { z1.d }, p0, [sp, #2, mul vl]
+; CHECK-NEXT: st1d { z0.d }, p0, [sp, #1, mul vl]
+; CHECK-NEXT: addvl sp, sp, #6
+; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+entry:
+ %ret = alloca [2 x %my_type], align 8
+ %val = load [2 x %my_type], ptr %addr
+ store [2 x %my_type] %val, ptr %ret, align 8
+ ret void
+}
+
+attributes #0 = { "target-features"="+sve" }
diff --git a/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-array.ll b/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-array.ll
new file mode 100644
index 000000000000000..1fe91c721f4dd2b
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-array.ll
@@ -0,0 +1,40 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs --riscv-no-aliases < %s | FileCheck %s
+
+target triple = "riscv64-unknown-unknown-elf"
+
+%my_type = type [3 x <vscale x 1 x double>]
+
+define void @test(ptr %addr) {
+; CHECK-LABEL: test:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrrs a1, vlenb, zero
+; CHECK-NEXT: slli a1, a1, 2
+; CHECK-NEXT: sub sp, sp, a1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
+; CHECK-NEXT: csrrs a1, vlenb, zero
+; CHECK-NEXT: add a2, a0, a1
+; CHECK-NEXT: vl1re64.v v8, (a2)
+; CHECK-NEXT: slli a2, a1, 1
+; CHECK-NEXT: vl1re64.v v9, (a0)
+; CHECK-NEXT: add a0, a0, a2
+; CHECK-NEXT: vl1re64.v v10, (a0)
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vs1r.v v9, (a0)
+; CHECK-NEXT: add a2, a0, a2
+; CHECK-NEXT: vs1r.v v10, (a2)
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: vs1r.v v8, (a0)
+; CHECK-NEXT: csrrs a0, vlenb, zero
+; CHECK-NEXT: slli a0, a0, 2
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %ret = alloca %my_type, align 8
+ %val = load %my_type, ptr %addr
+ store %my_type %val, ptr %ret, align 8
+ ret void
+}
diff --git a/llvm/test/Other/scalable-vector-array.ll b/llvm/test/Other/scalable-vector-array.ll
deleted file mode 100644
index 4119bb6953b421c..000000000000000
--- a/llvm/test/Other/scalable-vector-array.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; RUN: not opt -S -passes=verify < %s 2>&1 | FileCheck %s
-
-;; Arrays cannot contain scalable vectors; make sure we detect them even
-;; when nested inside other aggregates.
-
-%ty = type { i64, [4 x <vscale x 256 x i1>] }
-; CHECK: error: invalid array element type
-; CHECK: %ty = type { i64, [4 x <vscale x 256 x i1>] }
diff --git a/llvm/test/Transforms/GVN/opaque-ptr.ll b/llvm/test/Transforms/GVN/opaque-ptr.ll
index 911a7eba16794a6..4a0f9d319501c88 100644
--- a/llvm/test/Transforms/GVN/opaque-ptr.ll
+++ b/llvm/test/Transforms/GVN/opaque-ptr.ll
@@ -52,6 +52,12 @@ define void @gep_cse_offset_canonicalization(ptr %p, i64 %idx, i64 %idx2) {
; CHECK-NEXT: call void @use(ptr [[GEP5]])
; CHECK-NEXT: call void @use(ptr [[GEP5_SAME]])
; CHECK-NEXT: call void @use(ptr [[GEP5_DIFFERENT]])
+; CHECK-NEXT: [[GEP6:%.*]] = getelementptr [4 x <vscale x 4 x i32>], ptr [[P]], i64 [[IDX]], i64 1
+; CHECK-NEXT: [[GEP6_SAME:%.*]] = getelementptr [4 x <vscale x 4 x float>], ptr [[P]], i64 [[IDX]], i64 1
+; CHECK-NEXT: [[GEP6_DIFFERENT:%.*]] = getelementptr [4 x <vscale x 4 x float>], ptr [[P]], i64 [[IDX2]], i64 1
+; CHECK-NEXT: call void @use(ptr [[GEP6]])
+; CHECK-NEXT: call void @use(ptr [[GEP6_SAME]])
+; CHECK-NEXT: call void @use(ptr [[GEP6_DIFFERENT]])
; CHECK-NEXT: ret void
;
%gep1 = getelementptr i64, ptr %p, i64 1
@@ -89,6 +95,12 @@ define void @gep_cse_offset_canonicalization(ptr %p, i64 %idx, i64 %idx2) {
call void @use(ptr %gep5)
call void @use(ptr %gep5.same)
call void @use(ptr %gep5.
diff erent)
+ %gep6 = getelementptr [4 x <vscale x 4 x i32>], ptr %p, i64 %idx, i64 1
+ %gep6.same = getelementptr [4 x <vscale x 4 x float>], ptr %p, i64 %idx, i64 1
+ %gep6.
diff erent = getelementptr [4 x <vscale x 4 x float>], ptr %p, i64 %idx2, i64 1
+ call void @use(ptr %gep6)
+ call void @use(ptr %gep6.same)
+ call void @use(ptr %gep6.
diff erent)
ret void
}
diff --git a/llvm/test/Transforms/GlobalOpt/2022-08-23-ScalableVectorArrayCrash.ll b/llvm/test/Transforms/GlobalOpt/2022-08-23-ScalableVectorArrayCrash.ll
new file mode 100644
index 000000000000000..1f358ec850bc492
--- /dev/null
+++ b/llvm/test/Transforms/GlobalOpt/2022-08-23-ScalableVectorArrayCrash.ll
@@ -0,0 +1,15 @@
+; RUN: opt -passes=globalopt < %s
+
+; Ensure we don't ICE by trying to optimize a scalable vector load of a global
+; variable.
+
+%struct.xxx = type <{ [96 x i8] }>
+
+ at .bss = internal unnamed_addr global %struct.xxx zeroinitializer, align 32
+
+define dso_local void @foo() local_unnamed_addr align 16 {
+L.entry:
+ store [4 x <vscale x 2 x double>] zeroinitializer, ptr @.bss, align 1
+ %0 = load [4 x <vscale x 2 x double>], ptr @.bss, align 8
+ unreachable
+}
diff --git a/llvm/test/Transforms/InstCombine/gep-can-replace-gep-idx-with-zero-typesize.ll b/llvm/test/Transforms/InstCombine/gep-can-replace-gep-idx-with-zero-typesize.ll
index 6514158966ac590..5be2b90bf7a626a 100644
--- a/llvm/test/Transforms/InstCombine/gep-can-replace-gep-idx-with-zero-typesize.ll
+++ b/llvm/test/Transforms/InstCombine/gep-can-replace-gep-idx-with-zero-typesize.ll
@@ -18,3 +18,10 @@ define void @can_replace_gep_idx_with_zero_typesize(i64 %n, ptr %a, i64 %b) {
call void @do_something(<vscale x 4 x i32> %tmp)
ret void
}
+
+define void @can_replace_gep_idx_with_zero_typesize_2(i64 %n, ptr %a, i64 %b) {
+ %idx = getelementptr [2 x <vscale x 4 x i32>], ptr %a, i64 %b, i64 0
+ %tmp = load <vscale x 4 x i32>, ptr %idx
+ call void @do_something(<vscale x 4 x i32> %tmp)
+ ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/opaque-ptr.ll b/llvm/test/Transforms/InstCombine/opaque-ptr.ll
index 4f12fa45e9ecadd..900d3f142a6ff79 100644
--- a/llvm/test/Transforms/InstCombine/opaque-ptr.ll
+++ b/llvm/test/Transforms/InstCombine/opaque-ptr.ll
@@ -298,6 +298,17 @@ define ptr @geps_combinable_scalable(ptr %a, i64 %idx) {
ret ptr %a3
}
+define ptr @geps_combinable_scalable_vector_array(ptr %a, i64 %idx) {
+; CHECK-LABEL: @geps_combinable_scalable_vector_array(
+; CHECK-NEXT: [[A2:%.*]] = getelementptr inbounds [4 x <vscale x 2 x i32>], ptr [[A:%.*]], i64 1
+; CHECK-NEXT: [[A3:%.*]] = getelementptr inbounds i8, ptr [[A2]], i64 4
+; CHECK-NEXT: ret ptr [[A3]]
+;
+ %a2 = getelementptr inbounds [4 x <vscale x 2 x i32>], ptr %a, i64 1
+ %a3 = getelementptr inbounds i8, ptr %a2, i32 4
+ ret ptr %a3
+}
+
define i1 @compare_geps_same_indices(ptr %a, ptr %b, i64 %idx) {
; CHECK-LABEL: @compare_geps_same_indices(
; CHECK-NEXT: [[C:%.*]] = icmp eq ptr [[A:%.*]], [[B:%.*]]
diff --git a/llvm/test/Transforms/InstCombine/scalable-vector-array.ll b/llvm/test/Transforms/InstCombine/scalable-vector-array.ll
new file mode 100644
index 000000000000000..d03184766b4a682
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/scalable-vector-array.ll
@@ -0,0 +1,28 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; RUN: opt -passes=instcombine -S < %s | FileCheck %s
+
+define <vscale x 4 x i32> @load(ptr %x) {
+; CHECK-LABEL: define <vscale x 4 x i32> @load
+; CHECK-SAME: (ptr [[X:%.*]]) {
+; CHECK-NEXT: [[A_ELT1:%.*]] = getelementptr inbounds [2 x <vscale x 4 x i32>], ptr [[X]], i64 0, i64 1
+; CHECK-NEXT: [[A_UNPACK2:%.*]] = load <vscale x 4 x i32>, ptr [[A_ELT1]], align 16
+; CHECK-NEXT: ret <vscale x 4 x i32> [[A_UNPACK2]]
+;
+ %a = load [2 x <vscale x 4 x i32>], ptr %x
+ %b = extractvalue [2 x <vscale x 4 x i32>] %a, 1
+ ret <vscale x 4 x i32> %b
+}
+
+define void @store(ptr %x, <vscale x 4 x i32> %y, <vscale x 4 x i32> %z) {
+; CHECK-LABEL: define void @store
+; CHECK-SAME: (ptr [[X:%.*]], <vscale x 4 x i32> [[Y:%.*]], <vscale x 4 x i32> [[Z:%.*]]) {
+; CHECK-NEXT: store <vscale x 4 x i32> [[Y]], ptr [[X]], align 16
+; CHECK-NEXT: [[X_REPACK1:%.*]] = getelementptr inbounds [2 x <vscale x 4 x i32>], ptr [[X]], i64 0, i64 1
+; CHECK-NEXT: store <vscale x 4 x i32> [[Z]], ptr [[X_REPACK1]], align 16
+; CHECK-NEXT: ret void
+;
+ %a = insertvalue [2 x <vscale x 4 x i32>] poison, <vscale x 4 x i32> %y, 0
+ %b = insertvalue [2 x <vscale x 4 x i32>] %a, <vscale x 4 x i32> %z, 1
+ store [2 x <vscale x 4 x i32>] %b, ptr %x
+ ret void
+}
diff --git a/llvm/test/Transforms/InstSimplify/gep.ll b/llvm/test/Transforms/InstSimplify/gep.ll
index 5e70c2ca37c3a2e..281c286ef36ea2f 100644
--- a/llvm/test/Transforms/InstSimplify/gep.ll
+++ b/llvm/test/Transforms/InstSimplify/gep.ll
@@ -358,3 +358,12 @@ define <8 x ptr> @gep_vector_index_op3_poison_constant_index_afterwards(ptr %ptr
%res = getelementptr inbounds %t.3, ptr %ptr, i64 0, i32 1, <8 x i64> poison, i32 1
ret <8 x ptr> %res
}
+
+define i64 @gep_array_of_scalable_vectors_ptr
diff (ptr %ptr) {
+ %c1 = getelementptr inbounds [8 x <vscale x 4 x i32>], ptr %ptr, i64 4
+ %c2 = getelementptr inbounds [8 x <vscale x 4 x i32>], ptr %ptr, i64 6
+ %c1.int = ptrtoint ptr %c1 to i64
+ %c2.int = ptrtoint ptr %c2 to i64
+ %
diff = sub i64 %c2.int, %c1.int
+ ret i64 %
diff
+}
diff --git a/llvm/test/Transforms/SROA/scalable-vector-array.ll b/llvm/test/Transforms/SROA/scalable-vector-array.ll
new file mode 100644
index 000000000000000..dbb6b4cbea47fb5
--- /dev/null
+++ b/llvm/test/Transforms/SROA/scalable-vector-array.ll
@@ -0,0 +1,20 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; RUN: opt < %s -passes='sroa<preserve-cfg>' -S | FileCheck %s
+; RUN: opt < %s -passes='sroa<modify-cfg>' -S | FileCheck %s
+
+; This test checks that SROA runs mem2reg on arrays of scalable vectors.
+
+define [ 2 x <vscale x 4 x i32> ] @alloca(<vscale x 4 x i32> %x, <vscale x 4 x i32> %y) {
+; CHECK-LABEL: define [2 x <vscale x 4 x i32>] @alloca
+; CHECK-SAME: (<vscale x 4 x i32> [[X:%.*]], <vscale x 4 x i32> [[Y:%.*]]) {
+; CHECK-NEXT: [[AGG0:%.*]] = insertvalue [2 x <vscale x 4 x i32>] poison, <vscale x 4 x i32> [[X]], 0
+; CHECK-NEXT: [[AGG1:%.*]] = insertvalue [2 x <vscale x 4 x i32>] [[AGG0]], <vscale x 4 x i32> [[Y]], 1
+; CHECK-NEXT: ret [2 x <vscale x 4 x i32>] [[AGG1]]
+;
+ %addr = alloca [ 2 x <vscale x 4 x i32> ], align 4
+ %agg0 = insertvalue [ 2 x <vscale x 4 x i32> ] poison, <vscale x 4 x i32> %x, 0
+ %agg1 = insertvalue [ 2 x <vscale x 4 x i32> ] %agg0, <vscale x 4 x i32> %y, 1
+ store [ 2 x <vscale x 4 x i32> ] %agg1, ptr %addr, align 4
+ %val = load [ 2 x <vscale x 4 x i32> ], ptr %addr, align 4
+ ret [ 2 x <vscale x 4 x i32> ] %val
+}
diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AArch64/scalable-vector-geps.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AArch64/scalable-vector-geps.ll
index 2ceac4c44d10f4c..31d166506a4e48f 100644
--- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AArch64/scalable-vector-geps.ll
+++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AArch64/scalable-vector-geps.ll
@@ -28,4 +28,41 @@ define ptr @test2(ptr %base, i64 %idx) {
ret ptr %gep
}
+; Index is implicitly multiplied by vscale and so not really constant.
+define ptr @test3(ptr %base, i64 %idx) #0 {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT: [[IDX_NEXT:%.*]] = add nuw nsw i64 [[IDX:%.*]], 1
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr [8 x <vscale x 4 x float>], ptr [[BASE:%.*]], i64 [[IDX_NEXT]]
+; CHECK-NEXT: ret ptr [[GEP]]
+;
+ %idx.next = add nuw nsw i64 %idx, 1
+ %gep = getelementptr [8 x <vscale x 4 x float>], ptr %base, i64 %idx.next
+ ret ptr %gep
+}
+
+; Indices are implicitly multiplied by vscale and so not really constant.
+define ptr @test4(ptr %base, i64 %idx) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT: [[IDX_NEXT:%.*]] = add nuw nsw i64 [[IDX:%.*]], 1
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr [8 x <vscale x 4 x float>], ptr [[BASE:%.*]], i64 3, i64 [[IDX_NEXT]]
+; CHECK-NEXT: ret ptr [[GEP]]
+;
+ %idx.next = add nuw nsw i64 %idx, 1
+ %gep = getelementptr [8 x <vscale x 4 x float>], ptr %base, i64 3, i64 %idx.next
+ ret ptr %gep
+}
+
+; Whilst the first two indices are not constant, the calculation of the third
+; index does contain a constant that can be extracted.
+define ptr @test5(ptr %base, i64 %idx) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [8 x <vscale x 4 x float>], ptr [[BASE:%.*]], i64 1, i64 3, i64 [[IDX:%.*]]
+; CHECK-NEXT: [[GEP2:%.*]] = getelementptr float, ptr [[TMP1]], i64 1
+; CHECK-NEXT: ret ptr [[GEP2]]
+;
+ %idx.next = add nuw nsw i64 %idx, 1
+ %gep = getelementptr [8 x <vscale x 4 x float>], ptr %base, i64 1, i64 3, i64 %idx.next
+ ret ptr %gep
+}
+
attributes #0 = { "target-features"="+sve" }
diff --git a/llvm/test/Verifier/scalable-global-vars.ll b/llvm/test/Verifier/scalable-global-vars.ll
index 740cd23a2888a1b..81882261e664ef6 100644
--- a/llvm/test/Verifier/scalable-global-vars.ll
+++ b/llvm/test/Verifier/scalable-global-vars.ll
@@ -3,14 +3,15 @@
;; Global variables cannot be scalable vectors, since we don't
;; know the size at compile time.
-; CHECK: Globals cannot contain scalable vectors
+; CHECK: Globals cannot contain scalable types
; CHECK-NEXT: ptr @ScalableVecGlobal
@ScalableVecGlobal = global <vscale x 4 x i32> zeroinitializer
-; CHECK-NEXT: Globals cannot contain scalable vectors
+; CHECK-NEXT: Globals cannot contain scalable types
+; CHECK-NEXT: ptr @ScalableVecArrayGlobal
+ at ScalableVecArrayGlobal = global [ 8 x <vscale x 4 x i32> ] zeroinitializer
+
+; CHECK-NEXT: Globals cannot contain scalable types
; CHECK-NEXT: ptr @ScalableVecStructGlobal
@ScalableVecStructGlobal = global { i32, <vscale x 4 x i32> } zeroinitializer
-;; Global _pointers_ to scalable vectors are fine
-; CHECK-NOT: Globals cannot contain scalable vectors
- at ScalableVecPtr = global ptr zeroinitializer
More information about the llvm-commits
mailing list