[PATCH] D85725: [Transforms][SROA] Skip uses of allocas where the type is scalable
Cullen Rhodes via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 11 05:56:17 PDT 2020
c-rhodes created this revision.
c-rhodes added reviewers: efriedma, sdesmalen, cameron.mcinally, david-arm.
Herald added subscribers: hiraditya, kristof.beyls.
Herald added a reviewer: rengolin.
Herald added a project: LLVM.
c-rhodes requested review of this revision.
When splitting an alloca of a fixed-length vector (VLS) and making a
slice for each load and store, skip uses where the VLS is bitcasted to a
scalable vector (VLA).
This is relevant in the implementation of the 'arm_sve_vector_bits'
attribute that is used to define VLS types. See D85128 <https://reviews.llvm.org/D85128> for more
information.
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D85725
Files:
llvm/lib/Transforms/Scalar/SROA.cpp
llvm/test/Transforms/SROA/scalable-vectors.ll
Index: llvm/test/Transforms/SROA/scalable-vectors.ll
===================================================================
--- llvm/test/Transforms/SROA/scalable-vectors.ll
+++ llvm/test/Transforms/SROA/scalable-vectors.ll
@@ -34,3 +34,50 @@
%1 = load volatile <vscale x 16 x i8>, <vscale x 16 x i8>* %vec.addr
ret <vscale x 16 x i8> %1
}
+
+; Test we bail out when using an alloca of a fixed-length vector (VLS) that was
+; bitcasted to a scalable vector.
+define <vscale x 4 x i32> @cast_alloca_to_svint32_t(<vscale x 4 x i32> %type.coerce) {
+; CHECK-LABEL: cast_alloca_to_svint32_t
+; CHECK-NEXT: %type = alloca <16 x i32>, align 64
+; CHECK-NEXT: %type.addr = alloca <16 x i32>, align 64
+; CHECK-NEXT: %1 = bitcast <16 x i32>* %type to <vscale x 4 x i32>*
+; CHECK-NEXT: store <vscale x 4 x i32> %type.coerce, <vscale x 4 x i32>* %1, align 16
+; CHECK-NEXT: %type1 = load <16 x i32>, <16 x i32>* %type, align 64
+; CHECK-NEXT: store <16 x i32> %type1, <16 x i32>* %type.addr, align 64
+; CHECK-NEXT: %2 = load <16 x i32>, <16 x i32>* %type.addr, align 64
+; CHECK-NEXT: %3 = bitcast <16 x i32>* %type.addr to <vscale x 4 x i32>*
+; CHECK-NEXT: %4 = load <vscale x 4 x i32>, <vscale x 4 x i32>* %3, align 16
+; CHECK-NEXT: ret <vscale x 4 x i32> %4
+ %type = alloca <16 x i32>
+ %type.addr = alloca <16 x i32>
+ %1 = bitcast <16 x i32>* %type to <vscale x 4 x i32>*
+ store <vscale x 4 x i32> %type.coerce, <vscale x 4 x i32>* %1
+ %type1 = load <16 x i32>, <16 x i32>* %type
+ store <16 x i32> %type1, <16 x i32>* %type.addr
+ %2 = load <16 x i32>, <16 x i32>* %type.addr
+ %3 = bitcast <16 x i32>* %type.addr to <vscale x 4 x i32>*
+ %4 = load <vscale x 4 x i32>, <vscale x 4 x i32>* %3
+ ret <vscale x 4 x i32> %4
+}
+
+; When casting from VLA to VLS via memory check we bail out when producing a
+; GEP where the element type is a scalable vector.
+define <vscale x 4 x i32> @cast_alloca_from_svint32_t() {
+; CHECK-LABEL: cast_alloca_from_svint32_t
+; CHECK-NEXT: %retval.coerce = alloca <vscale x 4 x i32>, align 16
+; CHECK-NEXT: %1 = bitcast <vscale x 4 x i32>* %retval.coerce to i8*
+; CHECK-NEXT: %retval.0..sroa_cast = bitcast i8* %1 to <16 x i32>*
+; CHECK-NEXT: store <16 x i32> undef, <16 x i32>* %retval.0..sroa_cast, align 16
+; CHECK-NEXT: %2 = load <vscale x 4 x i32>, <vscale x 4 x i32>* %retval.coerce, align 16
+; CHECK-NEXT: ret <vscale x 4 x i32> %2
+ %retval = alloca <16 x i32>
+ %retval.coerce = alloca <vscale x 4 x i32>
+ %1 = bitcast <vscale x 4 x i32>* %retval.coerce to i8*
+ %2 = bitcast <16 x i32>* %retval to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %1, i8* align 16 %2, i64 64, i1 false)
+ %3 = load <vscale x 4 x i32>, <vscale x 4 x i32>* %retval.coerce
+ ret <vscale x 4 x i32> %3
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
Index: llvm/lib/Transforms/Scalar/SROA.cpp
===================================================================
--- llvm/lib/Transforms/Scalar/SROA.cpp
+++ llvm/lib/Transforms/Scalar/SROA.cpp
@@ -780,6 +780,9 @@
LI.getPointerAddressSpace() != DL.getAllocaAddrSpace())
return PI.setAborted(&LI);
+ if (isa<ScalableVectorType>(LI.getType()))
+ return PI.setAborted(&LI);
+
uint64_t Size = DL.getTypeStoreSize(LI.getType()).getFixedSize();
return handleLoadOrStore(LI.getType(), LI, Offset, Size, LI.isVolatile());
}
@@ -795,6 +798,9 @@
SI.getPointerAddressSpace() != DL.getAllocaAddrSpace())
return PI.setAborted(&SI);
+ if (isa<ScalableVectorType>(ValOp->getType()))
+ return PI.setAborted(&SI);
+
uint64_t Size = DL.getTypeStoreSize(ValOp->getType()).getFixedSize();
// If this memory access can be shown to *statically* extend outside the
@@ -1538,6 +1544,8 @@
Type *ElementTy = Ty->getElementType();
if (!ElementTy->isSized())
return nullptr; // We can't GEP through an unsized element.
+ if (isa<ScalableVectorType>(ElementTy))
+ return nullptr;
APInt ElementSize(Offset.getBitWidth(),
DL.getTypeAllocSize(ElementTy).getFixedSize());
if (ElementSize == 0)
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D85725.284680.patch
Type: text/x-patch
Size: 4131 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200811/b9a33060/attachment.bin>
More information about the llvm-commits
mailing list