[llvm] 511d5aa - [Transforms][SROA] Skip uses of allocas where the type is scalable

Wed Aug 12 02:36:19 PDT 2020

Author: Cullen Rhodes
Date: 2020-08-12T09:35:48Z
New Revision: 511d5aaca37a31d806470aac9e2ee7ff6ce6247c

URL: https://github.com/llvm/llvm-project/commit/511d5aaca37a31d806470aac9e2ee7ff6ce6247c
DIFF: https://github.com/llvm/llvm-project/commit/511d5aaca37a31d806470aac9e2ee7ff6ce6247c.diff

LOG: [Transforms][SROA] Skip uses of allocas where the type is scalable

When visiting load and store instructions in SROA skip scalable vectors.
This is relevant in the implementation of the 'arm_sve_vector_bits'
attribute that is used to define VLS types, where an alloca of a
fixed-length vector could be bitcasted to scalable. See D85128 for more
information.

Reviewed By: efriedma

Differential Revision: https://reviews.llvm.org/D85725

Added: 
    

Modified: 
    llvm/lib/Transforms/Scalar/SROA.cpp
    llvm/test/Transforms/SROA/scalable-vectors.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index 93ea6e67ee43..f26b9a1f26f9 100644

--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -780,6 +780,9 @@ class AllocaSlices::SliceBuilder : public PtrUseVisitor<SliceBuilder> {
         LI.getPointerAddressSpace() != DL.getAllocaAddrSpace())
       return PI.setAborted(&LI);
 
+    if (isa<ScalableVectorType>(LI.getType()))
+      return PI.setAborted(&LI);
+
     uint64_t Size = DL.getTypeStoreSize(LI.getType()).getFixedSize();
     return handleLoadOrStore(LI.getType(), LI, Offset, Size, LI.isVolatile());
   }
@@ -795,6 +798,9 @@ class AllocaSlices::SliceBuilder : public PtrUseVisitor<SliceBuilder> {
         SI.getPointerAddressSpace() != DL.getAllocaAddrSpace())
       return PI.setAborted(&SI);
 
+    if (isa<ScalableVectorType>(ValOp->getType()))
+      return PI.setAborted(&SI);
+
     uint64_t Size = DL.getTypeStoreSize(ValOp->getType()).getFixedSize();
 
     // If this memory access can be shown to *statically* extend outside the
@@ -1538,6 +1544,8 @@ static Value *getNaturalGEPWithOffset(IRBuilderTy &IRB, const DataLayout &DL,
   Type *ElementTy = Ty->getElementType();
   if (!ElementTy->isSized())
     return nullptr; // We can't GEP through an unsized element.
+  if (isa<ScalableVectorType>(ElementTy))
+    return nullptr;
   APInt ElementSize(Offset.getBitWidth(),
                     DL.getTypeAllocSize(ElementTy).getFixedSize());
   if (ElementSize == 0)

diff  --git a/llvm/test/Transforms/SROA/scalable-vectors.ll b/llvm/test/Transforms/SROA/scalable-vectors.ll
index bda54e25b945..21c473426f26 100644
--- a/llvm/test/Transforms/SROA/scalable-vectors.ll
+++ b/llvm/test/Transforms/SROA/scalable-vectors.ll
@@ -34,3 +34,50 @@ define <vscale x 16 x i8> @unpromotable_alloca(<vscale x 16 x i8> %vec) {
   %1 = load volatile <vscale x 16 x i8>, <vscale x 16 x i8>* %vec.addr
   ret <vscale x 16 x i8> %1
 }
+
+; Test we bail out when using an alloca of a fixed-length vector (VLS) that was
+; bitcasted to a scalable vector.
+define <vscale x 4 x i32> @cast_alloca_to_svint32_t(<vscale x 4 x i32> %type.coerce) {
+; CHECK-LABEL: cast_alloca_to_svint32_t
+; CHECK-NEXT: %type = alloca <16 x i32>, align 64
+; CHECK-NEXT: %type.addr = alloca <16 x i32>, align 64
+; CHECK-NEXT: %1 = bitcast <16 x i32>* %type to <vscale x 4 x i32>*
+; CHECK-NEXT: store <vscale x 4 x i32> %type.coerce, <vscale x 4 x i32>* %1, align 16
+; CHECK-NEXT: %type1 = load <16 x i32>, <16 x i32>* %type, align 64
+; CHECK-NEXT: store <16 x i32> %type1, <16 x i32>* %type.addr, align 64
+; CHECK-NEXT: %2 = load <16 x i32>, <16 x i32>* %type.addr, align 64
+; CHECK-NEXT: %3 = bitcast <16 x i32>* %type.addr to <vscale x 4 x i32>*
+; CHECK-NEXT: %4 = load <vscale x 4 x i32>, <vscale x 4 x i32>* %3, align 16
+; CHECK-NEXT: ret <vscale x 4 x i32> %4
+  %type = alloca <16 x i32>
+  %type.addr = alloca <16 x i32>
+  %1 = bitcast <16 x i32>* %type to <vscale x 4 x i32>*
+  store <vscale x 4 x i32> %type.coerce, <vscale x 4 x i32>* %1
+  %type1 = load <16 x i32>, <16 x i32>* %type
+  store <16 x i32> %type1, <16 x i32>* %type.addr
+  %2 = load <16 x i32>, <16 x i32>* %type.addr
+  %3 = bitcast <16 x i32>* %type.addr to <vscale x 4 x i32>*
+  %4 = load <vscale x 4 x i32>, <vscale x 4 x i32>* %3
+  ret <vscale x 4 x i32> %4
+}
+
+; When casting from VLA to VLS via memory check we bail out when producing a
+; GEP where the element type is a scalable vector.
+define <vscale x 4 x i32> @cast_alloca_from_svint32_t() {
+; CHECK-LABEL: cast_alloca_from_svint32_t
+; CHECK-NEXT: %retval.coerce = alloca <vscale x 4 x i32>, align 16
+; CHECK-NEXT: %1 = bitcast <vscale x 4 x i32>* %retval.coerce to i8*
+; CHECK-NEXT: %retval.0..sroa_cast = bitcast i8* %1 to <16 x i32>*
+; CHECK-NEXT: store <16 x i32> undef, <16 x i32>* %retval.0..sroa_cast, align 16
+; CHECK-NEXT: %2 = load <vscale x 4 x i32>, <vscale x 4 x i32>* %retval.coerce, align 16
+; CHECK-NEXT: ret <vscale x 4 x i32> %2
+  %retval = alloca <16 x i32>
+  %retval.coerce = alloca <vscale x 4 x i32>
+  %1 = bitcast <vscale x 4 x i32>* %retval.coerce to i8*
+  %2 = bitcast <16 x i32>* %retval to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %1, i8* align 16 %2, i64 64, i1 false)
+  %3 = load <vscale x 4 x i32>, <vscale x 4 x i32>* %retval.coerce
+  ret <vscale x 4 x i32> %3
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind