[PATCH] D85725: [Transforms][SROA] Skip uses of allocas where the type is scalable

Tue Aug 11 12:02:08 PDT 2020

c-rhodes updated this revision to Diff 284841.
c-rhodes added a comment.

Specify `-aarch64-sve-vector-bits-min=512` in test.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D85725/new/

https://reviews.llvm.org/D85725

Files:
  llvm/lib/Transforms/Scalar/SROA.cpp
  llvm/test/Transforms/SROA/scalable-vectors.ll


Index: llvm/test/Transforms/SROA/scalable-vectors.ll
===================================================================

--- llvm/test/Transforms/SROA/scalable-vectors.ll
+++ llvm/test/Transforms/SROA/scalable-vectors.ll
@@ -1,5 +1,7 @@
 ; RUN: opt < %s -sroa -S | FileCheck %s
 ; RUN: opt < %s -passes=sroa -S | FileCheck %s
+; RUN: opt -aarch64-sve-vector-bits-min=512 < %s -sroa -S | FileCheck %s --check-prefix=CHECK-VLS
+; RUN: opt -aarch64-sve-vector-bits-min=512 < %s -passes=sroa -S | FileCheck %s --check-prefix=CHECK-VLS
 
 ; This test checks that SROA runs mem2reg on scalable vectors.
 
@@ -34,3 +36,50 @@
   %1 = load volatile <vscale x 16 x i8>, <vscale x 16 x i8>* %vec.addr
   ret <vscale x 16 x i8> %1
 }
+
+; Test we bail out when using an alloca of a fixed-length vector (VLS) that was
+; bitcasted to a scalable vector.
+define <vscale x 4 x i32> @cast_alloca_to_svint32_t(<vscale x 4 x i32> %type.coerce) {
+; CHECK-VLS-LABEL: cast_alloca_to_svint32_t
+; CHECK-VLS-NEXT: %type = alloca <16 x i32>, align 64
+; CHECK-VLS-NEXT: %type.addr = alloca <16 x i32>, align 64
+; CHECK-VLS-NEXT: %1 = bitcast <16 x i32>* %type to <vscale x 4 x i32>*
+; CHECK-VLS-NEXT: store <vscale x 4 x i32> %type.coerce, <vscale x 4 x i32>* %1, align 16
+; CHECK-VLS-NEXT: %type1 = load <16 x i32>, <16 x i32>* %type, align 64
+; CHECK-VLS-NEXT: store <16 x i32> %type1, <16 x i32>* %type.addr, align 64
+; CHECK-VLS-NEXT: %2 = load <16 x i32>, <16 x i32>* %type.addr, align 64
+; CHECK-VLS-NEXT: %3 = bitcast <16 x i32>* %type.addr to <vscale x 4 x i32>*
+; CHECK-VLS-NEXT: %4 = load <vscale x 4 x i32>, <vscale x 4 x i32>* %3, align 16
+; CHECK-VLS-NEXT: ret <vscale x 4 x i32> %4
+  %type = alloca <16 x i32>
+  %type.addr = alloca <16 x i32>
+  %1 = bitcast <16 x i32>* %type to <vscale x 4 x i32>*
+  store <vscale x 4 x i32> %type.coerce, <vscale x 4 x i32>* %1
+  %type1 = load <16 x i32>, <16 x i32>* %type
+  store <16 x i32> %type1, <16 x i32>* %type.addr
+  %2 = load <16 x i32>, <16 x i32>* %type.addr
+  %3 = bitcast <16 x i32>* %type.addr to <vscale x 4 x i32>*
+  %4 = load <vscale x 4 x i32>, <vscale x 4 x i32>* %3
+  ret <vscale x 4 x i32> %4
+}
+
+; When casting from VLA to VLS via memory check we bail out when producing a
+; GEP where the element type is a scalable vector.
+define <vscale x 4 x i32> @cast_alloca_from_svint32_t() {
+; CHECK-VLS-LABEL: cast_alloca_from_svint32_t
+; CHECK-VLS-NEXT: %retval.coerce = alloca <vscale x 4 x i32>, align 16
+; CHECK-VLS-NEXT: %1 = bitcast <vscale x 4 x i32>* %retval.coerce to i8*
+; CHECK-VLS-NEXT: %retval.0..sroa_cast = bitcast i8* %1 to <16 x i32>*
+; CHECK-VLS-NEXT: store <16 x i32> undef, <16 x i32>* %retval.0..sroa_cast, align 16
+; CHECK-VLS-NEXT: %2 = load <vscale x 4 x i32>, <vscale x 4 x i32>* %retval.coerce, align 16
+; CHECK-VLS-NEXT: ret <vscale x 4 x i32> %2
+  %retval = alloca <16 x i32>
+  %retval.coerce = alloca <vscale x 4 x i32>
+  %1 = bitcast <vscale x 4 x i32>* %retval.coerce to i8*
+  %2 = bitcast <16 x i32>* %retval to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %1, i8* align 16 %2, i64 64, i1 false)
+  %3 = load <vscale x 4 x i32>, <vscale x 4 x i32>* %retval.coerce
+  ret <vscale x 4 x i32> %3
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
Index: llvm/lib/Transforms/Scalar/SROA.cpp
===================================================================
--- llvm/lib/Transforms/Scalar/SROA.cpp
+++ llvm/lib/Transforms/Scalar/SROA.cpp
@@ -780,6 +780,9 @@
         LI.getPointerAddressSpace() != DL.getAllocaAddrSpace())
       return PI.setAborted(&LI);
 
+    if (isa<ScalableVectorType>(LI.getType()))
+      return PI.setAborted(&LI);
+
     uint64_t Size = DL.getTypeStoreSize(LI.getType()).getFixedSize();
     return handleLoadOrStore(LI.getType(), LI, Offset, Size, LI.isVolatile());
   }
@@ -795,6 +798,9 @@
         SI.getPointerAddressSpace() != DL.getAllocaAddrSpace())
       return PI.setAborted(&SI);
 
+    if (isa<ScalableVectorType>(ValOp->getType()))
+      return PI.setAborted(&SI);
+
     uint64_t Size = DL.getTypeStoreSize(ValOp->getType()).getFixedSize();
 
     // If this memory access can be shown to *statically* extend outside the
@@ -1538,6 +1544,8 @@
   Type *ElementTy = Ty->getElementType();
   if (!ElementTy->isSized())
     return nullptr; // We can't GEP through an unsized element.
+  if (isa<ScalableVectorType>(ElementTy))
+    return nullptr;
   APInt ElementSize(Offset.getBitWidth(),
                     DL.getTypeAllocSize(ElementTy).getFixedSize());
   if (ElementSize == 0)


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D85725.284841.patch
Type: text/x-patch
Size: 4581 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200811/7bcd6972/attachment.bin>