[llvm] 1e1770a - [SVE][CodeGen] Fix InlineFunction for scalable vectors

Fri Sep 11 00:35:07 PDT 2020

Author: David Sherwood
Date: 2020-09-11T08:34:51+01:00
New Revision: 1e1770a07ec0f6a3576362ea5eb97aedd33f4b26

URL: https://github.com/llvm/llvm-project/commit/1e1770a07ec0f6a3576362ea5eb97aedd33f4b26
DIFF: https://github.com/llvm/llvm-project/commit/1e1770a07ec0f6a3576362ea5eb97aedd33f4b26.diff

LOG: [SVE][CodeGen] Fix InlineFunction for scalable vectors

When inlining functions containing allocas of scalable vectors we
cannot specify the size in the lifetime markers, since we don't
know this at compile time.

Added new test here:

  test/Transforms/Inline/AArch64/sve-alloca-merge.ll

Differential Revision: https://reviews.llvm.org/D87139

Added: 
    llvm/test/Transforms/Inline/AArch64/sve-alloca-merge.ll

Modified: 
    llvm/lib/Transforms/Utils/InlineFunction.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp
index 30726627bc82..7ff21d7ee9ef 100644

--- a/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -2061,7 +2061,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
           dyn_cast<ConstantInt>(AI->getArraySize())) {
         auto &DL = Caller->getParent()->getDataLayout();
         Type *AllocaType = AI->getAllocatedType();
-        uint64_t AllocaTypeSize = DL.getTypeAllocSize(AllocaType);
+        TypeSize AllocaTypeSize = DL.getTypeAllocSize(AllocaType);
         uint64_t AllocaArraySize = AIArraySize->getLimitedValue();
 
         // Don't add markers for zero-sized allocas.
@@ -2070,9 +2070,10 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
 
         // Check that array size doesn't saturate uint64_t and doesn't
         // overflow when it's multiplied by type size.
-        if (AllocaArraySize != std::numeric_limits<uint64_t>::max() &&
+        if (!AllocaTypeSize.isScalable() &&
+            AllocaArraySize != std::numeric_limits<uint64_t>::max() &&
             std::numeric_limits<uint64_t>::max() / AllocaArraySize >=
-                AllocaTypeSize) {
+                AllocaTypeSize.getFixedSize()) {
           AllocaSize = ConstantInt::get(Type::getInt64Ty(AI->getContext()),
                                         AllocaArraySize * AllocaTypeSize);
         }

diff  --git a/llvm/test/Transforms/Inline/AArch64/sve-alloca-merge.ll b/llvm/test/Transforms/Inline/AArch64/sve-alloca-merge.ll
new file mode 100644
index 000000000000..c355388ed836
--- /dev/null
+++ b/llvm/test/Transforms/Inline/AArch64/sve-alloca-merge.ll
@@ -0,0 +1,29 @@
+; RUN: opt -mtriple=aarch64--linux-gnu -mattr=+sve < %s -inline -S | FileCheck %s
+
+define void @bar(<vscale x 2 x i64>* %a) {
+entry:
+  %b = alloca <vscale x 2 x i64>, align 16
+  store <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i64>* %b, align 16
+  %c = load <vscale x 2 x i64>, <vscale x 2 x i64>* %a, align 16
+  %d = load <vscale x 2 x i64>, <vscale x 2 x i64>* %b, align 16
+  %e = add <vscale x 2 x i64> %c, %d
+  %f = add <vscale x 2 x i64> %e, %c
+  store <vscale x 2 x i64> %f, <vscale x 2 x i64>* %a, align 16
+  ret void
+}
+
+define i64 @foo() {
+; CHECK-LABEL: @foo(
+; CHECK: %0 = bitcast <vscale x 2 x i64>* %{{.*}} to i8*
+; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* %0)
+; CHECK: %1 = bitcast <vscale x 2 x i64>* %{{.*}} to i8*
+; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* %1)
+entry:
+  %a = alloca <vscale x 2 x i64>, align 16
+  store <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i64>* %a, align 16
+  %a1 = bitcast <vscale x 2 x i64>* %a to i64*
+  store i64 1, i64* %a1, align 8
+  call void @bar(<vscale x 2 x i64>* %a)
+  %el = load i64, i64* %a1
+  ret i64 %el
+}