[clang] 28518d9 - [InlineFunction] Handle return attributes on call within inlined body

Anna Thomas via cfe-commits cfe-commits at lists.llvm.org
Tue Mar 31 11:35:53 PDT 2020


Author: Anna Thomas
Date: 2020-03-31T14:35:40-04:00
New Revision: 28518d9ae39ff5c6044e230d58b6ae28b0252cae

URL: https://github.com/llvm/llvm-project/commit/28518d9ae39ff5c6044e230d58b6ae28b0252cae
DIFF: https://github.com/llvm/llvm-project/commit/28518d9ae39ff5c6044e230d58b6ae28b0252cae.diff

LOG: [InlineFunction] Handle return attributes on call within inlined body

Consider a callee function that has a call (C) within it which feeds
into the return.  When we inline that callee into a callsite that has
return attributes, we can backward propagate those attributes to the
call (C) within that inlined callee body.

This is safe to do so only if we can guarantee transfer of execution to
successor in the window of instructions between return value (i.e. the
call C) and the return instruction.

See added test cases.

Reviewed-By: reames, jdoerfert

Differential Revision: https://reviews.llvm.org/D76140

Added: 
    llvm/test/Transforms/Inline/ret_attr_update.ll

Modified: 
    clang/test/CodeGen/builtins-systemz-zvector.c
    clang/test/CodeGen/builtins-systemz-zvector2.c
    clang/test/CodeGen/movbe-builtins.c
    clang/test/CodeGen/rot-intrinsics.c
    llvm/lib/Transforms/Utils/InlineFunction.cpp

Removed: 
    


################################################################################
diff  --git a/clang/test/CodeGen/builtins-systemz-zvector.c b/clang/test/CodeGen/builtins-systemz-zvector.c
index da0e720c9fae..6cba71098792 100644
--- a/clang/test/CodeGen/builtins-systemz-zvector.c
+++ b/clang/test/CodeGen/builtins-systemz-zvector.c
@@ -3665,31 +3665,31 @@ void test_integer(void) {
   // CHECK-ASM: vsumqg
 
   idx = vec_test_mask(vsc, vuc);
-  // CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  // CHECK: call signext i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
   // CHECK-ASM: vtm
   idx = vec_test_mask(vuc, vuc);
-  // CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  // CHECK: call signext i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
   // CHECK-ASM: vtm
   idx = vec_test_mask(vss, vus);
-  // CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  // CHECK: call signext i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
   // CHECK-ASM: vtm
   idx = vec_test_mask(vus, vus);
-  // CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  // CHECK: call signext i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
   // CHECK-ASM: vtm
   idx = vec_test_mask(vsi, vui);
-  // CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  // CHECK: call signext i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
   // CHECK-ASM: vtm
   idx = vec_test_mask(vui, vui);
-  // CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  // CHECK: call signext i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
   // CHECK-ASM: vtm
   idx = vec_test_mask(vsl, vul);
-  // CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  // CHECK: call signext i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
   // CHECK-ASM: vtm
   idx = vec_test_mask(vul, vul);
-  // CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  // CHECK: call signext i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
   // CHECK-ASM: vtm
   idx = vec_test_mask(vd, vul);
-  // CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  // CHECK: call signext i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
   // CHECK-ASM: vtm
 }
 

diff  --git a/clang/test/CodeGen/builtins-systemz-zvector2.c b/clang/test/CodeGen/builtins-systemz-zvector2.c
index a4f791e6019b..1880fed64dbc 100644
--- a/clang/test/CodeGen/builtins-systemz-zvector2.c
+++ b/clang/test/CodeGen/builtins-systemz-zvector2.c
@@ -654,10 +654,10 @@ void test_integer(void) {
   // CHECK-ASM: vsrlb
 
   idx = vec_test_mask(vf, vui);
-  // CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  // CHECK: call signext i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
   // CHECK-ASM: vtm
   idx = vec_test_mask(vd, vul);
-  // CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  // CHECK: call signext i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
   // CHECK-ASM: vtm
 
   vuc = vec_msum_u128(vul, vul, vuc, 0);

diff  --git a/clang/test/CodeGen/movbe-builtins.c b/clang/test/CodeGen/movbe-builtins.c
index 342f66391388..15f49b84ec67 100644
--- a/clang/test/CodeGen/movbe-builtins.c
+++ b/clang/test/CodeGen/movbe-builtins.c
@@ -7,7 +7,7 @@
 short test_loadbe_i16(const short *P) {
   // CHECK-LABEL: @test_loadbe_i16
   // CHECK: [[LOAD:%.*]] = load i16, i16* %{{.*}}, align 1
-  // CHECK: call i16 @llvm.bswap.i16(i16 [[LOAD]])
+  // CHECK: call signext i16 @llvm.bswap.i16(i16 [[LOAD]])
   return _loadbe_i16(P);
 }
 

diff  --git a/clang/test/CodeGen/rot-intrinsics.c b/clang/test/CodeGen/rot-intrinsics.c
index dcdc54c4585a..7b1ffb6ae3a6 100644
--- a/clang/test/CodeGen/rot-intrinsics.c
+++ b/clang/test/CodeGen/rot-intrinsics.c
@@ -1,9 +1,9 @@
-// RUN: %clang_cc1 -ffreestanding -triple i686--linux -emit-llvm %s -o - | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG
-// RUN: %clang_cc1 -ffreestanding -triple x86_64--linux -emit-llvm %s -o - | FileCheck %s --check-prefixes CHECK,CHECK-64BIT-LONG
-// RUN: %clang_cc1 -fms-extensions -fms-compatibility -ffreestanding %s -triple=i686-windows-msvc -target-feature +sse2 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG
-// RUN: %clang_cc1 -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +sse2 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG
-// RUN: %clang_cc1 -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 -ffreestanding %s -triple=i686-windows-msvc -target-feature +sse2 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG
-// RUN: %clang_cc1 -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +sse2 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG
+// RUN: %clang_cc1 -ffreestanding -triple i686--linux -emit-llvm -mllvm -update-return-attrs=false %s -o - | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG
+// RUN: %clang_cc1 -ffreestanding -triple x86_64--linux -emit-llvm -mllvm -update-return-attrs=false %s -o - | FileCheck %s --check-prefixes CHECK,CHECK-64BIT-LONG
+// RUN: %clang_cc1 -fms-extensions -fms-compatibility -ffreestanding %s -triple=i686-windows-msvc -target-feature +sse2 -emit-llvm -mllvm -update-return-attrs=false -o - -Wall -Werror | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG
+// RUN: %clang_cc1 -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +sse2 -emit-llvm -mllvm -update-return-attrs=false -o - -Wall -Werror | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG
+// RUN: %clang_cc1 -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 -ffreestanding %s -triple=i686-windows-msvc -target-feature +sse2 -emit-llvm -mllvm -update-return-attrs=false -o - -Wall -Werror | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG
+// RUN: %clang_cc1 -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +sse2 -emit-llvm -mllvm -update-return-attrs=false -o - -Wall -Werror | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG
 
 #include <x86intrin.h>
 

diff  --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp
index b8b3d1895093..36d05345f23b 100644
--- a/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -80,11 +80,21 @@ EnableNoAliasConversion("enable-noalias-to-md-conversion", cl::init(true),
   cl::Hidden,
   cl::desc("Convert noalias attributes to metadata during inlining."));
 
+static cl::opt<bool> UpdateReturnAttributes(
+    "update-return-attrs", cl::init(true), cl::Hidden,
+    cl::desc("Update return attributes on calls within inlined body"));
+
 static cl::opt<bool>
 PreserveAlignmentAssumptions("preserve-alignment-assumptions-during-inlining",
   cl::init(true), cl::Hidden,
   cl::desc("Convert align attributes to assumptions during inlining."));
 
+static cl::opt<unsigned> InlinerAttributeWindow(
+    "inliner-attribute-window", cl::Hidden,
+    cl::desc("the maximum number of instructions analyzed for may throw during "
+             "attribute inference in inlined body"),
+    cl::init(4));
+
 llvm::InlineResult llvm::InlineFunction(CallBase *CB, InlineFunctionInfo &IFI,
                                         AAResults *CalleeAAR,
                                         bool InsertLifetime) {
@@ -1136,6 +1146,81 @@ static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap,
   }
 }
 
+static bool MayContainThrowingOrExitingCall(Instruction *Begin,
+                                            Instruction *End) {
+
+  assert(Begin->getParent() == End->getParent() &&
+         "Expected to be in same basic block!");
+  unsigned NumInstChecked = 0;
+  // Check that all instructions in the range [Begin, End) are guaranteed to
+  // transfer execution to successor.
+  for (auto &I : make_range(Begin->getIterator(), End->getIterator()))
+    if (NumInstChecked++ > InlinerAttributeWindow ||
+        !isGuaranteedToTransferExecutionToSuccessor(&I))
+      return true;
+  return false;
+}
+
+static void AddReturnAttributes(CallSite CS, ValueToValueMapTy &VMap) {
+  if (!UpdateReturnAttributes)
+    return;
+  AttrBuilder AB(CS.getAttributes(), AttributeList::ReturnIndex);
+  if (AB.empty())
+    return;
+
+  auto *CalledFunction = CS.getCalledFunction();
+  auto &Context = CalledFunction->getContext();
+
+  for (auto &BB : *CalledFunction) {
+    auto *RI = dyn_cast<ReturnInst>(BB.getTerminator());
+    if (!RI || !isa<CallBase>(RI->getOperand(0)))
+      continue;
+    // Sanity check that the cloned return instruction exists and is a return
+    // instruction itself.
+    auto *NewRI = dyn_cast_or_null<ReturnInst>(VMap.lookup(RI));
+    if (!NewRI)
+      continue;
+    auto *RetVal = cast<CallBase>(RI->getOperand(0));
+    // Sanity check that the cloned RetVal exists and is a call.
+    // Simplification during inlining could have transformed the cloned
+    // instruction.
+    auto *NewRetVal = dyn_cast_or_null<CallBase>(VMap.lookup(RetVal));
+    if (!NewRetVal)
+      continue;
+    // Backward propagation of attributes to the returned value may be incorrect
+    // if it is control flow dependent.
+    // Consider:
+    // @callee {
+    //  %rv = call @foo()
+    //  %rv2 = call @bar()
+    //  if (%rv2 != null)
+    //    return %rv2
+    //  if (%rv == null)
+    //    exit()
+    //  return %rv
+    // }
+    // caller() {
+    //   %val = call nonnull @callee()
+    // }
+    // Here we cannot add the nonnull attribute on either foo or bar. So, we
+    // limit the check to both NewRetVal and NewRI are in the same basic block
+    // and there are no throwing/exiting instructions between these
+    // instructions.
+    if (NewRI->getParent() != NewRetVal->getParent() ||
+        MayContainThrowingOrExitingCall(NewRetVal, NewRI))
+      continue;
+    // Add to the existing attributes of NewRetVal.
+    // NB! When we have the same attribute already existing on NewRetVal, but
+    // with a 
diff ering value, the AttributeList's merge API honours the already
+    // existing attribute value (i.e. attributes such as dereferenceable,
+    // dereferenceable_or_null etc). See AttrBuilder::merge for more details.
+    AttributeList AL = NewRetVal->getAttributes();
+    AttributeList NewAL =
+        AL.addAttributes(Context, AttributeList::ReturnIndex, AB);
+    NewRetVal->setAttributes(NewAL);
+  }
+}
+
 /// If the inlined function has non-byval align arguments, then
 /// add @llvm.assume-based alignment assumptions to preserve this information.
 static void AddAlignmentAssumptions(CallSite CS, InlineFunctionInfo &IFI) {
@@ -1801,6 +1886,10 @@ llvm::InlineResult llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
     // Add noalias metadata if necessary.
     AddAliasScopeMetadata(CS, VMap, DL, CalleeAAR);
 
+    // Clone return attributes on the callsite into the calls within the inlined
+    // function which feed into its return value.
+    AddReturnAttributes(CS, VMap);
+
     // Propagate llvm.mem.parallel_loop_access if necessary.
     PropagateParallelLoopAccessMetadata(CS, VMap);
 

diff  --git a/llvm/test/Transforms/Inline/ret_attr_update.ll b/llvm/test/Transforms/Inline/ret_attr_update.ll
new file mode 100644
index 000000000000..2e53540c3fe2
--- /dev/null
+++ b/llvm/test/Transforms/Inline/ret_attr_update.ll
@@ -0,0 +1,159 @@
+; RUN: opt < %s -inline-threshold=0 -always-inline -S | FileCheck %s
+; RUN: opt < %s -passes=always-inline -S | FileCheck %s
+
+declare i8* @foo(i8*) argmemonly nounwind
+
+define i8* @callee(i8 *%p) alwaysinline {
+; CHECK: @callee(
+; CHECK: call i8* @foo(i8* noalias %p)
+  %r = call i8* @foo(i8* noalias %p)
+  ret i8* %r
+}
+
+define i8* @caller(i8* %ptr, i64 %x) {
+; CHECK-LABEL: @caller
+; CHECK: call nonnull i8* @foo(i8* noalias
+  %gep = getelementptr inbounds i8, i8* %ptr, i64 %x
+  %p = call nonnull i8* @callee(i8* %gep)
+  ret i8* %p
+}
+
+declare void @llvm.experimental.guard(i1,...)
+; Cannot add nonnull attribute to foo
+; because the guard is a throwing call
+define internal i8* @callee_with_throwable(i8* %p) alwaysinline {
+; CHECK-NOT: callee_with_throwable
+  %r = call i8* @foo(i8* %p)
+  %cond = icmp ne i8* %r, null
+  call void (i1, ...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ]
+  ret i8* %r
+}
+
+declare i8* @bar(i8*) readonly nounwind
+; Here also we cannot add nonnull attribute to the call bar.
+define internal i8* @callee_with_explicit_control_flow(i8* %p) alwaysinline {
+; CHECK-NOT: callee_with_explicit_control_flow
+  %r = call i8* @bar(i8* %p)
+  %cond = icmp ne i8* %r, null
+  br i1 %cond, label %ret, label %orig
+
+ret:
+  ret i8* %r
+
+orig:
+  ret i8* %p
+}
+
+define i8* @caller2(i8* %ptr, i64 %x, i1 %cond) {
+; CHECK-LABEL: @caller2
+; CHECK: call i8* @foo
+; CHECK: call i8* @bar
+  %gep = getelementptr inbounds i8, i8* %ptr, i64 %x
+  %p = call nonnull i8* @callee_with_throwable(i8* %gep)
+  %q = call nonnull i8* @callee_with_explicit_control_flow(i8* %gep)
+  br i1 %cond, label %pret, label %qret
+
+pret:
+  ret i8* %p
+
+qret:
+  ret i8* %q
+}
+
+define internal i8* @callee3(i8 *%p) alwaysinline {
+; CHECK-NOT: callee3
+  %r = call noalias i8* @foo(i8* %p)
+  ret i8* %r
+}
+
+; add the deref attribute to the existing attributes on foo.
+define i8* @caller3(i8* %ptr, i64 %x) {
+; CHECK-LABEL: caller3
+; CHECK: call noalias dereferenceable_or_null(12) i8* @foo
+  %gep = getelementptr inbounds i8, i8* %ptr, i64 %x
+  %p = call dereferenceable_or_null(12) i8* @callee3(i8* %gep)
+  ret i8* %p
+}
+
+declare i8* @inf_loop_call(i8*) nounwind
+; We cannot propagate attributes to foo because we do not know whether inf_loop_call
+; will return execution.
+define internal i8* @callee_with_sideeffect_callsite(i8* %p) alwaysinline {
+; CHECK-NOT: callee_with_sideeffect_callsite
+  %r = call i8* @foo(i8* %p)
+  %v = call i8* @inf_loop_call(i8* %p)
+  ret i8* %r
+}
+
+; do not add deref attribute to foo
+define i8* @test4(i8* %ptr, i64 %x) {
+; CHECK-LABEL: test4
+; CHECK: call i8* @foo
+  %gep = getelementptr inbounds i8, i8* %ptr, i64 %x
+  %p = call dereferenceable_or_null(12) i8* @callee_with_sideeffect_callsite(i8* %gep)
+  ret i8* %p
+}
+
+declare i8* @baz(i8*) nounwind readonly
+define internal i8* @callee5(i8* %p) alwaysinline {
+; CHECK-NOT: callee5
+  %r = call i8* @foo(i8* %p)
+  %v = call i8* @baz(i8* %p)
+  ret i8* %r
+}
+
+; add the deref attribute to foo.
+define i8* @test5(i8* %ptr, i64 %x) {
+; CHECK-LABEL: test5
+; CHECK: call dereferenceable_or_null(12) i8* @foo
+  %gep = getelementptr inbounds i8, i8* %ptr, i64 %x
+  %s = call dereferenceable_or_null(12) i8* @callee5(i8* %gep)
+  ret i8* %s
+}
+
+; deref attributes have 
diff erent values on the callee and the call feeding into
+; the return.
+; AttrBuilder chooses the already existing value and does not overwrite it.
+define internal i8* @callee6(i8* %p) alwaysinline {
+; CHECK-NOT: callee6
+  %r = call dereferenceable_or_null(16) i8* @foo(i8* %p)
+  %v = call i8* @baz(i8* %p)
+  ret i8* %r
+}
+
+
+define i8* @test6(i8* %ptr, i64 %x) {
+; CHECK-LABEL: test6
+; CHECK: call dereferenceable_or_null(16) i8* @foo
+  %gep = getelementptr inbounds i8, i8* %ptr, i64 %x
+  %s = call dereferenceable_or_null(12) i8* @callee6(i8* %gep)
+  ret i8* %s
+}
+
+; We add the attributes from the callee to both the calls below.
+define internal i8* @callee7(i8 *%ptr, i1 %cond) alwaysinline {
+; CHECK-NOT: @callee7(
+  br i1 %cond, label %pass, label %fail
+
+pass:
+  %r = call i8* @foo(i8* noalias %ptr)
+  ret i8* %r
+
+fail:
+  %s = call i8* @baz(i8* %ptr)
+  ret i8* %s
+}
+
+define void @test7(i8* %ptr, i64 %x, i1 %cond) {
+; CHECK-LABEL: @test7
+; CHECK: call nonnull i8* @foo(i8* noalias
+; CHECK: call nonnull i8* @baz
+; CHECK: phi i8*
+; CHECK: call void @snort
+
+  %gep = getelementptr inbounds i8, i8* %ptr, i64 %x
+  %t = call nonnull i8* @callee7(i8* %gep, i1 %cond)
+  call void @snort(i8* %t)
+  ret void
+}
+declare void @snort(i8*)


        


More information about the cfe-commits mailing list