[llvm] 3d39709 - AMDGPU: Remove wrapper only call limitation

Tue Jan 12 14:13:18 PST 2021

Author: Matt Arsenault
Date: 2021-01-12T17:12:49-05:00
New Revision: 3d397091591fca4aa16153bba22f031218bee47d

URL: https://github.com/llvm/llvm-project/commit/3d397091591fca4aa16153bba22f031218bee47d
DIFF: https://github.com/llvm/llvm-project/commit/3d397091591fca4aa16153bba22f031218bee47d.diff

LOG: AMDGPU: Remove wrapper only call limitation

This seems to only have overridden cold handling, which we probably
shouldn't do. As far as I can tell the wrapper library functions are
still inlined as appropriate.

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUInline.cpp
    llvm/test/CodeGen/AMDGPU/amdgpu-inline.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp
index 3b96a6a85879..4e689b392802 100644

--- a/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp
@@ -145,26 +145,6 @@ unsigned AMDGPUInliner::getInlineThreshold(CallBase &CB) const {
   return (unsigned)Thres;
 }
 
-// Check if call is just a wrapper around another call.
-// In this case we only have call and ret instructions.
-static bool isWrapperOnlyCall(CallBase &CB) {
-  Function *Callee = CB.getCalledFunction();
-  if (!Callee || Callee->size() != 1)
-    return false;
-  const BasicBlock &BB = Callee->getEntryBlock();
-  if (const Instruction *I = BB.getFirstNonPHI()) {
-    if (!isa<CallInst>(I)) {
-      return false;
-    }
-    if (isa<ReturnInst>(*std::next(I->getIterator()))) {
-      LLVM_DEBUG(dbgs() << "    Wrapper only call detected: "
-                        << Callee->getName() << '\n');
-      return true;
-    }
-  }
-  return false;
-}
-
 InlineCost AMDGPUInliner::getInlineCost(CallBase &CB) {
   Function *Callee = CB.getCalledFunction();
   Function *Caller = CB.getCaller();
@@ -186,9 +166,6 @@ InlineCost AMDGPUInliner::getInlineCost(CallBase &CB) {
     return llvm::InlineCost::getNever(IsViable.getFailureReason());
   }
 
-  if (isWrapperOnlyCall(CB))
-    return llvm::InlineCost::getAlways("wrapper-only call");
-
   InlineParams LocalParams = Params;
   LocalParams.DefaultThreshold = (int)getInlineThreshold(CB);
   bool RemarksEnabled = false;

diff  --git a/llvm/test/CodeGen/AMDGPU/amdgpu-inline.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-inline.ll
index 243522e28dd7..dd06fc17e8ed 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-inline.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-inline.ll
@@ -40,7 +40,7 @@ if.end:
   ret void
 }
 
-define coldcc float @sin_wrapper(float %x) {
+define float @sin_wrapper(float %x) {
 bb:
   %call = tail call float @_Z3sinf(float %x)
   ret float %call
@@ -83,7 +83,7 @@ entry:
   %and = and i32 %tid, %n
   %arrayidx11 = getelementptr inbounds [64 x float], [64 x float] addrspace(5)* %pvt_arr, i32 0, i32 %and
   %tmp12 = load float, float addrspace(5)* %arrayidx11, align 4
-  %c2 = call coldcc float @sin_wrapper(float %tmp12)
+  %c2 = call float @sin_wrapper(float %tmp12)
   store float %c2, float addrspace(5)* %arrayidx7, align 4
   %xor = xor i32 %tid, %n
   %arrayidx16 = getelementptr inbounds [64 x float], [64 x float] addrspace(5)* %pvt_arr, i32 0, i32 %xor