[PATCH] D70391: [AMDGPU] Tune inlining parameters for AMDGPU target (part 2)

Tue Nov 19 05:38:44 PST 2019

This revision was automatically updated to reflect the committed changes.
Closed by commit rG6fd11b14f604: [AMDGPU] Tune inlining parameters for AMDGPU target (part 2) (authored by dfukalov).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D70391/new/

https://reviews.llvm.org/D70391

Files:
  llvm/lib/Target/AMDGPU/AMDGPUInline.cpp
  llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
  llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
  llvm/test/CodeGen/AMDGPU/amdgpu-inline.ll
  llvm/test/Transforms/LoopUnroll/AMDGPU/unroll-for-private.ll


Index: llvm/test/Transforms/LoopUnroll/AMDGPU/unroll-for-private.ll
===================================================================

--- llvm/test/Transforms/LoopUnroll/AMDGPU/unroll-for-private.ll
+++ llvm/test/Transforms/LoopUnroll/AMDGPU/unroll-for-private.ll
@@ -1,4 +1,4 @@
-; RUN: opt -data-layout=A5 -mtriple=amdgcn-unknown-amdhsa -loop-unroll -S -amdgpu-unroll-threshold-private=12000 %s | FileCheck %s
+; RUN: opt -data-layout=A5 -mtriple=amdgcn-unknown-amdhsa -loop-unroll -S %s | FileCheck %s
 
 ; Check that we full unroll loop to be able to eliminate alloca
 ; CHECK-LABEL: @non_invariant_ind
Index: llvm/test/CodeGen/AMDGPU/amdgpu-inline.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/amdgpu-inline.ll
+++ llvm/test/CodeGen/AMDGPU/amdgpu-inline.ll
@@ -28,8 +28,15 @@
 define coldcc void @foo_private_ptr2(float addrspace(5)* nocapture %p1, float addrspace(5)* nocapture %p2) {
 entry:
   %tmp1 = load float, float addrspace(5)* %p1, align 4
+  %cmp = fcmp ogt float %tmp1, 1.000000e+00
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
   %div = fdiv float 2.000000e+00, %tmp1
   store float %div, float addrspace(5)* %p2, align 4
+  br label %if.end
+
+if.end:
   ret void
 }
 
Index: llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -204,7 +204,7 @@
   bool areInlineCompatible(const Function *Caller,
                            const Function *Callee) const;
 
-  unsigned getInliningThresholdMultiplier() { return 9; }
+  unsigned getInliningThresholdMultiplier() { return 11; }
 
   int getInlinerVectorBonusPercent() { return 0; }
 
Index: llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -57,7 +57,7 @@
 static cl::opt<unsigned> UnrollThresholdPrivate(
   "amdgpu-unroll-threshold-private",
   cl::desc("Unroll threshold for AMDGPU if private memory used in a loop"),
-  cl::init(2000), cl::Hidden);
+  cl::init(2700), cl::Hidden);
 
 static cl::opt<unsigned> UnrollThresholdLocal(
   "amdgpu-unroll-threshold-local",
Index: llvm/lib/Target/AMDGPU/AMDGPUInline.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUInline.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPUInline.cpp
@@ -39,7 +39,7 @@
 #define DEBUG_TYPE "inline"
 
 static cl::opt<int>
-ArgAllocaCost("amdgpu-inline-arg-alloca-cost", cl::Hidden, cl::init(1500),
+ArgAllocaCost("amdgpu-inline-arg-alloca-cost", cl::Hidden, cl::init(4000),
               cl::desc("Cost of alloca argument"));
 
 // If the amount of scratch memory to eliminate exceeds our ability to allocate


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D70391.230052.patch
Type: text/x-patch
Size: 2936 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20191119/397b15ac/attachment.bin>