[PATCH] D70391: [AMDGPU] Tune inlining parameters for AMDGPU target (part 2)
Daniil Fukalov via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 19 05:38:44 PST 2019
This revision was automatically updated to reflect the committed changes.
Closed by commit rG6fd11b14f604: [AMDGPU] Tune inlining parameters for AMDGPU target (part 2) (authored by dfukalov).
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D70391/new/
https://reviews.llvm.org/D70391
Files:
llvm/lib/Target/AMDGPU/AMDGPUInline.cpp
llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
llvm/test/CodeGen/AMDGPU/amdgpu-inline.ll
llvm/test/Transforms/LoopUnroll/AMDGPU/unroll-for-private.ll
Index: llvm/test/Transforms/LoopUnroll/AMDGPU/unroll-for-private.ll
===================================================================
--- llvm/test/Transforms/LoopUnroll/AMDGPU/unroll-for-private.ll
+++ llvm/test/Transforms/LoopUnroll/AMDGPU/unroll-for-private.ll
@@ -1,4 +1,4 @@
-; RUN: opt -data-layout=A5 -mtriple=amdgcn-unknown-amdhsa -loop-unroll -S -amdgpu-unroll-threshold-private=12000 %s | FileCheck %s
+; RUN: opt -data-layout=A5 -mtriple=amdgcn-unknown-amdhsa -loop-unroll -S %s | FileCheck %s
; Check that we full unroll loop to be able to eliminate alloca
; CHECK-LABEL: @non_invariant_ind
Index: llvm/test/CodeGen/AMDGPU/amdgpu-inline.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/amdgpu-inline.ll
+++ llvm/test/CodeGen/AMDGPU/amdgpu-inline.ll
@@ -28,8 +28,15 @@
define coldcc void @foo_private_ptr2(float addrspace(5)* nocapture %p1, float addrspace(5)* nocapture %p2) {
entry:
%tmp1 = load float, float addrspace(5)* %p1, align 4
+ %cmp = fcmp ogt float %tmp1, 1.000000e+00
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then:
%div = fdiv float 2.000000e+00, %tmp1
store float %div, float addrspace(5)* %p2, align 4
+ br label %if.end
+
+if.end:
ret void
}
Index: llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -204,7 +204,7 @@
bool areInlineCompatible(const Function *Caller,
const Function *Callee) const;
- unsigned getInliningThresholdMultiplier() { return 9; }
+ unsigned getInliningThresholdMultiplier() { return 11; }
int getInlinerVectorBonusPercent() { return 0; }
Index: llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -57,7 +57,7 @@
static cl::opt<unsigned> UnrollThresholdPrivate(
"amdgpu-unroll-threshold-private",
cl::desc("Unroll threshold for AMDGPU if private memory used in a loop"),
- cl::init(2000), cl::Hidden);
+ cl::init(2700), cl::Hidden);
static cl::opt<unsigned> UnrollThresholdLocal(
"amdgpu-unroll-threshold-local",
Index: llvm/lib/Target/AMDGPU/AMDGPUInline.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUInline.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPUInline.cpp
@@ -39,7 +39,7 @@
#define DEBUG_TYPE "inline"
static cl::opt<int>
-ArgAllocaCost("amdgpu-inline-arg-alloca-cost", cl::Hidden, cl::init(1500),
+ArgAllocaCost("amdgpu-inline-arg-alloca-cost", cl::Hidden, cl::init(4000),
cl::desc("Cost of alloca argument"));
// If the amount of scratch memory to eliminate exceeds our ability to allocate
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D70391.230052.patch
Type: text/x-patch
Size: 2936 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20191119/397b15ac/attachment.bin>
More information about the llvm-commits
mailing list