[llvm] r366348 - [AMDGPU] Tune inlining parameters for AMDGPU target
Daniil Fukalov via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 17 09:51:29 PDT 2019
Author: dfukalov
Date: Wed Jul 17 09:51:29 2019
New Revision: 366348
URL: http://llvm.org/viewvc/llvm-project?rev=366348&view=rev
Log:
[AMDGPU] Tune inlining parameters for AMDGPU target
Summary:
Since the target has no significant advantage of vectorization,
vector instructions bous threshold bonus should be optional.
amdgpu-inline-arg-alloca-cost parameter default value and the target
InliningThresholdMultiplier value tuned then respectively.
Reviewers: arsenm, rampitec
Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, eraman, hiraditya, haicheng, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D64642
Added:
llvm/trunk/test/Transforms/Inline/AMDGPU/inline-amdgpu-vecbonus.ll
Modified:
llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h
llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h
llvm/trunk/include/llvm/CodeGen/BasicTTIImpl.h
llvm/trunk/lib/Analysis/InlineCost.cpp
llvm/trunk/lib/Analysis/TargetTransformInfo.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUInline.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
llvm/trunk/test/CodeGen/AMDGPU/amdgpu-inline.ll
Modified: llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h?rev=366348&r1=366347&r2=366348&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h (original)
+++ llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h Wed Jul 17 09:51:29 2019
@@ -263,6 +263,18 @@ public:
/// individual classes of instructions would be better.
unsigned getInliningThresholdMultiplier() const;
+ /// \returns Vector bonus in percent.
+ ///
+ /// Vector bonuses: We want to more aggressively inline vector-dense kernels
+ /// and apply this bonus based on the percentage of vector instructions. A
+ /// bonus is applied if the vector instructions exceed 50% and half that amount
+ /// is applied if it exceeds 10%. Note that these bonuses are some what
+ /// arbitrary and evolved over time by accident as much as because they are
+ /// principled bonuses.
+ /// FIXME: It would be nice to base the bonus values on something more
+ /// scientific. A target may has no bonus on vector instructions.
+ int getInlinerVectorBonusPercent() const;
+
/// Estimate the cost of an intrinsic when lowered.
///
/// Mirrors the \c getCallCost method but uses an intrinsic identifier.
@@ -1128,6 +1140,7 @@ public:
virtual int getCallCost(const Function *F,
ArrayRef<const Value *> Arguments, const User *U) = 0;
virtual unsigned getInliningThresholdMultiplier() = 0;
+ virtual int getInlinerVectorBonusPercent() = 0;
virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
ArrayRef<Type *> ParamTys, const User *U) = 0;
virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
@@ -1351,6 +1364,9 @@ public:
unsigned getInliningThresholdMultiplier() override {
return Impl.getInliningThresholdMultiplier();
}
+ int getInlinerVectorBonusPercent() override {
+ return Impl.getInlinerVectorBonusPercent();
+ }
int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
ArrayRef<Type *> ParamTys, const User *U = nullptr) override {
return Impl.getIntrinsicCost(IID, RetTy, ParamTys, U);
Modified: llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h?rev=366348&r1=366347&r2=366348&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h (original)
+++ llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h Wed Jul 17 09:51:29 2019
@@ -140,6 +140,8 @@ public:
unsigned getInliningThresholdMultiplier() { return 1; }
+ int getInlinerVectorBonusPercent() { return 150; }
+
unsigned getMemcpyCost(const Instruction *I) {
return TTI::TCC_Expensive;
}
Modified: llvm/trunk/include/llvm/CodeGen/BasicTTIImpl.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/BasicTTIImpl.h?rev=366348&r1=366347&r2=366348&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/BasicTTIImpl.h (original)
+++ llvm/trunk/include/llvm/CodeGen/BasicTTIImpl.h Wed Jul 17 09:51:29 2019
@@ -427,6 +427,8 @@ public:
unsigned getInliningThresholdMultiplier() { return 1; }
+ int getInlinerVectorBonusPercent() { return 150; }
+
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP) {
// This unrolling functionality is target independent, but to provide some
Modified: llvm/trunk/lib/Analysis/InlineCost.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/InlineCost.cpp?rev=366348&r1=366347&r2=366348&view=diff
==============================================================================
--- llvm/trunk/lib/Analysis/InlineCost.cpp (original)
+++ llvm/trunk/lib/Analysis/InlineCost.cpp Wed Jul 17 09:51:29 2019
@@ -880,15 +880,6 @@ void CallAnalyzer::updateThreshold(CallB
// basic block at the given callsite context. This is speculatively applied
// and withdrawn if more than one basic block is seen.
//
- // Vector bonuses: We want to more aggressively inline vector-dense kernels
- // and apply this bonus based on the percentage of vector instructions. A
- // bonus is applied if the vector instructions exceed 50% and half that amount
- // is applied if it exceeds 10%. Note that these bonuses are some what
- // arbitrary and evolved over time by accident as much as because they are
- // principled bonuses.
- // FIXME: It would be nice to base the bonus values on something more
- // scientific.
- //
// LstCallToStaticBonus: This large bonus is applied to ensure the inlining
// of the last call to a static function as inlining such functions is
// guaranteed to reduce code size.
@@ -896,7 +887,7 @@ void CallAnalyzer::updateThreshold(CallB
// These bonus percentages may be set to 0 based on properties of the caller
// and the callsite.
int SingleBBBonusPercent = 50;
- int VectorBonusPercent = 150;
+ int VectorBonusPercent = TTI.getInlinerVectorBonusPercent();
int LastCallToStaticBonus = InlineConstants::LastCallToStaticBonus;
// Lambda to set all the above bonus and bonus percentages to 0.
Modified: llvm/trunk/lib/Analysis/TargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/TargetTransformInfo.cpp?rev=366348&r1=366347&r2=366348&view=diff
==============================================================================
--- llvm/trunk/lib/Analysis/TargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Analysis/TargetTransformInfo.cpp Wed Jul 17 09:51:29 2019
@@ -176,6 +176,10 @@ unsigned TargetTransformInfo::getInlinin
return TTIImpl->getInliningThresholdMultiplier();
}
+int TargetTransformInfo::getInlinerVectorBonusPercent() const {
+ return TTIImpl->getInlinerVectorBonusPercent();
+}
+
int TargetTransformInfo::getGEPCost(Type *PointeeType, const Value *Ptr,
ArrayRef<const Value *> Operands) const {
return TTIImpl->getGEPCost(PointeeType, Ptr, Operands);
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUInline.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUInline.cpp?rev=366348&r1=366347&r2=366348&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUInline.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUInline.cpp Wed Jul 17 09:51:29 2019
@@ -39,7 +39,7 @@ using namespace llvm;
#define DEBUG_TYPE "inline"
static cl::opt<int>
-ArgAllocaCost("amdgpu-inline-arg-alloca-cost", cl::Hidden, cl::init(2200),
+ArgAllocaCost("amdgpu-inline-arg-alloca-cost", cl::Hidden, cl::init(1500),
cl::desc("Cost of alloca argument"));
// If the amount of scratch memory to eliminate exceeds our ability to allocate
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h?rev=366348&r1=366347&r2=366348&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h Wed Jul 17 09:51:29 2019
@@ -191,7 +191,9 @@ public:
bool areInlineCompatible(const Function *Caller,
const Function *Callee) const;
- unsigned getInliningThresholdMultiplier() { return 9; }
+ unsigned getInliningThresholdMultiplier() { return 7; }
+
+ int getInlinerVectorBonusPercent() { return 0; }
int getArithmeticReductionCost(unsigned Opcode,
Type *Ty,
Modified: llvm/trunk/test/CodeGen/AMDGPU/amdgpu-inline.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/amdgpu-inline.ll?rev=366348&r1=366347&r2=366348&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/amdgpu-inline.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/amdgpu-inline.ll Wed Jul 17 09:51:29 2019
@@ -28,15 +28,8 @@ if.end:
define coldcc void @foo_private_ptr2(float addrspace(5)* nocapture %p1, float addrspace(5)* nocapture %p2) {
entry:
%tmp1 = load float, float addrspace(5)* %p1, align 4
- %cmp = fcmp ogt float %tmp1, 1.000000e+00
- br i1 %cmp, label %if.then, label %if.end
-
-if.then: ; preds = %entry
%div = fdiv float 2.000000e+00, %tmp1
store float %div, float addrspace(5)* %p2, align 4
- br label %if.end
-
-if.end: ; preds = %if.then, %entry
ret void
}
Added: llvm/trunk/test/Transforms/Inline/AMDGPU/inline-amdgpu-vecbonus.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Inline/AMDGPU/inline-amdgpu-vecbonus.ll?rev=366348&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Inline/AMDGPU/inline-amdgpu-vecbonus.ll (added)
+++ llvm/trunk/test/Transforms/Inline/AMDGPU/inline-amdgpu-vecbonus.ll Wed Jul 17 09:51:29 2019
@@ -0,0 +1,31 @@
+; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -amdgpu-inline --inline-threshold=1 < %s | FileCheck %s
+
+define hidden <16 x i32> @div_vecbonus(<16 x i32> %x, <16 x i32> %y) {
+entry:
+ %div.1 = udiv <16 x i32> %x, %y
+ %div.2 = udiv <16 x i32> %div.1, %y
+ %div.3 = udiv <16 x i32> %div.2, %y
+ %div.4 = udiv <16 x i32> %div.3, %y
+ %div.5 = udiv <16 x i32> %div.4, %y
+ %div.6 = udiv <16 x i32> %div.5, %y
+ %div.7 = udiv <16 x i32> %div.6, %y
+ %div.8 = udiv <16 x i32> %div.7, %y
+ %div.9 = udiv <16 x i32> %div.8, %y
+ %div.10 = udiv <16 x i32> %div.9, %y
+ %div.11 = udiv <16 x i32> %div.10, %y
+ %div.12 = udiv <16 x i32> %div.11, %y
+ ret <16 x i32> %div.12
+}
+
+; CHECK-LABEL: define amdgpu_kernel void @caller_vecbonus
+; CHECK-NOT: udiv
+; CHECK: tail call <16 x i32> @div_vecbonus
+; CHECK: ret void
+define amdgpu_kernel void @caller_vecbonus(<16 x i32> addrspace(1)* nocapture %x, <16 x i32> addrspace(1)* nocapture readonly %y) {
+entry:
+ %tmp = load <16 x i32>, <16 x i32> addrspace(1)* %x
+ %tmp1 = load <16 x i32>, <16 x i32> addrspace(1)* %y
+ %div.i = tail call <16 x i32> @div_vecbonus(<16 x i32> %tmp, <16 x i32> %tmp1)
+ store <16 x i32> %div.i, <16 x i32> addrspace(1)* %x
+ ret void
+}
More information about the llvm-commits
mailing list