[llvm] 8f4b7e9 - [AMDGPU][CostModel] Refine cost model for control-flow instructions.
via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 9 23:20:40 PDT 2021
Author: dfukalov
Date: 2021-04-10T09:20:24+03:00
New Revision: 8f4b7e94a2b4465b7dac889da286cbd26309457b
URL: https://github.com/llvm/llvm-project/commit/8f4b7e94a2b4465b7dac889da286cbd26309457b
DIFF: https://github.com/llvm/llvm-project/commit/8f4b7e94a2b4465b7dac889da286cbd26309457b.diff
LOG: [AMDGPU][CostModel] Refine cost model for control-flow instructions.
Added cost estimation for switch instruction, updated costs of branches, fixed
phi cost.
Had to increase `-amdgpu-unroll-threshold-if` default value since conditional
branch cost (size) was corrected to higher value.
Test renamed to "control-flow.ll".
Removed redundant code in `X86TTIImpl::getCFInstrCost()` and
`PPCTTIImpl::getCFInstrCost()`.
Reviewed By: rampitec
Differential Revision: https://reviews.llvm.org/D96805
Added:
llvm/test/Analysis/CostModel/AMDGPU/control-flow.ll
Modified:
llvm/include/llvm/Analysis/TargetTransformInfo.h
llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
llvm/include/llvm/CodeGen/BasicTTIImpl.h
llvm/lib/Analysis/TargetTransformInfo.cpp
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
llvm/lib/Target/ARM/ARMTargetTransformInfo.h
llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
llvm/lib/Target/X86/X86TargetTransformInfo.cpp
llvm/lib/Target/X86/X86TargetTransformInfo.h
llvm/test/CodeGen/AMDGPU/unroll.ll
llvm/test/Transforms/LoopUnroll/AMDGPU/unroll-cost-addrspacecast.ll
Removed:
llvm/test/Analysis/CostModel/AMDGPU/br.ll
################################################################################
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 691e0086b559a..8773da7873315 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1103,9 +1103,10 @@ class TargetTransformInfo {
unsigned Index = -1) const;
/// \return The expected cost of control-flow related instructions such as
- /// Phi, Ret, Br.
+ /// Phi, Ret, Br, Switch.
int getCFInstrCost(unsigned Opcode,
- TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) const;
+ TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
+ const Instruction *I = nullptr) const;
/// \returns The expected cost of compare and select instructions. If there
/// is an existing instruction that holds Opcode, it may be passed in the
@@ -1573,8 +1574,8 @@ class TargetTransformInfo::Concept {
const Instruction *I) = 0;
virtual int getExtractWithExtendCost(unsigned Opcode, Type *Dst,
VectorType *VecTy, unsigned Index) = 0;
- virtual int getCFInstrCost(unsigned Opcode,
- TTI::TargetCostKind CostKind) = 0;
+ virtual int getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
+ const Instruction *I = nullptr) = 0;
virtual int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
CmpInst::Predicate VecPred,
TTI::TargetCostKind CostKind,
@@ -2040,8 +2041,9 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
unsigned Index) override {
return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
}
- int getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) override {
- return Impl.getCFInstrCost(Opcode, CostKind);
+ int getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
+ const Instruction *I = nullptr) override {
+ return Impl.getCFInstrCost(Opcode, CostKind, I);
}
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
CmpInst::Predicate VecPred,
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 793318014348f..dcf82db007e36 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -512,7 +512,8 @@ class TargetTransformInfoImplBase {
return 1;
}
- unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) const {
+ unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
+ const Instruction *I = nullptr) const {
// A phi would be free, unless we're costing the throughput because it
// will require a register.
if (Opcode == Instruction::PHI && CostKind != TTI::TCK_RecipThroughput)
@@ -933,7 +934,8 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
case Instruction::Br:
case Instruction::Ret:
case Instruction::PHI:
- return TargetTTI->getCFInstrCost(Opcode, CostKind);
+ case Instruction::Switch:
+ return TargetTTI->getCFInstrCost(Opcode, CostKind, I);
case Instruction::ExtractValue:
case Instruction::Freeze:
return TTI::TCC_Free;
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 70dda7caf7c82..7732501be2b26 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -897,8 +897,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
TTI::CastContextHint::None, TTI::TCK_RecipThroughput);
}
- unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) {
- return BaseT::getCFInstrCost(Opcode, CostKind);
+ unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
+ const Instruction *I = nullptr) {
+ return BaseT::getCFInstrCost(Opcode, CostKind, I);
}
unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 9e6ff37beba6e..2be0a0cdd0b57 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -783,8 +783,11 @@ int TargetTransformInfo::getExtractWithExtendCost(unsigned Opcode, Type *Dst,
}
int TargetTransformInfo::getCFInstrCost(unsigned Opcode,
- TTI::TargetCostKind CostKind) const {
- int Cost = TTIImpl->getCFInstrCost(Opcode, CostKind);
+ TTI::TargetCostKind CostKind,
+ const Instruction *I) const {
+ assert((I == nullptr || I->getOpcode() == Opcode) &&
+ "Opcode should reflect passed instruction.");
+ int Cost = TTIImpl->getCFInstrCost(Opcode, CostKind, I);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
@@ -1374,6 +1377,7 @@ TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
case Instruction::ExtractValue:
case Instruction::ShuffleVector:
case Instruction::Call:
+ case Instruction::Switch:
return getUserCost(I, CostKind);
default:
// We don't have any information on this instruction.
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index edcb667c3d4eb..148239b3d7899 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -653,7 +653,8 @@ int AArch64TTIImpl::getExtractWithExtendCost(unsigned Opcode, Type *Dst,
}
unsigned AArch64TTIImpl::getCFInstrCost(unsigned Opcode,
- TTI::TargetCostKind CostKind) {
+ TTI::TargetCostKind CostKind,
+ const Instruction *I) {
if (CostKind != TTI::TCK_RecipThroughput)
return Opcode == Instruction::PHI ? 0 : 1;
assert(CostKind == TTI::TCK_RecipThroughput && "unexpected CostKind");
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index 39be2645bfc6c..7a6cfd36fcc31 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -139,7 +139,8 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
unsigned Index);
- unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind);
+ unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
+ const Instruction *I = nullptr);
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 7fa5d23bdaff1..ac5df7198825b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -39,7 +39,7 @@ static cl::opt<unsigned> UnrollThresholdLocal(
static cl::opt<unsigned> UnrollThresholdIf(
"amdgpu-unroll-threshold-if",
cl::desc("Unroll threshold increment for AMDGPU for each if statement inside loop"),
- cl::init(150), cl::Hidden);
+ cl::init(200), cl::Hidden);
static cl::opt<bool> UnrollRuntimeLocal(
"amdgpu-unroll-runtime-local",
@@ -106,6 +106,10 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
UP.MaxCount = std::numeric_limits<unsigned>::max();
UP.Partial = true;
+ // Conditional branch in a loop back edge needs 3 additional exec
+ // manipulations in average.
+ UP.BEInsns += 3;
+
// TODO: Do we want runtime unrolling?
// Maximum alloca size than can fit registers. Reserve 16 registers.
@@ -809,18 +813,37 @@ GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
}
unsigned GCNTTIImpl::getCFInstrCost(unsigned Opcode,
- TTI::TargetCostKind CostKind) {
- if (CostKind == TTI::TCK_CodeSize || CostKind == TTI::TCK_SizeAndLatency)
- return Opcode == Instruction::PHI ? 0 : 1;
-
- // XXX - For some reason this isn't called for switch.
+ TTI::TargetCostKind CostKind,
+ const Instruction *I) {
+ assert((I == nullptr || I->getOpcode() == Opcode) &&
+ "Opcode should reflect passed instruction.");
+ const bool SCost =
+ (CostKind == TTI::TCK_CodeSize || CostKind == TTI::TCK_SizeAndLatency);
+ const int CBrCost = SCost ? 5 : 7;
switch (Opcode) {
- case Instruction::Br:
+ case Instruction::Br: {
+ // Branch instruction takes about 4 slots on gfx900.
+ auto BI = dyn_cast_or_null<BranchInst>(I);
+ if (BI && BI->isUnconditional())
+ return SCost ? 1 : 4;
+ // Suppose conditional branch takes additional 3 exec manipulations
+ // instructions in average.
+ return CBrCost;
+ }
+ case Instruction::Switch: {
+ auto SI = dyn_cast_or_null<SwitchInst>(I);
+ // Each case (including default) takes 1 cmp + 1 cbr instructions in
+ // average.
+ return (SI ? (SI->getNumCases() + 1) : 4) * (CBrCost + 1);
+ }
case Instruction::Ret:
- return 10;
- default:
- return BaseT::getCFInstrCost(Opcode, CostKind);
+ return SCost ? 1 : 10;
+ case Instruction::PHI:
+ // TODO: 1. A prediction phi won't be eliminated?
+ // 2. Estimate data copy instructions in this case.
+ return 1;
}
+ return BaseT::getCFInstrCost(Opcode, CostKind, I);
}
int GCNTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
@@ -1292,7 +1315,8 @@ unsigned R600TTIImpl::getMaxInterleaveFactor(unsigned VF) {
}
unsigned R600TTIImpl::getCFInstrCost(unsigned Opcode,
- TTI::TargetCostKind CostKind) {
+ TTI::TargetCostKind CostKind,
+ const Instruction *I) {
if (CostKind == TTI::TCK_CodeSize || CostKind == TTI::TCK_SizeAndLatency)
return Opcode == Instruction::PHI ? 0 : 1;
@@ -1302,7 +1326,7 @@ unsigned R600TTIImpl::getCFInstrCost(unsigned Opcode,
case Instruction::Ret:
return 10;
default:
- return BaseT::getCFInstrCost(Opcode, CostKind);
+ return BaseT::getCFInstrCost(Opcode, CostKind, I);
}
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
index 56282af75abf8..a462dd9f02ca2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -163,7 +163,8 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
const Instruction *CxtI = nullptr);
- unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind);
+ unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
+ const Instruction *I = nullptr);
bool isInlineAsmSourceOfDivergence(const CallInst *CI,
ArrayRef<unsigned> Indices = {}) const;
@@ -253,7 +254,8 @@ class R600TTIImpl final : public BasicTTIImplBase<R600TTIImpl> {
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
unsigned AddrSpace) const;
unsigned getMaxInterleaveFactor(unsigned VF);
- unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind);
+ unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
+ const Instruction *I = nullptr);
int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index);
};
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index f72adfa90bc5d..7a8ae943d124c 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -379,7 +379,8 @@ int ARMTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
return getIntImmCost(Imm, Ty, CostKind);
}
-int ARMTTIImpl::getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) {
+int ARMTTIImpl::getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
+ const Instruction *I) {
if (CostKind == TTI::TCK_RecipThroughput &&
(ST->hasNEON() || ST->hasMVEIntegerOps())) {
// FIXME: The vectorizer is highly sensistive to the cost of these
@@ -388,7 +389,7 @@ int ARMTTIImpl::getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) {
// vector targets.
return 0;
}
- return BaseT::getCFInstrCost(Opcode, CostKind);
+ return BaseT::getCFInstrCost(Opcode, CostKind, I);
}
int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
index b0aafce5d30c5..dda1b6dbef094 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -198,8 +198,8 @@ class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
bool shouldExpandReduction(const IntrinsicInst *II) const { return false; }
- int getCFInstrCost(unsigned Opcode,
- TTI::TargetCostKind CostKind);
+ int getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
+ const Instruction *I = nullptr);
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
TTI::CastContextHint CCH, TTI::TargetCostKind CostKind,
diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
index 9281217ad0268..e4c1facf5552e 100644
--- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
+++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
@@ -153,7 +153,8 @@ class HexagonTTIImpl : public BasicTTIImplBase<HexagonTTIImpl> {
const Instruction *I = nullptr);
unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
- unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) {
+ unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
+ const Instruction *I = nullptr) {
return 1;
}
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index 398254f9d50ec..8be9117104b9f 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -1000,11 +1000,12 @@ int PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp,
nullptr);
}
-int PPCTTIImpl::getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) {
+int PPCTTIImpl::getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
+ const Instruction *I) {
if (CostKind != TTI::TCK_RecipThroughput)
return Opcode == Instruction::PHI ? 0 : 1;
// Branches are assumed to be predicted.
- return CostKind == TTI::TCK_RecipThroughput ? 0 : 1;
+ return 0;
}
int PPCTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
index db618d0fbcf4e..acd453fca45cd 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -112,7 +112,8 @@ class PPCTTIImpl : public BasicTTIImplBase<PPCTTIImpl> {
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
TTI::CastContextHint CCH, TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
- int getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind);
+ int getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
+ const Instruction *I = nullptr);
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
CmpInst::Predicate VecPred,
TTI::TargetCostKind CostKind,
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index a7479330a9a9f..9b3ad4538ce30 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -4076,12 +4076,13 @@ int X86TTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
return X86TTIImpl::getIntImmCost(Imm, Ty, CostKind);
}
-unsigned
-X86TTIImpl::getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) {
+unsigned X86TTIImpl::getCFInstrCost(unsigned Opcode,
+ TTI::TargetCostKind CostKind,
+ const Instruction *I) {
if (CostKind != TTI::TCK_RecipThroughput)
return Opcode == Instruction::PHI ? 0 : 1;
// Branches are assumed to be predicted.
- return CostKind == TTI::TCK_RecipThroughput ? 0 : 1;
+ return 0;
}
int X86TTIImpl::getGatherOverhead() const {
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h
index 0e6bfc02aee63..3fba7feab1398 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.h
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h
@@ -203,7 +203,8 @@ class X86TTIImpl : public BasicTTIImplBase<X86TTIImpl> {
int getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind);
- unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind);
+ unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
+ const Instruction *I = nullptr);
int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm,
Type *Ty, TTI::TargetCostKind CostKind,
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/br.ll b/llvm/test/Analysis/CostModel/AMDGPU/br.ll
deleted file mode 100644
index 9c85ccabb0a2a..0000000000000
--- a/llvm/test/Analysis/CostModel/AMDGPU/br.ll
+++ /dev/null
@@ -1,45 +0,0 @@
-; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck %s
-
-; CHECK: 'test_br_cost'
-; CHECK: estimated cost of 10 for instruction: br i1
-; CHECK: estimated cost of 10 for instruction: br label
-; CHECK: estimated cost of 10 for instruction: ret void
-define amdgpu_kernel void @test_br_cost(i32 addrspace(1)* %out, i32 addrspace(1)* %vaddr, i32 %b) #0 {
-bb0:
- br i1 undef, label %bb1, label %bb2
-
-bb1:
- %vec = load i32, i32 addrspace(1)* %vaddr
- %add = add i32 %vec, %b
- store i32 %add, i32 addrspace(1)* %out
- br label %bb2
-
-bb2:
- ret void
-
-}
-
-; CHECK: 'test_switch_cost'
-; CHECK: estimated cost of -1 for instruction: switch
-define amdgpu_kernel void @test_switch_cost(i32 %a) #0 {
-entry:
- switch i32 %a, label %default [
- i32 0, label %case0
- i32 1, label %case1
- ]
-
-case0:
- store volatile i32 undef, i32 addrspace(1)* undef
- ret void
-
-case1:
- store volatile i32 undef, i32 addrspace(1)* undef
- ret void
-
-default:
- store volatile i32 undef, i32 addrspace(1)* undef
- ret void
-
-end:
- ret void
-}
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/control-flow.ll b/llvm/test/Analysis/CostModel/AMDGPU/control-flow.ll
new file mode 100644
index 0000000000000..ef469ccb48278
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/AMDGPU/control-flow.ll
@@ -0,0 +1,52 @@
+; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck --check-prefixes=ALL,SPEED %s
+; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck --check-prefixes=ALL,SIZE %s
+
+; ALL-LABEL: 'test_br_cost'
+; SPEED: estimated cost of 7 for instruction: br i1
+; SPEED: estimated cost of 4 for instruction: br label
+; SPEED: estimated cost of 1 for instruction: %phi = phi i32 [
+; SPEED: estimated cost of 10 for instruction: ret void
+; SIZE: estimated cost of 5 for instruction: br i1
+; SIZE: estimated cost of 1 for instruction: br label
+; SIZE: estimated cost of 1 for instruction: %phi = phi i32 [
+; SIZE: estimated cost of 1 for instruction: ret void
+define amdgpu_kernel void @test_br_cost(i32 addrspace(1)* %out, i32 addrspace(1)* %vaddr, i32 %b) #0 {
+bb0:
+ br i1 undef, label %bb1, label %bb2
+
+bb1:
+ %vec = load i32, i32 addrspace(1)* %vaddr
+ %add = add i32 %vec, %b
+ store i32 %add, i32 addrspace(1)* %out
+ br label %bb2
+
+bb2:
+ %phi = phi i32 [ %b, %bb0 ], [ %add, %bb1 ]
+ ret void
+}
+
+; ALL-LABEL: 'test_switch_cost'
+; SPEED: estimated cost of 24 for instruction: switch
+; SIZE: estimated cost of 18 for instruction: switch
+define amdgpu_kernel void @test_switch_cost(i32 %a) #0 {
+entry:
+ switch i32 %a, label %default [
+ i32 0, label %case0
+ i32 1, label %case1
+ ]
+
+case0:
+ store volatile i32 undef, i32 addrspace(1)* undef
+ ret void
+
+case1:
+ store volatile i32 undef, i32 addrspace(1)* undef
+ ret void
+
+default:
+ store volatile i32 undef, i32 addrspace(1)* undef
+ ret void
+
+end:
+ ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/unroll.ll b/llvm/test/CodeGen/AMDGPU/unroll.ll
index 10f77c795378a..e05021d53e429 100644
--- a/llvm/test/CodeGen/AMDGPU/unroll.ll
+++ b/llvm/test/CodeGen/AMDGPU/unroll.ll
@@ -81,8 +81,7 @@ entry:
for.body: ; preds = %entry, %for.inc
%i1 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
- %and = and i32 %i1, 1
- %tobool = icmp eq i32 %and, 0
+ %tobool = icmp eq i32 %i1, 0
br i1 %tobool, label %for.inc, label %if.then
if.then: ; preds = %for.body
@@ -93,7 +92,7 @@ if.then: ; preds = %for.body
for.inc: ; preds = %for.body, %if.then
%inc = add nuw nsw i32 %i1, 1
- %cmp = icmp ult i32 %inc, 48
+ %cmp = icmp ult i32 %inc, 38
br i1 %cmp, label %for.body, label %for.end
for.end: ; preds = %for.cond
diff --git a/llvm/test/Transforms/LoopUnroll/AMDGPU/unroll-cost-addrspacecast.ll b/llvm/test/Transforms/LoopUnroll/AMDGPU/unroll-cost-addrspacecast.ll
index 761aa077606b4..a31af401a72ab 100644
--- a/llvm/test/Transforms/LoopUnroll/AMDGPU/unroll-cost-addrspacecast.ll
+++ b/llvm/test/Transforms/LoopUnroll/AMDGPU/unroll-cost-addrspacecast.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=hawaii -loop-unroll -unroll-threshold=75 -unroll-peel-count=0 -unroll-allow-partial=false -unroll-max-iteration-count-to-analyze=16 < %s | FileCheck %s
+; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=hawaii -loop-unroll -unroll-threshold=49 -unroll-peel-count=0 -unroll-allow-partial=false -unroll-max-iteration-count-to-analyze=16 < %s | FileCheck %s
; CHECK-LABEL: @test_func_addrspacecast_cost_noop(
; CHECK-NOT: br i1
More information about the llvm-commits
mailing list