[llvm] 4ccc388 - [AMDGPU][CostModel] Add f16, f64 and contract cases to fused costs estimation.
via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 6 11:43:41 PDT 2020
Author: dfukalov
Date: 2020-08-06T21:43:27+03:00
New Revision: 4ccc38813eb76c7984f2700df527c643abeb9a58
URL: https://github.com/llvm/llvm-project/commit/4ccc38813eb76c7984f2700df527c643abeb9a58
DIFF: https://github.com/llvm/llvm-project/commit/4ccc38813eb76c7984f2700df527c643abeb9a58.diff
LOG: [AMDGPU][CostModel] Add f16, f64 and contract cases to fused costs estimation.
Add cases of fused fmul+fadd/fsub with f16 and f64 operands to cost model.
Also added operations with contract attribute.
Fixed line endings in test.
Reviewed By: rampitec
Differential Revision: https://reviews.llvm.org/D84995
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
llvm/test/Analysis/CostModel/AMDGPU/fused_costs.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index da00a993bd64..3321187fa65a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -510,11 +510,21 @@ int GCNTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
// Check possible fuse {fadd|fsub}(a,fmul(b,c)) and return zero cost for
// fmul(b,c) supposing the fadd|fsub will get estimated cost for the whole
// fused operation.
- if (!HasFP32Denormals && SLT == MVT::f32 && CxtI && CxtI->hasOneUse())
+ if (CxtI && CxtI->hasOneUse())
if (const auto *FAdd = dyn_cast<BinaryOperator>(*CxtI->user_begin())) {
const int OPC = TLI->InstructionOpcodeToISD(FAdd->getOpcode());
if (OPC == ISD::FADD || OPC == ISD::FSUB) {
- return TargetTransformInfo::TCC_Free;
+ if (ST->hasMadMacF32Insts() && SLT == MVT::f32 && !HasFP32Denormals)
+ return TargetTransformInfo::TCC_Free;
+ if (ST->has16BitInsts() && SLT == MVT::f16 && !HasFP64FP16Denormals)
+ return TargetTransformInfo::TCC_Free;
+
+ // Estimate all types may be fused with contract/unsafe flags
+ const TargetOptions &Options = TLI->getTargetMachine().Options;
+ if (Options.AllowFPOpFusion == FPOpFusion::Fast ||
+ Options.UnsafeFPMath ||
+ (FAdd->hasAllowContract() && CxtI->hasAllowContract()))
+ return TargetTransformInfo::TCC_Free;
}
}
LLVM_FALLTHROUGH;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
index b060221330cb..e6276e87ecaf 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -78,6 +78,7 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
AMDGPUTTIImpl CommonTTI;
bool IsGraphicsShader;
bool HasFP32Denormals;
+ bool HasFP64FP16Denormals;
unsigned MaxVGPRs;
const FeatureBitset InlineFeatureIgnoreList = {
@@ -133,16 +134,18 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
public:
explicit GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
- : BaseT(TM, F.getParent()->getDataLayout()),
- ST(static_cast<const GCNSubtarget*>(TM->getSubtargetImpl(F))),
- TLI(ST->getTargetLowering()),
- CommonTTI(TM, F),
- IsGraphicsShader(AMDGPU::isShader(F.getCallingConv())),
- HasFP32Denormals(AMDGPU::SIModeRegisterDefaults(F).allFP32Denormals()),
- MaxVGPRs(ST->getMaxNumVGPRs(
- std::max(ST->getWavesPerEU(F).first,
- ST->getWavesPerEUForWorkGroup(
- ST->getFlatWorkGroupSizes(F).second)))) {}
+ : BaseT(TM, F.getParent()->getDataLayout()),
+ ST(static_cast<const GCNSubtarget *>(TM->getSubtargetImpl(F))),
+ TLI(ST->getTargetLowering()), CommonTTI(TM, F),
+ IsGraphicsShader(AMDGPU::isShader(F.getCallingConv())),
+ MaxVGPRs(ST->getMaxNumVGPRs(
+ std::max(ST->getWavesPerEU(F).first,
+ ST->getWavesPerEUForWorkGroup(
+ ST->getFlatWorkGroupSizes(F).second)))) {
+ AMDGPU::SIModeRegisterDefaults Mode(F);
+ HasFP32Denormals = Mode.allFP32Denormals();
+ HasFP64FP16Denormals = Mode.allFP64FP16Denormals();
+ }
bool hasBranchDivergence() { return true; }
bool useGPUDivergenceAnalysis() const;
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/fused_costs.ll b/llvm/test/Analysis/CostModel/AMDGPU/fused_costs.ll
index 7af1e48125d7..21067738af23 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/fused_costs.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/fused_costs.ll
@@ -1,48 +1,163 @@
-; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=FUSED,ALL %s
-; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -denormal-fp-math-f32=ieee < %s | FileCheck -check-prefixes=SLOW,ALL %s
-; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=FUSED,ALL %s
-; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -denormal-fp-math-f32=ieee < %s | FileCheck -check-prefixes=SLOW,ALL %s
-
-target triple = "amdgcn--"
-
-; ALL-LABEL: 'fmul_fadd_f32':
-; FUSED: estimated cost of 0 for instruction: %mul = fmul float
-; SLOW: estimated cost of 1 for instruction: %mul = fmul float
-; ALL: estimated cost of 1 for instruction: %add = fadd float
-define float @fmul_fadd_f32(float %r0, float %r1, float %r2) #0 {
- %mul = fmul float %r0, %r1
- %add = fadd float %mul, %r2
- ret float %add
-}
-
-; ALL-LABEL: 'fmul_fadd_v2f32':
-; FUSED: estimated cost of 0 for instruction: %mul = fmul <2 x float>
-; SLOW: estimated cost of 2 for instruction: %mul = fmul <2 x float>
-; ALL: estimated cost of 2 for instruction: %add = fadd <2 x float>
-define <2 x float> @fmul_fadd_v2f32(<2 x float> %r0, <2 x float> %r1, <2 x float> %r2) #0 {
- %mul = fmul <2 x float> %r0, %r1
- %add = fadd <2 x float> %mul, %r2
- ret <2 x float> %add
-}
-
-; ALL-LABEL: 'fmul_fsub_f32':
-; FUSED: estimated cost of 0 for instruction: %mul = fmul float
-; SLOW: estimated cost of 1 for instruction: %mul = fmul float
-; ALL: estimated cost of 1 for instruction: %sub = fsub float
-define float @fmul_fsub_f32(float %r0, float %r1, float %r2) #0 {
- %mul = fmul float %r0, %r1
- %sub = fsub float %mul, %r2
- ret float %sub
-}
-
-; ALL-LABEL: 'fmul_fsub_v2f32':
-; FUSED: estimated cost of 0 for instruction: %mul = fmul <2 x float>
-; SLOW: estimated cost of 2 for instruction: %mul = fmul <2 x float>
-; ALL: estimated cost of 2 for instruction: %sub = fsub <2 x float>
-define <2 x float> @fmul_fsub_v2f32(<2 x float> %r0, <2 x float> %r1, <2 x float> %r2) #0 {
- %mul = fmul <2 x float> %r0, %r1
- %sub = fsub <2 x float> %mul, %r2
- ret <2 x float> %sub
-}
-
-attributes #0 = { nounwind }
+; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -denormal-fp-math-f32=preserve-sign -denormal-fp-math=preserve-sign -fp-contract=on < %s | FileCheck -check-prefixes=FUSED,NOCONTRACT,ALL %s
+; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -denormal-fp-math-f32=ieee -denormal-fp-math=ieee -fp-contract=on < %s | FileCheck -check-prefixes=SLOW,NOCONTRACT,ALL %s
+; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -denormal-fp-math-f32=ieee -denormal-fp-math=ieee -fp-contract=fast < %s | FileCheck -check-prefixes=FUSED,CONTRACT,ALL %s
+; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1030 -denormal-fp-math-f32=preserve-sign -denormal-fp-math=preserve-sign -fp-contract=on < %s | FileCheck -check-prefixes=GFX1030,NOCONTRACT,ALL %s
+; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -denormal-fp-math-f32=preserve-sign -denormal-fp-math=preserve-sign -fp-contract=on < %s | FileCheck -check-prefixes=FUSED32,FUSED16,NOCONTRACT,ALL %s
+; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -denormal-fp-math-f32=ieee -denormal-fp-math=ieee -fp-contract=on < %s | FileCheck -check-prefixes=SLOW,NOCONTRACT,ALL %s
+; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -denormal-fp-math-f32=ieee -denormal-fp-math=ieee -fp-contract=fast < %s | FileCheck -check-prefixes=FUSED32,FUSED16,CONTRACT,ALL %s
+; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1030 -denormal-fp-math-f32=preserve-sign -denormal-fp-math=preserve-sign -fp-contract=on < %s | FileCheck -check-prefixes=GFX1030,NOCONTRACT,ALL %s
+
+target triple = "amdgcn--"
+
+; ALL-LABEL: 'fmul_fadd_f32':
+; FUSED: estimated cost of 0 for instruction: %mul = fmul float
+; SLOW: estimated cost of 1 for instruction: %mul = fmul float
+; GFX1030: estimated cost of 1 for instruction: %mul = fmul float
+; ALL: estimated cost of 1 for instruction: %add = fadd float
+define float @fmul_fadd_f32(float %r0, float %r1, float %r2) #0 {
+ %mul = fmul float %r0, %r1
+ %add = fadd float %mul, %r2
+ ret float %add
+}
+
+; ALL-LABEL: 'fmul_fadd_contract_f32':
+; ALL: estimated cost of 0 for instruction: %mul = fmul contract float
+; ALL: estimated cost of 1 for instruction: %add = fadd contract float
+define float @fmul_fadd_contract_f32(float %r0, float %r1, float %r2) #0 {
+ %mul = fmul contract float %r0, %r1
+ %add = fadd contract float %mul, %r2
+ ret float %add
+}
+
+; ALL-LABEL: 'fmul_fadd_v2f32':
+; FUSED: estimated cost of 0 for instruction: %mul = fmul <2 x float>
+; SLOW: estimated cost of 2 for instruction: %mul = fmul <2 x float>
+; GFX1030: estimated cost of 2 for instruction: %mul = fmul <2 x float>
+; ALL: estimated cost of 2 for instruction: %add = fadd <2 x float>
+define <2 x float> @fmul_fadd_v2f32(<2 x float> %r0, <2 x float> %r1, <2 x float> %r2) #0 {
+ %mul = fmul <2 x float> %r0, %r1
+ %add = fadd <2 x float> %mul, %r2
+ ret <2 x float> %add
+}
+
+; ALL-LABEL: 'fmul_fsub_f32':
+; FUSED: estimated cost of 0 for instruction: %mul = fmul float
+; SLOW: estimated cost of 1 for instruction: %mul = fmul float
+; GFX1030: estimated cost of 1 for instruction: %mul = fmul float
+; ALL: estimated cost of 1 for instruction: %sub = fsub float
+define float @fmul_fsub_f32(float %r0, float %r1, float %r2) #0 {
+ %mul = fmul float %r0, %r1
+ %sub = fsub float %mul, %r2
+ ret float %sub
+}
+
+; ALL-LABEL: 'fmul_fsub_v2f32':
+; FUSED: estimated cost of 0 for instruction: %mul = fmul <2 x float>
+; SLOW: estimated cost of 2 for instruction: %mul = fmul <2 x float>
+; GFX1030: estimated cost of 2 for instruction: %mul = fmul <2 x float>
+; ALL: estimated cost of 2 for instruction: %sub = fsub <2 x float>
+define <2 x float> @fmul_fsub_v2f32(<2 x float> %r0, <2 x float> %r1, <2 x float> %r2) #0 {
+ %mul = fmul <2 x float> %r0, %r1
+ %sub = fsub <2 x float> %mul, %r2
+ ret <2 x float> %sub
+}
+
+; ALL-LABEL: 'fmul_fadd_f16':
+; FUSED: estimated cost of 0 for instruction: %mul = fmul half
+; SLOW: estimated cost of 1 for instruction: %mul = fmul half
+; ALL: estimated cost of 1 for instruction: %add = fadd half
+define half @fmul_fadd_f16(half %r0, half %r1, half %r2) #0 {
+ %mul = fmul half %r0, %r1
+ %add = fadd half %mul, %r2
+ ret half %add
+}
+
+; ALL-LABEL: 'fmul_fadd_contract_f16':
+; ALL: estimated cost of 0 for instruction: %mul = fmul contract half
+; ALL: estimated cost of 1 for instruction: %add = fadd contract half
+define half @fmul_fadd_contract_f16(half %r0, half %r1, half %r2) #0 {
+ %mul = fmul contract half %r0, %r1
+ %add = fadd contract half %mul, %r2
+ ret half %add
+}
+
+; ALL-LABEL: 'fmul_fadd_v2f16':
+; FUSED: estimated cost of 0 for instruction: %mul = fmul <2 x half>
+; SLOW: estimated cost of 1 for instruction: %mul = fmul <2 x half>
+; ALL: estimated cost of 1 for instruction: %add = fadd <2 x half>
+define <2 x half> @fmul_fadd_v2f16(<2 x half> %r0, <2 x half> %r1, <2 x half> %r2) #0 {
+ %mul = fmul <2 x half> %r0, %r1
+ %add = fadd <2 x half> %mul, %r2
+ ret <2 x half> %add
+}
+
+; ALL-LABEL: 'fmul_fsub_f16':
+; FUSED: estimated cost of 0 for instruction: %mul = fmul half
+; SLOW: estimated cost of 1 for instruction: %mul = fmul half
+; ALL: estimated cost of 1 for instruction: %sub = fsub half
+define half @fmul_fsub_f16(half %r0, half %r1, half %r2) #0 {
+ %mul = fmul half %r0, %r1
+ %sub = fsub half %mul, %r2
+ ret half %sub
+}
+
+; ALL-LABEL: 'fmul_fsub_v2f16':
+; FUSED: estimated cost of 0 for instruction: %mul = fmul <2 x half>
+; SLOW: estimated cost of 1 for instruction: %mul = fmul <2 x half>
+; ALL: estimated cost of 1 for instruction: %sub = fsub <2 x half>
+define <2 x half> @fmul_fsub_v2f16(<2 x half> %r0, <2 x half> %r1, <2 x half> %r2) #0 {
+ %mul = fmul <2 x half> %r0, %r1
+ %sub = fsub <2 x half> %mul, %r2
+ ret <2 x half> %sub
+}
+
+; ALL-LABEL: 'fmul_fadd_f64':
+; CONTRACT: estimated cost of 0 for instruction: %mul = fmul double
+; NOCONTRACT: estimated cost of 3 for instruction: %mul = fmul double
+; ALL: estimated cost of 3 for instruction: %add = fadd double
+define double @fmul_fadd_f64(double %r0, double %r1, double %r2) #0 {
+ %mul = fmul double %r0, %r1
+ %add = fadd double %mul, %r2
+ ret double %add
+}
+
+; ALL-LABEL: 'fmul_fadd_contract_f64':
+; ALL: estimated cost of 0 for instruction: %mul = fmul contract double
+; ALL: estimated cost of 3 for instruction: %add = fadd contract double
+define double @fmul_fadd_contract_f64(double %r0, double %r1, double %r2) #0 {
+ %mul = fmul contract double %r0, %r1
+ %add = fadd contract double %mul, %r2
+ ret double %add
+}
+
+; ALL-LABEL: 'fmul_fadd_v2f64':
+; CONTRACT: estimated cost of 0 for instruction: %mul = fmul <2 x double>
+; NOCONTRACT: estimated cost of 6 for instruction: %mul = fmul <2 x double>
+; ALL: estimated cost of 6 for instruction: %add = fadd <2 x double>
+define <2 x double> @fmul_fadd_v2f64(<2 x double> %r0, <2 x double> %r1, <2 x double> %r2) #0 {
+ %mul = fmul <2 x double> %r0, %r1
+ %add = fadd <2 x double> %mul, %r2
+ ret <2 x double> %add
+}
+
+; ALL-LABEL: 'fmul_fsub_f64':
+; CONTRACT: estimated cost of 0 for instruction: %mul = fmul double
+; NOCONTRACT: estimated cost of 3 for instruction: %mul = fmul double
+; ALL: estimated cost of 3 for instruction: %sub = fsub double
+define double @fmul_fsub_f64(double %r0, double %r1, double %r2) #0 {
+ %mul = fmul double %r0, %r1
+ %sub = fsub double %mul, %r2
+ ret double %sub
+}
+
+; ALL-LABEL: 'fmul_fsub_v2f64':
+; CONTRACT: estimated cost of 0 for instruction: %mul = fmul <2 x double>
+; NOCONTRACT: estimated cost of 6 for instruction: %mul = fmul <2 x double>
+; ALL: estimated cost of 6 for instruction: %sub = fsub <2 x double>
+define <2 x double> @fmul_fsub_v2f64(<2 x double> %r0, <2 x double> %r1, <2 x double> %r2) #0 {
+ %mul = fmul <2 x double> %r0, %r1
+ %sub = fsub <2 x double> %mul, %r2
+ ret <2 x double> %sub
+}
+
+attributes #0 = { nounwind }
More information about the llvm-commits
mailing list