r339278 - AMDGPU: Fix enabling denormals by default on pre-VI targets
Matt Arsenault via cfe-commits
cfe-commits at lists.llvm.org
Wed Aug 8 10:48:38 PDT 2018
Author: arsenm
Date: Wed Aug 8 10:48:37 2018
New Revision: 339278
URL: http://llvm.org/viewvc/llvm-project?rev=339278&view=rev
Log:
AMDGPU: Fix enabling denormals by default on pre-VI targets
Fast FMAF is not a sufficient condition to enable denormals.
Before VI, enabling denormals caused F32 instructions to
run at F64 speeds.
Modified:
cfe/trunk/lib/Basic/Targets/AMDGPU.cpp
cfe/trunk/lib/Basic/Targets/AMDGPU.h
cfe/trunk/test/CodeGenOpenCL/amdgpu-features.cl
cfe/trunk/test/CodeGenOpenCL/denorms-are-zero.cl
Modified: cfe/trunk/lib/Basic/Targets/AMDGPU.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/AMDGPU.cpp?rev=339278&r1=339277&r2=339278&view=diff
==============================================================================
--- cfe/trunk/lib/Basic/Targets/AMDGPU.cpp (original)
+++ cfe/trunk/lib/Basic/Targets/AMDGPU.cpp Wed Aug 8 10:48:37 2018
@@ -210,7 +210,8 @@ void AMDGPUTargetInfo::adjustTargetOptio
}
if (!hasFP32Denormals)
TargetOpts.Features.push_back(
- (Twine(CGOptsGPU.HasFastFMAF && !CGOpts.FlushDenorm
+ (Twine(CGOptsGPU.HasFastFMAF && CGOptsGPU.HasFullRateF32Denorms &&
+ !CGOpts.FlushDenorm
? '+'
: '-') +
Twine("fp32-denormals"))
Modified: cfe/trunk/lib/Basic/Targets/AMDGPU.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/AMDGPU.h?rev=339278&r1=339277&r2=339278&view=diff
==============================================================================
--- cfe/trunk/lib/Basic/Targets/AMDGPU.h (original)
+++ cfe/trunk/lib/Basic/Targets/AMDGPU.h Wed Aug 8 10:48:37 2018
@@ -94,77 +94,78 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUTarg
bool HasLDEXPF;
bool HasFP64;
bool HasFastFMA;
+ bool HasFullRateF32Denorms;
};
static constexpr GPUInfo InvalidGPU =
- {{""}, {""}, GK_NONE, false, false, false, false, false};
+ {{""}, {""}, GK_NONE, false, false, false, false, false, false};
static constexpr GPUInfo R600GPUs[26] = {
- // Name Canonical Kind Has Has Has Has Has
- // Name FMAF Fast LDEXPF FP64 Fast
- // FMAF FMA
- {{"r600"}, {"r600"}, GK_R600, false, false, false, false, false},
- {{"rv630"}, {"r600"}, GK_R600, false, false, false, false, false},
- {{"rv635"}, {"r600"}, GK_R600, false, false, false, false, false},
- {{"r630"}, {"r630"}, GK_R630, false, false, false, false, false},
- {{"rs780"}, {"rs880"}, GK_RS880, false, false, false, false, false},
- {{"rs880"}, {"rs880"}, GK_RS880, false, false, false, false, false},
- {{"rv610"}, {"rs880"}, GK_RS880, false, false, false, false, false},
- {{"rv620"}, {"rs880"}, GK_RS880, false, false, false, false, false},
- {{"rv670"}, {"rv670"}, GK_RV670, false, false, false, false, false},
- {{"rv710"}, {"rv710"}, GK_RV710, false, false, false, false, false},
- {{"rv730"}, {"rv730"}, GK_RV730, false, false, false, false, false},
- {{"rv740"}, {"rv770"}, GK_RV770, false, false, false, false, false},
- {{"rv770"}, {"rv770"}, GK_RV770, false, false, false, false, false},
- {{"cedar"}, {"cedar"}, GK_CEDAR, false, false, false, false, false},
- {{"palm"}, {"cedar"}, GK_CEDAR, false, false, false, false, false},
- {{"cypress"}, {"cypress"}, GK_CYPRESS, true, false, false, false, false},
- {{"hemlock"}, {"cypress"}, GK_CYPRESS, true, false, false, false, false},
- {{"juniper"}, {"juniper"}, GK_JUNIPER, false, false, false, false, false},
- {{"redwood"}, {"redwood"}, GK_REDWOOD, false, false, false, false, false},
- {{"sumo"}, {"sumo"}, GK_SUMO, false, false, false, false, false},
- {{"sumo2"}, {"sumo"}, GK_SUMO, false, false, false, false, false},
- {{"barts"}, {"barts"}, GK_BARTS, false, false, false, false, false},
- {{"caicos"}, {"caicos"}, GK_BARTS, false, false, false, false, false},
- {{"aruba"}, {"cayman"}, GK_CAYMAN, true, false, false, false, false},
- {{"cayman"}, {"cayman"}, GK_CAYMAN, true, false, false, false, false},
- {{"turks"}, {"turks"}, GK_TURKS, false, false, false, false, false},
+ // Name Canonical Kind Has Has Has Has Has Has
+ // Name FMAF Fast LDEXPF FP64 Fast Fast
+ // FMAF FMA Denorm
+ {{"r600"}, {"r600"}, GK_R600, false, false, false, false, false, false},
+ {{"rv630"}, {"r600"}, GK_R600, false, false, false, false, false, false},
+ {{"rv635"}, {"r600"}, GK_R600, false, false, false, false, false, false},
+ {{"r630"}, {"r630"}, GK_R630, false, false, false, false, false, false},
+ {{"rs780"}, {"rs880"}, GK_RS880, false, false, false, false, false, false},
+ {{"rs880"}, {"rs880"}, GK_RS880, false, false, false, false, false, false},
+ {{"rv610"}, {"rs880"}, GK_RS880, false, false, false, false, false, false},
+ {{"rv620"}, {"rs880"}, GK_RS880, false, false, false, false, false, false},
+ {{"rv670"}, {"rv670"}, GK_RV670, false, false, false, false, false, false},
+ {{"rv710"}, {"rv710"}, GK_RV710, false, false, false, false, false, false},
+ {{"rv730"}, {"rv730"}, GK_RV730, false, false, false, false, false, false},
+ {{"rv740"}, {"rv770"}, GK_RV770, false, false, false, false, false, false},
+ {{"rv770"}, {"rv770"}, GK_RV770, false, false, false, false, false, false},
+ {{"cedar"}, {"cedar"}, GK_CEDAR, false, false, false, false, false, false},
+ {{"palm"}, {"cedar"}, GK_CEDAR, false, false, false, false, false, false},
+ {{"cypress"}, {"cypress"}, GK_CYPRESS, true, false, false, false, false, false},
+ {{"hemlock"}, {"cypress"}, GK_CYPRESS, true, false, false, false, false, false},
+ {{"juniper"}, {"juniper"}, GK_JUNIPER, false, false, false, false, false, false},
+ {{"redwood"}, {"redwood"}, GK_REDWOOD, false, false, false, false, false, false},
+ {{"sumo"}, {"sumo"}, GK_SUMO, false, false, false, false, false, false},
+ {{"sumo2"}, {"sumo"}, GK_SUMO, false, false, false, false, false, false},
+ {{"barts"}, {"barts"}, GK_BARTS, false, false, false, false, false, false},
+ {{"caicos"}, {"caicos"}, GK_BARTS, false, false, false, false, false, false},
+ {{"aruba"}, {"cayman"}, GK_CAYMAN, true, false, false, false, false, false},
+ {{"cayman"}, {"cayman"}, GK_CAYMAN, true, false, false, false, false, false},
+ {{"turks"}, {"turks"}, GK_TURKS, false, false, false, false, false, false},
};
static constexpr GPUInfo AMDGCNGPUs[32] = {
- // Name Canonical Kind Has Has Has Has Has
- // Name FMAF Fast LDEXPF FP64 Fast
- // FMAF FMA
- {{"gfx600"}, {"gfx600"}, GK_GFX600, true, true, true, true, true},
- {{"tahiti"}, {"gfx600"}, GK_GFX600, true, true, true, true, true},
- {{"gfx601"}, {"gfx601"}, GK_GFX601, true, false, true, true, true},
- {{"hainan"}, {"gfx601"}, GK_GFX601, true, false, true, true, true},
- {{"oland"}, {"gfx601"}, GK_GFX601, true, false, true, true, true},
- {{"pitcairn"}, {"gfx601"}, GK_GFX601, true, false, true, true, true},
- {{"verde"}, {"gfx601"}, GK_GFX601, true, false, true, true, true},
- {{"gfx700"}, {"gfx700"}, GK_GFX700, true, false, true, true, true},
- {{"kaveri"}, {"gfx700"}, GK_GFX700, true, false, true, true, true},
- {{"gfx701"}, {"gfx701"}, GK_GFX701, true, true, true, true, true},
- {{"hawaii"}, {"gfx701"}, GK_GFX701, true, true, true, true, true},
- {{"gfx702"}, {"gfx702"}, GK_GFX702, true, true, true, true, true},
- {{"gfx703"}, {"gfx703"}, GK_GFX703, true, false, true, true, true},
- {{"kabini"}, {"gfx703"}, GK_GFX703, true, false, true, true, true},
- {{"mullins"}, {"gfx703"}, GK_GFX703, true, false, true, true, true},
- {{"gfx704"}, {"gfx704"}, GK_GFX704, true, false, true, true, true},
- {{"bonaire"}, {"gfx704"}, GK_GFX704, true, false, true, true, true},
- {{"gfx801"}, {"gfx801"}, GK_GFX801, true, true, true, true, true},
- {{"carrizo"}, {"gfx801"}, GK_GFX801, true, true, true, true, true},
- {{"gfx802"}, {"gfx802"}, GK_GFX802, true, false, true, true, true},
- {{"iceland"}, {"gfx802"}, GK_GFX802, true, false, true, true, true},
- {{"tonga"}, {"gfx802"}, GK_GFX802, true, false, true, true, true},
- {{"gfx803"}, {"gfx803"}, GK_GFX803, true, false, true, true, true},
- {{"fiji"}, {"gfx803"}, GK_GFX803, true, false, true, true, true},
- {{"polaris10"}, {"gfx803"}, GK_GFX803, true, false, true, true, true},
- {{"polaris11"}, {"gfx803"}, GK_GFX803, true, false, true, true, true},
- {{"gfx810"}, {"gfx810"}, GK_GFX810, true, false, true, true, true},
- {{"stoney"}, {"gfx810"}, GK_GFX810, true, false, true, true, true},
- {{"gfx900"}, {"gfx900"}, GK_GFX900, true, true, true, true, true},
- {{"gfx902"}, {"gfx902"}, GK_GFX900, true, true, true, true, true},
- {{"gfx904"}, {"gfx904"}, GK_GFX904, true, true, true, true, true},
- {{"gfx906"}, {"gfx906"}, GK_GFX906, true, true, true, true, true},
+ // Name Canonical Kind Has Has Has Has Has Has
+ // Name FMAF Fast LDEXPF FP64 Fast Fast
+ // FMAF FMA Denorm
+ {{"gfx600"}, {"gfx600"}, GK_GFX600, true, true, true, true, true, false},
+ {{"tahiti"}, {"gfx600"}, GK_GFX600, true, true, true, true, true, false},
+ {{"gfx601"}, {"gfx601"}, GK_GFX601, true, false, true, true, true, false},
+ {{"hainan"}, {"gfx601"}, GK_GFX601, true, false, true, true, true, false},
+ {{"oland"}, {"gfx601"}, GK_GFX601, true, false, true, true, true, false},
+ {{"pitcairn"}, {"gfx601"}, GK_GFX601, true, false, true, true, true, false},
+ {{"verde"}, {"gfx601"}, GK_GFX601, true, false, true, true, true, false},
+ {{"gfx700"}, {"gfx700"}, GK_GFX700, true, false, true, true, true, false},
+ {{"kaveri"}, {"gfx700"}, GK_GFX700, true, false, true, true, true, false},
+ {{"gfx701"}, {"gfx701"}, GK_GFX701, true, true, true, true, true, false},
+ {{"hawaii"}, {"gfx701"}, GK_GFX701, true, true, true, true, true, false},
+ {{"gfx702"}, {"gfx702"}, GK_GFX702, true, true, true, true, true, false},
+ {{"gfx703"}, {"gfx703"}, GK_GFX703, true, false, true, true, true, false},
+ {{"kabini"}, {"gfx703"}, GK_GFX703, true, false, true, true, true, false},
+ {{"mullins"}, {"gfx703"}, GK_GFX703, true, false, true, true, true, false},
+ {{"gfx704"}, {"gfx704"}, GK_GFX704, true, false, true, true, true, false},
+ {{"bonaire"}, {"gfx704"}, GK_GFX704, true, false, true, true, true, false},
+ {{"gfx801"}, {"gfx801"}, GK_GFX801, true, true, true, true, true, true},
+ {{"carrizo"}, {"gfx801"}, GK_GFX801, true, true, true, true, true, true},
+ {{"gfx802"}, {"gfx802"}, GK_GFX802, true, false, true, true, true, true},
+ {{"iceland"}, {"gfx802"}, GK_GFX802, true, false, true, true, true, true},
+ {{"tonga"}, {"gfx802"}, GK_GFX802, true, false, true, true, true, true},
+ {{"gfx803"}, {"gfx803"}, GK_GFX803, true, false, true, true, true, true},
+ {{"fiji"}, {"gfx803"}, GK_GFX803, true, false, true, true, true, true},
+ {{"polaris10"}, {"gfx803"}, GK_GFX803, true, false, true, true, true, true},
+ {{"polaris11"}, {"gfx803"}, GK_GFX803, true, false, true, true, true, true},
+ {{"gfx810"}, {"gfx810"}, GK_GFX810, true, false, true, true, true, true},
+ {{"stoney"}, {"gfx810"}, GK_GFX810, true, false, true, true, true, true},
+ {{"gfx900"}, {"gfx900"}, GK_GFX900, true, true, true, true, true, true},
+ {{"gfx902"}, {"gfx902"}, GK_GFX900, true, true, true, true, true, true},
+ {{"gfx904"}, {"gfx904"}, GK_GFX904, true, true, true, true, true, true},
+ {{"gfx906"}, {"gfx906"}, GK_GFX906, true, true, true, true, true, true},
};
static GPUInfo parseR600Name(StringRef Name);
Modified: cfe/trunk/test/CodeGenOpenCL/amdgpu-features.cl
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/amdgpu-features.cl?rev=339278&r1=339277&r2=339278&view=diff
==============================================================================
--- cfe/trunk/test/CodeGenOpenCL/amdgpu-features.cl (original)
+++ cfe/trunk/test/CodeGenOpenCL/amdgpu-features.cl Wed Aug 8 10:48:37 2018
@@ -14,7 +14,7 @@
// GFX906: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx9-insts,+s-memrealtime,+vi-insts"
// GFX801: "target-features"="+16-bit-insts,+ci-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+s-memrealtime,+vi-insts"
// GFX700: "target-features"="+ci-insts,+fp64-fp16-denormals,-fp32-denormals"
-// GFX600: "target-features"="+fp32-denormals,+fp64-fp16-denormals"
+// GFX600: "target-features"="+fp64-fp16-denormals,-fp32-denormals"
// GFX601: "target-features"="+fp64-fp16-denormals,-fp32-denormals"
kernel void test() {}
Modified: cfe/trunk/test/CodeGenOpenCL/denorms-are-zero.cl
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/denorms-are-zero.cl?rev=339278&r1=339277&r2=339278&view=diff
==============================================================================
--- cfe/trunk/test/CodeGenOpenCL/denorms-are-zero.cl (original)
+++ cfe/trunk/test/CodeGenOpenCL/denorms-are-zero.cl Wed Aug 8 10:48:37 2018
@@ -1,8 +1,26 @@
// RUN: %clang_cc1 -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -emit-llvm -cl-denorms-are-zero -o - %s | FileCheck %s --check-prefix=DENORM-ZERO
-// RUN: %clang_cc1 -emit-llvm -cl-denorms-are-zero -o - -triple amdgcn--amdhsa -target-cpu fiji %s | FileCheck %s --check-prefix=AMDGCN
-// RUN: %clang_cc1 -emit-llvm -o - -triple amdgcn--amdhsa -target-cpu fiji %s | FileCheck %s --check-prefix=AMDGCN-DENORM
-// RUN: %clang_cc1 -emit-llvm -target-feature +fp32-denormals -target-feature -fp64-fp16-denormals -cl-denorms-are-zero -o - -triple amdgcn--amdhsa -target-cpu fiji %s | FileCheck --check-prefix=AMDGCN-FEATURE %s
+// RUN: %clang_cc1 -emit-llvm -cl-denorms-are-zero -o - %s | FileCheck -check-prefix=DENORM-ZERO %s
+
+// Slow FMAF and slow f32 denormals
+// RUN: %clang_cc1 -emit-llvm -o - -triple amdgcn--amdhsa -target-cpu pitcairn %s | FileCheck -check-prefixes=AMDGCN,AMDGCN-FLUSH %s
+// RUN: %clang_cc1 -emit-llvm -cl-denorms-are-zero -o - -triple amdgcn--amdhsa -target-cpu pitcairn %s | FileCheck -check-prefixes=AMDGCN,AMDGCN-FLUSH-OPT %s
+
+// Fast FMAF, but slow f32 denormals
+// RUN: %clang_cc1 -emit-llvm -o - -triple amdgcn--amdhsa -target-cpu tahiti %s | FileCheck -check-prefixes=AMDGCN,AMDGCN-FLUSH %s
+// RUN: %clang_cc1 -emit-llvm -cl-denorms-are-zero -o - -triple amdgcn--amdhsa -target-cpu tahiti %s | FileCheck -check-prefixes=AMDGCN,AMDGCN-FLUSH-OPT %s
+
+// Fast F32 denormals, but slow FMAF
+// RUN: %clang_cc1 -emit-llvm -o - -triple amdgcn--amdhsa -target-cpu fiji %s | FileCheck -check-prefixes=AMDGCN,AMDGCN-FLUSH %s
+// RUN: %clang_cc1 -emit-llvm -cl-denorms-are-zero -o - -triple amdgcn--amdhsa -target-cpu fiji %s | FileCheck -check-prefixes=AMDGCN,AMDGCN-FLUSH-OPT %s
+
+// Fast F32 denormals and fast FMAF
+// RUN: %clang_cc1 -emit-llvm -o - -triple amdgcn--amdhsa -target-cpu gfx900 %s | FileCheck -check-prefixes=AMDGCN,AMDGCN-DENORM %s
+// RUN: %clang_cc1 -emit-llvm -cl-denorms-are-zero -o - -triple amdgcn--amdhsa -target-cpu gfx900 %s | FileCheck -check-prefixes=AMDGCN,AMDGCN-FLUSH-OPT %s
+
+// RUN: %clang_cc1 -emit-llvm -target-feature +fp32-denormals -target-feature -fp64-fp16-denormals -cl-denorms-are-zero -o - -triple amdgcn--amdhsa -target-cpu fiji %s | FileCheck -check-prefixes=AMDGCN,AMDGCN-FEATURE %s
+// RUN: %clang_cc1 -emit-llvm -target-feature +fp32-denormals -target-feature -fp64-fp16-denormals -cl-denorms-are-zero -o - -triple amdgcn--amdhsa -target-cpu pitcairn %s | FileCheck -check-prefixes=AMDGCN,AMDGCN-FEATURE %s
+
+
// For all targets 'denorms-are-zero' attribute is set to 'true'
// if '-cl-denorms-are-zero' was specified and to 'false' otherwise.
@@ -17,9 +35,11 @@
// explicitly set. amdgcn target always do not flush fp64 denormals. The control for fp64 and fp16 denormals is the same.
// AMDGCN-LABEL: define void @f()
-// AMDGCN: attributes #{{[0-9]*}} = {{{[^}]*}} "denorms-are-zero"="true" {{.*}} "target-features"="{{[^"]*}}+fp64-fp16-denormals,{{[^"]*}}-fp32-denormals{{[^"]*}}"
-// AMDGCN-DENORM-LABEL: define void @f()
-// AMDGCN-DENORM: attributes #{{[0-9]*}} = {{{[^}]*}} "denorms-are-zero"="false" {{.*}} "target-features"="{{[^"]*}}+fp64-fp16-denormals,{{[^"]*}}-fp32-denormals{{[^"]*}}"
-// AMDGCN-FEATURE-LABEL: define void @f()
+
+// AMDGCN-FLUSH: attributes #{{[0-9]*}} = {{{[^}]*}} "denorms-are-zero"="false" {{.*}} "target-features"="{{[^"]*}}+fp64-fp16-denormals,{{[^"]*}}-fp32-denormals{{[^"]*}}"
+// AMDGCN-FLUSH-OPT: attributes #{{[0-9]*}} = {{{[^}]*}} "denorms-are-zero"="true" {{.*}} "target-features"="{{[^"]*}}+fp64-fp16-denormals,{{[^"]*}}-fp32-denormals{{[^"]*}}"
+
+// AMDGCN-DENORM: attributes #{{[0-9]*}} = {{{[^}]*}} "denorms-are-zero"="false" {{.*}} "target-features"="{{[^"]*}}+fp32-denormals,{{[^"]*}}+fp64-fp16-denormals{{[^"]*}}"
+
// AMDGCN-FEATURE: attributes #{{[0-9]*}} = {{{[^}]*}} "denorms-are-zero"="true" {{.*}} "target-features"="{{[^"]*}}+fp32-denormals,{{[^"]*}}-fp64-fp16-denormals{{[^"]*}}"
void f() {}
More information about the cfe-commits
mailing list