[llvm] 221890d - AMDGPU: Add feature for fast f32 denormals
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Sat Apr 4 17:01:41 PDT 2020
Author: Matt Arsenault
Date: 2020-04-04T20:01:24-04:00
New Revision: 221890d709276a7315222f470f9f9a2d908b5327
URL: https://github.com/llvm/llvm-project/commit/221890d709276a7315222f470f9f9a2d908b5327
DIFF: https://github.com/llvm/llvm-project/commit/221890d709276a7315222f470f9f9a2d908b5327.diff
LOG: AMDGPU: Add feature for fast f32 denormals
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPU.td
llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 1f1065749c35..97852523033d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -33,6 +33,12 @@ def FeatureFastFMAF32 : SubtargetFeature<"fast-fmaf",
"Assuming f32 fma is at least as fast as mul + add"
>;
+def FeatureFastDenormalF32 : SubtargetFeature<"fast-denormal-f32",
+ "FastDenormalF32",
+ "true",
+ "Enabling denormals does not cause f32 instructions to run at f64 rates"
+>;
+
def FeatureMIMG_R128 : SubtargetFeature<"mimg-r128",
"MIMG_R128",
"true",
@@ -632,7 +638,7 @@ def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
FeatureScalarStores, FeatureInv2PiInlineImm,
FeatureSDWA, FeatureSDWAOutModsVOPC, FeatureSDWAMac, FeatureDPP,
FeatureIntClamp, FeatureTrigReducedRange, FeatureDoesNotSupportSRAMECC,
- FeatureGFX8Insts, FeatureGFX7GFX8GFX9Insts
+ FeatureGFX8Insts, FeatureGFX7GFX8GFX9Insts, FeatureFastDenormalF32
]
>;
@@ -647,8 +653,8 @@ def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9",
FeatureSDWA, FeatureSDWAOmod, FeatureSDWAScalar, FeatureSDWASdst,
FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts,
FeatureAddNoCarryInsts, FeatureGFX8Insts, FeatureGFX7GFX8GFX9Insts,
- FeatureScalarFlatScratchInsts, FeatureScalarAtomics, FeatureR128A16
- ]
+ FeatureScalarFlatScratchInsts, FeatureScalarAtomics, FeatureR128A16,
+ FeatureFastDenormalF32]
>;
def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10",
@@ -665,7 +671,7 @@ def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10",
FeatureNoSdstCMPX, FeatureVscnt, FeatureRegisterBanking,
FeatureVOP3Literal, FeatureDPP8,
FeatureNoDataDepHazard, FeaturePkFmacF16Inst, FeatureDoesNotSupportSRAMECC,
- FeatureGFX10A16
+ FeatureGFX10A16, FeatureFastDenormalF32
]
>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index df4c6308fee3..91c1bb4fb130 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -178,6 +178,7 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
MaxPrivateElementSize(0),
FastFMAF32(false),
+ FastDenormalF32(false),
HalfRate64Ops(false),
FlatForGlobal(false),
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index c565c17ff03d..cadb328cc498 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -291,6 +291,7 @@ class GCNSubtarget : public AMDGPUGenSubtargetInfo,
// Possibly statically set by tablegen, but may want to be overridden.
bool FastFMAF32;
+ bool FastDenormalF32;
bool HalfRate64Ops;
// Dynamially set bits that enable features.
More information about the llvm-commits
mailing list