[llvm] 19e7f8a - AMDGPU: Add default denormal mode to MachineFunctionInfo
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 1 00:03:53 PDT 2019
Author: Matt Arsenault
Date: 2019-11-01T00:03:39-07:00
New Revision: 19e7f8a21d62d0a6ae8a1bbecb232bd9d520555b
URL: https://github.com/llvm/llvm-project/commit/19e7f8a21d62d0a6ae8a1bbecb232bd9d520555b
DIFF: https://github.com/llvm/llvm-project/commit/19e7f8a21d62d0a6ae8a1bbecb232bd9d520555b.diff
LOG: AMDGPU: Add default denormal mode to MachineFunctionInfo
The default FP mode should really be a property of a specific
function, and not a subtarget. Introduce the necessary fields to the
SIMachineFunctionInfo to help move towards this goal.
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir
llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index e8cf77161a14..64739cd15c84 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -1151,6 +1151,8 @@ bool GCNTargetMachine::parseMachineFunctionInfo(
MFI->Mode.IEEE = YamlMFI.Mode.IEEE;
MFI->Mode.DX10Clamp = YamlMFI.Mode.DX10Clamp;
+ MFI->Mode.FP32Denormals = YamlMFI.Mode.FP32Denormals;
+ MFI->Mode.FP64FP16Denormals = YamlMFI.Mode.FP64FP16Denormals;
return false;
}
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 7d70c786b594..0d6153d06ce2 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -236,17 +236,23 @@ template <> struct MappingTraits<SIArgumentInfo> {
struct SIMode {
bool IEEE = true;
bool DX10Clamp = true;
+ bool FP32Denormals = true;
+ bool FP64FP16Denormals = true;
SIMode() = default;
-
SIMode(const AMDGPU::SIModeRegisterDefaults &Mode) {
IEEE = Mode.IEEE;
DX10Clamp = Mode.DX10Clamp;
+ FP32Denormals = Mode.FP32Denormals;
+ FP64FP16Denormals = Mode.FP64FP16Denormals;
}
bool operator ==(const SIMode Other) const {
- return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp;
+ return IEEE == Other.IEEE &&
+ DX10Clamp == Other.DX10Clamp &&
+ FP32Denormals == Other.FP32Denormals &&
+ FP64FP16Denormals == Other.FP64FP16Denormals;
}
};
@@ -254,6 +260,8 @@ template <> struct MappingTraits<SIMode> {
static void mapping(IO &YamlIO, SIMode &Mode) {
YamlIO.mapOptional("ieee", Mode.IEEE, true);
YamlIO.mapOptional("dx10-clamp", Mode.DX10Clamp, true);
+ YamlIO.mapOptional("fp32-denormals", Mode.FP32Denormals, true);
+ YamlIO.mapOptional("fp64-fp16-denormals", Mode.FP64FP16Denormals, true);
}
};
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index f78dadd447ff..f8c082060ff5 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -659,23 +659,40 @@ struct SIModeRegisterDefaults {
/// clamp NaN to zero; otherwise, pass NaN through.
bool DX10Clamp : 1;
- // TODO: FP mode fields
+ /// If this is set, neither input or output denormals are flushed for most f32
+ /// instructions.
+ ///
+ /// TODO: Split into separate input and output fields if necessary like the
+ /// control bits really provide?
+ bool FP32Denormals : 1;
+
+ /// If this is set, neither input or output denormals are flushed for both f64
+ /// and f16/v2f16 instructions.
+ bool FP64FP16Denormals : 1;
SIModeRegisterDefaults() :
IEEE(true),
- DX10Clamp(true) {}
+ DX10Clamp(true),
+ FP32Denormals(true),
+ FP64FP16Denormals(true) {}
SIModeRegisterDefaults(const Function &F);
static SIModeRegisterDefaults getDefaultForCallingConv(CallingConv::ID CC) {
+ const bool IsCompute = AMDGPU::isCompute(CC);
+
SIModeRegisterDefaults Mode;
Mode.DX10Clamp = true;
- Mode.IEEE = AMDGPU::isCompute(CC);
+ Mode.IEEE = IsCompute;
+ Mode.FP32Denormals = false; // FIXME: Should be on by default.
+ Mode.FP64FP16Denormals = true;
return Mode;
}
bool operator ==(const SIModeRegisterDefaults Other) const {
- return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp;
+ return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp &&
+ FP32Denormals == Other.FP32Denormals &&
+ FP64FP16Denormals == Other.FP64FP16Denormals;
}
// FIXME: Inlining should be OK for dx10-clamp, since the caller's mode should
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir
index 8334ef5ef6c0..0b23ded5d9d7 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir
+++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir
@@ -25,6 +25,8 @@
# FULL-NEXT: mode:
# FULL-NEXT: ieee: true
# FULL-NEXT: dx10-clamp: true
+# FULL-NEXT: fp32-denormals: true
+# FULL-NEXT: fp64-fp16-denormals: true
# FULL-NEXT: highBitsOf32BitAddress: 0
# FULL-NEXT: body:
@@ -92,6 +94,8 @@ body: |
# FULL-NEXT: mode:
# FULL-NEXT: ieee: true
# FULL-NEXT: dx10-clamp: true
+# FULL-NEXT: fp32-denormals: true
+# FULL-NEXT: fp64-fp16-denormals: true
# FULL-NEXT: highBitsOf32BitAddress: 0
# FULL-NEXT: body:
@@ -129,6 +133,8 @@ body: |
# FULL-NEXT: mode:
# FULL-NEXT: ieee: true
# FULL-NEXT: dx10-clamp: true
+# FULL-NEXT: fp32-denormals: true
+# FULL-NEXT: fp64-fp16-denormals: true
# FULL-NEXT: highBitsOf32BitAddress: 0
# FULL-NEXT: body:
@@ -167,6 +173,8 @@ body: |
# FULL-NEXT: mode:
# FULL-NEXT: ieee: true
# FULL-NEXT: dx10-clamp: true
+# FULL-NEXT: fp32-denormals: true
+# FULL-NEXT: fp64-fp16-denormals: true
# FULL-NEXT: highBitsOf32BitAddress: 0
# FULL-NEXT: body:
@@ -239,11 +247,16 @@ body: |
# ALL: mode:
# ALL-NEXT: ieee: false
# ALL-NEXT: dx10-clamp: false
+# ALL-NEXT: fp32-denormals: false
+# ALL-NEXT: fp64-fp16-denormals: false
+
name: parse_mode
machineFunctionInfo:
mode:
ieee: false
dx10-clamp: false
+ fp32-denormals: false
+ fp64-fp16-denormals: false
body: |
bb.0:
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll
index bc354f2a0d87..f9de722c5671 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll
+++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll
@@ -28,6 +28,8 @@
; CHECK-NEXT: mode:
; CHECK-NEXT: ieee: true
; CHECK-NEXT: dx10-clamp: true
+; CHECK-NEXT: fp32-denormals: false
+; CHECK-NEXT: fp64-fp16-denormals: true
; CHECK-NEXT: highBitsOf32BitAddress: 0
; CHECK-NEXT: body:
define amdgpu_kernel void @kernel(i32 %arg0, i64 %arg1, <16 x i32> %arg2) {
@@ -55,6 +57,8 @@ define amdgpu_kernel void @kernel(i32 %arg0, i64 %arg1, <16 x i32> %arg2) {
; CHECK-NEXT: mode:
; CHECK-NEXT: ieee: false
; CHECK-NEXT: dx10-clamp: true
+; CHECK-NEXT: fp32-denormals: false
+; CHECK-NEXT: fp64-fp16-denormals: true
; CHECK-NEXT: highBitsOf32BitAddress: 0
; CHECK-NEXT: body:
define amdgpu_ps void @ps_shader(i32 %arg0, i32 inreg %arg1) {
@@ -80,6 +84,8 @@ define amdgpu_ps void @ps_shader(i32 %arg0, i32 inreg %arg1) {
; CHECK-NEXT: mode:
; CHECK-NEXT: ieee: true
; CHECK-NEXT: dx10-clamp: true
+; CHECK-NEXT: fp32-denormals: false
+; CHECK-NEXT: fp64-fp16-denormals: true
; CHECK-NEXT: highBitsOf32BitAddress: 0
; CHECK-NEXT: body:
define void @function() {
@@ -105,6 +111,8 @@ define void @function() {
; CHECK-NEXT: mode:
; CHECK-NEXT: ieee: true
; CHECK-NEXT: dx10-clamp: true
+; CHECK-NEXT: fp32-denormals: false
+; CHECK-NEXT: fp64-fp16-denormals: true
; CHECK-NEXT: highBitsOf32BitAddress: 0
; CHECK-NEXT: body:
define void @function_nsz() #0 {
@@ -115,6 +123,8 @@ define void @function_nsz() #0 {
; CHECK: mode:
; CHECK-NEXT: ieee: true
; CHECK-NEXT: dx10-clamp: false
+; CHECK-NEXT: fp32-denormals: false
+; CHECK-NEXT: fp64-fp16-denormals: true
define void @function_dx10_clamp_off() #1 {
ret void
}
@@ -123,6 +133,8 @@ define void @function_dx10_clamp_off() #1 {
; CHECK: mode:
; CHECK-NEXT: ieee: false
; CHECK-NEXT: dx10-clamp: true
+; CHECK-NEXT: fp32-denormals: false
+; CHECK-NEXT: fp64-fp16-denormals: true
define void @function_ieee_off() #2 {
ret void
}
@@ -131,6 +143,8 @@ define void @function_ieee_off() #2 {
; CHECK: mode:
; CHECK-NEXT: ieee: false
; CHECK-NEXT: dx10-clamp: false
+; CHECK-NEXT: fp32-denormals: false
+; CHECK-NEXT: fp64-fp16-denormals: true
define void @function_ieee_off_dx10_clamp_off() #3 {
ret void
}
More information about the llvm-commits
mailing list