[llvm] r367969 - Re-commit: [AMDGPU] Use S_DENORM_MODE for gfx10
Austin Kerbow via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 5 19:16:11 PDT 2019
Author: kerbowa
Date: Mon Aug 5 19:16:11 2019
New Revision: 367969
URL: http://llvm.org/viewvc/llvm-project?rev=367969&view=rev
Log:
Re-commit: [AMDGPU] Use S_DENORM_MODE for gfx10
Summary: During fdiv32 lowering use S_DENORM_MODE to select denorm mode in gfx10.
Reviewers: arsenm, rampitec
Reviewed By: arsenm, rampitec
Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D65620
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h
llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h
llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp
llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
llvm/trunk/lib/Target/AMDGPU/SOPInstructions.td
llvm/trunk/test/CodeGen/AMDGPU/fdiv.ll
llvm/trunk/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp?rev=367969&r1=367968&r2=367969&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp Mon Aug 5 19:16:11 2019
@@ -4221,6 +4221,7 @@ const char* AMDGPUTargetLowering::getTar
NODE_NAME_CASE(FRACT)
NODE_NAME_CASE(SETCC)
NODE_NAME_CASE(SETREG)
+ NODE_NAME_CASE(DENORM_MODE)
NODE_NAME_CASE(FMA_W_CHAIN)
NODE_NAME_CASE(FMUL_W_CHAIN)
NODE_NAME_CASE(CLAMP)
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h?rev=367969&r1=367968&r2=367969&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h Mon Aug 5 19:16:11 2019
@@ -369,6 +369,9 @@ enum NodeType : unsigned {
// result bit per item in the wavefront.
SETCC,
SETREG,
+
+ DENORM_MODE,
+
// FP ops with input and output chain.
FMA_W_CHAIN,
FMUL_W_CHAIN,
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h?rev=367969&r1=367968&r2=367969&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h Mon Aug 5 19:16:11 2019
@@ -617,6 +617,11 @@ public:
return getGeneration() >= AMDGPUSubtarget::GFX9;
}
+ /// \returns If target supports S_DENORM_MODE.
+ bool hasDenormModeInst() const {
+ return getGeneration() >= AMDGPUSubtarget::GFX10;
+ }
+
bool useFlatForGlobal() const {
return FlatForGlobal;
}
Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=367969&r1=367968&r2=367969&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Mon Aug 5 19:16:11 2019
@@ -7591,6 +7591,19 @@ SDValue SITargetLowering::lowerFDIV_FAST
return DAG.getNode(ISD::FMUL, SL, MVT::f32, r3, Mul);
}
+// Returns immediate value for setting the F32 denorm mode when using the
+// S_DENORM_MODE instruction.
+static const SDValue getSPDenormModeValue(int SPDenormMode, SelectionDAG &DAG,
+ const SDLoc &SL, const GCNSubtarget *ST) {
+ assert(ST->hasDenormModeInst() && "Requires S_DENORM_MODE");
+ int DPDenormModeDefault = ST->hasFP64Denormals()
+ ? FP_DENORM_FLUSH_NONE
+ : FP_DENORM_FLUSH_IN_FLUSH_OUT;
+
+ int Mode = SPDenormMode | (DPDenormModeDefault << 2);
+ return DAG.getTargetConstant(Mode, SL, MVT::i32);
+}
+
SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
if (SDValue FastLowered = lowerFastUnsafeFDIV(Op, DAG))
return FastLowered;
@@ -7617,16 +7630,26 @@ SDValue SITargetLowering::LowerFDIV32(SD
const unsigned Denorm32Reg = AMDGPU::Hwreg::ID_MODE |
(4 << AMDGPU::Hwreg::OFFSET_SHIFT_) |
(1 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_);
-
const SDValue BitField = DAG.getTargetConstant(Denorm32Reg, SL, MVT::i16);
if (!Subtarget->hasFP32Denormals()) {
SDVTList BindParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- const SDValue EnableDenormValue = DAG.getConstant(FP_DENORM_FLUSH_NONE,
- SL, MVT::i32);
- SDValue EnableDenorm = DAG.getNode(AMDGPUISD::SETREG, SL, BindParamVTs,
- DAG.getEntryNode(),
- EnableDenormValue, BitField);
+
+ SDValue EnableDenorm;
+ if (Subtarget->hasDenormModeInst()) {
+ const SDValue EnableDenormValue =
+ getSPDenormModeValue(FP_DENORM_FLUSH_NONE, DAG, SL, Subtarget);
+
+ EnableDenorm = DAG.getNode(AMDGPUISD::DENORM_MODE, SL, BindParamVTs,
+ DAG.getEntryNode(), EnableDenormValue);
+ } else {
+ const SDValue EnableDenormValue = DAG.getConstant(FP_DENORM_FLUSH_NONE,
+ SL, MVT::i32);
+ EnableDenorm = DAG.getNode(AMDGPUISD::SETREG, SL, BindParamVTs,
+ DAG.getEntryNode(), EnableDenormValue,
+ BitField);
+ }
+
SDValue Ops[3] = {
NegDivScale0,
EnableDenorm.getValue(0),
@@ -7648,19 +7671,29 @@ SDValue SITargetLowering::LowerFDIV32(SD
SDValue Fma2 = getFPTernOp(DAG, ISD::FMA, SL, MVT::f32, NegDivScale0, Mul,
NumeratorScaled, Mul);
- SDValue Fma3 = getFPTernOp(DAG, ISD::FMA,SL, MVT::f32, Fma2, Fma1, Mul, Fma2);
+ SDValue Fma3 = getFPTernOp(DAG, ISD::FMA, SL, MVT::f32, Fma2, Fma1, Mul, Fma2);
SDValue Fma4 = getFPTernOp(DAG, ISD::FMA, SL, MVT::f32, NegDivScale0, Fma3,
NumeratorScaled, Fma3);
if (!Subtarget->hasFP32Denormals()) {
- const SDValue DisableDenormValue =
- DAG.getConstant(FP_DENORM_FLUSH_IN_FLUSH_OUT, SL, MVT::i32);
- SDValue DisableDenorm = DAG.getNode(AMDGPUISD::SETREG, SL, MVT::Other,
- Fma4.getValue(1),
- DisableDenormValue,
- BitField,
- Fma4.getValue(2));
+
+ SDValue DisableDenorm;
+ if (Subtarget->hasDenormModeInst()) {
+ const SDValue DisableDenormValue =
+ getSPDenormModeValue(FP_DENORM_FLUSH_IN_FLUSH_OUT, DAG, SL, Subtarget);
+
+ DisableDenorm = DAG.getNode(AMDGPUISD::DENORM_MODE, SL, MVT::Other,
+ Fma4.getValue(1), DisableDenormValue,
+ Fma4.getValue(2));
+ } else {
+ const SDValue DisableDenormValue =
+ DAG.getConstant(FP_DENORM_FLUSH_IN_FLUSH_OUT, SL, MVT::i32);
+
+ DisableDenorm = DAG.getNode(AMDGPUISD::SETREG, SL, MVT::Other,
+ Fma4.getValue(1), DisableDenormValue,
+ BitField, Fma4.getValue(2));
+ }
SDValue OutputChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other,
DisableDenorm, DAG.getRoot());
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp?rev=367969&r1=367968&r2=367969&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp Mon Aug 5 19:16:11 2019
@@ -2671,6 +2671,7 @@ bool SIInstrInfo::isSchedulingBoundary(c
MI.modifiesRegister(AMDGPU::EXEC, &RI) ||
MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
MI.getOpcode() == AMDGPU::S_SETREG_B32 ||
+ MI.getOpcode() == AMDGPU::S_DENORM_MODE ||
changesVGPRIndexingMode(MI);
}
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td?rev=367969&r1=367968&r2=367969&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td Mon Aug 5 19:16:11 2019
@@ -266,6 +266,11 @@ def SIload_d16_hi_i8 : SDNode<"AMDGPUISD
[SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
>;
+def SIdenorm_mode : SDNode<"AMDGPUISD::DENORM_MODE",
+ SDTypeProfile<0 ,1, [SDTCisInt<0>]>,
+ [SDNPHasChain, SDNPSideEffect, SDNPOptInGlue, SDNPOutGlue]
+>;
+
//===----------------------------------------------------------------------===//
// ValueType helpers
//===----------------------------------------------------------------------===//
@@ -689,7 +694,7 @@ def SIMM16bit : ImmLeaf <i32,
>;
def UIMM16bit : ImmLeaf <i32,
- [{return isUInt<16>(Imm); }]
+ [{return isUInt<16>(Imm);}]
>;
class InlineImm <ValueType vt> : PatLeaf <(vt imm), [{
Modified: llvm/trunk/lib/Target/AMDGPU/SOPInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SOPInstructions.td?rev=367969&r1=367968&r2=367969&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SOPInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SOPInstructions.td Mon Aug 5 19:16:11 2019
@@ -1168,7 +1168,10 @@ let SubtargetPredicate = isGFX10Plus in
def S_ROUND_MODE :
SOPP<0x024, (ins s16imm:$simm16), "s_round_mode $simm16">;
def S_DENORM_MODE :
- SOPP<0x025, (ins s16imm:$simm16), "s_denorm_mode $simm16">;
+ SOPP<0x025, (ins i32imm:$simm16), "s_denorm_mode $simm16",
+ [(SIdenorm_mode (i32 timm:$simm16))]> {
+ let hasSideEffects = 1;
+ }
def S_TTRACEDATA_IMM :
SOPP<0x028, (ins s16imm:$simm16), "s_ttracedata_imm $simm16">;
} // End SubtargetPredicate = isGFX10Plus
Modified: llvm/trunk/test/CodeGen/AMDGPU/fdiv.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fdiv.ll?rev=367969&r1=367968&r2=367969&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fdiv.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fdiv.ll Mon Aug 5 19:16:11 2019
@@ -1,6 +1,7 @@
-; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,PREGFX10,FUNC %s
+; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,PREGFX10,FUNC %s
+; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,PREGFX10,FUNC %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10,FUNC %s
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
; These tests check that fdiv is expanded correctly and also test that the
@@ -17,14 +18,16 @@
; GCN-DAG: v_div_scale_f32 [[DEN_SCALE:v[0-9]+]]
; GCN-DAG: v_rcp_f32_e32 [[NUM_RCP:v[0-9]+]], [[NUM_SCALE]]
-; GCN: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
+; PREGFX10: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
+; GFX10: s_denorm_mode 15
; GCN: v_fma_f32 [[A:v[0-9]+]], -[[NUM_SCALE]], [[NUM_RCP]], 1.0
; GCN: v_fma_f32 [[B:v[0-9]+]], [[A]], [[NUM_RCP]], [[NUM_RCP]]
; GCN: v_mul_f32_e32 [[C:v[0-9]+]], [[DEN_SCALE]], [[B]]
; GCN: v_fma_f32 [[D:v[0-9]+]], -[[NUM_SCALE]], [[C]], [[DEN_SCALE]]
; GCN: v_fma_f32 [[E:v[0-9]+]], [[D]], [[B]], [[C]]
; GCN: v_fma_f32 [[F:v[0-9]+]], -[[NUM_SCALE]], [[E]], [[DEN_SCALE]]
-; GCN: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
+; PREGFX10: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
+; GFX10: s_denorm_mode 12
; GCN: v_div_fmas_f32 [[FMAS:v[0-9]+]], [[F]], [[B]], [[E]]
; GCN: v_div_fixup_f32 v{{[0-9]+}}, [[FMAS]],
define amdgpu_kernel void @fdiv_f32(float addrspace(1)* %out, float %a, float %b) #0 {
@@ -39,17 +42,28 @@ entry:
; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, PS
; GCN: v_div_scale_f32 [[NUM_SCALE:v[0-9]+]]
-; GCN-DAG: v_div_scale_f32 [[DEN_SCALE:v[0-9]+]]
; GCN-DAG: v_rcp_f32_e32 [[NUM_RCP:v[0-9]+]], [[NUM_SCALE]]
-; GCN-NOT: s_setreg
-; GCN: v_fma_f32 [[A:v[0-9]+]], -[[NUM_SCALE]], [[NUM_RCP]], 1.0
-; GCN: v_fma_f32 [[B:v[0-9]+]], [[A]], [[NUM_RCP]], [[NUM_RCP]]
-; GCN: v_mul_f32_e32 [[C:v[0-9]+]], [[DEN_SCALE]], [[B]]
-; GCN: v_fma_f32 [[D:v[0-9]+]], -[[NUM_SCALE]], [[C]], [[DEN_SCALE]]
-; GCN: v_fma_f32 [[E:v[0-9]+]], [[D]], [[B]], [[C]]
-; GCN: v_fma_f32 [[F:v[0-9]+]], -[[NUM_SCALE]], [[E]], [[DEN_SCALE]]
-; GCN-NOT: s_setreg
+; PREGFX10-DAG: v_div_scale_f32 [[DEN_SCALE:v[0-9]+]]
+; PREGFX10-NOT: s_setreg
+; PREGFX10: v_fma_f32 [[A:v[0-9]+]], -[[NUM_SCALE]], [[NUM_RCP]], 1.0
+; PREGFX10: v_fma_f32 [[B:v[0-9]+]], [[A]], [[NUM_RCP]], [[NUM_RCP]]
+; PREGFX10: v_mul_f32_e32 [[C:v[0-9]+]], [[DEN_SCALE]], [[B]]
+; PREGFX10: v_fma_f32 [[D:v[0-9]+]], -[[NUM_SCALE]], [[C]], [[DEN_SCALE]]
+; PREGFX10: v_fma_f32 [[E:v[0-9]+]], [[D]], [[B]], [[C]]
+; PREGFX10: v_fma_f32 [[F:v[0-9]+]], -[[NUM_SCALE]], [[E]], [[DEN_SCALE]]
+; PREGFX10-NOT: s_setreg
+
+; GFX10-NOT: s_denorm_mode
+; GFX10: v_fma_f32 [[A:v[0-9]+]], -[[NUM_SCALE]], [[NUM_RCP]], 1.0
+; GFX10: v_fmac_f32_e32 [[B:v[0-9]+]], [[A]], [[NUM_RCP]]
+; GFX10: v_div_scale_f32 [[DEN_SCALE:v[0-9]+]]
+; GFX10: v_mul_f32_e32 [[C:v[0-9]+]], [[DEN_SCALE]], [[B]]
+; GFX10: v_fma_f32 [[D:v[0-9]+]], [[C]], -[[NUM_SCALE]], [[DEN_SCALE]]
+; GFX10: v_fmac_f32_e32 [[E:v[0-9]+]], [[D]], [[B]]
+; GFX10: v_fmac_f32_e64 [[F:v[0-9]+]], -[[NUM_SCALE]], [[E]]
+; GFX10-NOT: s_denorm_mode
+
; GCN: v_div_fmas_f32 [[FMAS:v[0-9]+]], [[F]], [[B]], [[E]]
; GCN: v_div_fixup_f32 v{{[0-9]+}}, [[FMAS]],
define amdgpu_kernel void @fdiv_f32_denormals(float addrspace(1)* %out, float %a, float %b) #2 {
@@ -88,7 +102,8 @@ entry:
; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], s{{[0-9]+}}
; GCN: v_mul_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}}, [[RCP]]
; GCN-NOT: [[RESULT]]
-; GCN-NOT: s_setreg
+; PREGFX10-NOT: s_setreg
+; GFX10-NOT: s_denorm_mode
; GCN: buffer_store_dword [[RESULT]]
define amdgpu_kernel void @fdiv_fast_denormals_f32(float addrspace(1)* %out, float %a, float %b) #2 {
entry:
@@ -269,7 +284,7 @@ define amdgpu_kernel void @fdiv_v4f32_ar
ret void
}
-attributes #0 = { nounwind "enable-unsafe-fp-math"="false" "target-features"="-fp32-denormals,-flat-for-global" }
+attributes #0 = { nounwind "enable-unsafe-fp-math"="false" "target-features"="-fp32-denormals,+fp64-fp16-denormals,-flat-for-global" }
attributes #1 = { nounwind "enable-unsafe-fp-math"="true" "target-features"="-fp32-denormals,-flat-for-global" }
attributes #2 = { nounwind "enable-unsafe-fp-math"="false" "target-features"="+fp32-denormals,-flat-for-global" }
Modified: llvm/trunk/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt?rev=367969&r1=367968&r2=367969&view=diff
==============================================================================
--- llvm/trunk/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt (original)
+++ llvm/trunk/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt Mon Aug 5 19:16:11 2019
@@ -13616,7 +13616,7 @@
# GFX10: s_decperflevel 0xc1d1 ; encoding: [0xd1,0xc1,0x95,0xbf]
0xd1,0xc1,0x95,0xbf
-# GFX10: s_denorm_mode 0x0 ; encoding: [0x00,0x00,0xa5,0xbf]
+# GFX10: s_denorm_mode 0 ; encoding: [0x00,0x00,0xa5,0xbf]
0x00,0x00,0xa5,0xbf
# GFX10: s_denorm_mode 0x1234 ; encoding: [0x34,0x12,0xa5,0xbf]
More information about the llvm-commits
mailing list