[llvm] r364074 - [AMDGPU] hazard recognizer for fp atomic to s_denorm_mode
Stanislav Mekhanoshin via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 21 09:30:14 PDT 2019
Author: rampitec
Date: Fri Jun 21 09:30:14 2019
New Revision: 364074
URL: http://llvm.org/viewvc/llvm-project?rev=364074&view=rev
Log:
[AMDGPU] hazard recognizer for fp atomic to s_denorm_mode
This requires 3 wait states unless there is a wait or VALU in
between.
Differential Revision: https://reviews.llvm.org/D63619
Added:
llvm/trunk/test/CodeGen/AMDGPU/fp-atomic-to-s_denormmode.mir
Modified:
llvm/trunk/lib/Target/AMDGPU/BUFInstructions.td
llvm/trunk/lib/Target/AMDGPU/FLATInstructions.td
llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.h
llvm/trunk/lib/Target/AMDGPU/MIMGInstructions.td
llvm/trunk/lib/Target/AMDGPU/SIDefines.h
llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td
llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h
llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
Modified: llvm/trunk/lib/Target/AMDGPU/BUFInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/BUFInstructions.td?rev=364074&r1=364073&r2=364074&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/BUFInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/BUFInstructions.td Fri Jun 21 09:30:14 2019
@@ -691,34 +691,53 @@ class MUBUF_AtomicRet_Pseudo<string opNa
multiclass MUBUF_Pseudo_Atomics_NO_RTN <string opName,
RegisterClass vdataClass,
ValueType vdataType,
- SDPatternOperator atomic> {
+ SDPatternOperator atomic,
+ bit isFP = getIsFP<vdataType>.ret> {
+ let FPAtomic = isFP in
def _OFFSET : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.Offset, vdataClass>,
MUBUFAddr64Table <0, NAME>;
+
+ let FPAtomic = isFP in
def _ADDR64 : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.Addr64, vdataClass>,
MUBUFAddr64Table <1, NAME>;
+
+ let FPAtomic = isFP in
def _OFFEN : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>;
+
+ let FPAtomic = isFP in
+
def _IDXEN : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass>;
+
+ let FPAtomic = isFP in
def _BOTHEN : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.BothEn, vdataClass>;
}
multiclass MUBUF_Pseudo_Atomics_RTN <string opName,
RegisterClass vdataClass,
ValueType vdataType,
- SDPatternOperator atomic> {
+ SDPatternOperator atomic,
+ bit isFP = getIsFP<vdataType>.ret> {
+ let FPAtomic = isFP in
def _OFFSET_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.Offset, vdataClass,
[(set vdataType:$vdata,
(atomic (MUBUFOffsetAtomic v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$slc),
vdataType:$vdata_in))]>,
MUBUFAddr64Table <0, NAME # "_RTN">;
+ let FPAtomic = isFP in
def _ADDR64_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.Addr64, vdataClass,
[(set vdataType:$vdata,
(atomic (MUBUFAddr64Atomic v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, i1:$slc),
vdataType:$vdata_in))]>,
MUBUFAddr64Table <1, NAME # "_RTN">;
+ let FPAtomic = isFP in
def _OFFEN_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>;
+
+ let FPAtomic = isFP in
def _IDXEN_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass>;
+
+ let FPAtomic = isFP in
def _BOTHEN_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.BothEn, vdataClass>;
}
Modified: llvm/trunk/lib/Target/AMDGPU/FLATInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/FLATInstructions.td?rev=364074&r1=364073&r2=364074&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/FLATInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/FLATInstructions.td Fri Jun 21 09:30:14 2019
@@ -273,7 +273,8 @@ multiclass FLAT_Atomic_Pseudo<
ValueType vt,
SDPatternOperator atomic = null_frag,
ValueType data_vt = vt,
- RegisterClass data_rc = vdst_rc> {
+ RegisterClass data_rc = vdst_rc,
+ bit isFP = getIsFP<data_vt>.ret> {
def "" : FLAT_AtomicNoRet_Pseudo <opName,
(outs),
(ins VReg_64:$vaddr, data_rc:$vdata, offset_u12:$offset, SLC:$slc),
@@ -281,6 +282,7 @@ multiclass FLAT_Atomic_Pseudo<
GlobalSaddrTable<0, opName>,
AtomicNoRet <opName, 0> {
let PseudoInstr = NAME;
+ let FPAtomic = isFP;
}
def _RTN : FLAT_AtomicRet_Pseudo <opName,
@@ -290,7 +292,9 @@ multiclass FLAT_Atomic_Pseudo<
[(set vt:$vdst,
(atomic (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$vdata))]>,
GlobalSaddrTable<0, opName#"_rtn">,
- AtomicNoRet <opName, 1>;
+ AtomicNoRet <opName, 1>{
+ let FPAtomic = isFP;
+ }
}
multiclass FLAT_Global_Atomic_Pseudo_NO_RTN<
@@ -299,7 +303,8 @@ multiclass FLAT_Global_Atomic_Pseudo_NO_
ValueType vt,
SDPatternOperator atomic = null_frag,
ValueType data_vt = vt,
- RegisterClass data_rc = vdst_rc> {
+ RegisterClass data_rc = vdst_rc,
+ bit isFP = getIsFP<data_vt>.ret> {
def "" : FLAT_AtomicNoRet_Pseudo <opName,
(outs),
@@ -309,6 +314,7 @@ multiclass FLAT_Global_Atomic_Pseudo_NO_
AtomicNoRet <opName, 0> {
let has_saddr = 1;
let PseudoInstr = NAME;
+ let FPAtomic = isFP;
}
def _SADDR : FLAT_AtomicNoRet_Pseudo <opName,
@@ -320,6 +326,7 @@ multiclass FLAT_Global_Atomic_Pseudo_NO_
let has_saddr = 1;
let enabled_saddr = 1;
let PseudoInstr = NAME#"_SADDR";
+ let FPAtomic = isFP;
}
}
@@ -329,7 +336,8 @@ multiclass FLAT_Global_Atomic_Pseudo_RTN
ValueType vt,
SDPatternOperator atomic = null_frag,
ValueType data_vt = vt,
- RegisterClass data_rc = vdst_rc> {
+ RegisterClass data_rc = vdst_rc,
+ bit isFP = getIsFP<data_vt>.ret> {
def _RTN : FLAT_AtomicRet_Pseudo <opName,
(outs vdst_rc:$vdst),
@@ -340,6 +348,7 @@ multiclass FLAT_Global_Atomic_Pseudo_RTN
GlobalSaddrTable<0, opName#"_rtn">,
AtomicNoRet <opName, 1> {
let has_saddr = 1;
+ let FPAtomic = isFP;
}
def _SADDR_RTN : FLAT_AtomicRet_Pseudo <opName,
@@ -351,6 +360,7 @@ multiclass FLAT_Global_Atomic_Pseudo_RTN
let has_saddr = 1;
let enabled_saddr = 1;
let PseudoInstr = NAME#"_SADDR_RTN";
+ let FPAtomic = isFP;
}
}
Modified: llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.cpp?rev=364074&r1=364073&r2=364074&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.cpp Fri Jun 21 09:30:14 2019
@@ -145,6 +145,9 @@ GCNHazardRecognizer::getHazardType(SUnit
if (ST.hasNSAtoVMEMBug() && checkNSAtoVMEMHazard(MI) > 0)
return NoopHazard;
+ if (checkFPAtomicToDenormModeHazard(MI) > 0)
+ return NoopHazard;
+
if (ST.hasNoDataDepHazard())
return NoHazard;
@@ -247,6 +250,8 @@ unsigned GCNHazardRecognizer::PreEmitNoo
if (ST.hasNSAtoVMEMBug())
WaitStates = std::max(WaitStates, checkNSAtoVMEMHazard(MI));
+ WaitStates = std::max(WaitStates, checkFPAtomicToDenormModeHazard(MI));
+
if (ST.hasNoDataDepHazard())
return WaitStates;
@@ -1138,3 +1143,39 @@ int GCNHazardRecognizer::checkNSAtoVMEMH
return NSAtoVMEMWaitStates - getWaitStatesSince(IsHazardFn, 1);
}
+
+int GCNHazardRecognizer::checkFPAtomicToDenormModeHazard(MachineInstr *MI) {
+ int FPAtomicToDenormModeWaitStates = 3;
+
+ if (MI->getOpcode() != AMDGPU::S_DENORM_MODE)
+ return 0;
+
+ auto IsHazardFn = [] (MachineInstr *I) {
+ if (!SIInstrInfo::isVMEM(*I) && !SIInstrInfo::isFLAT(*I))
+ return false;
+ return SIInstrInfo::isFPAtomic(*I);
+ };
+
+ auto IsExpiredFn = [] (MachineInstr *MI, int WaitStates) {
+ if (WaitStates >= 3 || SIInstrInfo::isVALU(*MI))
+ return true;
+
+ switch (MI->getOpcode()) {
+ case AMDGPU::S_WAITCNT:
+ case AMDGPU::S_WAITCNT_VSCNT:
+ case AMDGPU::S_WAITCNT_VMCNT:
+ case AMDGPU::S_WAITCNT_EXPCNT:
+ case AMDGPU::S_WAITCNT_LGKMCNT:
+ case AMDGPU::S_WAITCNT_IDLE:
+ return true;
+ default:
+ break;
+ }
+
+ return false;
+ };
+
+
+ return FPAtomicToDenormModeWaitStates -
+ ::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn);
+}
Modified: llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.h?rev=364074&r1=364073&r2=364074&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.h Fri Jun 21 09:30:14 2019
@@ -84,6 +84,7 @@ private:
int checkAnyInstHazards(MachineInstr *MI);
int checkReadM0Hazards(MachineInstr *SMovRel);
int checkNSAtoVMEMHazard(MachineInstr *MI);
+ int checkFPAtomicToDenormModeHazard(MachineInstr *MI);
void fixHazards(MachineInstr *MI);
bool fixVcmpxPermlaneHazards(MachineInstr *MI);
bool fixVMEMtoScalarWriteHazards(MachineInstr *MI);
Modified: llvm/trunk/lib/Target/AMDGPU/MIMGInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/MIMGInstructions.td?rev=364074&r1=364073&r2=364074&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/MIMGInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/MIMGInstructions.td Fri Jun 21 09:30:14 2019
@@ -716,9 +716,11 @@ defm IMAGE_ATOMIC_OR : MIMG_Atomic <mimg
defm IMAGE_ATOMIC_XOR : MIMG_Atomic <mimg<0x1a>, "image_atomic_xor">;
defm IMAGE_ATOMIC_INC : MIMG_Atomic <mimg<0x1b>, "image_atomic_inc">;
defm IMAGE_ATOMIC_DEC : MIMG_Atomic <mimg<0x1c>, "image_atomic_dec">;
+//let FPAtomic = 1 in {
//def IMAGE_ATOMIC_FCMPSWAP : MIMG_NoPattern_ <"image_atomic_fcmpswap", 0x0000001d, 1>; -- not on VI
//def IMAGE_ATOMIC_FMIN : MIMG_NoPattern_ <"image_atomic_fmin", 0x0000001e>; -- not on VI
//def IMAGE_ATOMIC_FMAX : MIMG_NoPattern_ <"image_atomic_fmax", 0x0000001f>; -- not on VI
+//} // End let FPAtomic = 1
defm IMAGE_SAMPLE : MIMG_Sampler_WQM <0x00000020, AMDGPUSample>;
defm IMAGE_SAMPLE_CL : MIMG_Sampler_WQM <0x00000021, AMDGPUSample_cl>;
defm IMAGE_SAMPLE_D : MIMG_Sampler <0x00000022, AMDGPUSample_d>;
Modified: llvm/trunk/lib/Target/AMDGPU/SIDefines.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIDefines.h?rev=364074&r1=364073&r2=364074&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIDefines.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIDefines.h Fri Jun 21 09:30:14 2019
@@ -93,7 +93,10 @@ enum : uint64_t {
IsNonFlatSeg = UINT64_C(1) << 51,
// Uses floating point double precision rounding mode
- FPDPRounding = UINT64_C(1) << 52
+ FPDPRounding = UINT64_C(1) << 52,
+
+ // Instruction is FP atomic.
+ FPAtomic = UINT64_C(1) << 53
};
// v_cmp_class_* etc. use a 10-bit mask for what operation is checked.
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td?rev=364074&r1=364073&r2=364074&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td Fri Jun 21 09:30:14 2019
@@ -118,6 +118,9 @@ class InstSI <dag outs, dag ins, string
// rounding mode flags
field bit FPDPRounding = 0;
+ // Instruction is FP atomic.
+ field bit FPAtomic = 0;
+
// These need to be kept in sync with the enum in SIInstrFlags.
let TSFlags{0} = SALU;
let TSFlags{1} = VALU;
@@ -179,6 +182,8 @@ class InstSI <dag outs, dag ins, string
let TSFlags{52} = FPDPRounding;
+ let TSFlags{53} = FPAtomic;
+
let SchedRW = [Write32Bit];
field bits<1> DisableSIDecoder = 0;
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h?rev=364074&r1=364073&r2=364074&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h Fri Jun 21 09:30:14 2019
@@ -631,6 +631,14 @@ public:
return get(Opcode).TSFlags & SIInstrFlags::FPDPRounding;
}
+ static bool isFPAtomic(const MachineInstr &MI) {
+ return MI.getDesc().TSFlags & SIInstrFlags::FPAtomic;
+ }
+
+ bool isFPAtomic(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::FPAtomic;
+ }
+
bool isVGPRCopy(const MachineInstr &MI) const {
assert(MI.isCopy());
unsigned Dest = MI.getOperand(0).getReg();
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td?rev=364074&r1=364073&r2=364074&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td Fri Jun 21 09:30:14 2019
@@ -1243,6 +1243,17 @@ class getVALUDstForVT<ValueType VT> {
VOPDstS64orS32)))); // else VT == i1
}
+// Returns true if VT is floating point.
+class getIsFP<ValueType VT> {
+ bit ret = !if(!eq(VT.Value, f16.Value), 1,
+ !if(!eq(VT.Value, v2f16.Value), 1,
+ !if(!eq(VT.Value, f32.Value), 1,
+ !if(!eq(VT.Value, v2f32.Value), 1,
+ !if(!eq(VT.Value, f64.Value), 1,
+ !if(!eq(VT.Value, v2f64.Value), 1,
+ 0))))));
+}
+
// Returns the register class to use for the destination of VOP[12C]
// instructions with SDWA extension
class getSDWADstForVT<ValueType VT> {
@@ -1254,11 +1265,7 @@ class getSDWADstForVT<ValueType VT> {
// Returns the register class to use for source 0 of VOP[12C]
// instructions for the given VT.
class getVOPSrc0ForVT<ValueType VT> {
- bit isFP = !if(!eq(VT.Value, f16.Value), 1,
- !if(!eq(VT.Value, v2f16.Value), 1,
- !if(!eq(VT.Value, f32.Value), 1,
- !if(!eq(VT.Value, f64.Value), 1,
- 0))));
+ bit isFP = getIsFP<VT>.ret;
RegisterOperand ret =
!if(isFP,
@@ -1292,9 +1299,7 @@ class getVregSrcForVT<ValueType VT> {
}
class getSDWASrcForVT <ValueType VT> {
- bit isFP = !if(!eq(VT.Value, f16.Value), 1,
- !if(!eq(VT.Value, f32.Value), 1,
- 0));
+ bit isFP = getIsFP<VT>.ret;
RegisterOperand retFlt = !if(!eq(VT.Size, 16), SDWASrc_f16, SDWASrc_f32);
RegisterOperand retInt = !if(!eq(VT.Size, 16), SDWASrc_i16, SDWASrc_i32);
RegisterOperand ret = !if(isFP, retFlt, retInt);
@@ -1303,11 +1308,7 @@ class getSDWASrcForVT <ValueType VT> {
// Returns the register class to use for sources of VOP3 instructions for the
// given VT.
class getVOP3SrcForVT<ValueType VT> {
- bit isFP = !if(!eq(VT.Value, f16.Value), 1,
- !if(!eq(VT.Value, v2f16.Value), 1,
- !if(!eq(VT.Value, f32.Value), 1,
- !if(!eq(VT.Value, f64.Value), 1,
- 0))));
+ bit isFP = getIsFP<VT>.ret;
RegisterOperand ret =
!if(!eq(VT.Size, 128),
VSrc_128,
@@ -1351,10 +1352,7 @@ class isModifierType<ValueType SrcVT> {
// Return type of input modifiers operand for specified input operand
class getSrcMod <ValueType VT, bit EnableF32SrcMods> {
- bit isFP = !if(!eq(VT.Value, f16.Value), 1,
- !if(!eq(VT.Value, f32.Value), 1,
- !if(!eq(VT.Value, f64.Value), 1,
- 0)));
+ bit isFP = getIsFP<VT>.ret;
bit isPacked = isPackedType<VT>.ret;
Operand ret = !if(!eq(VT.Size, 64),
!if(isFP, FP64InputMods, Int64InputMods),
@@ -1373,10 +1371,7 @@ class getOpSelMod <ValueType VT> {
// Return type of input modifiers operand specified input operand for DPP
class getSrcModExt <ValueType VT> {
- bit isFP = !if(!eq(VT.Value, f16.Value), 1,
- !if(!eq(VT.Value, f32.Value), 1,
- !if(!eq(VT.Value, f64.Value), 1,
- 0)));
+ bit isFP = getIsFP<VT>.ret;
Operand ret = !if(isFP, FPVRegInputMods, IntVRegInputMods);
}
Added: llvm/trunk/test/CodeGen/AMDGPU/fp-atomic-to-s_denormmode.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fp-atomic-to-s_denormmode.mir?rev=364074&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fp-atomic-to-s_denormmode.mir (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/fp-atomic-to-s_denormmode.mir Fri Jun 21 09:30:14 2019
@@ -0,0 +1,447 @@
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefix=GCN %s
+
+# GCN-LABEL: name: flat_atomic_fcmpswap_to_s_denorm_mode
+# GCN: FLAT_ATOMIC_FCMPSWAP
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_DENORM_MODE
+---
+name: flat_atomic_fcmpswap_to_s_denorm_mode
+body: |
+ bb.0:
+ FLAT_ATOMIC_FCMPSWAP undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
+ S_DENORM_MODE 0
+...
+
+# GCN-LABEL: name: flat_atomic_fcmpswap_x2_to_s_denorm_mode
+# GCN: FLAT_ATOMIC_FCMPSWAP_X2
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_DENORM_MODE
+---
+name: flat_atomic_fcmpswap_x2_to_s_denorm_mode
+body: |
+ bb.0:
+ FLAT_ATOMIC_FCMPSWAP_X2 undef %0:vreg_64, undef %1:vreg_128, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
+ S_DENORM_MODE 0
+...
+
+# GCN-LABEL: name: flat_atomic_fmax_to_s_denorm_mode
+# GCN: FLAT_ATOMIC_FMAX
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_DENORM_MODE
+---
+name: flat_atomic_fmax_to_s_denorm_mode
+body: |
+ bb.0:
+ FLAT_ATOMIC_FMAX undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
+ S_DENORM_MODE 0
+...
+
+# GCN-LABEL: name: flat_atomic_fmax_x2_to_s_denorm_mode
+# GCN: FLAT_ATOMIC_FMAX_X2
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_DENORM_MODE
+---
+name: flat_atomic_fmax_x2_to_s_denorm_mode
+body: |
+ bb.0:
+ FLAT_ATOMIC_FMAX_X2 undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
+ S_DENORM_MODE 0
+...
+
+# GCN-LABEL: name: flat_atomic_fmin_to_s_denorm_mode
+# GCN: FLAT_ATOMIC_FMIN
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_DENORM_MODE
+---
+name: flat_atomic_fmin_to_s_denorm_mode
+body: |
+ bb.0:
+ FLAT_ATOMIC_FMIN undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
+ S_DENORM_MODE 0
+...
+
+# GCN-LABEL: name: flat_atomic_fmin_x2_to_s_denorm_mode
+# GCN: FLAT_ATOMIC_FMIN_X2
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_DENORM_MODE
+---
+name: flat_atomic_fmin_x2_to_s_denorm_mode
+body: |
+ bb.0:
+ FLAT_ATOMIC_FMIN_X2 undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
+ S_DENORM_MODE 0
+...
+
+# GCN-LABEL: name: flat_atomic_fcmpswap_x2_rtn_to_s_denorm_mode
+# GCN: FLAT_ATOMIC_FCMPSWAP_X2_RTN
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_DENORM_MODE
+---
+name: flat_atomic_fcmpswap_x2_rtn_to_s_denorm_mode
+body: |
+ bb.0:
+ %2:vreg_64 = FLAT_ATOMIC_FCMPSWAP_X2_RTN undef %0:vreg_64, undef %1:vreg_128, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
+ S_DENORM_MODE 0
+...
+
+# GCN-LABEL: name: flat_atomic_fmax_rtn_to_s_denorm_mode
+# GCN: FLAT_ATOMIC_FMAX_RTN
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_DENORM_MODE
+---
+name: flat_atomic_fmax_rtn_to_s_denorm_mode
+body: |
+ bb.0:
+ %2:vgpr_32 = FLAT_ATOMIC_FMAX_RTN undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
+ S_DENORM_MODE 0
+...
+
+# GCN-LABEL: name: flat_atomic_fmax_x2_rtn_to_s_denorm_mode
+# GCN: FLAT_ATOMIC_FMAX_X2_RTN
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_DENORM_MODE
+---
+name: flat_atomic_fmax_x2_rtn_to_s_denorm_mode
+body: |
+ bb.0:
+ %2:vreg_64 = FLAT_ATOMIC_FMAX_X2_RTN undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
+ S_DENORM_MODE 0
+...
+
+# GCN-LABEL: name: flat_atomic_fmin_rtn_to_s_denorm_mode
+# GCN: FLAT_ATOMIC_FMIN_RTN
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_DENORM_MODE
+---
+name: flat_atomic_fmin_rtn_to_s_denorm_mode
+body: |
+ bb.0:
+ %2:vgpr_32 = FLAT_ATOMIC_FMIN_RTN undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
+ S_DENORM_MODE 0
+...
+
+# GCN-LABEL: name: flat_atomic_fmin_x2_rtn_to_s_denorm_mode
+# GCN: FLAT_ATOMIC_FMIN_X2_RTN
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_DENORM_MODE
+---
+name: flat_atomic_fmin_x2_rtn_to_s_denorm_mode
+body: |
+ bb.0:
+ %2:vreg_64 = FLAT_ATOMIC_FMIN_X2_RTN undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
+ S_DENORM_MODE 0
+...
+
+# GCN-LABEL: name: flat_atomic_fcmpswap_rtn_to_s_denorm_mode
+# GCN: FLAT_ATOMIC_FCMPSWAP_RTN
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_DENORM_MODE
+---
+name: flat_atomic_fcmpswap_rtn_to_s_denorm_mode
+body: |
+ bb.0:
+ %2:vgpr_32 = FLAT_ATOMIC_FCMPSWAP_RTN undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
+ S_DENORM_MODE 0
+...
+
+# GCN-LABEL: name: global_atomic_fcmpswap_to_s_denorm_mode
+# GCN: GLOBAL_ATOMIC_FCMPSWAP
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_DENORM_MODE
+---
+name: global_atomic_fcmpswap_to_s_denorm_mode
+body: |
+ bb.0:
+ GLOBAL_ATOMIC_FCMPSWAP undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
+ S_DENORM_MODE 0
+...
+
+# GCN-LABEL: name: global_atomic_fcmpswap_x2_to_s_denorm_mode
+# GCN: GLOBAL_ATOMIC_FCMPSWAP_X2
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_DENORM_MODE
+---
+name: global_atomic_fcmpswap_x2_to_s_denorm_mode
+body: |
+ bb.0:
+ GLOBAL_ATOMIC_FCMPSWAP_X2 undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
+ S_DENORM_MODE 0
+...
+
+# GCN-LABEL: name: global_atomic_fmax_to_s_denorm_mode
+# GCN: GLOBAL_ATOMIC_FMAX
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_DENORM_MODE
+---
+name: global_atomic_fmax_to_s_denorm_mode
+body: |
+ bb.0:
+ GLOBAL_ATOMIC_FMAX undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
+ S_DENORM_MODE 0
+...
+
+# GCN-LABEL: name: global_atomic_fmax_x2_to_s_denorm_mode
+# GCN: GLOBAL_ATOMIC_FMAX_X2
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_DENORM_MODE
+---
+name: global_atomic_fmax_x2_to_s_denorm_mode
+body: |
+ bb.0:
+ GLOBAL_ATOMIC_FMAX_X2 undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
+ S_DENORM_MODE 0
+...
+
+# GCN-LABEL: name: global_atomic_fmin_to_s_denorm_mode
+# GCN: GLOBAL_ATOMIC_FMIN
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_DENORM_MODE
+---
+name: global_atomic_fmin_to_s_denorm_mode
+body: |
+ bb.0:
+ GLOBAL_ATOMIC_FMIN undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
+ S_DENORM_MODE 0
+...
+
+# GCN-LABEL: name: global_atomic_fmin_x2_to_s_denorm_mode
+# GCN: GLOBAL_ATOMIC_FMIN_X2
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_DENORM_MODE
+---
+name: global_atomic_fmin_x2_to_s_denorm_mode
+body: |
+ bb.0:
+ GLOBAL_ATOMIC_FMIN_X2 undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
+ S_DENORM_MODE 0
+...
+
+# GCN-LABEL: name: global_atomic_fcmpswap_rtn_to_s_denorm_mode
+# GCN: GLOBAL_ATOMIC_FCMPSWAP_RTN
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_DENORM_MODE
+---
+name: global_atomic_fcmpswap_rtn_to_s_denorm_mode
+body: |
+ bb.0:
+ %2:vgpr_32 = GLOBAL_ATOMIC_FCMPSWAP_RTN undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
+ S_DENORM_MODE 0
+...
+
+# GCN-LABEL: name: global_atomic_fcmpswap_x2_rtn_to_s_denorm_mode
+# GCN: GLOBAL_ATOMIC_FCMPSWAP_X2_RTN
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_DENORM_MODE
+---
+name: global_atomic_fcmpswap_x2_rtn_to_s_denorm_mode
+body: |
+ bb.0:
+ %2:vreg_64 = GLOBAL_ATOMIC_FCMPSWAP_X2_RTN undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
+ S_DENORM_MODE 0
+...
+
+# GCN-LABEL: name: global_atomic_fmax_rtn_to_s_denorm_mode
+# GCN: GLOBAL_ATOMIC_FMAX_RTN
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_DENORM_MODE
+---
+name: global_atomic_fmax_rtn_to_s_denorm_mode
+body: |
+ bb.0:
+ %2:vgpr_32 = GLOBAL_ATOMIC_FMAX_RTN undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
+ S_DENORM_MODE 0
+...
+
+# GCN-LABEL: name: global_atomic_fmax_x2_rtn_to_s_denorm_mode
+# GCN: GLOBAL_ATOMIC_FMAX_X2_RTN
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_DENORM_MODE
+---
+name: global_atomic_fmax_x2_rtn_to_s_denorm_mode
+body: |
+ bb.0:
+ %2:vreg_64 = GLOBAL_ATOMIC_FMAX_X2_RTN undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
+ S_DENORM_MODE 0
+...
+
+# GCN-LABEL: name: global_atomic_fmin_rtn_to_s_denorm_mode
+# GCN: GLOBAL_ATOMIC_FMIN_RTN
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_DENORM_MODE
+---
+name: global_atomic_fmin_rtn_to_s_denorm_mode
+body: |
+ bb.0:
+ %2:vgpr_32 = GLOBAL_ATOMIC_FMIN_RTN undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
+ S_DENORM_MODE 0
+...
+
+# GCN-LABEL: name: global_atomic_fmin_x2_rtn_to_s_denorm_mode
+# GCN: GLOBAL_ATOMIC_FMIN_X2_RTN
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_DENORM_MODE
+---
+name: global_atomic_fmin_x2_rtn_to_s_denorm_mode
+body: |
+ bb.0:
+ %2:vreg_64 = GLOBAL_ATOMIC_FMIN_X2_RTN undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
+ S_DENORM_MODE 0
+...
+
+# GCN-LABEL: name: global_atomic_fcmpswap_saddr_to_s_denorm_mode
+# GCN: GLOBAL_ATOMIC_FCMPSWAP_SADDR
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_DENORM_MODE
+---
+name: global_atomic_fcmpswap_saddr_to_s_denorm_mode
+body: |
+ bb.0:
+ GLOBAL_ATOMIC_FCMPSWAP_SADDR undef %0:vreg_64, undef %1:vgpr_32, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
+ S_DENORM_MODE 0
+...
+
+# GCN-LABEL: name: global_atomic_fcmpswap_x2_saddr_rtn_to_s_denorm_mode
+# GCN: GLOBAL_ATOMIC_FCMPSWAP_X2_SADDR_RTN
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_DENORM_MODE
+---
+name: global_atomic_fcmpswap_x2_saddr_rtn_to_s_denorm_mode
+body: |
+ bb.0:
+ %2:vreg_64 = GLOBAL_ATOMIC_FCMPSWAP_X2_SADDR_RTN undef %0:vreg_64, undef %1:vreg_64, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
+ S_DENORM_MODE 0
+...
+
+# GCN-LABEL: name: global_atomic_fmax_saddr_rtn_to_s_denorm_mode
+# GCN: GLOBAL_ATOMIC_FMAX_SADDR_RTN
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_DENORM_MODE
+---
+name: global_atomic_fmax_saddr_rtn_to_s_denorm_mode
+body: |
+ bb.0:
+ %2:vgpr_32 = GLOBAL_ATOMIC_FMAX_SADDR_RTN undef %0:vreg_64, undef %1:vgpr_32, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
+ S_DENORM_MODE 0
+...
+
+# GCN-LABEL: name: global_atomic_fmax_x2_saddr_rtn_to_s_denorm_mode
+# GCN: GLOBAL_ATOMIC_FMAX_X2_SADDR_RTN
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_DENORM_MODE
+---
+name: global_atomic_fmax_x2_saddr_rtn_to_s_denorm_mode
+body: |
+ bb.0:
+ %2:vreg_64 = GLOBAL_ATOMIC_FMAX_X2_SADDR_RTN undef %0:vreg_64, undef %1:vreg_64, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
+ S_DENORM_MODE 0
+...
+
+# GCN-LABEL: name: global_atomic_fmin_saddr_rtn_to_s_denorm_mode
+# GCN: GLOBAL_ATOMIC_FMIN_SADDR_RTN
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_DENORM_MODE
+---
+name: global_atomic_fmin_saddr_rtn_to_s_denorm_mode
+body: |
+ bb.0:
+ %2:vgpr_32 = GLOBAL_ATOMIC_FMIN_SADDR_RTN undef %0:vreg_64, undef %1:vgpr_32, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
+ S_DENORM_MODE 0
+...
+
+# GCN-LABEL: name: global_atomic_fmin_x2_saddr_rtn_to_s_denorm_mode
+# GCN: GLOBAL_ATOMIC_FMIN_X2_SADDR_RTN
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_DENORM_MODE
+---
+name: global_atomic_fmin_x2_saddr_rtn_to_s_denorm_mode
+body: |
+ bb.0:
+ %2:vreg_64 = GLOBAL_ATOMIC_FMIN_X2_SADDR_RTN undef %0:vreg_64, undef %1:vreg_64, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
+ S_DENORM_MODE 0
+...
+
+# GCN-LABEL: name: flat_fp_atomic_to_s_denorm_mode_waitcnt
+# GCN: FLAT_ATOMIC_FMIN
+# GCN-NEXT: S_WAITCNT
+# GCN-NEXT: S_DENORM_MODE
+---
+name: flat_fp_atomic_to_s_denorm_mode_waitcnt
+body: |
+ bb.0:
+ FLAT_ATOMIC_FMIN undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
+ S_WAITCNT 0
+ S_DENORM_MODE 0
+...
+
+# GCN-LABEL: name: flat_fp_atomic_to_s_denorm_mode_valu
+# GCN: FLAT_ATOMIC_FMIN
+# GCN-NEXT: V_ADD_F32_e32
+# GCN-NEXT: S_DENORM_MODE
+---
+name: flat_fp_atomic_to_s_denorm_mode_valu
+body: |
+ bb.0:
+ FLAT_ATOMIC_FMIN undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
+ %2:vgpr_32 = V_ADD_F32_e32 undef %1:vgpr_32, undef %1:vgpr_32, implicit $exec
+ S_DENORM_MODE 0
+...
More information about the llvm-commits
mailing list