[llvm] 7bcf4d6 - [AMDGPU] Correctly insert s_nops for dst forwarding hazard (#100276)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 22 11:38:28 PDT 2024
Author: Jeffrey Byrnes
Date: 2024-08-22T11:38:24-07:00
New Revision: 7bcf4d63cf3b7bcc789808ea4e9c8369e94467dc
URL: https://github.com/llvm/llvm-project/commit/7bcf4d63cf3b7bcc789808ea4e9c8369e94467dc
DIFF: https://github.com/llvm/llvm-project/commit/7bcf4d63cf3b7bcc789808ea4e9c8369e94467dc.diff
LOG: [AMDGPU] Correctly insert s_nops for dst forwarding hazard (#100276)
MI300 ISA section 4.5 states there is a hazard between "VALU op which
uses OPSEL or SDWA with changes the result’s bit position" and "VALU op
consumes result of that op"
This includes the case where the second op is SDWA with same dest and
dst_sel != DWORD && dst_unused == UNUSED_PRESERVE. In this case, there
is an implicit read of the first op dst and the compiler needs to
resolve this hazard. Confirmed with HW team.
We model dst_unused == UNUSED_PRESERVE as tied-def of implicit operand,
so this PR checks for that.
MI300_SP_MAS section 1.3.9.2 specifies that CVT_SR_FP8_F32 and
CVT_SR_BF8_F32 with opsel[3:2] !=0 have dest forwarding issue.
Currently, we only add check for CVT_SR_FP8_F32 with opsel[3] != 0 --
this PR adds support opsel[2] != 0 as well
Added:
llvm/test/CodeGen/AMDGPU/dst-sel-hazard.mir
Modified:
llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
llvm/lib/Target/AMDGPU/SIInstrInfo.td
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
llvm/lib/Target/AMDGPU/VOP3Instructions.td
llvm/lib/Target/AMDGPU/VOPInstructions.td
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index a8b171aa82840a..a6b7264405ade1 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -876,6 +876,7 @@ GCNHazardRecognizer::checkVALUHazardsHelper(const MachineOperand &Def,
return DataIdx >= 0 &&
TRI->regsOverlap(MI.getOperand(DataIdx).getReg(), Reg);
};
+
int WaitStatesNeededForDef =
VALUWaitStates - getWaitStatesSince(IsHazardFn, VALUWaitStates);
WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef);
@@ -883,6 +884,70 @@ GCNHazardRecognizer::checkVALUHazardsHelper(const MachineOperand &Def,
return WaitStatesNeeded;
}
+/// Dest sel forwarding issue occurs if additional logic is needed to swizzle /
+/// pack the computed value into correct bit position of the dest register. This
+/// occurs if we have SDWA with dst_sel != DWORD or if we have op_sel with
+/// dst_sel that is not aligned to the register. This function analayzes the \p
+/// MI and \returns an operand with dst forwarding issue, or nullptr if
+/// none exists.
+static const MachineOperand *
+getDstSelForwardingOperand(const MachineInstr &MI, const GCNSubtarget &ST) {
+ if (!SIInstrInfo::isVALU(MI))
+ return nullptr;
+
+ const SIInstrInfo *TII = ST.getInstrInfo();
+
+ unsigned Opcode = MI.getOpcode();
+
+ // There are three
diff erent types of instructions
+ // which produce forwarded dest: 1. SDWA with dst_sel != DWORD, 2. VOP3
+ // which write hi bits (e.g. op_sel[3] == 1), and 3. CVR_SR_FP8_F32 and
+ // CVT_SR_BF8_F32 with op_sel[3:2]
+ // != 0
+ if (SIInstrInfo::isSDWA(MI)) {
+ // Type 1: SDWA with dst_sel != DWORD
+ if (auto *DstSel = TII->getNamedOperand(MI, AMDGPU::OpName::dst_sel))
+ if (DstSel->getImm() == AMDGPU::SDWA::DWORD)
+ return nullptr;
+ } else {
+ // Type 2 && Type 3: (VOP3 which write the hi bits) || (CVT_SR_FP8_F32 and
+ // CVT_SR_BF8_F32 with op_sel[3:2] != 0)
+ if (!AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::op_sel) ||
+ !(TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers)->getImm() &
+ SISrcMods::DST_OP_SEL ||
+ (AMDGPU::isFP8DstSelInst(Opcode) &&
+ (TII->getNamedOperand(MI, AMDGPU::OpName::src2_modifiers)->getImm() &
+ SISrcMods::OP_SEL_0))))
+ return nullptr;
+ }
+
+ return TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
+}
+
+/// Checks whether the provided \p MI "consumes" the operand with a Dest sel
+/// fowarding issue \p Dst . We may "consume" the Dst via a standard explicit
+/// RAW, or through irregular ways (e.g implicit RAW, certain types of WAW)
+static bool consumesDstSelForwardingOperand(const MachineInstr *VALU,
+ const MachineOperand *Dst,
+ const SIRegisterInfo *TRI) {
+ // We must consider implicit reads of the VALU. SDWA with dst_sel and
+ // UNUSED_PRESERVE will implicitly read the result from forwarded dest,
+ // and we must account for that hazard.
+ // We also must account for WAW hazards. In particular, WAW with dest
+ // preserve semantics (e.g. VOP3 with op_sel, VOP2 &&
+ // !zeroesHigh16BitsOfDest) will read the forwarded dest for parity
+ // check for ECC. Without accounting for this hazard, the ECC will be
+ // wrong.
+ // TODO: limit to RAW (including implicit reads) + problematic WAW (i.e.
+ // complete zeroesHigh16BitsOfDest)
+ for (auto &Operand : VALU->operands()) {
+ if (Operand.isReg() && TRI->regsOverlap(Dst->getReg(), Operand.getReg())) {
+ return true;
+ }
+ }
+ return false;
+}
+
int GCNHazardRecognizer::checkVALUHazards(MachineInstr *VALU) {
int WaitStatesNeeded = 0;
@@ -913,27 +978,18 @@ int GCNHazardRecognizer::checkVALUHazards(MachineInstr *VALU) {
if (ST.hasDstSelForwardingHazard()) {
const int Shift16DefWaitstates = 1;
- auto IsShift16BitDefFn = [this, VALU](const MachineInstr &MI) {
- if (!SIInstrInfo::isVALU(MI))
- return false;
- const SIInstrInfo *TII = ST.getInstrInfo();
- if (SIInstrInfo::isSDWA(MI)) {
- if (auto *DstSel = TII->getNamedOperand(MI, AMDGPU::OpName::dst_sel))
- if (DstSel->getImm() == AMDGPU::SDWA::DWORD)
- return false;
- } else {
- if (!AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::op_sel) ||
- !(TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers)
- ->getImm() &
- SISrcMods::DST_OP_SEL))
- return false;
- }
+ auto IsShift16BitDefFn = [this, VALU](const MachineInstr &ProducerMI) {
const SIRegisterInfo *TRI = ST.getRegisterInfo();
- if (auto *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst)) {
- Register Def = Dst->getReg();
+ const MachineOperand *ForwardedDst =
+ getDstSelForwardingOperand(ProducerMI, ST);
+ if (ForwardedDst) {
+ return consumesDstSelForwardingOperand(VALU, ForwardedDst, TRI);
+ }
- for (const MachineOperand &Use : VALU->explicit_uses()) {
- if (Use.isReg() && TRI->regsOverlap(Def, Use.getReg()))
+ if (ProducerMI.isInlineAsm()) {
+ // Assume inline asm has dst forwarding hazard
+ for (auto &Def : ProducerMI.all_defs()) {
+ if (consumesDstSelForwardingOperand(VALU, &Def, TRI))
return true;
}
}
@@ -1030,7 +1086,7 @@ int GCNHazardRecognizer::checkInlineAsmHazards(MachineInstr *IA) {
// problematic thus far.
// see checkVALUHazards()
- if (!ST.has12DWordStoreHazard())
+ if (!ST.has12DWordStoreHazard() && !ST.hasDstSelForwardingHazard())
return 0;
const MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -1039,11 +1095,45 @@ int GCNHazardRecognizer::checkInlineAsmHazards(MachineInstr *IA) {
for (const MachineOperand &Op :
llvm::drop_begin(IA->operands(), InlineAsm::MIOp_FirstOperand)) {
if (Op.isReg() && Op.isDef()) {
- WaitStatesNeeded =
- std::max(WaitStatesNeeded, checkVALUHazardsHelper(Op, MRI));
+ if (!TRI.isVectorRegister(MRI, Op.getReg()))
+ continue;
+
+ if (ST.has12DWordStoreHazard()) {
+ WaitStatesNeeded =
+ std::max(WaitStatesNeeded, checkVALUHazardsHelper(Op, MRI));
+ }
}
}
+ if (ST.hasDstSelForwardingHazard()) {
+ const int Shift16DefWaitstates = 1;
+
+ auto IsShift16BitDefFn = [this, &IA](const MachineInstr &ProducerMI) {
+ const MachineOperand *Dst = getDstSelForwardingOperand(ProducerMI, ST);
+ // Assume inline asm reads the dst
+ if (Dst)
+ return IA->modifiesRegister(Dst->getReg(), &TRI) ||
+ IA->readsRegister(Dst->getReg(), &TRI);
+
+ if (ProducerMI.isInlineAsm()) {
+ // If MI is inline asm, assume it has dst forwarding hazard
+ for (auto &Def : ProducerMI.all_defs()) {
+ if (IA->modifiesRegister(Def.getReg(), &TRI) ||
+ IA->readsRegister(Def.getReg(), &TRI)) {
+ return true;
+ }
+ }
+ }
+
+ return false;
+ };
+
+ int WaitStatesNeededForDef =
+ Shift16DefWaitstates -
+ getWaitStatesSince(IsShift16BitDefFn, Shift16DefWaitstates);
+ WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef);
+ }
+
return WaitStatesNeeded;
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 85281713e22b1f..2b54429dc9a03f 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -2342,6 +2342,7 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
field bit IsFP8SrcByteSel = 0;
field bit IsFP8DstByteSel = 0;
+ field bit HasFP8DstByteSel = 0;
field bit IsFP8ByteSel = !or(IsFP8SrcByteSel, IsFP8DstByteSel);
field bit HasDst = !ne(DstVT.Value, untyped.Value);
@@ -2921,6 +2922,15 @@ def getVCMPXOpFromVCMP : InstrMapping {
let ValueCols = [["1"]];
}
+def FP8DstByteSelTable : GenericTable {
+ let FilterClass = "VOP3_Pseudo";
+ let CppTypeName = "FP8DstByteSelInfo";
+ let Fields = ["Opcode", "HasFP8DstByteSel"];
+
+ let PrimaryKey = ["Opcode"];
+ let PrimaryKeyName = "getFP8DstByteSelHelper";
+}
+
def VOPDComponentTable : GenericTable {
let FilterClass = "VOPD_Component";
let CppTypeName = "VOPDComponentInfo";
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 5b41a2cd731607..cda664a151ef54 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -385,6 +385,13 @@ struct SingleUseExceptionInfo {
bool IsInvalidSingleUseProducer;
};
+struct FP8DstByteSelInfo {
+ uint16_t Opcode;
+ bool HasFP8DstByteSel;
+};
+
+#define GET_FP8DstByteSelTable_DECL
+#define GET_FP8DstByteSelTable_IMPL
#define GET_MTBUFInfoTable_DECL
#define GET_MTBUFInfoTable_IMPL
#define GET_MUBUFInfoTable_DECL
@@ -629,6 +636,11 @@ bool isInvalidSingleUseProducerInst(unsigned Opc) {
return Info && Info->IsInvalidSingleUseProducer;
}
+bool isFP8DstSelInst(unsigned Opc) {
+ const FP8DstByteSelInfo *Info = getFP8DstByteSelHelper(Opc);
+ return Info ? Info->HasFP8DstByteSel : false;
+}
+
unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc) {
const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom2AddrOpcode(Opc);
return Info ? Info->Opcode3Addr : ~0u;
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index a4e6a7ebe0558b..35c080d8e0bebc 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -861,6 +861,9 @@ getVOPDInstInfo(unsigned VOPDOpcode, const MCInstrInfo *InstrInfo);
LLVM_READONLY
bool isTrue16Inst(unsigned Opc);
+LLVM_READONLY
+bool isFP8DstSelInst(unsigned Opc);
+
LLVM_READONLY
bool isInvalidSingleUseConsumerInst(unsigned Opc);
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 6748eff9376b0d..466114b95f9f90 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -568,6 +568,7 @@ def VOP3_CVT_SR_F8_F32_Profile : VOP3_Profile<VOPProfile<[i32, f32, i32, f32]>,
let HasSrc2Mods = 1;
let HasExtVOP3DPP = 1;
let HasOpSel = 1;
+ let HasFP8DstByteSel = 1;
let AsmVOP3OpSel = !subst(", $src2_modifiers", "",
getAsmVOP3OpSel<3, HasClamp, HasOMod,
HasSrc0FloatMods, HasSrc1FloatMods,
@@ -587,6 +588,7 @@ def VOP3_CVT_SR_F8_F32_Profile : VOP3_Profile<VOPProfile<[i32, f32, i32, f32]>,
class VOP3_CVT_SR_F8_ByteSel_Profile<ValueType SrcVT> :
VOP3_Profile<VOPProfile<[i32, SrcVT, i32, untyped]>> {
let IsFP8DstByteSel = 1;
+ let HasFP8DstByteSel = 1;
let HasClamp = 0;
defvar bytesel = (ins VGPR_32:$vdst_in, ByteSel:$byte_sel);
let Ins64 = !con(getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs,
diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td
index 3851415ab0caed..5a460ef0d42320 100644
--- a/llvm/lib/Target/AMDGPU/VOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td
@@ -113,6 +113,8 @@ class VOP3_Pseudo <string opName, VOPProfile P, list<dag> pattern = [],
let IsWMMA = P.IsWMMA;
let IsSWMMAC = P.IsSWMMAC;
+ bit HasFP8DstByteSel = P.HasFP8DstByteSel;
+
let AsmOperands = !if(isVop3OpSel,
P.AsmVOP3OpSel,
!if(!and(isVOP3P, P.IsPacked), P.AsmVOP3P, P.Asm64));
diff --git a/llvm/test/CodeGen/AMDGPU/dst-sel-hazard.mir b/llvm/test/CodeGen/AMDGPU/dst-sel-hazard.mir
new file mode 100644
index 00000000000000..e24817078d8bc9
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/dst-sel-hazard.mir
@@ -0,0 +1,436 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn -mcpu=gfx942 -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefix=HAZARD %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefix=NOHAZARD %s
+
+---
+name: sdwa_opsel_hazard
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+
+ ; HAZARD-LABEL: name: sdwa_opsel_hazard
+ ; HAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+ ; HAZARD-NEXT: {{ $}}
+ ; HAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec
+ ; HAZARD-NEXT: S_NOP 0
+ ; HAZARD-NEXT: renamable $vgpr0 = V_MAD_U16_gfx9_e64 12, killed $vgpr3, 4, killed $vgpr4, 4, killed $vgpr2, 0, 0, implicit $exec
+ ; HAZARD-NEXT: S_ENDPGM 0
+ ;
+ ; NOHAZARD-LABEL: name: sdwa_opsel_hazard
+ ; NOHAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+ ; NOHAZARD-NEXT: {{ $}}
+ ; NOHAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec
+ ; NOHAZARD-NEXT: renamable $vgpr0 = V_MAD_U16_gfx9_e64 12, killed $vgpr3, 4, killed $vgpr4, 4, killed $vgpr2, 0, 0, implicit $exec
+ ; NOHAZARD-NEXT: S_ENDPGM 0
+ renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec
+ renamable $vgpr0 = V_MAD_U16_gfx9_e64 12, killed $vgpr3, 4, killed $vgpr4, 4, killed $vgpr2, 0, 0, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: sdwa_lo_opsel_hazard
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+
+ ; HAZARD-LABEL: name: sdwa_lo_opsel_hazard
+ ; HAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+ ; HAZARD-NEXT: {{ $}}
+ ; HAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec
+ ; HAZARD-NEXT: S_NOP 0
+ ; HAZARD-NEXT: renamable $vgpr0 = V_MAD_U16_gfx9_e64 4, killed $vgpr3, 4, killed $vgpr4, 4, killed $vgpr2, 0, 0, implicit $exec
+ ; HAZARD-NEXT: S_ENDPGM 0
+ ;
+ ; NOHAZARD-LABEL: name: sdwa_lo_opsel_hazard
+ ; NOHAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+ ; NOHAZARD-NEXT: {{ $}}
+ ; NOHAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec
+ ; NOHAZARD-NEXT: renamable $vgpr0 = V_MAD_U16_gfx9_e64 4, killed $vgpr3, 4, killed $vgpr4, 4, killed $vgpr2, 0, 0, implicit $exec
+ ; NOHAZARD-NEXT: S_ENDPGM 0
+ renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec
+ renamable $vgpr0 = V_MAD_U16_gfx9_e64 4, killed $vgpr3, 4, killed $vgpr4, 4, killed $vgpr2, 0, 0, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: opsel_sdwa_hazard
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+
+ ; HAZARD-LABEL: name: opsel_sdwa_hazard
+ ; HAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+ ; HAZARD-NEXT: {{ $}}
+ ; HAZARD-NEXT: renamable $vgpr0 = V_MAD_U16_gfx9_e64 12, killed $vgpr3, 4, killed $vgpr4, 4, killed $vgpr2, 0, 0, implicit $exec
+ ; HAZARD-NEXT: S_NOP 0
+ ; HAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0)
+ ; HAZARD-NEXT: S_ENDPGM 0
+ ;
+ ; NOHAZARD-LABEL: name: opsel_sdwa_hazard
+ ; NOHAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+ ; NOHAZARD-NEXT: {{ $}}
+ ; NOHAZARD-NEXT: renamable $vgpr0 = V_MAD_U16_gfx9_e64 12, killed $vgpr3, 4, killed $vgpr4, 4, killed $vgpr2, 0, 0, implicit $exec
+ ; NOHAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0)
+ ; NOHAZARD-NEXT: S_ENDPGM 0
+ renamable $vgpr0 = V_MAD_U16_gfx9_e64 12, killed $vgpr3, 4, killed $vgpr4, 4, killed $vgpr2, 0, 0, implicit $exec
+ renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0)
+ S_ENDPGM 0
+...
+
+
+# TODO -- there is no reason for s_nop (V_ADD_U16 doesn't preserve the dest)
+
+---
+name: opsel_no_sdwa_no_hazard
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+
+ ; HAZARD-LABEL: name: opsel_no_sdwa_no_hazard
+ ; HAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+ ; HAZARD-NEXT: {{ $}}
+ ; HAZARD-NEXT: renamable $vgpr0 = V_MAD_U16_gfx9_e64 12, killed $vgpr3, 4, killed $vgpr4, 4, killed $vgpr2, 0, 0, implicit $exec
+ ; HAZARD-NEXT: S_NOP 0
+ ; HAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec
+ ; HAZARD-NEXT: S_ENDPGM 0
+ ;
+ ; NOHAZARD-LABEL: name: opsel_no_sdwa_no_hazard
+ ; NOHAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+ ; NOHAZARD-NEXT: {{ $}}
+ ; NOHAZARD-NEXT: renamable $vgpr0 = V_MAD_U16_gfx9_e64 12, killed $vgpr3, 4, killed $vgpr4, 4, killed $vgpr2, 0, 0, implicit $exec
+ ; NOHAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec
+ ; NOHAZARD-NEXT: S_ENDPGM 0
+ renamable $vgpr0 = V_MAD_U16_gfx9_e64 12, killed $vgpr3, 4, killed $vgpr4, 4, killed $vgpr2, 0, 0, implicit $exec
+ renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: no_opsel_sdwa_no_hazard
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+
+ ; HAZARD-LABEL: name: no_opsel_sdwa_no_hazard
+ ; HAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+ ; HAZARD-NEXT: {{ $}}
+ ; HAZARD-NEXT: renamable $vgpr0 = V_MAD_U16_e64 killed $vgpr3, killed $vgpr4, killed $vgpr2, 0, implicit $exec
+ ; HAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0)
+ ; HAZARD-NEXT: S_ENDPGM 0
+ ;
+ ; NOHAZARD-LABEL: name: no_opsel_sdwa_no_hazard
+ ; NOHAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+ ; NOHAZARD-NEXT: {{ $}}
+ ; NOHAZARD-NEXT: renamable $vgpr0 = V_MAD_U16_e64 killed $vgpr3, killed $vgpr4, killed $vgpr2, 0, implicit $exec
+ ; NOHAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0)
+ ; NOHAZARD-NEXT: S_ENDPGM 0
+ renamable $vgpr0 = V_MAD_U16_e64 killed $vgpr3, killed $vgpr4, killed $vgpr2, 0, implicit $exec
+ renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0)
+ S_ENDPGM 0
+...
+
+---
+name: opsel_opsel_hazard
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+
+ ; HAZARD-LABEL: name: opsel_opsel_hazard
+ ; HAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+ ; HAZARD-NEXT: {{ $}}
+ ; HAZARD-NEXT: renamable $vgpr0 = V_MAD_U16_gfx9_e64 12, killed $vgpr3, 4, killed $vgpr4, 4, killed $vgpr2, 0, 0, implicit $exec
+ ; HAZARD-NEXT: S_NOP 0
+ ; HAZARD-NEXT: renamable $vgpr0 = V_MAD_U16_gfx9_e64 4, killed $vgpr3, 4, killed $vgpr4, 4, killed $vgpr2, 0, 0, implicit $exec
+ ; HAZARD-NEXT: S_ENDPGM 0
+ ;
+ ; NOHAZARD-LABEL: name: opsel_opsel_hazard
+ ; NOHAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+ ; NOHAZARD-NEXT: {{ $}}
+ ; NOHAZARD-NEXT: renamable $vgpr0 = V_MAD_U16_gfx9_e64 12, killed $vgpr3, 4, killed $vgpr4, 4, killed $vgpr2, 0, 0, implicit $exec
+ ; NOHAZARD-NEXT: renamable $vgpr0 = V_MAD_U16_gfx9_e64 4, killed $vgpr3, 4, killed $vgpr4, 4, killed $vgpr2, 0, 0, implicit $exec
+ ; NOHAZARD-NEXT: S_ENDPGM 0
+ renamable $vgpr0 = V_MAD_U16_gfx9_e64 12, killed $vgpr3, 4, killed $vgpr4, 4, killed $vgpr2, 0, 0, implicit $exec
+ renamable $vgpr0 = V_MAD_U16_gfx9_e64 4, killed $vgpr3, 4, killed $vgpr4, 4, killed $vgpr2, 0, 0, implicit $exec
+ S_ENDPGM 0
+...
+
+# TODO -- there is no reason for s_nop
+
+---
+name: opsel_opsel_no_hazard
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+
+ ; HAZARD-LABEL: name: opsel_opsel_no_hazard
+ ; HAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+ ; HAZARD-NEXT: {{ $}}
+ ; HAZARD-NEXT: renamable $vgpr0 = V_MAD_U16_gfx9_e64 12, killed $vgpr3, 4, killed $vgpr4, 4, killed $vgpr2, 0, 0, implicit $exec
+ ; HAZARD-NEXT: S_NOP 0
+ ; HAZARD-NEXT: renamable $vgpr0 = V_MAD_U16_gfx9_e64 12, killed $vgpr3, 4, killed $vgpr4, 4, killed $vgpr2, 0, 0, implicit $exec
+ ; HAZARD-NEXT: S_ENDPGM 0
+ ;
+ ; NOHAZARD-LABEL: name: opsel_opsel_no_hazard
+ ; NOHAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+ ; NOHAZARD-NEXT: {{ $}}
+ ; NOHAZARD-NEXT: renamable $vgpr0 = V_MAD_U16_gfx9_e64 12, killed $vgpr3, 4, killed $vgpr4, 4, killed $vgpr2, 0, 0, implicit $exec
+ ; NOHAZARD-NEXT: renamable $vgpr0 = V_MAD_U16_gfx9_e64 12, killed $vgpr3, 4, killed $vgpr4, 4, killed $vgpr2, 0, 0, implicit $exec
+ ; NOHAZARD-NEXT: S_ENDPGM 0
+ renamable $vgpr0 = V_MAD_U16_gfx9_e64 12, killed $vgpr3, 4, killed $vgpr4, 4, killed $vgpr2, 0, 0, implicit $exec
+ renamable $vgpr0 = V_MAD_U16_gfx9_e64 12, killed $vgpr3, 4, killed $vgpr4, 4, killed $vgpr2, 0, 0, implicit $exec
+ S_ENDPGM 0
+...
+
+# DS_READ_U16_D16 has dest preserve semantics, but only VALU consumers have hazard
+
+---
+name: sdwa_loadsel_no_hazard
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+
+ ; HAZARD-LABEL: name: sdwa_loadsel_no_hazard
+ ; HAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+ ; HAZARD-NEXT: {{ $}}
+ ; HAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 3, 0, 3, 3, implicit $exec
+ ; HAZARD-NEXT: renamable $vgpr0 = DS_READ_U16_D16 killed renamable $vgpr3, 0, 0, killed renamable $vgpr0, implicit $exec
+ ; HAZARD-NEXT: S_ENDPGM 0
+ ;
+ ; NOHAZARD-LABEL: name: sdwa_loadsel_no_hazard
+ ; NOHAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+ ; NOHAZARD-NEXT: {{ $}}
+ ; NOHAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 3, 0, 3, 3, implicit $exec
+ ; NOHAZARD-NEXT: renamable $vgpr0 = DS_READ_U16_D16 killed renamable $vgpr3, 0, 0, killed renamable $vgpr0, implicit $exec
+ ; NOHAZARD-NEXT: S_ENDPGM 0
+ renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 3, 0, 3, 3, implicit $exec
+ renamable $vgpr0 = DS_READ_U16_D16 killed renamable $vgpr3, 0, 0, killed renamable $vgpr0, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: sdwa_sdwa_hazard
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+
+ ; HAZARD-LABEL: name: sdwa_sdwa_hazard
+ ; HAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+ ; HAZARD-NEXT: {{ $}}
+ ; HAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec
+ ; HAZARD-NEXT: S_NOP 0
+ ; HAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0)
+ ; HAZARD-NEXT: S_ENDPGM 0
+ ;
+ ; NOHAZARD-LABEL: name: sdwa_sdwa_hazard
+ ; NOHAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+ ; NOHAZARD-NEXT: {{ $}}
+ ; NOHAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec
+ ; NOHAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0)
+ ; NOHAZARD-NEXT: S_ENDPGM 0
+ renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec
+ renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0)
+ S_ENDPGM 0
+...
+
+---
+name: cvt_sdwa_hazard_1
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+
+ ; HAZARD-LABEL: name: cvt_sdwa_hazard_1
+ ; HAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+ ; HAZARD-NEXT: {{ $}}
+ ; HAZARD-NEXT: renamable $vgpr0 = V_CVT_SR_FP8_F32_e64 0, killed $vgpr3, 0, killed $vgpr1, 4, $vgpr0, 0, implicit $mode, implicit $exec
+ ; HAZARD-NEXT: S_NOP 0
+ ; HAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0)
+ ; HAZARD-NEXT: S_ENDPGM 0
+ ;
+ ; NOHAZARD-LABEL: name: cvt_sdwa_hazard_1
+ ; NOHAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+ ; NOHAZARD-NEXT: {{ $}}
+ ; NOHAZARD-NEXT: renamable $vgpr0 = V_CVT_SR_FP8_F32_e64 0, killed $vgpr3, 0, killed $vgpr1, 4, $vgpr0, 0, implicit $mode, implicit $exec
+ ; NOHAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0)
+ ; NOHAZARD-NEXT: S_ENDPGM 0
+ renamable $vgpr0 = V_CVT_SR_FP8_F32_e64 0, killed $vgpr3, 0, killed $vgpr1, 4, $vgpr0, 0, implicit $mode, implicit $exec
+ renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0)
+ S_ENDPGM 0
+...
+
+---
+name: cvt_sdwa_hazard_2
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+
+ ; HAZARD-LABEL: name: cvt_sdwa_hazard_2
+ ; HAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+ ; HAZARD-NEXT: {{ $}}
+ ; HAZARD-NEXT: renamable $vgpr0 = V_CVT_SR_FP8_F32_e64 8, killed $vgpr3, 0, killed $vgpr1, 0, $vgpr0, 0, implicit $mode, implicit $exec
+ ; HAZARD-NEXT: S_NOP 0
+ ; HAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0)
+ ; HAZARD-NEXT: S_ENDPGM 0
+ ;
+ ; NOHAZARD-LABEL: name: cvt_sdwa_hazard_2
+ ; NOHAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+ ; NOHAZARD-NEXT: {{ $}}
+ ; NOHAZARD-NEXT: renamable $vgpr0 = V_CVT_SR_FP8_F32_e64 8, killed $vgpr3, 0, killed $vgpr1, 0, $vgpr0, 0, implicit $mode, implicit $exec
+ ; NOHAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0)
+ ; NOHAZARD-NEXT: S_ENDPGM 0
+ renamable $vgpr0 = V_CVT_SR_FP8_F32_e64 8, killed $vgpr3, 0, killed $vgpr1, 0, $vgpr0, 0, implicit $mode, implicit $exec
+ renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0)
+ S_ENDPGM 0
+...
+
+---
+name: cvt_sdwa_hazard_3
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+
+ ; HAZARD-LABEL: name: cvt_sdwa_hazard_3
+ ; HAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+ ; HAZARD-NEXT: {{ $}}
+ ; HAZARD-NEXT: renamable $vgpr0 = V_CVT_SR_FP8_F32_e64 8, killed $vgpr3, 0, killed $vgpr1, 4, $vgpr0, 0, implicit $mode, implicit $exec
+ ; HAZARD-NEXT: S_NOP 0
+ ; HAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0)
+ ; HAZARD-NEXT: S_ENDPGM 0
+ ;
+ ; NOHAZARD-LABEL: name: cvt_sdwa_hazard_3
+ ; NOHAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+ ; NOHAZARD-NEXT: {{ $}}
+ ; NOHAZARD-NEXT: renamable $vgpr0 = V_CVT_SR_FP8_F32_e64 8, killed $vgpr3, 0, killed $vgpr1, 4, $vgpr0, 0, implicit $mode, implicit $exec
+ ; NOHAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0)
+ ; NOHAZARD-NEXT: S_ENDPGM 0
+ renamable $vgpr0 = V_CVT_SR_FP8_F32_e64 8, killed $vgpr3, 0, killed $vgpr1, 4, $vgpr0, 0, implicit $mode, implicit $exec
+ renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0)
+ S_ENDPGM 0
+...
+
+---
+name: cvt_sdwa_no_hazard
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+
+ ; HAZARD-LABEL: name: cvt_sdwa_no_hazard
+ ; HAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+ ; HAZARD-NEXT: {{ $}}
+ ; HAZARD-NEXT: renamable $vgpr0 = V_CVT_SR_FP8_F32_e64 0, killed $vgpr3, 0, killed $vgpr1, 0, $vgpr0, 0, implicit $mode, implicit $exec
+ ; HAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0)
+ ; HAZARD-NEXT: S_ENDPGM 0
+ ;
+ ; NOHAZARD-LABEL: name: cvt_sdwa_no_hazard
+ ; NOHAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+ ; NOHAZARD-NEXT: {{ $}}
+ ; NOHAZARD-NEXT: renamable $vgpr0 = V_CVT_SR_FP8_F32_e64 0, killed $vgpr3, 0, killed $vgpr1, 0, $vgpr0, 0, implicit $mode, implicit $exec
+ ; NOHAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0)
+ ; NOHAZARD-NEXT: S_ENDPGM 0
+ renamable $vgpr0 = V_CVT_SR_FP8_F32_e64 0, killed $vgpr3, 0, killed $vgpr1, 0, $vgpr0, 0, implicit $mode, implicit $exec
+ renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0)
+ S_ENDPGM 0
+...
+
+# TODO -- there is no reason for s_nop (V_ADD_U16 doesn't preserve the dest)
+
+---
+name: sdwa_nosdwa_no_hazard
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+
+ ; HAZARD-LABEL: name: sdwa_nosdwa_no_hazard
+ ; HAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+ ; HAZARD-NEXT: {{ $}}
+ ; HAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec
+ ; HAZARD-NEXT: S_NOP 0
+ ; HAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec
+ ; HAZARD-NEXT: S_ENDPGM 0
+ ;
+ ; NOHAZARD-LABEL: name: sdwa_nosdwa_no_hazard
+ ; NOHAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+ ; NOHAZARD-NEXT: {{ $}}
+ ; NOHAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec
+ ; NOHAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec
+ ; NOHAZARD-NEXT: S_ENDPGM 0
+ renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec
+ renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: inline_sdwa_hazard
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+
+ ; HAZARD-LABEL: name: inline_sdwa_hazard
+ ; HAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+ ; HAZARD-NEXT: {{ $}}
+ ; HAZARD-NEXT: INLINEASM &"v_or_b32 %0, 0, %1", 32 /* isconvergent attdialect */, 327690 /* regdef:SReg_1_with_sub0 */, def $vgpr0, 327689 /* reguse:SReg_1_with_sub0 */, $vgpr1
+ ; HAZARD-NEXT: S_NOP 0
+ ; HAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0)
+ ; HAZARD-NEXT: S_ENDPGM 0
+ ;
+ ; NOHAZARD-LABEL: name: inline_sdwa_hazard
+ ; NOHAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+ ; NOHAZARD-NEXT: {{ $}}
+ ; NOHAZARD-NEXT: INLINEASM &"v_or_b32 %0, 0, %1", 32 /* isconvergent attdialect */, 327690 /* regdef:SReg_1_with_sub0 */, def $vgpr0, 327689 /* reguse:SReg_1_with_sub0 */, $vgpr1
+ ; NOHAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0)
+ ; NOHAZARD-NEXT: S_ENDPGM 0
+ INLINEASM &"v_or_b32 %0, 0, %1", 32, 327690, def $vgpr0, 327689, $vgpr1
+ renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0)
+ S_ENDPGM 0
+...
+
+---
+name: sdwa_inline_hazard
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+
+ ; HAZARD-LABEL: name: sdwa_inline_hazard
+ ; HAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+ ; HAZARD-NEXT: {{ $}}
+ ; HAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0)
+ ; HAZARD-NEXT: S_NOP 0
+ ; HAZARD-NEXT: INLINEASM &"v_or_b32 %0, 0, %1", 32 /* isconvergent attdialect */, 327690 /* regdef:SReg_1_with_sub0 */, def $vgpr0, 327689 /* reguse:SReg_1_with_sub0 */, $vgpr1
+ ; HAZARD-NEXT: S_ENDPGM 0
+ ;
+ ; NOHAZARD-LABEL: name: sdwa_inline_hazard
+ ; NOHAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+ ; NOHAZARD-NEXT: {{ $}}
+ ; NOHAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0)
+ ; NOHAZARD-NEXT: INLINEASM &"v_or_b32 %0, 0, %1", 32 /* isconvergent attdialect */, 327690 /* regdef:SReg_1_with_sub0 */, def $vgpr0, 327689 /* reguse:SReg_1_with_sub0 */, $vgpr1
+ ; NOHAZARD-NEXT: S_ENDPGM 0
+ renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0)
+ INLINEASM &"v_or_b32 %0, 0, %1", 32, 327690, def $vgpr0, 327689, $vgpr1
+ S_ENDPGM 0
+...
+
+
+---
+name: inline_inline_hazard
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+
+ ; HAZARD-LABEL: name: inline_inline_hazard
+ ; HAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+ ; HAZARD-NEXT: {{ $}}
+ ; HAZARD-NEXT: INLINEASM &"v_or_b32 %0, 0, %1", 32 /* isconvergent attdialect */, 327690 /* regdef:SReg_1_with_sub0 */, def $vgpr0, 327689 /* reguse:SReg_1_with_sub0 */, $vgpr1
+ ; HAZARD-NEXT: S_NOP 0
+ ; HAZARD-NEXT: INLINEASM &"v_or_b32 %0, 0, %1", 32 /* isconvergent attdialect */, 327690 /* regdef:SReg_1_with_sub0 */, def $vgpr0, 327689 /* reguse:SReg_1_with_sub0 */, $vgpr1
+ ; HAZARD-NEXT: S_ENDPGM 0
+ ;
+ ; NOHAZARD-LABEL: name: inline_inline_hazard
+ ; NOHAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode
+ ; NOHAZARD-NEXT: {{ $}}
+ ; NOHAZARD-NEXT: INLINEASM &"v_or_b32 %0, 0, %1", 32 /* isconvergent attdialect */, 327690 /* regdef:SReg_1_with_sub0 */, def $vgpr0, 327689 /* reguse:SReg_1_with_sub0 */, $vgpr1
+ ; NOHAZARD-NEXT: INLINEASM &"v_or_b32 %0, 0, %1", 32 /* isconvergent attdialect */, 327690 /* regdef:SReg_1_with_sub0 */, def $vgpr0, 327689 /* reguse:SReg_1_with_sub0 */, $vgpr1
+ ; NOHAZARD-NEXT: S_ENDPGM 0
+ INLINEASM &"v_or_b32 %0, 0, %1", 32, 327690, def $vgpr0, 327689, $vgpr1
+ INLINEASM &"v_or_b32 %0, 0, %1", 32, 327690, def $vgpr0, 327689, $vgpr1
+ S_ENDPGM 0
+...
+
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll
index d3fc96d7ff8012..8313f5b655efba 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll
@@ -375,6 +375,7 @@ define i32 @test_cvt_sr_bf8_f32_byte1(float %x, i32 %r, i32 %old) {
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX940-NEXT: v_cvt_sr_bf8_f32 v2, v0, v1 op_sel:[0,0,1,0]
+; GFX940-NEXT: s_nop 0
; GFX940-NEXT: v_mov_b32_e32 v0, v2
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
@@ -469,6 +470,7 @@ define i32 @test_cvt_sr_fp8_f32_byte1(float %x, i32 %r, i32 %old) {
; GFX940: ; %bb.0:
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX940-NEXT: v_cvt_sr_fp8_f32 v2, v0, v1 op_sel:[0,0,1,0]
+; GFX940-NEXT: s_nop 0
; GFX940-NEXT: v_mov_b32_e32 v0, v2
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
More information about the llvm-commits
mailing list