[llvm] [AMDGPU] Handle hazard in v_scalef32_sr_fp4_* conversions (PR #118589)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 4 05:50:49 PST 2024
================
@@ -913,24 +913,31 @@ getDstSelForwardingOperand(const MachineInstr &MI, const GCNSubtarget &ST) {
// (instructions with dest byte sel, e.g. CVT_SR_BF8_F32) and
// op_sel[3:2]
// != 0
- if (SIInstrInfo::isSDWA(MI)) {
+ if (SIInstrInfo::isSDWA(MI))
// Type 1: SDWA with dst_sel != DWORD
if (auto *DstSel = TII->getNamedOperand(MI, AMDGPU::OpName::dst_sel))
- if (DstSel->getImm() == AMDGPU::SDWA::DWORD)
- return nullptr;
- } else {
- // Type 2 && Type 3: (VOP3 which write the hi bits) || (FP8DstSelInst
- // with op_sel[3:2] != 0)
- if (!AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::op_sel) ||
- !(TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers)->getImm() &
- SISrcMods::DST_OP_SEL ||
- (AMDGPU::isFP8DstSelInst(Opcode) &&
- (TII->getNamedOperand(MI, AMDGPU::OpName::src2_modifiers)->getImm() &
- SISrcMods::OP_SEL_0))))
- return nullptr;
- }
-
- return TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
+ if (DstSel->getImm() != AMDGPU::SDWA::DWORD)
+ return TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
+
+ if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::op_sel)) {
+ // Type 2: VOP3 which write the hi bits
+ if (TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers)->getImm() &
+ SISrcMods::DST_OP_SEL)
+ return TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
+
+ // Type 3: FP8DstSelInst with op_sel[3:2] != 0)
+ if (AMDGPU::isFP8DstSelInst(Opcode) &&
+ (TII->getNamedOperand(MI, AMDGPU::OpName::src2_modifiers)->getImm() &
+ SISrcMods::OP_SEL_0))
+ return TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
+ }
+
+ // Special case: nop is required for all the opsel values for fp4 sr variant
+ // cvt scale instructions
+ if (AMDGPU::isFP4DstSelInst(Opcode))
----------------
arsenm wrote:
Maybe isFP8DstSel and isFP4DstSel should be combined into one query for both opcodes, which also reports which type is used
https://github.com/llvm/llvm-project/pull/118589
More information about the llvm-commits
mailing list