[llvm] [AMDGPU] Handle hazard in v_scalef32_sr_fp4_* conversions (PR #118589)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 5 10:27:30 PST 2024
================
@@ -916,21 +916,29 @@ getDstSelForwardingOperand(const MachineInstr &MI, const GCNSubtarget &ST) {
if (SIInstrInfo::isSDWA(MI)) {
// Type 1: SDWA with dst_sel != DWORD
if (auto *DstSel = TII->getNamedOperand(MI, AMDGPU::OpName::dst_sel))
- if (DstSel->getImm() == AMDGPU::SDWA::DWORD)
- return nullptr;
- } else {
- // Type 2 && Type 3: (VOP3 which write the hi bits) || (FP8DstSelInst
- // with op_sel[3:2] != 0)
- if (!AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::op_sel) ||
- !(TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers)->getImm() &
- SISrcMods::DST_OP_SEL ||
- (AMDGPU::isFP8DstSelInst(Opcode) &&
- (TII->getNamedOperand(MI, AMDGPU::OpName::src2_modifiers)->getImm() &
- SISrcMods::OP_SEL_0))))
- return nullptr;
- }
-
- return TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
+ if (DstSel->getImm() != AMDGPU::SDWA::DWORD)
+ return TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
+ }
+
+ if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::op_sel)) {
+ // Type 2: VOP3 which write the hi bits
+ if (TII->getNamedImmOperand(MI, AMDGPU::OpName::src0_modifiers) &
+ SISrcMods::DST_OP_SEL)
+ return TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
+
+ // Type 3: FP8DstSelInst with op_sel[3:2] != 0)
+ if (AMDGPU::isFP8DstSelInst(Opcode) &&
+ (TII->getNamedImmOperand(MI, AMDGPU::OpName::src2_modifiers) &
+ SISrcMods::OP_SEL_0))
+ return TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
+ }
+
+ // Special case: nop is required for all the opsel values for fp4 sr variant
+ // cvt scale instructions
+ if (AMDGPU::isFP4DstSelInst(Opcode))
----------------
arsenm wrote:
Can you combine the fp4 and fp8 queries?
https://github.com/llvm/llvm-project/pull/118589
More information about the llvm-commits
mailing list