[llvm] [AMDGPU] Handle hazard in v_scalef32_sr_fp4_* conversions (PR #118589)

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Wed Dec 4 05:50:49 PST 2024


================
@@ -913,24 +913,31 @@ getDstSelForwardingOperand(const MachineInstr &MI, const GCNSubtarget &ST) {
   // (instructions with dest byte sel, e.g. CVT_SR_BF8_F32) and
   // op_sel[3:2]
   // != 0
-  if (SIInstrInfo::isSDWA(MI)) {
+  if (SIInstrInfo::isSDWA(MI))
     // Type 1: SDWA with dst_sel != DWORD
     if (auto *DstSel = TII->getNamedOperand(MI, AMDGPU::OpName::dst_sel))
-      if (DstSel->getImm() == AMDGPU::SDWA::DWORD)
-        return nullptr;
-  } else {
-    // Type 2 && Type 3: (VOP3 which write the hi bits) || (FP8DstSelInst
-    // with op_sel[3:2] != 0)
-    if (!AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::op_sel) ||
-        !(TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers)->getImm() &
-              SISrcMods::DST_OP_SEL ||
-          (AMDGPU::isFP8DstSelInst(Opcode) &&
-           (TII->getNamedOperand(MI, AMDGPU::OpName::src2_modifiers)->getImm() &
-            SISrcMods::OP_SEL_0))))
-      return nullptr;
-  }
-
-  return TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
+      if (DstSel->getImm() != AMDGPU::SDWA::DWORD)
+        return TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
+
+  if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::op_sel)) {
+    // Type 2: VOP3 which write the hi bits
+    if (TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers)->getImm() &
+        SISrcMods::DST_OP_SEL)
+      return TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
+
+    // Type 3: FP8DstSelInst with op_sel[3:2] != 0)
+    if (AMDGPU::isFP8DstSelInst(Opcode) &&
+        (TII->getNamedOperand(MI, AMDGPU::OpName::src2_modifiers)->getImm() &
+         SISrcMods::OP_SEL_0))
+      return TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
+  }
+
+  // Special case: nop is required for all the opsel values for fp4 sr variant
+  // cvt scale instructions
+  if (AMDGPU::isFP4DstSelInst(Opcode))
----------------
arsenm wrote:

Maybe isFP8DstSel and isFP4DstSel should be combined into one query for both opcodes, which also reports which type is used

https://github.com/llvm/llvm-project/pull/118589


More information about the llvm-commits mailing list