[llvm] [AMDGPU] Correctly insert s_nops for implicit read of SDWA (PR #100276)

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 29 10:34:40 PDT 2024


================
@@ -913,30 +934,39 @@ int GCNHazardRecognizer::checkVALUHazards(MachineInstr *VALU) {
     const int Shift16DefWaitstates = 1;
 
     auto IsShift16BitDefFn = [this, VALU](const MachineInstr &MI) {
-      if (!SIInstrInfo::isVALU(MI))
-        return false;
       const SIInstrInfo *TII = ST.getInstrInfo();
-      if (SIInstrInfo::isSDWA(MI)) {
-        if (auto *DstSel = TII->getNamedOperand(MI, AMDGPU::OpName::dst_sel))
-          if (DstSel->getImm() == AMDGPU::SDWA::DWORD)
-            return false;
-      } else {
-        if (!AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::op_sel) ||
-            !(TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers)
-                  ->getImm() &
-              SISrcMods::DST_OP_SEL))
-          return false;
-      }
       const SIRegisterInfo *TRI = ST.getRegisterInfo();
-      if (auto *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst)) {
+      SmallVector<const MachineOperand *, 4> Dsts;
+      const MachineOperand *ForwardedDst = getDstSelForwardingOperand(MI, ST);
+      if (ForwardedDst) {
+        Dsts.push_back(ForwardedDst);
+      } else if (MI.isInlineAsm()) {
+        // Assume inline asm has dst forwarding hazard
+        for (auto &Op :
+             drop_begin(MI.operands(), InlineAsm::MIOp_FirstOperand)) {
+          if (Op.isReg() && Op.isDef()) {
+            Dsts.push_back(&Op);
+          }
+        }
+      }
+
+      for (auto Dst : Dsts) {
         Register Def = Dst->getReg();
 
-        for (const MachineOperand &Use : VALU->explicit_uses()) {
+        for (const MachineOperand &Use : VALU->all_uses()) {
           if (Use.isReg() && TRI->regsOverlap(Def, Use.getReg()))
             return true;
         }
-      }
 
+        // We also read the dst for sub 32 writes to the same register for ECC
+        if (auto *ThisDst = TII->getNamedOperand(*VALU, AMDGPU::OpName::vdst)) {
+          Register ThisDef = ThisDst->getReg();
+          if (!TRI->regsOverlap(Def, ThisDef))
+            return false;
+          if (TII->isVOP3(*VALU) && !TII->isVOP3P(*VALU))
+            return true;
----------------
arsenm wrote:

Check these first? 

https://github.com/llvm/llvm-project/pull/100276


More information about the llvm-commits mailing list