[llvm] r361124 - [AMDGPU] gfx1010 Avoid SMEM WAR hazard for some s_waitcnt values

Carl Ritson via llvm-commits llvm-commits at lists.llvm.org
Mon May 20 00:20:12 PDT 2019


Author: critson
Date: Mon May 20 00:20:12 2019
New Revision: 361124

URL: http://llvm.org/viewvc/llvm-project?rev=361124&view=rev
Log:
[AMDGPU] gfx1010 Avoid SMEM WAR hazard for some s_waitcnt values

Summary:
Avoid introducing hazard mitigation when lgkmcnt is reduced to 0.
Clarify code comments to explain assumptions made for this hazard
mitigation.  Expand and correct test cases to cover variants of
s_waitcnt.

Reviewers: nhaehnle, rampitec

Subscribers: arsenm, kzhuravl, jvesely, wdng, yaxunl, dstuttard, tpr, t-tye, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62058

Modified:
    llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
    llvm/trunk/test/CodeGen/AMDGPU/smem-war-hazard.mir

Modified: llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.cpp?rev=361124&r1=361123&r2=361124&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.cpp Mon May 20 00:20:12 2019
@@ -901,6 +901,7 @@ bool GCNHazardRecognizer::fixSMEMtoVecto
 
   const SIInstrInfo *TII = ST.getInstrInfo();
   const SIRegisterInfo *TRI = ST.getRegisterInfo();
+  const AMDGPU::IsaVersion IV = AMDGPU::getIsaVersion(ST.getCPU());
   const MachineOperand *SDST = TII->getNamedOperand(*MI, SDSTName);
   if (!SDST) {
     for (const auto &MO : MI->implicit_operands()) {
@@ -919,22 +920,37 @@ bool GCNHazardRecognizer::fixSMEMtoVecto
     return SIInstrInfo::isSMRD(*I) && I->readsRegister(SDSTReg, TRI);
   };
 
-  // This assumes that there will be s_waitcnt lgkmcnt(0) or equivalent
-  // between any at risk SMEM and any SALU dependent on the SMEM results.
-  auto IsExpiredFn = [TII] (MachineInstr *MI, int) {
+  auto IsExpiredFn = [TII, IV] (MachineInstr *MI, int) {
     if (MI) {
       if (TII->isSALU(*MI)) {
-        if (TII->isSOPP(*MI))
-          return false;
         switch (MI->getOpcode()) {
         case AMDGPU::S_SETVSKIP:
         case AMDGPU::S_VERSION:
         case AMDGPU::S_WAITCNT_VSCNT:
         case AMDGPU::S_WAITCNT_VMCNT:
         case AMDGPU::S_WAITCNT_EXPCNT:
-        case AMDGPU::S_WAITCNT_LGKMCNT:
+          // These instructions cannot not mitigate the hazard.
           return false;
+        case AMDGPU::S_WAITCNT_LGKMCNT:
+          // Reducing lgkmcnt count to 0 always mitigates the hazard.
+          return (MI->getOperand(1).getImm() == 0) &&
+                 (MI->getOperand(0).getReg() == AMDGPU::SGPR_NULL);
+        case AMDGPU::S_WAITCNT: {
+          const int64_t Imm = MI->getOperand(0).getImm();
+          AMDGPU::Waitcnt Decoded = AMDGPU::decodeWaitcnt(IV, Imm);
+          return (Decoded.LgkmCnt == 0);
+        }
         default:
+          // SOPP instructions cannot mitigate the hazard.
+          if (TII->isSOPP(*MI))
+            return false;
+          // At this point the SALU can be assumed to mitigate the hazard
+          // because either:
+          // (a) it is independent of the at risk SMEM (breaking chain),
+          // or
+          // (b) it is dependent on the SMEM, in which case an appropriate
+          //     s_waitcnt lgkmcnt _must_ exist between it and the at risk
+          //     SMEM instruction.
           return true;
         }
       }

Modified: llvm/trunk/test/CodeGen/AMDGPU/smem-war-hazard.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/smem-war-hazard.mir?rev=361124&r1=361123&r2=361124&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/smem-war-hazard.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/smem-war-hazard.mir Mon May 20 00:20:12 2019
@@ -29,13 +29,13 @@ body: |
     S_ENDPGM 0
 ...
 
-# GCN-LABEL: name: hazard_smem_war_related_clause
+# GCN-LABEL: name: hazard_smem_war_dependent_salu
 # GCN:      S_LOAD_DWORD_IMM
 # GCN-NEXT: S_WAITCNT
 # GCN-NEXT: S_ADD_U32
 # GCN-NEXT: V_CMP_EQ_F32
 ---
-name: hazard_smem_war_related_clause
+name: hazard_smem_war_dependent_salu
 body: |
   bb.0:
     liveins: $sgpr0, $sgpr1, $sgpr4, $vgpr0, $vgpr1
@@ -46,19 +46,128 @@ body: |
     S_ENDPGM 0
 ...
 
-# GCN-LABEL: name: hazard_smem_war_related_clause_vmcnt
+# GCN-LABEL: name: hazard_smem_war_independent_salu
 # GCN:      S_LOAD_DWORD_IMM
-# GCN-NEXT: S_WAITCNT 3952{{$}}
+# GCN-NEXT: S_WAITCNT
 # GCN-NEXT: S_ADD_U32
 # GCN-NEXT: V_CMP_EQ_F32
 ---
-name: hazard_smem_war_related_clause_vmcnt
+name: hazard_smem_war_independent_salu
 body: |
   bb.0:
-    liveins: $sgpr0, $sgpr1, $sgpr4, $vgpr0, $vgpr1
+    liveins: $sgpr0, $sgpr1, $sgpr4, $sgpr5, $vgpr0, $vgpr1
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
+    S_WAITCNT 0
+    $sgpr3 = S_ADD_U32 $sgpr5, $sgpr4, implicit-def $scc
+    $sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec
+    S_ENDPGM 0
+...
+
+# GCN-LABEL: name: hazard_smem_war_only_smem
+# GCN:      S_LOAD_DWORD_IMM
+# GCN-NEXT: S_LOAD_DWORD_IMM
+# GCN-NEXT: $sgpr_null = S_MOV_B32 0
+# GCN-NEXT: V_CMP_EQ_F32
+---
+name: hazard_smem_war_only_smem
+body: |
+  bb.0:
+    liveins: $sgpr0, $sgpr1, $sgpr6, $sgpr7, $vgpr0, $vgpr1
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
+    $sgpr5 = S_LOAD_DWORD_IMM $sgpr6_sgpr7, 0, 0, 0
+    $sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec
+    S_ENDPGM 0
+...
+
+# GCN-LABEL: name: hazard_smem_war_only_waitcnt_0
+# GCN:      S_LOAD_DWORD_IMM
+# GCN-NEXT: S_WAITCNT
+# GCN-NEXT: V_CMP_EQ_F32
+---
+name: hazard_smem_war_only_waitcnt_0
+body: |
+  bb.0:
+    liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
+    S_WAITCNT 0
+    $sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec
+    S_ENDPGM 0
+...
+
+# GCN-LABEL: name: hazard_smem_war_only_vmcnt_0
+# GCN:      S_LOAD_DWORD_IMM
+# GCN-NEXT: S_WAITCNT 3952{{$}}
+# GCN-NEXT: $sgpr_null = S_MOV_B32 0
+# GCN-NEXT: V_CMP_EQ_F32
+---
+name: hazard_smem_war_only_vmcnt_0
+body: |
+  bb.0:
+    liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
     $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
     S_WAITCNT 3952
-    $sgpr3 = S_ADD_U32 $sgpr2, $sgpr4, implicit-def $scc
+    $sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec
+    S_ENDPGM 0
+...
+
+# GCN-LABEL: name: hazard_smem_war_only_expcnt_0
+# GCN:      S_LOAD_DWORD_IMM
+# GCN-NEXT: S_WAITCNT 53007{{$}}
+# GCN-NEXT: $sgpr_null = S_MOV_B32 0
+# GCN-NEXT: V_CMP_EQ_F32
+---
+name: hazard_smem_war_only_expcnt_0
+body: |
+  bb.0:
+    liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
+    S_WAITCNT 53007
+    $sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec
+    S_ENDPGM 0
+...
+
+# GCN-LABEL: name: hazard_smem_war_only_lgkmcnt_0
+# GCN:      S_LOAD_DWORD_IMM
+# GCN-NEXT: S_WAITCNT 49279{{$}}
+# GCN-NEXT: V_CMP_EQ_F32
+---
+name: hazard_smem_war_only_lgkmcnt_0
+body: |
+  bb.0:
+    liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
+    S_WAITCNT 49279
+    $sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec
+    S_ENDPGM 0
+...
+
+# GCN-LABEL: name: hazard_smem_war_only_waitcnt_lgkmcnt_0
+# GCN:      S_LOAD_DWORD_IMM
+# GCN-NEXT: S_WAITCNT_LGKMCNT
+# GCN-NEXT: V_CMP_EQ_F32
+---
+name: hazard_smem_war_only_waitcnt_lgkmcnt_0
+body: |
+  bb.0:
+    liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
+    S_WAITCNT_LGKMCNT $sgpr_null, 0
+    $sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec
+    S_ENDPGM 0
+...
+
+# GCN-LABEL: name: hazard_smem_war_only_waitcnt_lgkmcnt_1
+# GCN:      S_LOAD_DWORD_IMM
+# GCN-NEXT: S_WAITCNT_LGKMCNT
+# GCN-NEXT: $sgpr_null = S_MOV_B32 0
+# GCN-NEXT: V_CMP_EQ_F32
+---
+name: hazard_smem_war_only_waitcnt_lgkmcnt_1
+body: |
+  bb.0:
+    liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
+    S_WAITCNT_LGKMCNT $sgpr_null, 1
     $sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec
     S_ENDPGM 0
 ...




More information about the llvm-commits mailing list