[llvm] 8b127a8 - [AMDGPU] Fix inserting combined s_nop in bundles

Austin Kerbow via llvm-commits llvm-commits at lists.llvm.org
Wed Oct 28 14:34:51 PDT 2020


Author: Austin Kerbow
Date: 2020-10-28T14:34:04-07:00
New Revision: 8b127a8661a160a12b09d32094268c4988c5424c

URL: https://github.com/llvm/llvm-project/commit/8b127a8661a160a12b09d32094268c4988c5424c
DIFF: https://github.com/llvm/llvm-project/commit/8b127a8661a160a12b09d32094268c4988c5424c.diff

LOG: [AMDGPU] Fix inserting combined s_nop in bundles

Reviewed By: rampitec

Differential Revision: https://reviews.llvm.org/D90334

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
    llvm/test/CodeGen/AMDGPU/hazard-in-bundle.mir
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.p.ll
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.release.all.ll
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.v.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 5a146fcd4faa..8153056b783f 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -207,9 +207,18 @@ GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
   return NoHazard;
 }
 
-static void insertNoopInBundle(MachineInstr *MI, const SIInstrInfo &TII) {
-  BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII.get(AMDGPU::S_NOP))
-      .addImm(0);
+static void insertNoopsInBundle(MachineInstr *MI, const SIInstrInfo &TII,
+                                unsigned Quantity) {
+  while (Quantity > 0) {
+    unsigned Arg;
+    if (Quantity >= 8)
+      Arg = 7;
+    else
+      Arg = Quantity - 1;
+    Quantity -= Arg + 1;
+    BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII.get(AMDGPU::S_NOP))
+        .addImm(Arg);
+  }
 }
 
 void GCNHazardRecognizer::processBundle() {
@@ -220,11 +229,11 @@ void GCNHazardRecognizer::processBundle() {
     CurrCycleInstr = &*MI;
     unsigned WaitStates = PreEmitNoopsCommon(CurrCycleInstr);
 
-    if (IsHazardRecognizerMode)
+    if (IsHazardRecognizerMode) {
       fixHazards(CurrCycleInstr);
 
-    for (unsigned i = 0; i < WaitStates; ++i)
-      insertNoopInBundle(CurrCycleInstr, TII);
+      insertNoopsInBundle(CurrCycleInstr, TII, WaitStates);
+    }
 
     // It’s unnecessary to track more than MaxLookAhead instructions. Since we
     // include the bundled MI directly after, only add a maximum of

diff  --git a/llvm/test/CodeGen/AMDGPU/hazard-in-bundle.mir b/llvm/test/CodeGen/AMDGPU/hazard-in-bundle.mir
index 8229ec7fae9c..d0879cd82ab7 100644
--- a/llvm/test/CodeGen/AMDGPU/hazard-in-bundle.mir
+++ b/llvm/test/CodeGen/AMDGPU/hazard-in-bundle.mir
@@ -64,3 +64,21 @@ body: |
     }
     S_ENDPGM 0
 ...
+
+# GCN-LABEL: name: vmem_vcc_hazard_in_bundle
+# GCN:      S_LOAD_DWORDX2_IMM
+# GCN-NEXT: S_NOP 3
+# GCN:      BUFFER_LOAD_DWORD_OFFEN
+---
+name: vmem_vcc_hazard_in_bundle
+body: |
+  bb.0:
+    BUNDLE {
+      $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
+      $vgpr0 = IMPLICIT_DEF
+      $vgpr1 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec
+      $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0
+      $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, 0, implicit $exec
+    }
+    S_ENDPGM 0
+...

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.p.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.p.ll
index dd1a801dda6c..1d73a7d7396b 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.p.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.p.ll
@@ -11,7 +11,6 @@
 ; LOOP: s_mov_b32 m0, 0{{$}}
 ; LOOP: [[LOOP:BB[0-9]+_[0-9]+]]:
 ; LOOP-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_TRAPSTS, 8, 1), 0
-; GFX8-NEXT: s_nop 0
 ; LOOP-NEXT: ds_gws_sema_p gds
 ; LOOP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; LOOP-NEXT: s_getreg_b32 [[GETREG:s[0-9]+]], hwreg(HW_REG_TRAPSTS, 8, 1)

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.release.all.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.release.all.ll
index 9a53b097a6f5..a535bfc990a1 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.release.all.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.release.all.ll
@@ -16,7 +16,6 @@
 ; LOOP: s_mov_b32 m0, 0{{$}}
 ; LOOP: [[LOOP:BB[0-9]+_[0-9]+]]:
 ; LOOP-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_TRAPSTS, 8, 1), 0
-; GFX8-NEXT: s_nop 0
 ; LOOP-NEXT: ds_gws_sema_release_all gds
 ; LOOP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; LOOP-NEXT: s_getreg_b32 [[GETREG:s[0-9]+]], hwreg(HW_REG_TRAPSTS, 8, 1)

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.v.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.v.ll
index 652646128e1b..9100f9c309e1 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.v.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.v.ll
@@ -11,7 +11,6 @@
 ; LOOP: s_mov_b32 m0, 0{{$}}
 ; LOOP: [[LOOP:BB[0-9]+_[0-9]+]]:
 ; LOOP-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_TRAPSTS, 8, 1), 0
-; GFX8-NEXT: s_nop 0
 ; LOOP-NEXT: ds_gws_sema_v gds
 ; LOOP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; LOOP-NEXT: s_getreg_b32 [[GETREG:s[0-9]+]], hwreg(HW_REG_TRAPSTS, 8, 1)


        


More information about the llvm-commits mailing list