[llvm] 8b127a8 - [AMDGPU] Fix inserting combined s_nop in bundles
Austin Kerbow via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 28 14:34:51 PDT 2020
Author: Austin Kerbow
Date: 2020-10-28T14:34:04-07:00
New Revision: 8b127a8661a160a12b09d32094268c4988c5424c
URL: https://github.com/llvm/llvm-project/commit/8b127a8661a160a12b09d32094268c4988c5424c
DIFF: https://github.com/llvm/llvm-project/commit/8b127a8661a160a12b09d32094268c4988c5424c.diff
LOG: [AMDGPU] Fix inserting combined s_nop in bundles
Reviewed By: rampitec
Differential Revision: https://reviews.llvm.org/D90334
Added:
Modified:
llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
llvm/test/CodeGen/AMDGPU/hazard-in-bundle.mir
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.p.ll
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.release.all.ll
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.v.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 5a146fcd4faa..8153056b783f 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -207,9 +207,18 @@ GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
return NoHazard;
}
-static void insertNoopInBundle(MachineInstr *MI, const SIInstrInfo &TII) {
- BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII.get(AMDGPU::S_NOP))
- .addImm(0);
+static void insertNoopsInBundle(MachineInstr *MI, const SIInstrInfo &TII,
+ unsigned Quantity) {
+ while (Quantity > 0) {
+ unsigned Arg;
+ if (Quantity >= 8)
+ Arg = 7;
+ else
+ Arg = Quantity - 1;
+ Quantity -= Arg + 1;
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII.get(AMDGPU::S_NOP))
+ .addImm(Arg);
+ }
}
void GCNHazardRecognizer::processBundle() {
@@ -220,11 +229,11 @@ void GCNHazardRecognizer::processBundle() {
CurrCycleInstr = &*MI;
unsigned WaitStates = PreEmitNoopsCommon(CurrCycleInstr);
- if (IsHazardRecognizerMode)
+ if (IsHazardRecognizerMode) {
fixHazards(CurrCycleInstr);
- for (unsigned i = 0; i < WaitStates; ++i)
- insertNoopInBundle(CurrCycleInstr, TII);
+ insertNoopsInBundle(CurrCycleInstr, TII, WaitStates);
+ }
// It’s unnecessary to track more than MaxLookAhead instructions. Since we
// include the bundled MI directly after, only add a maximum of
diff --git a/llvm/test/CodeGen/AMDGPU/hazard-in-bundle.mir b/llvm/test/CodeGen/AMDGPU/hazard-in-bundle.mir
index 8229ec7fae9c..d0879cd82ab7 100644
--- a/llvm/test/CodeGen/AMDGPU/hazard-in-bundle.mir
+++ b/llvm/test/CodeGen/AMDGPU/hazard-in-bundle.mir
@@ -64,3 +64,21 @@ body: |
}
S_ENDPGM 0
...
+
+# GCN-LABEL: name: vmem_vcc_hazard_in_bundle
+# GCN: S_LOAD_DWORDX2_IMM
+# GCN-NEXT: S_NOP 3
+# GCN: BUFFER_LOAD_DWORD_OFFEN
+---
+name: vmem_vcc_hazard_in_bundle
+body: |
+ bb.0:
+ BUNDLE {
+ $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
+ $vgpr0 = IMPLICIT_DEF
+ $vgpr1 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, 0, implicit $exec
+ }
+ S_ENDPGM 0
+...
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.p.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.p.ll
index dd1a801dda6c..1d73a7d7396b 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.p.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.p.ll
@@ -11,7 +11,6 @@
; LOOP: s_mov_b32 m0, 0{{$}}
; LOOP: [[LOOP:BB[0-9]+_[0-9]+]]:
; LOOP-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_TRAPSTS, 8, 1), 0
-; GFX8-NEXT: s_nop 0
; LOOP-NEXT: ds_gws_sema_p gds
; LOOP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; LOOP-NEXT: s_getreg_b32 [[GETREG:s[0-9]+]], hwreg(HW_REG_TRAPSTS, 8, 1)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.release.all.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.release.all.ll
index 9a53b097a6f5..a535bfc990a1 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.release.all.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.release.all.ll
@@ -16,7 +16,6 @@
; LOOP: s_mov_b32 m0, 0{{$}}
; LOOP: [[LOOP:BB[0-9]+_[0-9]+]]:
; LOOP-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_TRAPSTS, 8, 1), 0
-; GFX8-NEXT: s_nop 0
; LOOP-NEXT: ds_gws_sema_release_all gds
; LOOP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; LOOP-NEXT: s_getreg_b32 [[GETREG:s[0-9]+]], hwreg(HW_REG_TRAPSTS, 8, 1)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.v.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.v.ll
index 652646128e1b..9100f9c309e1 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.v.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.v.ll
@@ -11,7 +11,6 @@
; LOOP: s_mov_b32 m0, 0{{$}}
; LOOP: [[LOOP:BB[0-9]+_[0-9]+]]:
; LOOP-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_TRAPSTS, 8, 1), 0
-; GFX8-NEXT: s_nop 0
; LOOP-NEXT: ds_gws_sema_v gds
; LOOP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; LOOP-NEXT: s_getreg_b32 [[GETREG:s[0-9]+]], hwreg(HW_REG_TRAPSTS, 8, 1)
More information about the llvm-commits
mailing list