[llvm] 37d9078 - [HazardRec] Allow inserting multiple wait-states simultaneously
Austin Kerbow via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 20 17:06:11 PDT 2020
Author: Austin Kerbow
Date: 2020-10-20T17:03:47-07:00
New Revision: 37d907899f498c6944fc234ed2742b9923b88952
URL: https://github.com/llvm/llvm-project/commit/37d907899f498c6944fc234ed2742b9923b88952
DIFF: https://github.com/llvm/llvm-project/commit/37d907899f498c6944fc234ed2742b9923b88952.diff
LOG: [HazardRec] Allow inserting multiple wait-states simultaneously
If a target can encode multiple wait-states into a noop allow emitting such
instructions directly.
Reviewed By: rampitec, dmgreen
Differential Revision: https://reviews.llvm.org/D89753
Added:
Modified:
llvm/include/llvm/CodeGen/ScheduleHazardRecognizer.h
llvm/include/llvm/CodeGen/TargetInstrInfo.h
llvm/lib/CodeGen/PostRAHazardRecognizer.cpp
llvm/lib/CodeGen/TargetInstrInfo.cpp
llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
llvm/lib/Target/AMDGPU/SIInstrInfo.h
llvm/test/CodeGen/AMDGPU/fp-atomic-to-s_denormmode.mir
llvm/test/CodeGen/AMDGPU/frem.ll
llvm/test/CodeGen/AMDGPU/hazard-hidden-bundle.mir
llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll
llvm/test/CodeGen/AMDGPU/mai-hazards.mir
llvm/test/CodeGen/AMDGPU/vmem-vcc-hazard.mir
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/ScheduleHazardRecognizer.h b/llvm/include/llvm/CodeGen/ScheduleHazardRecognizer.h
index 37590f496ca2..53c5fc0edee5 100644
--- a/llvm/include/llvm/CodeGen/ScheduleHazardRecognizer.h
+++ b/llvm/include/llvm/CodeGen/ScheduleHazardRecognizer.h
@@ -114,6 +114,14 @@ class ScheduleHazardRecognizer {
// Default implementation: count it as a cycle.
AdvanceCycle();
}
+
+ /// EmitNoops - This callback is invoked when noops were added to the
+ /// instruction stream.
+ virtual void EmitNoops(unsigned Quantity) {
+ // Default implementation: count it as a cycle.
+ for (unsigned i = 0; i < Quantity; ++i)
+ EmitNoop();
+ }
};
} // end namespace llvm
diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index f00741530b8f..96cca0257782 100644
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -1343,6 +1343,11 @@ class TargetInstrInfo : public MCInstrInfo {
virtual void insertNoop(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const;
+ /// Insert noops into the instruction stream at the specified point.
+ virtual void insertNoops(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned Quantity) const;
+
/// Return the noop instruction to use for a noop.
virtual void getNoop(MCInst &NopInst) const;
diff --git a/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp b/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp
index 4f88f4d3dd6a..82ed386db827 100644
--- a/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp
+++ b/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp
@@ -82,11 +82,9 @@ bool PostRAHazardRecognizer::runOnMachineFunction(MachineFunction &Fn) {
for (MachineInstr &MI : MBB) {
// If we need to emit noops prior to this instruction, then do so.
unsigned NumPreNoops = HazardRec->PreEmitNoops(&MI);
- for (unsigned i = 0; i != NumPreNoops; ++i) {
- HazardRec->EmitNoop();
- TII->insertNoop(MBB, MachineBasicBlock::iterator(MI));
- ++NumNoops;
- }
+ HazardRec->EmitNoops(NumPreNoops);
+ TII->insertNoops(MBB, MachineBasicBlock::iterator(MI), NumPreNoops);
+ NumNoops += NumPreNoops;
HazardRec->EmitInstruction(&MI);
if (HazardRec->atIssueLimit()) {
diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp
index fe9feb5f116b..7e8fe93eb8e0 100644
--- a/llvm/lib/CodeGen/TargetInstrInfo.cpp
+++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp
@@ -69,6 +69,15 @@ void TargetInstrInfo::insertNoop(MachineBasicBlock &MBB,
llvm_unreachable("Target didn't implement insertNoop!");
}
+/// insertNoops - Insert noops into the instruction stream at the specified
+/// point.
+void TargetInstrInfo::insertNoops(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned Quantity) const {
+ for (unsigned i = 0; i < Quantity; ++i)
+ insertNoop(MBB, MI);
+}
+
static bool isAsmComment(const char *Str, const MCAsmInfo &MAI) {
return strncmp(Str, MAI.getCommentString().data(),
MAI.getCommentString().size()) == 0;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 0c921bccc775..626b23581337 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -1047,9 +1047,6 @@ void GCNPassConfig::addPreEmitPass() {
//
// Here we add a stand-alone hazard recognizer pass which can handle all
// cases.
- //
- // FIXME: This stand-alone pass will emit indiv. S_NOP 0, as needed. It would
- // be better for it to emit S_NOP <N> when possible.
addPass(&PostRAHazardRecognizerID);
addPass(&BranchRelaxationPassID);
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 77ed364dedcb..dbd3d3517295 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -1533,27 +1533,26 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
.addMemOperand(MMO);
}
-void SIInstrInfo::insertWaitStates(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- int Count) const {
+void SIInstrInfo::insertNoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const {
+ insertNoops(MBB, MI, 1);
+}
+
+void SIInstrInfo::insertNoops(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned Quantity) const {
DebugLoc DL = MBB.findDebugLoc(MI);
- while (Count > 0) {
- int Arg;
- if (Count >= 8)
+ while (Quantity > 0) {
+ unsigned Arg;
+ if (Quantity >= 8)
Arg = 7;
else
- Arg = Count - 1;
- Count -= 8;
- BuildMI(MBB, MI, DL, get(AMDGPU::S_NOP))
- .addImm(Arg);
+ Arg = Quantity - 1;
+ Quantity -= Arg + 1;
+ BuildMI(MBB, MI, DL, get(AMDGPU::S_NOP)).addImm(Arg);
}
}
-void SIInstrInfo::insertNoop(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI) const {
- insertWaitStates(MBB, MI, 1);
-}
-
void SIInstrInfo::insertReturn(MachineBasicBlock &MBB) const {
auto MF = MBB.getParent();
SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>();
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index 08bf3d27c74d..c77d3fb4342b 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -898,12 +898,12 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
/// VALU if necessary. If present, \p MDT is updated.
void moveToVALU(MachineInstr &MI, MachineDominatorTree *MDT = nullptr) const;
- void insertWaitStates(MachineBasicBlock &MBB,MachineBasicBlock::iterator MI,
- int Count) const;
-
void insertNoop(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const override;
+ void insertNoops(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ unsigned Quantity) const override;
+
void insertReturn(MachineBasicBlock &MBB) const;
/// Return the number of wait states that result from executing this
/// instruction.
diff --git a/llvm/test/CodeGen/AMDGPU/fp-atomic-to-s_denormmode.mir b/llvm/test/CodeGen/AMDGPU/fp-atomic-to-s_denormmode.mir
index 08ac96ae719b..a96490f34168 100644
--- a/llvm/test/CodeGen/AMDGPU/fp-atomic-to-s_denormmode.mir
+++ b/llvm/test/CodeGen/AMDGPU/fp-atomic-to-s_denormmode.mir
@@ -2,9 +2,7 @@
# GCN-LABEL: name: flat_atomic_fcmpswap_to_s_denorm_mode
# GCN: FLAT_ATOMIC_FCMPSWAP
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 2
# GCN-NEXT: S_DENORM_MODE
---
name: flat_atomic_fcmpswap_to_s_denorm_mode
@@ -16,9 +14,7 @@ body: |
# GCN-LABEL: name: flat_atomic_fcmpswap_x2_to_s_denorm_mode
# GCN: FLAT_ATOMIC_FCMPSWAP_X2
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 2
# GCN-NEXT: S_DENORM_MODE
---
name: flat_atomic_fcmpswap_x2_to_s_denorm_mode
@@ -30,9 +26,7 @@ body: |
# GCN-LABEL: name: flat_atomic_fmax_to_s_denorm_mode
# GCN: FLAT_ATOMIC_FMAX
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 2
# GCN-NEXT: S_DENORM_MODE
---
name: flat_atomic_fmax_to_s_denorm_mode
@@ -44,9 +38,7 @@ body: |
# GCN-LABEL: name: flat_atomic_fmax_x2_to_s_denorm_mode
# GCN: FLAT_ATOMIC_FMAX_X2
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 2
# GCN-NEXT: S_DENORM_MODE
---
name: flat_atomic_fmax_x2_to_s_denorm_mode
@@ -58,9 +50,7 @@ body: |
# GCN-LABEL: name: flat_atomic_fmin_to_s_denorm_mode
# GCN: FLAT_ATOMIC_FMIN
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 2
# GCN-NEXT: S_DENORM_MODE
---
name: flat_atomic_fmin_to_s_denorm_mode
@@ -72,9 +62,7 @@ body: |
# GCN-LABEL: name: flat_atomic_fmin_x2_to_s_denorm_mode
# GCN: FLAT_ATOMIC_FMIN_X2
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 2
# GCN-NEXT: S_DENORM_MODE
---
name: flat_atomic_fmin_x2_to_s_denorm_mode
@@ -86,9 +74,7 @@ body: |
# GCN-LABEL: name: flat_atomic_fcmpswap_x2_rtn_to_s_denorm_mode
# GCN: FLAT_ATOMIC_FCMPSWAP_X2_RTN
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 2
# GCN-NEXT: S_DENORM_MODE
---
name: flat_atomic_fcmpswap_x2_rtn_to_s_denorm_mode
@@ -100,9 +86,7 @@ body: |
# GCN-LABEL: name: flat_atomic_fmax_rtn_to_s_denorm_mode
# GCN: FLAT_ATOMIC_FMAX_RTN
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 2
# GCN-NEXT: S_DENORM_MODE
---
name: flat_atomic_fmax_rtn_to_s_denorm_mode
@@ -114,9 +98,7 @@ body: |
# GCN-LABEL: name: flat_atomic_fmax_x2_rtn_to_s_denorm_mode
# GCN: FLAT_ATOMIC_FMAX_X2_RTN
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 2
# GCN-NEXT: S_DENORM_MODE
---
name: flat_atomic_fmax_x2_rtn_to_s_denorm_mode
@@ -128,9 +110,7 @@ body: |
# GCN-LABEL: name: flat_atomic_fmin_rtn_to_s_denorm_mode
# GCN: FLAT_ATOMIC_FMIN_RTN
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 2
# GCN-NEXT: S_DENORM_MODE
---
name: flat_atomic_fmin_rtn_to_s_denorm_mode
@@ -142,9 +122,7 @@ body: |
# GCN-LABEL: name: flat_atomic_fmin_x2_rtn_to_s_denorm_mode
# GCN: FLAT_ATOMIC_FMIN_X2_RTN
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 2
# GCN-NEXT: S_DENORM_MODE
---
name: flat_atomic_fmin_x2_rtn_to_s_denorm_mode
@@ -156,9 +134,7 @@ body: |
# GCN-LABEL: name: flat_atomic_fcmpswap_rtn_to_s_denorm_mode
# GCN: FLAT_ATOMIC_FCMPSWAP_RTN
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 2
# GCN-NEXT: S_DENORM_MODE
---
name: flat_atomic_fcmpswap_rtn_to_s_denorm_mode
@@ -170,9 +146,7 @@ body: |
# GCN-LABEL: name: global_atomic_fcmpswap_to_s_denorm_mode
# GCN: GLOBAL_ATOMIC_FCMPSWAP
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 2
# GCN-NEXT: S_DENORM_MODE
---
name: global_atomic_fcmpswap_to_s_denorm_mode
@@ -184,9 +158,7 @@ body: |
# GCN-LABEL: name: global_atomic_fcmpswap_x2_to_s_denorm_mode
# GCN: GLOBAL_ATOMIC_FCMPSWAP_X2
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 2
# GCN-NEXT: S_DENORM_MODE
---
name: global_atomic_fcmpswap_x2_to_s_denorm_mode
@@ -198,9 +170,7 @@ body: |
# GCN-LABEL: name: global_atomic_fmax_to_s_denorm_mode
# GCN: GLOBAL_ATOMIC_FMAX
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 2
# GCN-NEXT: S_DENORM_MODE
---
name: global_atomic_fmax_to_s_denorm_mode
@@ -212,9 +182,7 @@ body: |
# GCN-LABEL: name: global_atomic_fmax_x2_to_s_denorm_mode
# GCN: GLOBAL_ATOMIC_FMAX_X2
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 2
# GCN-NEXT: S_DENORM_MODE
---
name: global_atomic_fmax_x2_to_s_denorm_mode
@@ -226,9 +194,7 @@ body: |
# GCN-LABEL: name: global_atomic_fmin_to_s_denorm_mode
# GCN: GLOBAL_ATOMIC_FMIN
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 2
# GCN-NEXT: S_DENORM_MODE
---
name: global_atomic_fmin_to_s_denorm_mode
@@ -240,9 +206,7 @@ body: |
# GCN-LABEL: name: global_atomic_fmin_x2_to_s_denorm_mode
# GCN: GLOBAL_ATOMIC_FMIN_X2
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 2
# GCN-NEXT: S_DENORM_MODE
---
name: global_atomic_fmin_x2_to_s_denorm_mode
@@ -254,9 +218,7 @@ body: |
# GCN-LABEL: name: global_atomic_fcmpswap_rtn_to_s_denorm_mode
# GCN: GLOBAL_ATOMIC_FCMPSWAP_RTN
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 2
# GCN-NEXT: S_DENORM_MODE
---
name: global_atomic_fcmpswap_rtn_to_s_denorm_mode
@@ -268,9 +230,7 @@ body: |
# GCN-LABEL: name: global_atomic_fcmpswap_x2_rtn_to_s_denorm_mode
# GCN: GLOBAL_ATOMIC_FCMPSWAP_X2_RTN
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 2
# GCN-NEXT: S_DENORM_MODE
---
name: global_atomic_fcmpswap_x2_rtn_to_s_denorm_mode
@@ -282,9 +242,7 @@ body: |
# GCN-LABEL: name: global_atomic_fmax_rtn_to_s_denorm_mode
# GCN: GLOBAL_ATOMIC_FMAX_RTN
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 2
# GCN-NEXT: S_DENORM_MODE
---
name: global_atomic_fmax_rtn_to_s_denorm_mode
@@ -296,9 +254,7 @@ body: |
# GCN-LABEL: name: global_atomic_fmax_x2_rtn_to_s_denorm_mode
# GCN: GLOBAL_ATOMIC_FMAX_X2_RTN
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 2
# GCN-NEXT: S_DENORM_MODE
---
name: global_atomic_fmax_x2_rtn_to_s_denorm_mode
@@ -310,9 +266,7 @@ body: |
# GCN-LABEL: name: global_atomic_fmin_rtn_to_s_denorm_mode
# GCN: GLOBAL_ATOMIC_FMIN_RTN
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 2
# GCN-NEXT: S_DENORM_MODE
---
name: global_atomic_fmin_rtn_to_s_denorm_mode
@@ -324,9 +278,7 @@ body: |
# GCN-LABEL: name: global_atomic_fmin_x2_rtn_to_s_denorm_mode
# GCN: GLOBAL_ATOMIC_FMIN_X2_RTN
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 2
# GCN-NEXT: S_DENORM_MODE
---
name: global_atomic_fmin_x2_rtn_to_s_denorm_mode
@@ -338,9 +290,7 @@ body: |
# GCN-LABEL: name: global_atomic_fcmpswap_saddr_to_s_denorm_mode
# GCN: GLOBAL_ATOMIC_FCMPSWAP_SADDR
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 2
# GCN-NEXT: S_DENORM_MODE
---
name: global_atomic_fcmpswap_saddr_to_s_denorm_mode
@@ -352,9 +302,7 @@ body: |
# GCN-LABEL: name: global_atomic_fcmpswap_x2_saddr_rtn_to_s_denorm_mode
# GCN: GLOBAL_ATOMIC_FCMPSWAP_X2_SADDR_RTN
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 2
# GCN-NEXT: S_DENORM_MODE
---
name: global_atomic_fcmpswap_x2_saddr_rtn_to_s_denorm_mode
@@ -366,9 +314,7 @@ body: |
# GCN-LABEL: name: global_atomic_fmax_saddr_rtn_to_s_denorm_mode
# GCN: GLOBAL_ATOMIC_FMAX_SADDR_RTN
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 2
# GCN-NEXT: S_DENORM_MODE
---
name: global_atomic_fmax_saddr_rtn_to_s_denorm_mode
@@ -380,9 +326,7 @@ body: |
# GCN-LABEL: name: global_atomic_fmax_x2_saddr_rtn_to_s_denorm_mode
# GCN: GLOBAL_ATOMIC_FMAX_X2_SADDR_RTN
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 2
# GCN-NEXT: S_DENORM_MODE
---
name: global_atomic_fmax_x2_saddr_rtn_to_s_denorm_mode
@@ -394,9 +338,7 @@ body: |
# GCN-LABEL: name: global_atomic_fmin_saddr_rtn_to_s_denorm_mode
# GCN: GLOBAL_ATOMIC_FMIN_SADDR_RTN
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 2
# GCN-NEXT: S_DENORM_MODE
---
name: global_atomic_fmin_saddr_rtn_to_s_denorm_mode
@@ -408,9 +350,7 @@ body: |
# GCN-LABEL: name: global_atomic_fmin_x2_saddr_rtn_to_s_denorm_mode
# GCN: GLOBAL_ATOMIC_FMIN_X2_SADDR_RTN
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 2
# GCN-NEXT: S_DENORM_MODE
---
name: global_atomic_fmin_x2_saddr_rtn_to_s_denorm_mode
diff --git a/llvm/test/CodeGen/AMDGPU/frem.ll b/llvm/test/CodeGen/AMDGPU/frem.ll
index 9287fae037b3..c50ffcfba3e6 100644
--- a/llvm/test/CodeGen/AMDGPU/frem.ll
+++ b/llvm/test/CodeGen/AMDGPU/frem.ll
@@ -612,8 +612,7 @@ define amdgpu_kernel void @frem_f64(double addrspace(1)* %out, double addrspace(
; SI-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
; SI-NEXT: v_cmp_eq_u32_e64 s[0:1], v1, v9
; SI-NEXT: s_xor_b64 vcc, s[0:1], vcc
-; SI-NEXT: s_nop 0
-; SI-NEXT: s_nop 0
+; SI-NEXT: s_nop 1
; SI-NEXT: v_div_fmas_f64 v[4:5], v[12:13], v[6:7], v[10:11]
; SI-NEXT: v_div_fixup_f64 v[4:5], v[4:5], v[2:3], v[0:1]
; SI-NEXT: v_bfe_u32 v6, v5, 20, 11
@@ -740,8 +739,7 @@ define amdgpu_kernel void @fast_frem_f64(double addrspace(1)* %out, double addrs
; SI-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
; SI-NEXT: v_cmp_eq_u32_e64 s[0:1], v1, v9
; SI-NEXT: s_xor_b64 vcc, s[0:1], vcc
-; SI-NEXT: s_nop 0
-; SI-NEXT: s_nop 0
+; SI-NEXT: s_nop 1
; SI-NEXT: v_div_fmas_f64 v[4:5], v[12:13], v[6:7], v[10:11]
; SI-NEXT: v_div_fixup_f64 v[4:5], v[4:5], v[2:3], v[0:1]
; SI-NEXT: v_bfe_u32 v6, v5, 20, 11
@@ -1842,8 +1840,7 @@ define amdgpu_kernel void @frem_v2f64(<2 x double> addrspace(1)* %out, <2 x doub
; SI-NEXT: v_cmp_eq_u32_e32 vcc, v7, v9
; SI-NEXT: v_cmp_eq_u32_e64 s[0:1], v3, v13
; SI-NEXT: s_xor_b64 vcc, s[0:1], vcc
-; SI-NEXT: s_nop 0
-; SI-NEXT: s_nop 0
+; SI-NEXT: s_nop 1
; SI-NEXT: v_div_fmas_f64 v[8:9], v[16:17], v[10:11], v[14:15]
; SI-NEXT: v_div_fixup_f64 v[8:9], v[8:9], v[6:7], v[2:3]
; SI-NEXT: v_bfe_u32 v10, v9, 20, 11
@@ -1876,8 +1873,7 @@ define amdgpu_kernel void @frem_v2f64(<2 x double> addrspace(1)* %out, <2 x doub
; SI-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7
; SI-NEXT: v_cmp_eq_u32_e64 s[0:1], v1, v11
; SI-NEXT: s_xor_b64 vcc, s[0:1], vcc
-; SI-NEXT: s_nop 0
-; SI-NEXT: s_nop 0
+; SI-NEXT: s_nop 1
; SI-NEXT: v_div_fmas_f64 v[6:7], v[14:15], v[8:9], v[12:13]
; SI-NEXT: v_div_fixup_f64 v[6:7], v[6:7], v[4:5], v[0:1]
; SI-NEXT: v_bfe_u32 v8, v7, 20, 11
diff --git a/llvm/test/CodeGen/AMDGPU/hazard-hidden-bundle.mir b/llvm/test/CodeGen/AMDGPU/hazard-hidden-bundle.mir
index d5d2512795b7..b02b6b0664bc 100644
--- a/llvm/test/CodeGen/AMDGPU/hazard-hidden-bundle.mir
+++ b/llvm/test/CodeGen/AMDGPU/hazard-hidden-bundle.mir
@@ -40,10 +40,7 @@ body: |
# GCN-LABEL: name: vmem_vcc_hazard_ignore_bundle_instr
# GCN: S_LOAD_DWORDX2_IMM
# GCN-NEXT: }
-# GCN-NEXT: S_NOP
-# GCN-NEXT: S_NOP
-# GCN-NEXT: S_NOP
-# GCN-NEXT: S_NOP
+# GCN-NEXT: S_NOP 3
# GCN: BUFFER_LOAD_DWORD_OFFEN
---
name: vmem_vcc_hazard_ignore_bundle_instr
@@ -63,11 +60,7 @@ body: |
# GCN-LABEL: name: vmem_vcc_min_of_two_after_bundle
# GCN: bb.2:
-# GCN-NEXT: S_NOP
-# GCN-NEXT: S_NOP
-# GCN-NEXT: S_NOP
-# GCN-NEXT: S_NOP
-# GCN-NEXT: S_NOP
+# GCN-NEXT: S_NOP 4
# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
---
name: vmem_vcc_min_of_two_after_bundle
diff --git a/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir b/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir
index 1922adf5ee6c..2a1442bb0a5f 100644
--- a/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir
+++ b/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir
@@ -1,5 +1,5 @@
-# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN
-# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN,CIVI
+# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN,SICI
+# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN,CIVI,SICI
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN,CIVI,VI
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN,CIVI,VI,GFX9
@@ -24,26 +24,17 @@
# GCN-LABEL: bb.1:
# GCN: V_CMP_EQ_I32
-# GCN: S_NOP
-# GCN: S_NOP
-# GCN: S_NOP
-# GCN: S_NOP
+# GCN: S_NOP 3
# GCN: V_DIV_FMAS_F32
# GCN-LABEL: bb.2:
# GCN: V_CMP_EQ_I32
-# GCN: S_NOP
-# GCN: S_NOP
-# GCN: S_NOP
-# GCN: S_NOP
+# GCN: S_NOP 3
# GCN: V_DIV_FMAS_F32
# GCN-LABEL: bb.3:
# GCN: V_DIV_SCALE_F32
-# GCN: S_NOP
-# GCN: S_NOP
-# GCN: S_NOP
-# GCN: S_NOP
+# GCN: S_NOP 3
# GCN: V_DIV_FMAS_F32
name: div_fmas
@@ -76,14 +67,12 @@ body: |
# GCN-LABEL: bb.0:
# GCN: S_SETREG
-# GCN: S_NOP 0
-# GCN: S_NOP 0
+# GCN: S_NOP 1
# GCN: S_GETREG
# GCN-LABEL: bb.1:
# GCN: S_SETREG_IMM32
-# GCN: S_NOP 0
-# GCN: S_NOP 0
+# GCN: S_NOP 1
# GCN: S_GETREG
# GCN-LABEL: bb.2:
@@ -126,15 +115,15 @@ body: |
# GCN-LABEL: bb.0:
# GCN: S_SETREG
-# GCN: S_NOP 0
-# VI: S_NOP 0
-# GCN-NEXT: S_SETREG
+# SICI: S_NOP 0
+# VI: S_NOP 1
+# GCN: S_SETREG
# GCN-LABEL: bb.1:
# GCN: S_SETREG
-# GCN: S_NOP 0
-# VI: S_NOP 0
-# GCN-NEXT: S_SETREG
+# SICI: S_NOP 0
+# VI: S_NOP 1
+# GCN: S_SETREG
# GCN-LABEL: bb.2:
# GCN: S_SETREG
@@ -239,34 +228,22 @@ body: |
# GCN-LABEL: bb.0:
# GCN: V_ADD_CO_U32
-# GCN: S_NOP
-# GCN: S_NOP
-# GCN: S_NOP
-# GCN: S_NOP
+# GCN: S_NOP 3
# GCN: V_READLANE_B32
# GCN-LABEL: bb.1:
# GCN: V_ADD_CO_U32
-# GCN: S_NOP
-# GCN: S_NOP
-# GCN: S_NOP
-# GCN: S_NOP
+# GCN: S_NOP 3
# GCN: V_WRITELANE_B32
# GCN-LABEL: bb.2:
# GCN: V_ADD_CO_U32
-# GCN: S_NOP
-# GCN: S_NOP
-# GCN: S_NOP
-# GCN: S_NOP
+# GCN: S_NOP 3
# GCN: V_READLANE_B32
# GCN-LABEL: bb.3:
# GCN: V_ADD_CO_U32
-# GCN: S_NOP
-# GCN: S_NOP
-# GCN: S_NOP
-# GCN: S_NOP
+# GCN: S_NOP 3
# GCN: V_WRITELANE_B32
name: readwrite_lane
@@ -429,17 +406,12 @@ body: |
# VI-LABEL: bb.0:
# VI: V_MOV_B32_e32
-# VI-NEXT: S_NOP 0
-# VI-NEXT: S_NOP 0
+# VI-NEXT: S_NOP 1
# VI-NEXT: V_MOV_B32_dpp
# VI-LABEL: bb.1:
# VI: V_CMPX_EQ_I32_e32
-# VI-NEXT: S_NOP 0
-# VI-NEXT: S_NOP 0
-# VI-NEXT: S_NOP 0
-# VI-NEXT: S_NOP 0
-# VI-NEXT: S_NOP 0
+# VI-NEXT: S_NOP 4
# VI-NEXT: V_MOV_B32_dpp
name: dpp
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll
index 31a54f1e0ffe..1420b513b034 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll
@@ -7,9 +7,7 @@
; VI-LABEL: {{^}}dpp_test:
; VI: v_mov_b32_e32 v0, s{{[0-9]+}}
; VI-NOOPT: v_mov_b32_e32 v1, s{{[0-9]+}}
-; PREGFX10-OPT: s_nop 1
-; PREGFX10-NOOPT: s_nop 0
-; PREGFX10-NOOPT: s_nop 0
+; PREGFX10: s_nop 1
; VI-OPT: v_mov_b32_dpp v0, v0 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x01,0x08,0x11]
; VI-NOOPT: v_mov_b32_dpp v0, v1 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x02,0x00,0x7e,0x01,0x01,0x08,0x11]
define amdgpu_kernel void @dpp_test(i32 addrspace(1)* %out, i32 %in) {
@@ -21,14 +19,10 @@ define amdgpu_kernel void @dpp_test(i32 addrspace(1)* %out, i32 %in) {
; VI-LABEL: {{^}}dpp_wait_states:
; VI-NOOPT: v_mov_b32_e32 [[VGPR1:v[0-9]+]], s{{[0-9]+}}
; VI: v_mov_b32_e32 [[VGPR0:v[0-9]+]], s{{[0-9]+}}
-; PREGFX10-OPT: s_nop 1
-; PREGFX10-NOOPT: s_nop 0
-; PREGFX10-NOOPT: s_nop 0
+; PREGFX10: s_nop 1
; VI-OPT: v_mov_b32_dpp [[VGPR0]], [[VGPR0]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0
; VI-NOOPT: v_mov_b32_dpp [[VGPR1]], [[VGPR0]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:
-; PREGFX10-OPT: s_nop 1
-; PREGFX10-NOOPT: s_nop 0
-; PREGFX10-NOOPT: s_nop 0
+; PREGFX10: s_nop 1
; VI-OPT: v_mov_b32_dpp v{{[0-9]+}}, [[VGPR0]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0
; VI-NOOPT: v_mov_b32_dpp v{{[0-9]+}}, [[VGPR1]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0
define amdgpu_kernel void @dpp_wait_states(i32 addrspace(1)* %out, i32 %in) {
@@ -44,13 +38,10 @@ define amdgpu_kernel void @dpp_wait_states(i32 addrspace(1)* %out, i32 %in) {
; PREGFX10-OPT: s_mov_b32
; PREGFX10-NOOPT: s_waitcnt
; PREGFX10-NOOPT: v_mov_b32_e32
-; PREGFX10-NOOPT-NEXT: s_nop 0
; VI: v_mov_b32_dpp [[VGPR0:v[0-9]+]], v{{[0-9]+}} quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0
-; PREGFX10-OPT: s_nop 1
+; PREGFX10: s_nop 1
; VI: v_mov_b32_dpp [[VGPR1:v[0-9]+]], [[VGPR0]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0
-; PREGFX10-OPT: s_nop 1
-; PREGFX10-NOOPT: s_nop 0
-; PREGFX10-NOOPT: s_nop 0
+; PREGFX10: s_nop 1
; VI: v_mov_b32_dpp v{{[0-9]+}}, [[VGPR1]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0
define amdgpu_kernel void @dpp_first_in_bb(float addrspace(1)* %out, float addrspace(1)* %in, float %cond, float %a, float %b) {
%cmp = fcmp oeq float %cond, 0.0
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll
index 52562cc73e62..1d16b9ca3e4b 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll
@@ -5,9 +5,7 @@
; GCN-LABEL: {{^}}dpp_test:
; GCN: v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}}
; GCN: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}}
-; GFX8-OPT: s_nop 1
-; GFX8-NOOPT: s_nop 0
-; GFX8-NOOPT-NEXT: s_nop 0
+; GFX8: s_nop 1
; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}}
define amdgpu_kernel void @dpp_test(i32 addrspace(1)* %out, i32 %in1, i32 %in2) {
%tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 1, i32 1, i32 1, i1 0) #0
@@ -18,9 +16,7 @@ define amdgpu_kernel void @dpp_test(i32 addrspace(1)* %out, i32 %in1, i32 %in2)
; GCN-LABEL: {{^}}dpp_test_bc:
; GCN: v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}}
; GCN: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}}
-; GFX8-OPT: s_nop 1
-; GFX8-NOOPT: s_nop 0
-; GFX8-NOOPT-NEXT: s_nop 0
+; GFX8: s_nop 1
; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[2,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0{{$}}
define amdgpu_kernel void @dpp_test_bc(i32 addrspace(1)* %out, i32 %in1, i32 %in2) {
%tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 2, i32 1, i32 1, i1 1) #0
@@ -34,8 +30,9 @@ define amdgpu_kernel void @dpp_test_bc(i32 addrspace(1)* %out, i32 %in1, i32 %in
; GFX8-OPT: v_add_u32_e32 [[REG:v[0-9]+]], vcc, v{{[0-9]+}}, v{{[0-9]+}}
; GFX8-NOOPT: v_add_u32_e64 [[REG:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{[0-9]+}}
; GFX8-NOOPT: v_mov_b32_e32 v{{[0-9]+}}, 0
-; GFX8: s_nop 0
-; GFX8-NEXT: s_nop 0
+; GFX8-NOOPT: s_nop 1
+; GFX8-OPT: s_nop 0
+; GFX8-OPT-NEXT: s_nop 0
; GFX8-NEXT: v_mov_b32_dpp {{v[0-9]+}}, [[REG]] quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf
@0 = internal unnamed_addr addrspace(3) global [448 x i32] undef, align 4
define weak_odr amdgpu_kernel void @dpp_test1(i32* %arg) local_unnamed_addr {
diff --git a/llvm/test/CodeGen/AMDGPU/mai-hazards.mir b/llvm/test/CodeGen/AMDGPU/mai-hazards.mir
index b0906f6018de..4a10af28984d 100644
--- a/llvm/test/CodeGen/AMDGPU/mai-hazards.mir
+++ b/llvm/test/CodeGen/AMDGPU/mai-hazards.mir
@@ -3,8 +3,7 @@
# GCN-LABEL: name: valu_write_vgpr_mfma_read
# GCN: V_MOV_B32
# GCN: V_MOV_B32
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 1
# GCN-NEXT: V_MFMA
name: valu_write_vgpr_mfma_read
body: |
@@ -17,8 +16,7 @@ body: |
# GCN-LABEL: name: valu_write_vgpr_accvgpr_write_read
# GCN: V_MOV_B32
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 1
# GCN-NEXT: V_ACCVGPR_WRITE_B32
name: valu_write_vgpr_accvgpr_write_read
body: |
@@ -41,8 +39,7 @@ body: |
# GCN-LABEL: name: mfma_write_agpr_mfma_read_overlap
# GCN: V_MFMA
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 1
# GCN-NEXT: V_MFMA
name: mfma_write_agpr_mfma_read_overlap
body: |
@@ -54,8 +51,7 @@ body: |
# GCN-LABEL: name: mfma_write_agpr_mfma_read_partial
# GCN: V_MFMA
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 1
# GCN-NEXT: V_MFMA
name: mfma_write_agpr_mfma_read_partial
body: |
@@ -67,10 +63,7 @@ body: |
# GCN-LABEL: name: mfma_write_agpr_mfma_srca_read_overlap
# GCN: V_MFMA
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 3
# GCN-NEXT: V_MFMA
name: mfma_write_agpr_mfma_srca_read_overlap
body: |
@@ -82,10 +75,7 @@ body: |
# GCN-LABEL: name: mfma_write_agpr_mfma_srcb_read_overlap
# GCN: V_MFMA
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 3
# GCN-NEXT: V_MFMA
name: mfma_write_agpr_mfma_srcb_read_overlap
body: |
@@ -97,10 +87,7 @@ body: |
# GCN-LABEL: name: mfma_4x4_write_agpr_accvgpr_read
# GCN: V_MFMA_F32_4X4X1F32
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 3
# GCN-NEXT: V_ACCVGPR_READ_B32
name: mfma_4x4_write_agpr_accvgpr_read
body: |
@@ -112,16 +99,8 @@ body: |
# GCN-LABEL: name: mfma_16x16_write_agpr_accvgpr_read
# GCN: V_MFMA_F32_16X16X1F32
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 7
+# GCN-NEXT: S_NOP 1
# GCN-NEXT: V_ACCVGPR_READ_B32
name: mfma_16x16_write_agpr_accvgpr_read
body: |
@@ -133,24 +112,9 @@ body: |
# GCN-LABEL: name: mfma_32x32_write_agpr_accvgpr_read
# GCN: V_MFMA_F32_32X32X2F32
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 7
+# GCN-NEXT: S_NOP 7
+# GCN-NEXT: S_NOP 1
# GCN-NEXT: V_ACCVGPR_READ_B32
name: mfma_32x32_write_agpr_accvgpr_read
body: |
@@ -174,13 +138,7 @@ body: |
# GCN-LABEL: name: mfma_16x16_write_agpr_accvgpr_write
# GCN: V_MFMA_F32_16X16X1F32
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 6
# GCN-NEXT: V_ACCVGPR_WRITE_B32
name: mfma_16x16_write_agpr_accvgpr_write
body: |
@@ -192,21 +150,8 @@ body: |
# GCN-LABEL: name: mfma_32x32_write_agpr_accvgpr_write
# GCN: V_MFMA_F32_32X32X2F32
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 7
+# GCN-NEXT: S_NOP 6
# GCN-NEXT: V_ACCVGPR_WRITE_B32
name: mfma_32x32_write_agpr_accvgpr_write
body: |
@@ -229,11 +174,7 @@ body: |
# GCN-LABEL: name: mfma_16x16_read_srcc_accvgpr_write
# GCN: V_MFMA_F32_16X16X1F32
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 4
# GCN-NEXT: V_ACCVGPR_WRITE_B32
name: mfma_16x16_read_srcc_accvgpr_write
body: |
@@ -245,19 +186,8 @@ body: |
# GCN-LABEL: name: mfma_32x32_read_srcc_accvgpr_write
# GCN: V_MFMA_F32_32X32X2F32
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 7
+# GCN-NEXT: S_NOP 4
# GCN-NEXT: V_ACCVGPR_WRITE_B32
name: mfma_32x32_read_srcc_accvgpr_write
body: |
@@ -280,8 +210,7 @@ body: |
# GCN-LABEL: name: accvgpr_read_write_vgpr_mfma_read
# GCN: V_ACCVGPR_READ_B32
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 1
# GCN-NEXT: V_MFMA
name: accvgpr_read_write_vgpr_mfma_read
body: |
@@ -293,8 +222,7 @@ body: |
# GCN-LABEL: name: accvgpr_read_write_vgpr_accvgpr_write_read
# GCN: V_ACCVGPR_READ_B32
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 1
# GCN-NEXT: V_ACCVGPR_WRITE_B32
name: accvgpr_read_write_vgpr_accvgpr_write_read
body: |
@@ -318,9 +246,7 @@ body: |
# GCN-LABEL: name: accvgpr_write_agpr_mfma_read_srca
# GCN: V_ACCVGPR_WRITE_B32
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 2
# GCN-NEXT: V_MFMA
name: accvgpr_write_agpr_mfma_read_srca
body: |
@@ -332,9 +258,7 @@ body: |
# GCN-LABEL: name: accvgpr_write_agpr_mfma_read_srcb
# GCN: V_ACCVGPR_WRITE_B32
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 2
# GCN-NEXT: V_MFMA
name: accvgpr_write_agpr_mfma_read_srcb
body: |
@@ -346,9 +270,7 @@ body: |
# GCN-LABEL: name: accvgpr_write_agpr_accvgpr_read
# GCN: V_ACCVGPR_WRITE_B32
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 2
# GCN-NEXT: V_ACCVGPR_READ_B32
name: accvgpr_write_agpr_accvgpr_read
body: |
@@ -360,10 +282,7 @@ body: |
# GCN-LABEL: name: vcmpx_write_exec_mfma
# GCN: V_CMPX_EQ_I32_e32
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 3
# GCN-NEXT: V_MFMA
name: vcmpx_write_exec_mfma
body: |
@@ -375,10 +294,7 @@ body: |
# GCN-LABEL: name: vcmpx_write_exec_accvgpr_write
# GCN: V_CMPX_EQ_I32_e32
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 3
# GCN-NEXT: V_ACCVGPR_WRITE_B32
name: vcmpx_write_exec_accvgpr_write
body: |
@@ -390,8 +306,7 @@ body: |
# GCN-LABEL: name: accvgpr_read_write_vgpr_load
# GCN: V_ACCVGPR_READ_B32
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 1
# GCN-NEXT: FLAT_LOAD_DWORD
name: accvgpr_read_write_vgpr_load
body: |
@@ -403,8 +318,7 @@ body: |
# GCN-LABEL: name: accvgpr_read_write_vgpr_ds_permute
# GCN: V_ACCVGPR_READ_B32
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 1
# GCN-NEXT: DS_PERMUTE_B32
name: accvgpr_read_write_vgpr_ds_permute
body: |
@@ -416,8 +330,7 @@ body: |
# GCN-LABEL: name: accvgpr_read_write_vgpr_flat_load
# GCN: V_ACCVGPR_READ_B32
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 1
# GCN-NEXT: FLAT_LOAD_DWORD
name: accvgpr_read_write_vgpr_flat_load
body: |
@@ -429,8 +342,7 @@ body: |
# GCN-LABEL: name: accvgpr_read_write_vgpr_buffer_store
# GCN: V_ACCVGPR_READ_B32
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 1
# GCN-NEXT: BUFFER_STORE_DWORD_OFFSET
name: accvgpr_read_write_vgpr_buffer_store
body: |
@@ -442,8 +354,7 @@ body: |
# GCN-LABEL: name: accvgpr_read_write_vgpr_store
# GCN: V_ACCVGPR_READ_B32
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 1
# GCN-NEXT: DS_WRITE_B32
name: accvgpr_read_write_vgpr_store
body: |
@@ -497,8 +408,7 @@ body: |
# GCN-LABEL: name: valu_write_vgpr_accvgpr_read_load_2_and_3_depend
# GCN: V_MOV_B32
# GCN-NEXT: V_ACCVGPR_READ_B32
-# GCN-NEXT: S_NOP 0
-# GCN-NEXT: S_NOP 0
+# GCN-NEXT: S_NOP 1
# GCN-NEXT: FLAT_LOAD_DWORD
name: valu_write_vgpr_accvgpr_read_load_2_and_3_depend
body: |
diff --git a/llvm/test/CodeGen/AMDGPU/vmem-vcc-hazard.mir b/llvm/test/CodeGen/AMDGPU/vmem-vcc-hazard.mir
index 5dbe5d58d9bc..2efbe582fbb3 100644
--- a/llvm/test/CodeGen/AMDGPU/vmem-vcc-hazard.mir
+++ b/llvm/test/CodeGen/AMDGPU/vmem-vcc-hazard.mir
@@ -2,11 +2,7 @@
# GCN-LABEL: name: vmem_vcc_fallthrough
# GCN: bb.1:
-# GCN-NEXT: S_NOP
-# GCN-NEXT: S_NOP
-# GCN-NEXT: S_NOP
-# GCN-NEXT: S_NOP
-# GCN-NEXT: S_NOP
+# GCN-NEXT: S_NOP 4
# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
---
name: vmem_vcc_fallthrough
@@ -23,10 +19,7 @@ body: |
...
# GCN-LABEL: name: vmem_vcc_branch_to_next
# GCN: bb.1:
-# GCN-NEXT: S_NOP
-# GCN-NEXT: S_NOP
-# GCN-NEXT: S_NOP
-# GCN-NEXT: S_NOP
+# GCN-NEXT: S_NOP 3
# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
---
name: vmem_vcc_branch_to_next
@@ -82,10 +75,7 @@ body: |
...
# GCN-LABEL: name: vmem_vcc_branch_around
# GCN: bb.2:
-# GCN-NEXT: S_NOP
-# GCN-NEXT: S_NOP
-# GCN-NEXT: S_NOP
-# GCN-NEXT: S_NOP
+# GCN-NEXT: S_NOP 3
# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
---
name: vmem_vcc_branch_around
@@ -110,10 +100,7 @@ body: |
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, 0, implicit $exec
...
# GCN-LABEL: name: vmem_vcc_branch_backedge
-# GCN: S_NOP
-# GCN-NEXT: S_NOP
-# GCN-NEXT: S_NOP
-# GCN-NEXT: S_NOP
+# GCN: S_NOP 3
# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
---
name: vmem_vcc_branch_backedge
@@ -132,11 +119,7 @@ body: |
...
# GCN-LABEL: name: vmem_vcc_min_of_two
# GCN: bb.2:
-# GCN-NEXT: S_NOP
-# GCN-NEXT: S_NOP
-# GCN-NEXT: S_NOP
-# GCN-NEXT: S_NOP
-# GCN-NEXT: S_NOP
+# GCN-NEXT: S_NOP 4
# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
---
name: vmem_vcc_min_of_two
@@ -159,10 +142,7 @@ body: |
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, 0, implicit $exec
...
# GCN-LABEL: name: vmem_vcc_self_loop
-# GCN: S_NOP
-# GCN-NEXT: S_NOP
-# GCN-NEXT: S_NOP
-# GCN-NEXT: S_NOP
+# GCN: S_NOP 3
# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
---
name: vmem_vcc_self_loop
@@ -179,10 +159,7 @@ body: |
# GCN-LABEL: name: vmem_vcc_min_of_two_self_loop1
# GCN: bb.1:
# GCN: $sgpr0 = S_MOV_B32 0
-# GCN-NEXT: S_NOP
-# GCN-NEXT: S_NOP
-# GCN-NEXT: S_NOP
-# GCN-NEXT: S_NOP
+# GCN-NEXT: S_NOP 3
# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
---
name: vmem_vcc_min_of_two_self_loop1
@@ -205,9 +182,7 @@ body: |
# GCN-LABEL: name: vmem_vcc_min_of_two_self_loop2
# GCN: bb.1:
# GCN: $sgpr0 = S_MOV_B32 0
-# GCN-NEXT: S_NOP
-# GCN-NEXT: S_NOP
-# GCN-NEXT: S_NOP
+# GCN-NEXT: S_NOP 2
# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
---
name: vmem_vcc_min_of_two_self_loop2
More information about the llvm-commits
mailing list