[llvm] [AMDGPU] Support bottom-up postRA scheduing. (PR #135295)
Harrison Hao via llvm-commits
llvm-commits at lists.llvm.org
Tue May 27 02:36:35 PDT 2025
https://github.com/harrisonGPU updated https://github.com/llvm/llvm-project/pull/135295
>From 87f17d0379fee36cff843f6ceb0688a0eee3fbb1 Mon Sep 17 00:00:00 2001
From: Harrison Hao <tsworld1314 at gmail.com>
Date: Wed, 9 Apr 2025 17:52:33 +0800
Subject: [PATCH 1/5] [AMDGPU] Support bottom-up postRA scheduling.
---
.../lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 46 ++++++++++++++++-
llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h | 1 +
.../CodeGen/AMDGPU/sched-barrier-post-RA.mir | 50 ++++++++++++++++++-
3 files changed, 95 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 1561efe2cd295..12356b3b567e8 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -280,6 +280,25 @@ void GCNHazardRecognizer::processBundle() {
CurrCycleInstr = nullptr;
}
+void GCNHazardRecognizer::reverseProcessBundle() {
+ MachineBasicBlock::instr_iterator MI =
+ std::next(CurrCycleInstr->getIterator());
+ MachineBasicBlock::instr_iterator E =
+ CurrCycleInstr->getParent()->instr_end();
+
+ for (; MI != E && MI->isInsideBundle(); ++MI) {
+ CurrCycleInstr = &*MI;
+ for (unsigned I = 0, E = MaxLookAhead - 1; I < E; ++I) {
+ if (!EmittedInstrs.empty())
+ EmittedInstrs.pop_back();
+ }
+
+ EmittedInstrs.push_back(CurrCycleInstr);
+ EmittedInstrs.resize(MaxLookAhead);
+ }
+ CurrCycleInstr = nullptr;
+}
+
void GCNHazardRecognizer::runOnInstruction(MachineInstr *MI) {
assert(IsHazardRecognizerMode);
@@ -417,7 +436,32 @@ void GCNHazardRecognizer::AdvanceCycle() {
}
void GCNHazardRecognizer::RecedeCycle() {
- llvm_unreachable("hazard recognizer does not support bottom-up scheduling.");
+ if (!CurrCycleInstr) {
+ if (!EmittedInstrs.empty())
+ EmittedInstrs.pop_back();
+ return;
+ }
+
+ if (CurrCycleInstr->isBundle()) {
+ reverseProcessBundle();
+ return;
+ }
+
+ unsigned NumWaitStates = TII.getNumWaitStates(*CurrCycleInstr);
+ if (!NumWaitStates) {
+ CurrCycleInstr = nullptr;
+ return;
+ }
+
+ EmittedInstrs.push_back(CurrCycleInstr);
+ for (unsigned i = 1, e = std::min(NumWaitStates, getMaxLookAhead()); i < e;
+ ++i) {
+ if (!EmittedInstrs.empty())
+ EmittedInstrs.pop_back();
+ }
+
+ EmittedInstrs.resize(getMaxLookAhead());
+ CurrCycleInstr = nullptr;
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
index bbc55851bf967..eed2561bad231 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
@@ -69,6 +69,7 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
// Advance over a MachineInstr bundle. Look for hazards in the bundled
// instructions.
void processBundle();
+ void reverseProcessBundle();
// Run on an individual instruction in hazard recognizer mode. This can be
// used on a newly inserted instruction before returning from PreEmitNoops.
diff --git a/llvm/test/CodeGen/AMDGPU/sched-barrier-post-RA.mir b/llvm/test/CodeGen/AMDGPU/sched-barrier-post-RA.mir
index 7bdb8f5b35ec5..02ebffca84bda 100644
--- a/llvm/test/CodeGen/AMDGPU/sched-barrier-post-RA.mir
+++ b/llvm/test/CodeGen/AMDGPU/sched-barrier-post-RA.mir
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -misched-cluster=false -run-pass=postmisched -verify-misched -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -misched-cluster=false -run-pass=postmisched -verify-misched -o - %s | FileCheck -check-prefix=CHECK %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -misched-cluster=false -run-pass=postmisched -misched-postra-direction=bottomup -verify-misched -o - %s | FileCheck -check-prefix=CHECK-BOTTOMUP %s
--- |
define amdgpu_kernel void @no_sched_barrier(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) { ret void }
@@ -29,6 +30,21 @@ body: |
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR killed renamable $vgpr0, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1, 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
; CHECK-NEXT: }
; CHECK-NEXT: S_ENDPGM 0
+ ;
+ ; CHECK-BOTTOMUP-LABEL: name: no_sched_barrier
+ ; CHECK-BOTTOMUP: renamable $vgpr0 = IMPLICIT_DEF
+ ; CHECK-BOTTOMUP-NEXT: renamable $sgpr0_sgpr1 = IMPLICIT_DEF
+ ; CHECK-BOTTOMUP-NEXT: BUNDLE implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit $sgpr0_sgpr1, implicit $vgpr0, implicit $exec {
+ ; CHECK-BOTTOMUP-NEXT: renamable $vgpr1 = GLOBAL_LOAD_DWORD_SADDR renamable $sgpr0_sgpr1, renamable $vgpr0, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
+ ; CHECK-BOTTOMUP-NEXT: renamable $vgpr2 = GLOBAL_LOAD_DWORD_SADDR renamable $sgpr0_sgpr1, renamable $vgpr0, 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
+ ; CHECK-BOTTOMUP-NEXT: }
+ ; CHECK-BOTTOMUP-NEXT: renamable $vgpr1 = nsw V_MUL_LO_U32_e64 killed $vgpr1, $vgpr1, implicit $exec
+ ; CHECK-BOTTOMUP-NEXT: renamable $vgpr2 = nsw V_MUL_LO_U32_e64 killed $vgpr2, $vgpr2, implicit $exec
+ ; CHECK-BOTTOMUP-NEXT: BUNDLE implicit killed $vgpr0, implicit killed $vgpr1, implicit killed $sgpr0_sgpr1, implicit $exec, implicit killed $vgpr2 {
+ ; CHECK-BOTTOMUP-NEXT: GLOBAL_STORE_DWORD_SADDR renamable $vgpr0, killed renamable $vgpr1, renamable $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
+ ; CHECK-BOTTOMUP-NEXT: GLOBAL_STORE_DWORD_SADDR killed renamable $vgpr0, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1, 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
+ ; CHECK-BOTTOMUP-NEXT: }
+ ; CHECK-BOTTOMUP-NEXT: S_ENDPGM 0
renamable $sgpr0_sgpr1 = IMPLICIT_DEF
renamable $vgpr0 = IMPLICIT_DEF
BUNDLE implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit $sgpr0_sgpr1, implicit $vgpr0, implicit $exec {
@@ -66,6 +82,22 @@ body: |
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR killed renamable $vgpr0, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1, 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
; CHECK-NEXT: }
; CHECK-NEXT: S_ENDPGM 0
+ ;
+ ; CHECK-BOTTOMUP-LABEL: name: sched_barrier_mask_0
+ ; CHECK-BOTTOMUP: renamable $vgpr0 = IMPLICIT_DEF
+ ; CHECK-BOTTOMUP-NEXT: renamable $sgpr0_sgpr1 = IMPLICIT_DEF
+ ; CHECK-BOTTOMUP-NEXT: BUNDLE implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit $sgpr0_sgpr1, implicit $vgpr0, implicit $exec {
+ ; CHECK-BOTTOMUP-NEXT: renamable $vgpr1 = GLOBAL_LOAD_DWORD_SADDR renamable $sgpr0_sgpr1, renamable $vgpr0, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
+ ; CHECK-BOTTOMUP-NEXT: renamable $vgpr2 = GLOBAL_LOAD_DWORD_SADDR renamable $sgpr0_sgpr1, renamable $vgpr0, 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
+ ; CHECK-BOTTOMUP-NEXT: }
+ ; CHECK-BOTTOMUP-NEXT: renamable $vgpr1 = nsw V_MUL_LO_U32_e64 killed $vgpr1, $vgpr1, implicit $exec
+ ; CHECK-BOTTOMUP-NEXT: SCHED_BARRIER 0
+ ; CHECK-BOTTOMUP-NEXT: renamable $vgpr2 = nsw V_MUL_LO_U32_e64 killed $vgpr2, $vgpr2, implicit $exec
+ ; CHECK-BOTTOMUP-NEXT: BUNDLE implicit killed $vgpr0, implicit killed $vgpr1, implicit killed $sgpr0_sgpr1, implicit $exec, implicit killed $vgpr2 {
+ ; CHECK-BOTTOMUP-NEXT: GLOBAL_STORE_DWORD_SADDR renamable $vgpr0, killed renamable $vgpr1, renamable $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
+ ; CHECK-BOTTOMUP-NEXT: GLOBAL_STORE_DWORD_SADDR killed renamable $vgpr0, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1, 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
+ ; CHECK-BOTTOMUP-NEXT: }
+ ; CHECK-BOTTOMUP-NEXT: S_ENDPGM 0
renamable $sgpr0_sgpr1 = IMPLICIT_DEF
renamable $vgpr0 = IMPLICIT_DEF
BUNDLE implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit $sgpr0_sgpr1, implicit $vgpr0, implicit $exec {
@@ -105,6 +137,22 @@ body: |
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR killed renamable $vgpr0, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1, 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
; CHECK-NEXT: }
; CHECK-NEXT: S_ENDPGM 0
+ ;
+ ; CHECK-BOTTOMUP-LABEL: name: sched_barrier_mask_1
+ ; CHECK-BOTTOMUP: renamable $vgpr0 = IMPLICIT_DEF
+ ; CHECK-BOTTOMUP-NEXT: renamable $sgpr0_sgpr1 = IMPLICIT_DEF
+ ; CHECK-BOTTOMUP-NEXT: BUNDLE implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit $sgpr0_sgpr1, implicit $vgpr0, implicit $exec {
+ ; CHECK-BOTTOMUP-NEXT: renamable $vgpr1 = GLOBAL_LOAD_DWORD_SADDR renamable $sgpr0_sgpr1, renamable $vgpr0, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
+ ; CHECK-BOTTOMUP-NEXT: renamable $vgpr2 = GLOBAL_LOAD_DWORD_SADDR renamable $sgpr0_sgpr1, renamable $vgpr0, 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
+ ; CHECK-BOTTOMUP-NEXT: }
+ ; CHECK-BOTTOMUP-NEXT: renamable $vgpr1 = nsw V_MUL_LO_U32_e64 killed $vgpr1, $vgpr1, implicit $exec
+ ; CHECK-BOTTOMUP-NEXT: renamable $vgpr2 = nsw V_MUL_LO_U32_e64 killed $vgpr2, $vgpr2, implicit $exec
+ ; CHECK-BOTTOMUP-NEXT: SCHED_BARRIER 1
+ ; CHECK-BOTTOMUP-NEXT: BUNDLE implicit killed $vgpr0, implicit killed $vgpr1, implicit killed $sgpr0_sgpr1, implicit $exec, implicit killed $vgpr2 {
+ ; CHECK-BOTTOMUP-NEXT: GLOBAL_STORE_DWORD_SADDR renamable $vgpr0, killed renamable $vgpr1, renamable $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
+ ; CHECK-BOTTOMUP-NEXT: GLOBAL_STORE_DWORD_SADDR killed renamable $vgpr0, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1, 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
+ ; CHECK-BOTTOMUP-NEXT: }
+ ; CHECK-BOTTOMUP-NEXT: S_ENDPGM 0
renamable $sgpr0_sgpr1 = IMPLICIT_DEF
renamable $vgpr0 = IMPLICIT_DEF
BUNDLE implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit $sgpr0_sgpr1, implicit $vgpr0, implicit $exec {
>From 5393ae4e12ac3d5ecede878003eca999c80003c1 Mon Sep 17 00:00:00 2001
From: Harrison Hao <tsworld1314 at gmail.com>
Date: Thu, 17 Apr 2025 11:10:54 +0800
Subject: [PATCH 2/5] [AMDGPU] Update comments.
---
.../lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 33 ++++++++++++++-----
llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h | 5 ++-
2 files changed, 29 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 12356b3b567e8..d751b31a35a85 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -280,22 +280,30 @@ void GCNHazardRecognizer::processBundle() {
CurrCycleInstr = nullptr;
}
-void GCNHazardRecognizer::reverseProcessBundle() {
+void GCNHazardRecognizer::processBundleBottomUp() {
+ // Step through each instruction in the bundle in bottom-up order.
MachineBasicBlock::instr_iterator MI =
std::next(CurrCycleInstr->getIterator());
MachineBasicBlock::instr_iterator E =
CurrCycleInstr->getParent()->instr_end();
+ // Evict stale entries to maintain a fixed lookahead window.
+ // TODO: Hazard detection is not yet implemented. This scheduling
+ // is intended for GFX11 and newer.
for (; MI != E && MI->isInsideBundle(); ++MI) {
CurrCycleInstr = &*MI;
- for (unsigned I = 0, E = MaxLookAhead - 1; I < E; ++I) {
- if (!EmittedInstrs.empty())
- EmittedInstrs.pop_back();
- }
+
+ // Remove up to (MaxLookAhead - 1) oldest entries.
+ for (unsigned I = 0, E = MaxLookAhead - 1; I < E && !EmittedInstrs.empty();
+ ++I)
+ EmittedInstrs.pop_back();
EmittedInstrs.push_back(CurrCycleInstr);
+
+ // Keep only the most recent MaxLookAhead entries
EmittedInstrs.resize(MaxLookAhead);
}
+
CurrCycleInstr = nullptr;
}
@@ -436,14 +444,16 @@ void GCNHazardRecognizer::AdvanceCycle() {
}
void GCNHazardRecognizer::RecedeCycle() {
+ // If no instruction was issued this cycle, pop the oldest placeholder.
if (!CurrCycleInstr) {
if (!EmittedInstrs.empty())
EmittedInstrs.pop_back();
return;
}
+ // If this is a bundle header, handle the entire bundle here.
if (CurrCycleInstr->isBundle()) {
- reverseProcessBundle();
+ processBundleBottomUp();
return;
}
@@ -453,14 +463,21 @@ void GCNHazardRecognizer::RecedeCycle() {
return;
}
+ // Add current instruction to the emitted list.
EmittedInstrs.push_back(CurrCycleInstr);
- for (unsigned i = 1, e = std::min(NumWaitStates, getMaxLookAhead()); i < e;
- ++i) {
+
+ // Model remaining wait states by removing older placeholders.
+ for (unsigned I = 1, E = std::min(NumWaitStates, getMaxLookAhead()); I < E;
+ ++I) {
if (!EmittedInstrs.empty())
EmittedInstrs.pop_back();
}
+ // getMaxLookahead() is the largest number of wait states we will ever need
+ // to insert, so there is no point in keeping track of more than that many
+ // wait states.
EmittedInstrs.resize(getMaxLookAhead());
+
CurrCycleInstr = nullptr;
}
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
index eed2561bad231..88c7426be552d 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
@@ -69,7 +69,10 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
// Advance over a MachineInstr bundle. Look for hazards in the bundled
// instructions.
void processBundle();
- void reverseProcessBundle();
+ // Recede over a MachineInstr bundle. Adds bundled instructions to the
+ // EmittedInstrs queue in bottom-up scheduling mode.
+ // TODO: Hazard detection is not yet implemented.
+ void processBundleBottomUp();
// Run on an individual instruction in hazard recognizer mode. This can be
// used on a newly inserted instruction before returning from PreEmitNoops.
>From 33c5df42d5e1d788f1c515c658280ad1f8cb7889 Mon Sep 17 00:00:00 2001
From: Harrison Hao <tsworld1314 at gmail.com>
Date: Mon, 21 Apr 2025 11:05:09 +0800
Subject: [PATCH 3/5] [AMDGPU] Add assert for Hazard for bottom up.
---
llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index d751b31a35a85..7369b4ad60e90 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -281,6 +281,12 @@ void GCNHazardRecognizer::processBundle() {
}
void GCNHazardRecognizer::processBundleBottomUp() {
+ // Walk through the instructions in this bundle in bottom-up order.
+ // We only use this during post-RA scheduling, so hazard recognizer mode
+ // should never be active here (it always runs top-down).
+ assert(!IsHazardRecognizerMode &&
+ "Bottom-up scheduling shouldn't run in hazard recognizer mode");
+
// Step through each instruction in the bundle in bottom-up order.
MachineBasicBlock::instr_iterator MI =
std::next(CurrCycleInstr->getIterator());
>From 9926760d87247cfc7b20650e4a0c415f6055d50c Mon Sep 17 00:00:00 2001
From: Harrison Hao <tsworld1314 at gmail.com>
Date: Tue, 27 May 2025 16:43:24 +0800
Subject: [PATCH 4/5] [AMDGPU] Add a new lit test.
---
.../CodeGen/AMDGPU/sched-barrier-post-RA.mir | 50 +-----
.../AMDGPU/sched-image-sample-post-RA.mir | 158 ++++++++++++++++++
2 files changed, 159 insertions(+), 49 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/sched-image-sample-post-RA.mir
diff --git a/llvm/test/CodeGen/AMDGPU/sched-barrier-post-RA.mir b/llvm/test/CodeGen/AMDGPU/sched-barrier-post-RA.mir
index 02ebffca84bda..7bdb8f5b35ec5 100644
--- a/llvm/test/CodeGen/AMDGPU/sched-barrier-post-RA.mir
+++ b/llvm/test/CodeGen/AMDGPU/sched-barrier-post-RA.mir
@@ -1,6 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -misched-cluster=false -run-pass=postmisched -verify-misched -o - %s | FileCheck -check-prefix=CHECK %s
-# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -misched-cluster=false -run-pass=postmisched -misched-postra-direction=bottomup -verify-misched -o - %s | FileCheck -check-prefix=CHECK-BOTTOMUP %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -misched-cluster=false -run-pass=postmisched -verify-misched -o - %s | FileCheck %s
--- |
define amdgpu_kernel void @no_sched_barrier(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) { ret void }
@@ -30,21 +29,6 @@ body: |
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR killed renamable $vgpr0, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1, 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
; CHECK-NEXT: }
; CHECK-NEXT: S_ENDPGM 0
- ;
- ; CHECK-BOTTOMUP-LABEL: name: no_sched_barrier
- ; CHECK-BOTTOMUP: renamable $vgpr0 = IMPLICIT_DEF
- ; CHECK-BOTTOMUP-NEXT: renamable $sgpr0_sgpr1 = IMPLICIT_DEF
- ; CHECK-BOTTOMUP-NEXT: BUNDLE implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit $sgpr0_sgpr1, implicit $vgpr0, implicit $exec {
- ; CHECK-BOTTOMUP-NEXT: renamable $vgpr1 = GLOBAL_LOAD_DWORD_SADDR renamable $sgpr0_sgpr1, renamable $vgpr0, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
- ; CHECK-BOTTOMUP-NEXT: renamable $vgpr2 = GLOBAL_LOAD_DWORD_SADDR renamable $sgpr0_sgpr1, renamable $vgpr0, 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
- ; CHECK-BOTTOMUP-NEXT: }
- ; CHECK-BOTTOMUP-NEXT: renamable $vgpr1 = nsw V_MUL_LO_U32_e64 killed $vgpr1, $vgpr1, implicit $exec
- ; CHECK-BOTTOMUP-NEXT: renamable $vgpr2 = nsw V_MUL_LO_U32_e64 killed $vgpr2, $vgpr2, implicit $exec
- ; CHECK-BOTTOMUP-NEXT: BUNDLE implicit killed $vgpr0, implicit killed $vgpr1, implicit killed $sgpr0_sgpr1, implicit $exec, implicit killed $vgpr2 {
- ; CHECK-BOTTOMUP-NEXT: GLOBAL_STORE_DWORD_SADDR renamable $vgpr0, killed renamable $vgpr1, renamable $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
- ; CHECK-BOTTOMUP-NEXT: GLOBAL_STORE_DWORD_SADDR killed renamable $vgpr0, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1, 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
- ; CHECK-BOTTOMUP-NEXT: }
- ; CHECK-BOTTOMUP-NEXT: S_ENDPGM 0
renamable $sgpr0_sgpr1 = IMPLICIT_DEF
renamable $vgpr0 = IMPLICIT_DEF
BUNDLE implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit $sgpr0_sgpr1, implicit $vgpr0, implicit $exec {
@@ -82,22 +66,6 @@ body: |
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR killed renamable $vgpr0, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1, 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
; CHECK-NEXT: }
; CHECK-NEXT: S_ENDPGM 0
- ;
- ; CHECK-BOTTOMUP-LABEL: name: sched_barrier_mask_0
- ; CHECK-BOTTOMUP: renamable $vgpr0 = IMPLICIT_DEF
- ; CHECK-BOTTOMUP-NEXT: renamable $sgpr0_sgpr1 = IMPLICIT_DEF
- ; CHECK-BOTTOMUP-NEXT: BUNDLE implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit $sgpr0_sgpr1, implicit $vgpr0, implicit $exec {
- ; CHECK-BOTTOMUP-NEXT: renamable $vgpr1 = GLOBAL_LOAD_DWORD_SADDR renamable $sgpr0_sgpr1, renamable $vgpr0, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
- ; CHECK-BOTTOMUP-NEXT: renamable $vgpr2 = GLOBAL_LOAD_DWORD_SADDR renamable $sgpr0_sgpr1, renamable $vgpr0, 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
- ; CHECK-BOTTOMUP-NEXT: }
- ; CHECK-BOTTOMUP-NEXT: renamable $vgpr1 = nsw V_MUL_LO_U32_e64 killed $vgpr1, $vgpr1, implicit $exec
- ; CHECK-BOTTOMUP-NEXT: SCHED_BARRIER 0
- ; CHECK-BOTTOMUP-NEXT: renamable $vgpr2 = nsw V_MUL_LO_U32_e64 killed $vgpr2, $vgpr2, implicit $exec
- ; CHECK-BOTTOMUP-NEXT: BUNDLE implicit killed $vgpr0, implicit killed $vgpr1, implicit killed $sgpr0_sgpr1, implicit $exec, implicit killed $vgpr2 {
- ; CHECK-BOTTOMUP-NEXT: GLOBAL_STORE_DWORD_SADDR renamable $vgpr0, killed renamable $vgpr1, renamable $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
- ; CHECK-BOTTOMUP-NEXT: GLOBAL_STORE_DWORD_SADDR killed renamable $vgpr0, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1, 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
- ; CHECK-BOTTOMUP-NEXT: }
- ; CHECK-BOTTOMUP-NEXT: S_ENDPGM 0
renamable $sgpr0_sgpr1 = IMPLICIT_DEF
renamable $vgpr0 = IMPLICIT_DEF
BUNDLE implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit $sgpr0_sgpr1, implicit $vgpr0, implicit $exec {
@@ -137,22 +105,6 @@ body: |
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR killed renamable $vgpr0, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1, 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
; CHECK-NEXT: }
; CHECK-NEXT: S_ENDPGM 0
- ;
- ; CHECK-BOTTOMUP-LABEL: name: sched_barrier_mask_1
- ; CHECK-BOTTOMUP: renamable $vgpr0 = IMPLICIT_DEF
- ; CHECK-BOTTOMUP-NEXT: renamable $sgpr0_sgpr1 = IMPLICIT_DEF
- ; CHECK-BOTTOMUP-NEXT: BUNDLE implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit $sgpr0_sgpr1, implicit $vgpr0, implicit $exec {
- ; CHECK-BOTTOMUP-NEXT: renamable $vgpr1 = GLOBAL_LOAD_DWORD_SADDR renamable $sgpr0_sgpr1, renamable $vgpr0, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
- ; CHECK-BOTTOMUP-NEXT: renamable $vgpr2 = GLOBAL_LOAD_DWORD_SADDR renamable $sgpr0_sgpr1, renamable $vgpr0, 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
- ; CHECK-BOTTOMUP-NEXT: }
- ; CHECK-BOTTOMUP-NEXT: renamable $vgpr1 = nsw V_MUL_LO_U32_e64 killed $vgpr1, $vgpr1, implicit $exec
- ; CHECK-BOTTOMUP-NEXT: renamable $vgpr2 = nsw V_MUL_LO_U32_e64 killed $vgpr2, $vgpr2, implicit $exec
- ; CHECK-BOTTOMUP-NEXT: SCHED_BARRIER 1
- ; CHECK-BOTTOMUP-NEXT: BUNDLE implicit killed $vgpr0, implicit killed $vgpr1, implicit killed $sgpr0_sgpr1, implicit $exec, implicit killed $vgpr2 {
- ; CHECK-BOTTOMUP-NEXT: GLOBAL_STORE_DWORD_SADDR renamable $vgpr0, killed renamable $vgpr1, renamable $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
- ; CHECK-BOTTOMUP-NEXT: GLOBAL_STORE_DWORD_SADDR killed renamable $vgpr0, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1, 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
- ; CHECK-BOTTOMUP-NEXT: }
- ; CHECK-BOTTOMUP-NEXT: S_ENDPGM 0
renamable $sgpr0_sgpr1 = IMPLICIT_DEF
renamable $vgpr0 = IMPLICIT_DEF
BUNDLE implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit $sgpr0_sgpr1, implicit $vgpr0, implicit $exec {
diff --git a/llvm/test/CodeGen/AMDGPU/sched-image-sample-post-RA.mir b/llvm/test/CodeGen/AMDGPU/sched-image-sample-post-RA.mir
new file mode 100644
index 0000000000000..484ac0618541a
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/sched-image-sample-post-RA.mir
@@ -0,0 +1,158 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=postmisched -verify-misched -o - %s | FileCheck -check-prefix=TopDown %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=postmisched -misched-postra-direction=bottomup -verify-misched -o - %s | FileCheck -check-prefix=BottomUp %s
+
+---
+name: sched_image_sample_post_ra
+tracksRegLiveness: true
+tracksDebugUserValues: true
+body: |
+ ; TopDown-LABEL: name: sched_image_sample_post_ra
+ ; TopDown: bb.0.entry:
+ ; TopDown-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; TopDown-NEXT: liveins: $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
+ ; TopDown-NEXT: {{ $}}
+ ; TopDown-NEXT: $sgpr12_sgpr13 = S_MOV_B64 $exec
+ ; TopDown-NEXT: $exec = S_WQM_B64 $exec, implicit-def $scc
+ ; TopDown-NEXT: $sgpr14 = S_MOV_B32 killed $sgpr5
+ ; TopDown-NEXT: $m0 = S_MOV_B32 killed $sgpr6
+ ; TopDown-NEXT: $sgpr16_sgpr17 = S_MOV_B64 $exec
+ ; TopDown-NEXT: $exec = S_WQM_B64 $exec, implicit-def $scc
+ ; TopDown-NEXT: $vgpr2 = LDS_PARAM_LOAD 0, 0, 0, implicit $m0, implicit $exec
+ ; TopDown-NEXT: $vgpr3 = LDS_PARAM_LOAD 0, 1, 0, implicit $m0, implicit $exec
+ ; TopDown-NEXT: $sgpr8 = S_MOV_B32 killed $sgpr1
+ ; TopDown-NEXT: renamable $sgpr0_sgpr1 = S_GETPC_B64
+ ; TopDown-NEXT: $sgpr10 = S_MOV_B32 killed $sgpr3
+ ; TopDown-NEXT: $sgpr9 = S_MOV_B32 killed $sgpr2
+ ; TopDown-NEXT: $sgpr15 = S_MOV_B32 killed $sgpr1
+ ; TopDown-NEXT: $exec = S_MOV_B64 killed renamable $sgpr16_sgpr17
+ ; TopDown-NEXT: BUNDLE implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, implicit-def $sgpr16, implicit-def $sgpr16_lo16, implicit-def $sgpr16_hi16, implicit-def $sgpr17, implicit-def $sgpr17_lo16, implicit-def $sgpr17_hi16, implicit-def $sgpr18, implicit-def $sgpr18_lo16, implicit-def $sgpr18_hi16, implicit-def $sgpr19, implicit-def $sgpr19_lo16, implicit-def $sgpr19_hi16, implicit-def $sgpr20, implicit-def $sgpr20_lo16, implicit-def $sgpr20_hi16, implicit-def $sgpr21, implicit-def $sgpr21_lo16, implicit-def $sgpr21_hi16, implicit-def $sgpr22, implicit-def $sgpr22_lo16, implicit-def $sgpr22_hi16, implicit-def $sgpr23, implicit-def $sgpr23_lo16, implicit-def $sgpr23_hi16, implicit-def $sgpr16_sgpr17, implicit-def $sgpr16_sgpr17_sgpr18, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21, implicit-def $sgpr18_sgpr19, implicit-def $sgpr20_sgpr21, implicit-def $sgpr20_sgpr21_sgpr22, implicit-def $sgpr20_sgpr21_sgpr22_sgpr23, implicit-def $sgpr22_sgpr23, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $sgpr0, implicit-def $sgpr0_lo16, implicit-def $sgpr0_hi16, implicit-def $sgpr1, implicit-def $sgpr1_lo16, implicit-def $sgpr1_hi16, implicit-def $sgpr2, implicit-def $sgpr2_lo16, implicit-def $sgpr2_hi16, implicit-def $sgpr3, implicit-def $sgpr3_lo16, implicit-def $sgpr3_hi16, implicit-def $sgpr0_sgpr1, implicit-def $sgpr0_sgpr1_sgpr2, implicit-def $sgpr2_sgpr3, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit-def $sgpr24, implicit-def $sgpr24_lo16, implicit-def $sgpr24_hi16, implicit-def $sgpr25, implicit-def $sgpr25_lo16, implicit-def $sgpr25_hi16, implicit-def $sgpr26, implicit-def $sgpr26_lo16, implicit-def $sgpr26_hi16, implicit-def $sgpr27, implicit-def $sgpr27_lo16, implicit-def $sgpr27_hi16, implicit-def $sgpr28, implicit-def $sgpr28_lo16, implicit-def $sgpr28_hi16, implicit-def $sgpr29, implicit-def $sgpr29_lo16, implicit-def $sgpr29_hi16, implicit-def $sgpr30, implicit-def $sgpr30_lo16, implicit-def $sgpr30_hi16, implicit-def $sgpr31, implicit-def $sgpr31_lo16, implicit-def $sgpr31_hi16, implicit-def $sgpr24_sgpr25, implicit-def $sgpr24_sgpr25_sgpr26, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29, implicit-def $sgpr26_sgpr27, implicit-def $sgpr28_sgpr29, implicit-def $sgpr28_sgpr29_sgpr30, implicit-def $sgpr28_sgpr29_sgpr30_sgpr31, implicit-def $sgpr30_sgpr31, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30, implicit-def $sgpr36_sgpr37_sgpr38_sgpr39, implicit-def $sgpr36, implicit-def $sgpr36_lo16, implicit-def $sgpr36_hi16, implicit-def $sgpr37, implicit-def $sgpr37_lo16, implicit-def $sgpr37_hi16, implicit-def $sgpr38, implicit-def $sgpr38_lo16, implicit-def $sgpr38_hi16, implicit-def $sgpr39, implicit-def $sgpr39_lo16, implicit-def $sgpr39_hi16, implicit-def $sgpr36_sgpr37, implicit-def $sgpr36_sgpr37_sgpr38, implicit-def $sgpr38_sgpr39, implicit killed $sgpr14_sgpr15 {
+ ; TopDown-NEXT: renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23 = S_LOAD_DWORDX8_IMM renamable $sgpr14_sgpr15, 0, 0 :: (invariant load (s256))
+ ; TopDown-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM renamable $sgpr14_sgpr15, 128, 0 :: (invariant load (s128))
+ ; TopDown-NEXT: renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX8_IMM renamable $sgpr14_sgpr15, 96, 0 :: (invariant load (s256))
+ ; TopDown-NEXT: renamable $sgpr36_sgpr37_sgpr38_sgpr39 = S_LOAD_DWORDX4_IMM killed renamable $sgpr14_sgpr15, 32, 0 :: (invariant load (s128))
+ ; TopDown-NEXT: }
+ ; TopDown-NEXT: renamable $vgpr5 = V_INTERP_P10_F32_inreg 0, $vgpr2, 0, $vgpr0, 0, $vgpr2, 0, 7, implicit $m0, implicit $exec, implicit $mode
+ ; TopDown-NEXT: renamable $vgpr6 = V_INTERP_P10_F32_inreg 0, $vgpr3, 0, killed $vgpr0, 0, $vgpr3, 0, 7, implicit $m0, implicit $exec, implicit $mode
+ ; TopDown-NEXT: renamable $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+ ; TopDown-NEXT: renamable $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+ ; TopDown-NEXT: renamable $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+ ; TopDown-NEXT: renamable $vgpr9 = V_INTERP_P2_F32_inreg 0, killed $vgpr2, 0, $vgpr1, 0, killed $vgpr5, 0, 7, implicit $m0, implicit $exec, implicit $mode
+ ; TopDown-NEXT: renamable $vgpr10 = V_INTERP_P2_F32_inreg 0, killed $vgpr3, 0, killed $vgpr1, 0, killed $vgpr6, 0, 7, implicit $m0, implicit $exec, implicit $mode
+ ; TopDown-NEXT: BUNDLE implicit-def $vgpr11, implicit killed $vgpr9_vgpr10, implicit killed $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec, implicit killed $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, implicit killed $sgpr36_sgpr37_sgpr38_sgpr39 {
+ ; TopDown-NEXT: renamable $vgpr11 = IMAGE_SAMPLE_V1_V2_gfx11 $vgpr9_vgpr10, killed renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 8)
+ ; TopDown-NEXT: renamable $vgpr5_vgpr6_vgpr7_vgpr8 = IMAGE_SAMPLE_V4_V2_gfx11 killed $vgpr9_vgpr10, killed renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, killed renamable $sgpr36_sgpr37_sgpr38_sgpr39, 15, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), addrspace 8)
+ ; TopDown-NEXT: }
+ ; TopDown-NEXT: nofpexcept V_CMP_GT_F32_e32 1065353216, killed $vgpr11, implicit-def $vcc_lo, implicit $mode, implicit $exec
+ ; TopDown-NEXT: renamable $sgpr0_sgpr1 = COPY $vcc
+ ; TopDown-NEXT: nofpexcept V_CMP_GT_F32_e32 1065353216, killed $vgpr8, implicit-def $vcc_lo, implicit $mode, implicit $exec
+ ; TopDown-NEXT: renamable $sgpr2_sgpr3 = S_AND_B64 killed renamable $sgpr0_sgpr1, killed renamable $vcc, implicit-def dead $scc
+ ; TopDown-NEXT: $sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 killed $sgpr2_sgpr3, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; TopDown-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; TopDown-NEXT: {{ $}}
+ ; TopDown-NEXT: bb.1:
+ ; TopDown-NEXT: successors: %bb.2(0x80000000)
+ ; TopDown-NEXT: {{ $}}
+ ; TopDown-NEXT: S_BRANCH %bb.2
+ ; TopDown-NEXT: {{ $}}
+ ; TopDown-NEXT: bb.2:
+ ; TopDown-NEXT: S_ENDPGM 0
+ ;
+ ; BottomUp-LABEL: name: sched_image_sample_post_ra
+ ; BottomUp: bb.0.entry:
+ ; BottomUp-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; BottomUp-NEXT: liveins: $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
+ ; BottomUp-NEXT: {{ $}}
+ ; BottomUp-NEXT: $sgpr12_sgpr13 = S_MOV_B64 $exec
+ ; BottomUp-NEXT: $exec = S_WQM_B64 $exec, implicit-def $scc
+ ; BottomUp-NEXT: $sgpr16_sgpr17 = S_MOV_B64 $exec
+ ; BottomUp-NEXT: $m0 = S_MOV_B32 killed $sgpr6
+ ; BottomUp-NEXT: $sgpr14 = S_MOV_B32 killed $sgpr5
+ ; BottomUp-NEXT: $exec = S_WQM_B64 $exec, implicit-def $scc
+ ; BottomUp-NEXT: $vgpr2 = LDS_PARAM_LOAD 0, 0, 0, implicit $m0, implicit $exec
+ ; BottomUp-NEXT: $vgpr3 = LDS_PARAM_LOAD 0, 1, 0, implicit $m0, implicit $exec
+ ; BottomUp-NEXT: $sgpr8 = S_MOV_B32 killed $sgpr1
+ ; BottomUp-NEXT: renamable $sgpr0_sgpr1 = S_GETPC_B64
+ ; BottomUp-NEXT: $sgpr9 = S_MOV_B32 killed $sgpr2
+ ; BottomUp-NEXT: $sgpr10 = S_MOV_B32 killed $sgpr3
+ ; BottomUp-NEXT: $sgpr15 = S_MOV_B32 killed $sgpr1
+ ; BottomUp-NEXT: $exec = S_MOV_B64 killed renamable $sgpr16_sgpr17
+ ; BottomUp-NEXT: BUNDLE implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, implicit-def $sgpr16, implicit-def $sgpr16_lo16, implicit-def $sgpr16_hi16, implicit-def $sgpr17, implicit-def $sgpr17_lo16, implicit-def $sgpr17_hi16, implicit-def $sgpr18, implicit-def $sgpr18_lo16, implicit-def $sgpr18_hi16, implicit-def $sgpr19, implicit-def $sgpr19_lo16, implicit-def $sgpr19_hi16, implicit-def $sgpr20, implicit-def $sgpr20_lo16, implicit-def $sgpr20_hi16, implicit-def $sgpr21, implicit-def $sgpr21_lo16, implicit-def $sgpr21_hi16, implicit-def $sgpr22, implicit-def $sgpr22_lo16, implicit-def $sgpr22_hi16, implicit-def $sgpr23, implicit-def $sgpr23_lo16, implicit-def $sgpr23_hi16, implicit-def $sgpr16_sgpr17, implicit-def $sgpr16_sgpr17_sgpr18, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21, implicit-def $sgpr18_sgpr19, implicit-def $sgpr20_sgpr21, implicit-def $sgpr20_sgpr21_sgpr22, implicit-def $sgpr20_sgpr21_sgpr22_sgpr23, implicit-def $sgpr22_sgpr23, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $sgpr0, implicit-def $sgpr0_lo16, implicit-def $sgpr0_hi16, implicit-def $sgpr1, implicit-def $sgpr1_lo16, implicit-def $sgpr1_hi16, implicit-def $sgpr2, implicit-def $sgpr2_lo16, implicit-def $sgpr2_hi16, implicit-def $sgpr3, implicit-def $sgpr3_lo16, implicit-def $sgpr3_hi16, implicit-def $sgpr0_sgpr1, implicit-def $sgpr0_sgpr1_sgpr2, implicit-def $sgpr2_sgpr3, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit-def $sgpr24, implicit-def $sgpr24_lo16, implicit-def $sgpr24_hi16, implicit-def $sgpr25, implicit-def $sgpr25_lo16, implicit-def $sgpr25_hi16, implicit-def $sgpr26, implicit-def $sgpr26_lo16, implicit-def $sgpr26_hi16, implicit-def $sgpr27, implicit-def $sgpr27_lo16, implicit-def $sgpr27_hi16, implicit-def $sgpr28, implicit-def $sgpr28_lo16, implicit-def $sgpr28_hi16, implicit-def $sgpr29, implicit-def $sgpr29_lo16, implicit-def $sgpr29_hi16, implicit-def $sgpr30, implicit-def $sgpr30_lo16, implicit-def $sgpr30_hi16, implicit-def $sgpr31, implicit-def $sgpr31_lo16, implicit-def $sgpr31_hi16, implicit-def $sgpr24_sgpr25, implicit-def $sgpr24_sgpr25_sgpr26, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29, implicit-def $sgpr26_sgpr27, implicit-def $sgpr28_sgpr29, implicit-def $sgpr28_sgpr29_sgpr30, implicit-def $sgpr28_sgpr29_sgpr30_sgpr31, implicit-def $sgpr30_sgpr31, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30, implicit-def $sgpr36_sgpr37_sgpr38_sgpr39, implicit-def $sgpr36, implicit-def $sgpr36_lo16, implicit-def $sgpr36_hi16, implicit-def $sgpr37, implicit-def $sgpr37_lo16, implicit-def $sgpr37_hi16, implicit-def $sgpr38, implicit-def $sgpr38_lo16, implicit-def $sgpr38_hi16, implicit-def $sgpr39, implicit-def $sgpr39_lo16, implicit-def $sgpr39_hi16, implicit-def $sgpr36_sgpr37, implicit-def $sgpr36_sgpr37_sgpr38, implicit-def $sgpr38_sgpr39, implicit killed $sgpr14_sgpr15 {
+ ; BottomUp-NEXT: renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23 = S_LOAD_DWORDX8_IMM renamable $sgpr14_sgpr15, 0, 0 :: (invariant load (s256))
+ ; BottomUp-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM renamable $sgpr14_sgpr15, 128, 0 :: (invariant load (s128))
+ ; BottomUp-NEXT: renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX8_IMM renamable $sgpr14_sgpr15, 96, 0 :: (invariant load (s256))
+ ; BottomUp-NEXT: renamable $sgpr36_sgpr37_sgpr38_sgpr39 = S_LOAD_DWORDX4_IMM killed renamable $sgpr14_sgpr15, 32, 0 :: (invariant load (s128))
+ ; BottomUp-NEXT: }
+ ; BottomUp-NEXT: renamable $vgpr6 = V_INTERP_P10_F32_inreg 0, $vgpr3, 0, $vgpr0, 0, $vgpr3, 0, 7, implicit $m0, implicit $exec, implicit $mode
+ ; BottomUp-NEXT: renamable $vgpr5 = V_INTERP_P10_F32_inreg 0, $vgpr2, 0, killed $vgpr0, 0, $vgpr2, 0, 7, implicit $m0, implicit $exec, implicit $mode
+ ; BottomUp-NEXT: renamable $vgpr10 = V_INTERP_P2_F32_inreg 0, killed $vgpr3, 0, $vgpr1, 0, killed $vgpr6, 0, 7, implicit $m0, implicit $exec, implicit $mode
+ ; BottomUp-NEXT: renamable $vgpr9 = V_INTERP_P2_F32_inreg 0, killed $vgpr2, 0, killed $vgpr1, 0, killed $vgpr5, 0, 7, implicit $m0, implicit $exec, implicit $mode
+ ; BottomUp-NEXT: BUNDLE implicit-def $vgpr11, implicit killed $vgpr9_vgpr10, implicit killed $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec, implicit killed $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, implicit killed $sgpr36_sgpr37_sgpr38_sgpr39 {
+ ; BottomUp-NEXT: renamable $vgpr11 = IMAGE_SAMPLE_V1_V2_gfx11 $vgpr9_vgpr10, killed renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 8)
+ ; BottomUp-NEXT: renamable $vgpr5_vgpr6_vgpr7_vgpr8 = IMAGE_SAMPLE_V4_V2_gfx11 killed $vgpr9_vgpr10, killed renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, killed renamable $sgpr36_sgpr37_sgpr38_sgpr39, 15, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), addrspace 8)
+ ; BottomUp-NEXT: }
+ ; BottomUp-NEXT: renamable $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+ ; BottomUp-NEXT: nofpexcept V_CMP_GT_F32_e32 1065353216, killed $vgpr11, implicit-def $vcc_lo, implicit $mode, implicit $exec
+ ; BottomUp-NEXT: renamable $sgpr0_sgpr1 = COPY $vcc
+ ; BottomUp-NEXT: nofpexcept V_CMP_GT_F32_e32 1065353216, killed $vgpr8, implicit-def $vcc_lo, implicit $mode, implicit $exec
+ ; BottomUp-NEXT: renamable $sgpr2_sgpr3 = S_AND_B64 killed renamable $sgpr0_sgpr1, killed renamable $vcc, implicit-def dead $scc
+ ; BottomUp-NEXT: renamable $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+ ; BottomUp-NEXT: renamable $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+ ; BottomUp-NEXT: $sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 killed $sgpr2_sgpr3, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; BottomUp-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; BottomUp-NEXT: {{ $}}
+ ; BottomUp-NEXT: bb.1:
+ ; BottomUp-NEXT: successors: %bb.2(0x80000000)
+ ; BottomUp-NEXT: {{ $}}
+ ; BottomUp-NEXT: S_BRANCH %bb.2
+ ; BottomUp-NEXT: {{ $}}
+ ; BottomUp-NEXT: bb.2:
+ ; BottomUp-NEXT: S_ENDPGM 0
+ bb.0.entry:
+ successors: %bb.1(0x40000000), %bb.34(0x40000000); %bb.1(50.00%), %bb.34(50.00%)
+ liveins: $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
+ $sgpr12_sgpr13 = S_MOV_B64 $exec
+ $exec = S_WQM_B64 $exec, implicit-def $scc
+ $sgpr14 = S_MOV_B32 $sgpr5
+ $m0 = S_MOV_B32 $sgpr6
+ $sgpr16_sgpr17 = S_MOV_B64 $exec
+ $exec = S_WQM_B64 $exec, implicit-def $scc
+ $sgpr10 = S_MOV_B32 $sgpr3
+ $sgpr9 = S_MOV_B32 $sgpr2
+ $sgpr8 = S_MOV_B32 $sgpr1
+ $vgpr2 = LDS_PARAM_LOAD 0, 0, 0, implicit $m0, implicit $exec
+ $vgpr3 = LDS_PARAM_LOAD 0, 1, 0, implicit $m0, implicit $exec
+ renamable $sgpr0_sgpr1 = S_GETPC_B64
+ $sgpr15 = S_MOV_B32 killed $sgpr1
+ $exec = S_MOV_B64 killed renamable $sgpr16_sgpr17
+ BUNDLE implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, implicit-def $sgpr16, implicit-def $sgpr16_lo16, implicit-def $sgpr16_hi16, implicit-def $sgpr17, implicit-def $sgpr17_lo16, implicit-def $sgpr17_hi16, implicit-def $sgpr18, implicit-def $sgpr18_lo16, implicit-def $sgpr18_hi16, implicit-def $sgpr19, implicit-def $sgpr19_lo16, implicit-def $sgpr19_hi16, implicit-def $sgpr20, implicit-def $sgpr20_lo16, implicit-def $sgpr20_hi16, implicit-def $sgpr21, implicit-def $sgpr21_lo16, implicit-def $sgpr21_hi16, implicit-def $sgpr22, implicit-def $sgpr22_lo16, implicit-def $sgpr22_hi16, implicit-def $sgpr23, implicit-def $sgpr23_lo16, implicit-def $sgpr23_hi16, implicit-def $sgpr16_sgpr17, implicit-def $sgpr16_sgpr17_sgpr18, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21, implicit-def $sgpr18_sgpr19, implicit-def $sgpr20_sgpr21, implicit-def $sgpr20_sgpr21_sgpr22, implicit-def $sgpr20_sgpr21_sgpr22_sgpr23, implicit-def $sgpr22_sgpr23, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $sgpr0, implicit-def $sgpr0_lo16, implicit-def $sgpr0_hi16, implicit-def $sgpr1, implicit-def $sgpr1_lo16, implicit-def $sgpr1_hi16, implicit-def $sgpr2, implicit-def $sgpr2_lo16, implicit-def $sgpr2_hi16, implicit-def $sgpr3, implicit-def $sgpr3_lo16, implicit-def $sgpr3_hi16, implicit-def $sgpr0_sgpr1, implicit-def $sgpr0_sgpr1_sgpr2, implicit-def $sgpr2_sgpr3, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit-def $sgpr24, implicit-def $sgpr24_lo16, implicit-def $sgpr24_hi16, implicit-def $sgpr25, implicit-def $sgpr25_lo16, implicit-def $sgpr25_hi16, implicit-def $sgpr26, implicit-def $sgpr26_lo16, implicit-def $sgpr26_hi16, implicit-def $sgpr27, implicit-def $sgpr27_lo16, implicit-def $sgpr27_hi16, implicit-def $sgpr28, implicit-def $sgpr28_lo16, implicit-def $sgpr28_hi16, implicit-def $sgpr29, implicit-def $sgpr29_lo16, implicit-def $sgpr29_hi16, implicit-def $sgpr30, implicit-def $sgpr30_lo16, implicit-def $sgpr30_hi16, implicit-def $sgpr31, implicit-def $sgpr31_lo16, implicit-def $sgpr31_hi16, implicit-def $sgpr24_sgpr25, implicit-def $sgpr24_sgpr25_sgpr26, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29, implicit-def $sgpr26_sgpr27, implicit-def $sgpr28_sgpr29, implicit-def $sgpr28_sgpr29_sgpr30, implicit-def $sgpr28_sgpr29_sgpr30_sgpr31, implicit-def $sgpr30_sgpr31, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30, implicit-def $sgpr36_sgpr37_sgpr38_sgpr39, implicit-def $sgpr36, implicit-def $sgpr36_lo16, implicit-def $sgpr36_hi16, implicit-def $sgpr37, implicit-def $sgpr37_lo16, implicit-def $sgpr37_hi16, implicit-def $sgpr38, implicit-def $sgpr38_lo16, implicit-def $sgpr38_hi16, implicit-def $sgpr39, implicit-def $sgpr39_lo16, implicit-def $sgpr39_hi16, implicit-def $sgpr36_sgpr37, implicit-def $sgpr36_sgpr37_sgpr38, implicit-def $sgpr38_sgpr39, implicit $sgpr14_sgpr15 {
+ renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23 = S_LOAD_DWORDX8_IMM renamable $sgpr14_sgpr15, 0, 0 :: (invariant load (s256))
+ renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM renamable $sgpr14_sgpr15, 128, 0 :: (invariant load (s128))
+ renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX8_IMM renamable $sgpr14_sgpr15, 96, 0 :: (invariant load (s256))
+ renamable $sgpr36_sgpr37_sgpr38_sgpr39 = S_LOAD_DWORDX4_IMM renamable $sgpr14_sgpr15, 32, 0 :: (invariant load (s128))
+ }
+ renamable $vgpr5 = V_INTERP_P10_F32_inreg 0, $vgpr2, 0, $vgpr0, 0, $vgpr2, 0, 7, implicit $m0, implicit $exec, implicit $mode
+ renamable $vgpr6 = V_INTERP_P10_F32_inreg 0, $vgpr3, 0, $vgpr0, 0, $vgpr3, 0, 7, implicit $m0, implicit $exec, implicit $mode
+ renamable $vgpr9 = V_INTERP_P2_F32_inreg 0, $vgpr2, 0, $vgpr1, 0, killed $vgpr5, 0, 7, implicit $m0, implicit $exec, implicit $mode
+ renamable $vgpr10 = V_INTERP_P2_F32_inreg 0, $vgpr3, 0, $vgpr1, 0, killed $vgpr6, 0, 7, implicit $m0, implicit $exec, implicit $mode
+ BUNDLE implicit-def $vgpr11, implicit $vgpr9_vgpr10, implicit killed $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec, implicit killed $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, implicit killed $sgpr36_sgpr37_sgpr38_sgpr39 {
+ renamable $vgpr11 = IMAGE_SAMPLE_V1_V2_gfx11 $vgpr9_vgpr10, killed renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 8)
+ renamable $vgpr5_vgpr6_vgpr7_vgpr8 = IMAGE_SAMPLE_V4_V2_gfx11 $vgpr9_vgpr10, killed renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, killed renamable $sgpr36_sgpr37_sgpr38_sgpr39, 15, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), addrspace 8)
+ }
+ nofpexcept V_CMP_GT_F32_e32 1065353216, $vgpr11, implicit-def $vcc, implicit $mode, implicit $exec
+ renamable $sgpr0_sgpr1 = COPY $vcc
+ nofpexcept V_CMP_GT_F32_e32 1065353216, $vgpr8, implicit-def $vcc, implicit $mode, implicit $exec
+ renamable $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+ renamable $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+ renamable $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+ renamable $sgpr2_sgpr3 = S_AND_B64 killed renamable $sgpr0_sgpr1, killed renamable $vcc, implicit-def dead $scc
+ $sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 $sgpr2_sgpr3, implicit-def $exec, implicit-def $scc, implicit $exec
+ S_CBRANCH_EXECZ %bb.34, implicit $exec
+
+ bb.1:
+ successors: %bb.34(0x80000000)
+ S_BRANCH %bb.34
+
+ bb.34:
+ S_ENDPGM 0
+...
>From ee44249f9e7ce8754a17c3117e0610bc7bf4fe54 Mon Sep 17 00:00:00 2001
From: Harrison Hao <tsworld1314 at gmail.com>
Date: Tue, 27 May 2025 17:36:14 +0800
Subject: [PATCH 5/5] [AMDGPU] Rmove unnecssary RecedeCycle.
---
.../lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 71 +------------------
1 file changed, 3 insertions(+), 68 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 7369b4ad60e90..4a1583a40babd 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -280,39 +280,6 @@ void GCNHazardRecognizer::processBundle() {
CurrCycleInstr = nullptr;
}
-void GCNHazardRecognizer::processBundleBottomUp() {
- // Walk through the instructions in this bundle in bottom-up order.
- // We only use this during post-RA scheduling, so hazard recognizer mode
- // should never be active here (it always runs top-down).
- assert(!IsHazardRecognizerMode &&
- "Bottom-up scheduling shouldn't run in hazard recognizer mode");
-
- // Step through each instruction in the bundle in bottom-up order.
- MachineBasicBlock::instr_iterator MI =
- std::next(CurrCycleInstr->getIterator());
- MachineBasicBlock::instr_iterator E =
- CurrCycleInstr->getParent()->instr_end();
-
- // Evict stale entries to maintain a fixed lookahead window.
- // TODO: Hazard detection is not yet implemented. This scheduling
- // is intended for GFX11 and newer.
- for (; MI != E && MI->isInsideBundle(); ++MI) {
- CurrCycleInstr = &*MI;
-
- // Remove up to (MaxLookAhead - 1) oldest entries.
- for (unsigned I = 0, E = MaxLookAhead - 1; I < E && !EmittedInstrs.empty();
- ++I)
- EmittedInstrs.pop_back();
-
- EmittedInstrs.push_back(CurrCycleInstr);
-
- // Keep only the most recent MaxLookAhead entries
- EmittedInstrs.resize(MaxLookAhead);
- }
-
- CurrCycleInstr = nullptr;
-}
-
void GCNHazardRecognizer::runOnInstruction(MachineInstr *MI) {
assert(IsHazardRecognizerMode);
@@ -450,41 +417,9 @@ void GCNHazardRecognizer::AdvanceCycle() {
}
void GCNHazardRecognizer::RecedeCycle() {
- // If no instruction was issued this cycle, pop the oldest placeholder.
- if (!CurrCycleInstr) {
- if (!EmittedInstrs.empty())
- EmittedInstrs.pop_back();
- return;
- }
-
- // If this is a bundle header, handle the entire bundle here.
- if (CurrCycleInstr->isBundle()) {
- processBundleBottomUp();
- return;
- }
-
- unsigned NumWaitStates = TII.getNumWaitStates(*CurrCycleInstr);
- if (!NumWaitStates) {
- CurrCycleInstr = nullptr;
- return;
- }
-
- // Add current instruction to the emitted list.
- EmittedInstrs.push_back(CurrCycleInstr);
-
- // Model remaining wait states by removing older placeholders.
- for (unsigned I = 1, E = std::min(NumWaitStates, getMaxLookAhead()); I < E;
- ++I) {
- if (!EmittedInstrs.empty())
- EmittedInstrs.pop_back();
- }
-
- // getMaxLookahead() is the largest number of wait states we will ever need
- // to insert, so there is no point in keeping track of more than that many
- // wait states.
- EmittedInstrs.resize(getMaxLookAhead());
-
- CurrCycleInstr = nullptr;
+ if (IsHazardRecognizerMode || ST.getGeneration() < AMDGPUSubtarget::GFX11)
+ llvm_unreachable(
+ "hazard recognizer does not support bottom-up scheduling.");
}
//===----------------------------------------------------------------------===//
More information about the llvm-commits
mailing list