[llvm] [AMDGPU] Support bottom-up postRA scheduing. (PR #135295)

Harrison Hao via llvm-commits llvm-commits at lists.llvm.org
Tue May 27 02:36:35 PDT 2025


https://github.com/harrisonGPU updated https://github.com/llvm/llvm-project/pull/135295

>From 87f17d0379fee36cff843f6ceb0688a0eee3fbb1 Mon Sep 17 00:00:00 2001
From: Harrison Hao <tsworld1314 at gmail.com>
Date: Wed, 9 Apr 2025 17:52:33 +0800
Subject: [PATCH 1/5] [AMDGPU] Support bottom-up postRA scheduling.

---
 .../lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 46 ++++++++++++++++-
 llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h  |  1 +
 .../CodeGen/AMDGPU/sched-barrier-post-RA.mir  | 50 ++++++++++++++++++-
 3 files changed, 95 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 1561efe2cd295..12356b3b567e8 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -280,6 +280,25 @@ void GCNHazardRecognizer::processBundle() {
   CurrCycleInstr = nullptr;
 }
 
+void GCNHazardRecognizer::reverseProcessBundle() {
+  MachineBasicBlock::instr_iterator MI =
+      std::next(CurrCycleInstr->getIterator());
+  MachineBasicBlock::instr_iterator E =
+      CurrCycleInstr->getParent()->instr_end();
+
+  for (; MI != E && MI->isInsideBundle(); ++MI) {
+    CurrCycleInstr = &*MI;
+    for (unsigned I = 0, E = MaxLookAhead - 1; I < E; ++I) {
+      if (!EmittedInstrs.empty())
+        EmittedInstrs.pop_back();
+    }
+
+    EmittedInstrs.push_back(CurrCycleInstr);
+    EmittedInstrs.resize(MaxLookAhead);
+  }
+  CurrCycleInstr = nullptr;
+}
+
 void GCNHazardRecognizer::runOnInstruction(MachineInstr *MI) {
   assert(IsHazardRecognizerMode);
 
@@ -417,7 +436,32 @@ void GCNHazardRecognizer::AdvanceCycle() {
 }
 
 void GCNHazardRecognizer::RecedeCycle() {
-  llvm_unreachable("hazard recognizer does not support bottom-up scheduling.");
+  if (!CurrCycleInstr) {
+    if (!EmittedInstrs.empty())
+      EmittedInstrs.pop_back();
+    return;
+  }
+
+  if (CurrCycleInstr->isBundle()) {
+    reverseProcessBundle();
+    return;
+  }
+
+  unsigned NumWaitStates = TII.getNumWaitStates(*CurrCycleInstr);
+  if (!NumWaitStates) {
+    CurrCycleInstr = nullptr;
+    return;
+  }
+
+  EmittedInstrs.push_back(CurrCycleInstr);
+  for (unsigned i = 1, e = std::min(NumWaitStates, getMaxLookAhead()); i < e;
+       ++i) {
+    if (!EmittedInstrs.empty())
+      EmittedInstrs.pop_back();
+  }
+
+  EmittedInstrs.resize(getMaxLookAhead());
+  CurrCycleInstr = nullptr;
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
index bbc55851bf967..eed2561bad231 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
@@ -69,6 +69,7 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
   // Advance over a MachineInstr bundle. Look for hazards in the bundled
   // instructions.
   void processBundle();
+  void reverseProcessBundle();
 
   // Run on an individual instruction in hazard recognizer mode. This can be
   // used on a newly inserted instruction before returning from PreEmitNoops.
diff --git a/llvm/test/CodeGen/AMDGPU/sched-barrier-post-RA.mir b/llvm/test/CodeGen/AMDGPU/sched-barrier-post-RA.mir
index 7bdb8f5b35ec5..02ebffca84bda 100644
--- a/llvm/test/CodeGen/AMDGPU/sched-barrier-post-RA.mir
+++ b/llvm/test/CodeGen/AMDGPU/sched-barrier-post-RA.mir
@@ -1,5 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -misched-cluster=false -run-pass=postmisched -verify-misched -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -misched-cluster=false -run-pass=postmisched -verify-misched -o - %s | FileCheck -check-prefix=CHECK %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -misched-cluster=false -run-pass=postmisched -misched-postra-direction=bottomup -verify-misched -o - %s | FileCheck -check-prefix=CHECK-BOTTOMUP %s
 
 --- |
   define amdgpu_kernel void @no_sched_barrier(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) { ret void }
@@ -29,6 +30,21 @@ body: |
     ; CHECK-NEXT:   GLOBAL_STORE_DWORD_SADDR killed renamable $vgpr0, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1, 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
     ; CHECK-NEXT: }
     ; CHECK-NEXT: S_ENDPGM 0
+    ;
+    ; CHECK-BOTTOMUP-LABEL: name: no_sched_barrier
+    ; CHECK-BOTTOMUP: renamable $vgpr0 = IMPLICIT_DEF
+    ; CHECK-BOTTOMUP-NEXT: renamable $sgpr0_sgpr1 = IMPLICIT_DEF
+    ; CHECK-BOTTOMUP-NEXT: BUNDLE implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit $sgpr0_sgpr1, implicit $vgpr0, implicit $exec {
+    ; CHECK-BOTTOMUP-NEXT:   renamable $vgpr1 = GLOBAL_LOAD_DWORD_SADDR renamable $sgpr0_sgpr1, renamable $vgpr0, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
+    ; CHECK-BOTTOMUP-NEXT:   renamable $vgpr2 = GLOBAL_LOAD_DWORD_SADDR renamable $sgpr0_sgpr1, renamable $vgpr0, 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
+    ; CHECK-BOTTOMUP-NEXT: }
+    ; CHECK-BOTTOMUP-NEXT: renamable $vgpr1 = nsw V_MUL_LO_U32_e64 killed $vgpr1, $vgpr1, implicit $exec
+    ; CHECK-BOTTOMUP-NEXT: renamable $vgpr2 = nsw V_MUL_LO_U32_e64 killed $vgpr2, $vgpr2, implicit $exec
+    ; CHECK-BOTTOMUP-NEXT: BUNDLE implicit killed $vgpr0, implicit killed $vgpr1, implicit killed $sgpr0_sgpr1, implicit $exec, implicit killed $vgpr2 {
+    ; CHECK-BOTTOMUP-NEXT:   GLOBAL_STORE_DWORD_SADDR renamable $vgpr0, killed renamable $vgpr1, renamable $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
+    ; CHECK-BOTTOMUP-NEXT:   GLOBAL_STORE_DWORD_SADDR killed renamable $vgpr0, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1, 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
+    ; CHECK-BOTTOMUP-NEXT: }
+    ; CHECK-BOTTOMUP-NEXT: S_ENDPGM 0
     renamable $sgpr0_sgpr1 = IMPLICIT_DEF
     renamable $vgpr0 = IMPLICIT_DEF
     BUNDLE implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit $sgpr0_sgpr1, implicit $vgpr0, implicit $exec {
@@ -66,6 +82,22 @@ body: |
     ; CHECK-NEXT:   GLOBAL_STORE_DWORD_SADDR killed renamable $vgpr0, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1, 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
     ; CHECK-NEXT: }
     ; CHECK-NEXT: S_ENDPGM 0
+    ;
+    ; CHECK-BOTTOMUP-LABEL: name: sched_barrier_mask_0
+    ; CHECK-BOTTOMUP: renamable $vgpr0 = IMPLICIT_DEF
+    ; CHECK-BOTTOMUP-NEXT: renamable $sgpr0_sgpr1 = IMPLICIT_DEF
+    ; CHECK-BOTTOMUP-NEXT: BUNDLE implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit $sgpr0_sgpr1, implicit $vgpr0, implicit $exec {
+    ; CHECK-BOTTOMUP-NEXT:   renamable $vgpr1 = GLOBAL_LOAD_DWORD_SADDR renamable $sgpr0_sgpr1, renamable $vgpr0, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
+    ; CHECK-BOTTOMUP-NEXT:   renamable $vgpr2 = GLOBAL_LOAD_DWORD_SADDR renamable $sgpr0_sgpr1, renamable $vgpr0, 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
+    ; CHECK-BOTTOMUP-NEXT: }
+    ; CHECK-BOTTOMUP-NEXT: renamable $vgpr1 = nsw V_MUL_LO_U32_e64 killed $vgpr1, $vgpr1, implicit $exec
+    ; CHECK-BOTTOMUP-NEXT: SCHED_BARRIER 0
+    ; CHECK-BOTTOMUP-NEXT: renamable $vgpr2 = nsw V_MUL_LO_U32_e64 killed $vgpr2, $vgpr2, implicit $exec
+    ; CHECK-BOTTOMUP-NEXT: BUNDLE implicit killed $vgpr0, implicit killed $vgpr1, implicit killed $sgpr0_sgpr1, implicit $exec, implicit killed $vgpr2 {
+    ; CHECK-BOTTOMUP-NEXT:   GLOBAL_STORE_DWORD_SADDR renamable $vgpr0, killed renamable $vgpr1, renamable $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
+    ; CHECK-BOTTOMUP-NEXT:   GLOBAL_STORE_DWORD_SADDR killed renamable $vgpr0, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1, 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
+    ; CHECK-BOTTOMUP-NEXT: }
+    ; CHECK-BOTTOMUP-NEXT: S_ENDPGM 0
     renamable $sgpr0_sgpr1 = IMPLICIT_DEF
     renamable $vgpr0 = IMPLICIT_DEF
     BUNDLE implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit $sgpr0_sgpr1, implicit $vgpr0, implicit $exec {
@@ -105,6 +137,22 @@ body: |
     ; CHECK-NEXT:   GLOBAL_STORE_DWORD_SADDR killed renamable $vgpr0, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1, 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
     ; CHECK-NEXT: }
     ; CHECK-NEXT: S_ENDPGM 0
+    ;
+    ; CHECK-BOTTOMUP-LABEL: name: sched_barrier_mask_1
+    ; CHECK-BOTTOMUP: renamable $vgpr0 = IMPLICIT_DEF
+    ; CHECK-BOTTOMUP-NEXT: renamable $sgpr0_sgpr1 = IMPLICIT_DEF
+    ; CHECK-BOTTOMUP-NEXT: BUNDLE implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit $sgpr0_sgpr1, implicit $vgpr0, implicit $exec {
+    ; CHECK-BOTTOMUP-NEXT:   renamable $vgpr1 = GLOBAL_LOAD_DWORD_SADDR renamable $sgpr0_sgpr1, renamable $vgpr0, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
+    ; CHECK-BOTTOMUP-NEXT:   renamable $vgpr2 = GLOBAL_LOAD_DWORD_SADDR renamable $sgpr0_sgpr1, renamable $vgpr0, 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
+    ; CHECK-BOTTOMUP-NEXT: }
+    ; CHECK-BOTTOMUP-NEXT: renamable $vgpr1 = nsw V_MUL_LO_U32_e64 killed $vgpr1, $vgpr1, implicit $exec
+    ; CHECK-BOTTOMUP-NEXT: renamable $vgpr2 = nsw V_MUL_LO_U32_e64 killed $vgpr2, $vgpr2, implicit $exec
+    ; CHECK-BOTTOMUP-NEXT: SCHED_BARRIER 1
+    ; CHECK-BOTTOMUP-NEXT: BUNDLE implicit killed $vgpr0, implicit killed $vgpr1, implicit killed $sgpr0_sgpr1, implicit $exec, implicit killed $vgpr2 {
+    ; CHECK-BOTTOMUP-NEXT:   GLOBAL_STORE_DWORD_SADDR renamable $vgpr0, killed renamable $vgpr1, renamable $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
+    ; CHECK-BOTTOMUP-NEXT:   GLOBAL_STORE_DWORD_SADDR killed renamable $vgpr0, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1, 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
+    ; CHECK-BOTTOMUP-NEXT: }
+    ; CHECK-BOTTOMUP-NEXT: S_ENDPGM 0
     renamable $sgpr0_sgpr1 = IMPLICIT_DEF
     renamable $vgpr0 = IMPLICIT_DEF
     BUNDLE implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit $sgpr0_sgpr1, implicit $vgpr0, implicit $exec {

>From 5393ae4e12ac3d5ecede878003eca999c80003c1 Mon Sep 17 00:00:00 2001
From: Harrison Hao <tsworld1314 at gmail.com>
Date: Thu, 17 Apr 2025 11:10:54 +0800
Subject: [PATCH 2/5] [AMDGPU] Update comments.

---
 .../lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 33 ++++++++++++++-----
 llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h  |  5 ++-
 2 files changed, 29 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 12356b3b567e8..d751b31a35a85 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -280,22 +280,30 @@ void GCNHazardRecognizer::processBundle() {
   CurrCycleInstr = nullptr;
 }
 
-void GCNHazardRecognizer::reverseProcessBundle() {
+void GCNHazardRecognizer::processBundleBottomUp() {
+  // Step through each instruction in the bundle in bottom-up order.
   MachineBasicBlock::instr_iterator MI =
       std::next(CurrCycleInstr->getIterator());
   MachineBasicBlock::instr_iterator E =
       CurrCycleInstr->getParent()->instr_end();
 
+  // Evict stale entries to maintain a fixed lookahead window.
+  // TODO: Hazard detection is not yet implemented. This scheduling
+  // is intended for GFX11 and newer.
   for (; MI != E && MI->isInsideBundle(); ++MI) {
     CurrCycleInstr = &*MI;
-    for (unsigned I = 0, E = MaxLookAhead - 1; I < E; ++I) {
-      if (!EmittedInstrs.empty())
-        EmittedInstrs.pop_back();
-    }
+
+    // Remove up to (MaxLookAhead - 1) oldest entries.
+    for (unsigned I = 0, E = MaxLookAhead - 1; I < E && !EmittedInstrs.empty();
+         ++I)
+      EmittedInstrs.pop_back();
 
     EmittedInstrs.push_back(CurrCycleInstr);
+
+    // Keep only the most recent MaxLookAhead entries
     EmittedInstrs.resize(MaxLookAhead);
   }
+
   CurrCycleInstr = nullptr;
 }
 
@@ -436,14 +444,16 @@ void GCNHazardRecognizer::AdvanceCycle() {
 }
 
 void GCNHazardRecognizer::RecedeCycle() {
+  // If no instruction was issued this cycle, pop the oldest placeholder.
   if (!CurrCycleInstr) {
     if (!EmittedInstrs.empty())
       EmittedInstrs.pop_back();
     return;
   }
 
+  // If this is a bundle header, handle the entire bundle here.
   if (CurrCycleInstr->isBundle()) {
-    reverseProcessBundle();
+    processBundleBottomUp();
     return;
   }
 
@@ -453,14 +463,21 @@ void GCNHazardRecognizer::RecedeCycle() {
     return;
   }
 
+  // Add current instruction to the emitted list.
   EmittedInstrs.push_back(CurrCycleInstr);
-  for (unsigned i = 1, e = std::min(NumWaitStates, getMaxLookAhead()); i < e;
-       ++i) {
+
+  // Model remaining wait states by removing older placeholders.
+  for (unsigned I = 1, E = std::min(NumWaitStates, getMaxLookAhead()); I < E;
+       ++I) {
     if (!EmittedInstrs.empty())
       EmittedInstrs.pop_back();
   }
 
+  // getMaxLookahead() is the largest number of wait states we will ever need
+  // to insert, so there is no point in keeping track of more than that many
+  // wait states.
   EmittedInstrs.resize(getMaxLookAhead());
+
   CurrCycleInstr = nullptr;
 }
 
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
index eed2561bad231..88c7426be552d 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
@@ -69,7 +69,10 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
   // Advance over a MachineInstr bundle. Look for hazards in the bundled
   // instructions.
   void processBundle();
-  void reverseProcessBundle();
+  // Recede over a MachineInstr bundle. Adds bundled instructions to the
+  // EmittedInstrs queue in bottom-up scheduling mode.
+  // TODO: Hazard detection is not yet implemented.
+  void processBundleBottomUp();
 
   // Run on an individual instruction in hazard recognizer mode. This can be
   // used on a newly inserted instruction before returning from PreEmitNoops.

>From 33c5df42d5e1d788f1c515c658280ad1f8cb7889 Mon Sep 17 00:00:00 2001
From: Harrison Hao <tsworld1314 at gmail.com>
Date: Mon, 21 Apr 2025 11:05:09 +0800
Subject: [PATCH 3/5] [AMDGPU] Add assert for Hazard for bottom up.

---
 llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index d751b31a35a85..7369b4ad60e90 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -281,6 +281,12 @@ void GCNHazardRecognizer::processBundle() {
 }
 
 void GCNHazardRecognizer::processBundleBottomUp() {
+  // Walk through the instructions in this bundle in bottom-up order.
+  // We only use this during post-RA scheduling, so hazard recognizer mode
+  // should never be active here (it always runs top-down).
+  assert(!IsHazardRecognizerMode &&
+         "Bottom-up scheduling shouldn't run in hazard recognizer mode");
+
   // Step through each instruction in the bundle in bottom-up order.
   MachineBasicBlock::instr_iterator MI =
       std::next(CurrCycleInstr->getIterator());

>From 9926760d87247cfc7b20650e4a0c415f6055d50c Mon Sep 17 00:00:00 2001
From: Harrison Hao <tsworld1314 at gmail.com>
Date: Tue, 27 May 2025 16:43:24 +0800
Subject: [PATCH 4/5] [AMDGPU] Add a new lit test.

---
 .../CodeGen/AMDGPU/sched-barrier-post-RA.mir  |  50 +-----
 .../AMDGPU/sched-image-sample-post-RA.mir     | 158 ++++++++++++++++++
 2 files changed, 159 insertions(+), 49 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/sched-image-sample-post-RA.mir

diff --git a/llvm/test/CodeGen/AMDGPU/sched-barrier-post-RA.mir b/llvm/test/CodeGen/AMDGPU/sched-barrier-post-RA.mir
index 02ebffca84bda..7bdb8f5b35ec5 100644
--- a/llvm/test/CodeGen/AMDGPU/sched-barrier-post-RA.mir
+++ b/llvm/test/CodeGen/AMDGPU/sched-barrier-post-RA.mir
@@ -1,6 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -misched-cluster=false -run-pass=postmisched -verify-misched -o - %s | FileCheck -check-prefix=CHECK %s
-# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -misched-cluster=false -run-pass=postmisched -misched-postra-direction=bottomup -verify-misched -o - %s | FileCheck -check-prefix=CHECK-BOTTOMUP %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -misched-cluster=false -run-pass=postmisched -verify-misched -o - %s | FileCheck %s
 
 --- |
   define amdgpu_kernel void @no_sched_barrier(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) { ret void }
@@ -30,21 +29,6 @@ body: |
     ; CHECK-NEXT:   GLOBAL_STORE_DWORD_SADDR killed renamable $vgpr0, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1, 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
     ; CHECK-NEXT: }
     ; CHECK-NEXT: S_ENDPGM 0
-    ;
-    ; CHECK-BOTTOMUP-LABEL: name: no_sched_barrier
-    ; CHECK-BOTTOMUP: renamable $vgpr0 = IMPLICIT_DEF
-    ; CHECK-BOTTOMUP-NEXT: renamable $sgpr0_sgpr1 = IMPLICIT_DEF
-    ; CHECK-BOTTOMUP-NEXT: BUNDLE implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit $sgpr0_sgpr1, implicit $vgpr0, implicit $exec {
-    ; CHECK-BOTTOMUP-NEXT:   renamable $vgpr1 = GLOBAL_LOAD_DWORD_SADDR renamable $sgpr0_sgpr1, renamable $vgpr0, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
-    ; CHECK-BOTTOMUP-NEXT:   renamable $vgpr2 = GLOBAL_LOAD_DWORD_SADDR renamable $sgpr0_sgpr1, renamable $vgpr0, 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
-    ; CHECK-BOTTOMUP-NEXT: }
-    ; CHECK-BOTTOMUP-NEXT: renamable $vgpr1 = nsw V_MUL_LO_U32_e64 killed $vgpr1, $vgpr1, implicit $exec
-    ; CHECK-BOTTOMUP-NEXT: renamable $vgpr2 = nsw V_MUL_LO_U32_e64 killed $vgpr2, $vgpr2, implicit $exec
-    ; CHECK-BOTTOMUP-NEXT: BUNDLE implicit killed $vgpr0, implicit killed $vgpr1, implicit killed $sgpr0_sgpr1, implicit $exec, implicit killed $vgpr2 {
-    ; CHECK-BOTTOMUP-NEXT:   GLOBAL_STORE_DWORD_SADDR renamable $vgpr0, killed renamable $vgpr1, renamable $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
-    ; CHECK-BOTTOMUP-NEXT:   GLOBAL_STORE_DWORD_SADDR killed renamable $vgpr0, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1, 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
-    ; CHECK-BOTTOMUP-NEXT: }
-    ; CHECK-BOTTOMUP-NEXT: S_ENDPGM 0
     renamable $sgpr0_sgpr1 = IMPLICIT_DEF
     renamable $vgpr0 = IMPLICIT_DEF
     BUNDLE implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit $sgpr0_sgpr1, implicit $vgpr0, implicit $exec {
@@ -82,22 +66,6 @@ body: |
     ; CHECK-NEXT:   GLOBAL_STORE_DWORD_SADDR killed renamable $vgpr0, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1, 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
     ; CHECK-NEXT: }
     ; CHECK-NEXT: S_ENDPGM 0
-    ;
-    ; CHECK-BOTTOMUP-LABEL: name: sched_barrier_mask_0
-    ; CHECK-BOTTOMUP: renamable $vgpr0 = IMPLICIT_DEF
-    ; CHECK-BOTTOMUP-NEXT: renamable $sgpr0_sgpr1 = IMPLICIT_DEF
-    ; CHECK-BOTTOMUP-NEXT: BUNDLE implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit $sgpr0_sgpr1, implicit $vgpr0, implicit $exec {
-    ; CHECK-BOTTOMUP-NEXT:   renamable $vgpr1 = GLOBAL_LOAD_DWORD_SADDR renamable $sgpr0_sgpr1, renamable $vgpr0, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
-    ; CHECK-BOTTOMUP-NEXT:   renamable $vgpr2 = GLOBAL_LOAD_DWORD_SADDR renamable $sgpr0_sgpr1, renamable $vgpr0, 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
-    ; CHECK-BOTTOMUP-NEXT: }
-    ; CHECK-BOTTOMUP-NEXT: renamable $vgpr1 = nsw V_MUL_LO_U32_e64 killed $vgpr1, $vgpr1, implicit $exec
-    ; CHECK-BOTTOMUP-NEXT: SCHED_BARRIER 0
-    ; CHECK-BOTTOMUP-NEXT: renamable $vgpr2 = nsw V_MUL_LO_U32_e64 killed $vgpr2, $vgpr2, implicit $exec
-    ; CHECK-BOTTOMUP-NEXT: BUNDLE implicit killed $vgpr0, implicit killed $vgpr1, implicit killed $sgpr0_sgpr1, implicit $exec, implicit killed $vgpr2 {
-    ; CHECK-BOTTOMUP-NEXT:   GLOBAL_STORE_DWORD_SADDR renamable $vgpr0, killed renamable $vgpr1, renamable $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
-    ; CHECK-BOTTOMUP-NEXT:   GLOBAL_STORE_DWORD_SADDR killed renamable $vgpr0, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1, 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
-    ; CHECK-BOTTOMUP-NEXT: }
-    ; CHECK-BOTTOMUP-NEXT: S_ENDPGM 0
     renamable $sgpr0_sgpr1 = IMPLICIT_DEF
     renamable $vgpr0 = IMPLICIT_DEF
     BUNDLE implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit $sgpr0_sgpr1, implicit $vgpr0, implicit $exec {
@@ -137,22 +105,6 @@ body: |
     ; CHECK-NEXT:   GLOBAL_STORE_DWORD_SADDR killed renamable $vgpr0, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1, 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
     ; CHECK-NEXT: }
     ; CHECK-NEXT: S_ENDPGM 0
-    ;
-    ; CHECK-BOTTOMUP-LABEL: name: sched_barrier_mask_1
-    ; CHECK-BOTTOMUP: renamable $vgpr0 = IMPLICIT_DEF
-    ; CHECK-BOTTOMUP-NEXT: renamable $sgpr0_sgpr1 = IMPLICIT_DEF
-    ; CHECK-BOTTOMUP-NEXT: BUNDLE implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit $sgpr0_sgpr1, implicit $vgpr0, implicit $exec {
-    ; CHECK-BOTTOMUP-NEXT:   renamable $vgpr1 = GLOBAL_LOAD_DWORD_SADDR renamable $sgpr0_sgpr1, renamable $vgpr0, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
-    ; CHECK-BOTTOMUP-NEXT:   renamable $vgpr2 = GLOBAL_LOAD_DWORD_SADDR renamable $sgpr0_sgpr1, renamable $vgpr0, 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
-    ; CHECK-BOTTOMUP-NEXT: }
-    ; CHECK-BOTTOMUP-NEXT: renamable $vgpr1 = nsw V_MUL_LO_U32_e64 killed $vgpr1, $vgpr1, implicit $exec
-    ; CHECK-BOTTOMUP-NEXT: renamable $vgpr2 = nsw V_MUL_LO_U32_e64 killed $vgpr2, $vgpr2, implicit $exec
-    ; CHECK-BOTTOMUP-NEXT: SCHED_BARRIER 1
-    ; CHECK-BOTTOMUP-NEXT: BUNDLE implicit killed $vgpr0, implicit killed $vgpr1, implicit killed $sgpr0_sgpr1, implicit $exec, implicit killed $vgpr2 {
-    ; CHECK-BOTTOMUP-NEXT:   GLOBAL_STORE_DWORD_SADDR renamable $vgpr0, killed renamable $vgpr1, renamable $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
-    ; CHECK-BOTTOMUP-NEXT:   GLOBAL_STORE_DWORD_SADDR killed renamable $vgpr0, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1, 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
-    ; CHECK-BOTTOMUP-NEXT: }
-    ; CHECK-BOTTOMUP-NEXT: S_ENDPGM 0
     renamable $sgpr0_sgpr1 = IMPLICIT_DEF
     renamable $vgpr0 = IMPLICIT_DEF
     BUNDLE implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit $sgpr0_sgpr1, implicit $vgpr0, implicit $exec {
diff --git a/llvm/test/CodeGen/AMDGPU/sched-image-sample-post-RA.mir b/llvm/test/CodeGen/AMDGPU/sched-image-sample-post-RA.mir
new file mode 100644
index 0000000000000..484ac0618541a
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/sched-image-sample-post-RA.mir
@@ -0,0 +1,158 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=postmisched -verify-misched -o - %s | FileCheck -check-prefix=TopDown %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=postmisched -misched-postra-direction=bottomup -verify-misched -o - %s | FileCheck -check-prefix=BottomUp %s
+
+---
+name: sched_image_sample_post_ra
+tracksRegLiveness: true
+tracksDebugUserValues: true
+body: |
+  ; TopDown-LABEL: name: sched_image_sample_post_ra
+  ; TopDown: bb.0.entry:
+  ; TopDown-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; TopDown-NEXT:   liveins: $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
+  ; TopDown-NEXT: {{  $}}
+  ; TopDown-NEXT:   $sgpr12_sgpr13 = S_MOV_B64 $exec
+  ; TopDown-NEXT:   $exec = S_WQM_B64 $exec, implicit-def $scc
+  ; TopDown-NEXT:   $sgpr14 = S_MOV_B32 killed $sgpr5
+  ; TopDown-NEXT:   $m0 = S_MOV_B32 killed $sgpr6
+  ; TopDown-NEXT:   $sgpr16_sgpr17 = S_MOV_B64 $exec
+  ; TopDown-NEXT:   $exec = S_WQM_B64 $exec, implicit-def $scc
+  ; TopDown-NEXT:   $vgpr2 = LDS_PARAM_LOAD 0, 0, 0, implicit $m0, implicit $exec
+  ; TopDown-NEXT:   $vgpr3 = LDS_PARAM_LOAD 0, 1, 0, implicit $m0, implicit $exec
+  ; TopDown-NEXT:   $sgpr8 = S_MOV_B32 killed $sgpr1
+  ; TopDown-NEXT:   renamable $sgpr0_sgpr1 = S_GETPC_B64
+  ; TopDown-NEXT:   $sgpr10 = S_MOV_B32 killed $sgpr3
+  ; TopDown-NEXT:   $sgpr9 = S_MOV_B32 killed $sgpr2
+  ; TopDown-NEXT:   $sgpr15 = S_MOV_B32 killed $sgpr1
+  ; TopDown-NEXT:   $exec = S_MOV_B64 killed renamable $sgpr16_sgpr17
+  ; TopDown-NEXT:   BUNDLE implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, implicit-def $sgpr16, implicit-def $sgpr16_lo16, implicit-def $sgpr16_hi16, implicit-def $sgpr17, implicit-def $sgpr17_lo16, implicit-def $sgpr17_hi16, implicit-def $sgpr18, implicit-def $sgpr18_lo16, implicit-def $sgpr18_hi16, implicit-def $sgpr19, implicit-def $sgpr19_lo16, implicit-def $sgpr19_hi16, implicit-def $sgpr20, implicit-def $sgpr20_lo16, implicit-def $sgpr20_hi16, implicit-def $sgpr21, implicit-def $sgpr21_lo16, implicit-def $sgpr21_hi16, implicit-def $sgpr22, implicit-def $sgpr22_lo16, implicit-def $sgpr22_hi16, implicit-def $sgpr23, implicit-def $sgpr23_lo16, implicit-def $sgpr23_hi16, implicit-def $sgpr16_sgpr17, implicit-def $sgpr16_sgpr17_sgpr18, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21, implicit-def $sgpr18_sgpr19, implicit-def $sgpr20_sgpr21, implicit-def $sgpr20_sgpr21_sgpr22, implicit-def $sgpr20_sgpr21_sgpr22_sgpr23, implicit-def $sgpr22_sgpr23, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $sgpr0, implicit-def $sgpr0_lo16, implicit-def $sgpr0_hi16, implicit-def $sgpr1, implicit-def $sgpr1_lo16, implicit-def $sgpr1_hi16, implicit-def $sgpr2, implicit-def $sgpr2_lo16, implicit-def $sgpr2_hi16, implicit-def $sgpr3, implicit-def $sgpr3_lo16, implicit-def $sgpr3_hi16, implicit-def $sgpr0_sgpr1, implicit-def $sgpr0_sgpr1_sgpr2, implicit-def $sgpr2_sgpr3, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit-def $sgpr24, implicit-def $sgpr24_lo16, implicit-def $sgpr24_hi16, implicit-def $sgpr25, implicit-def $sgpr25_lo16, implicit-def $sgpr25_hi16, implicit-def $sgpr26, implicit-def $sgpr26_lo16, implicit-def $sgpr26_hi16, implicit-def $sgpr27, implicit-def $sgpr27_lo16, implicit-def $sgpr27_hi16, implicit-def $sgpr28, implicit-def $sgpr28_lo16, implicit-def $sgpr28_hi16, implicit-def $sgpr29, implicit-def $sgpr29_lo16, implicit-def $sgpr29_hi16, implicit-def $sgpr30, implicit-def $sgpr30_lo16, implicit-def $sgpr30_hi16, implicit-def $sgpr31, implicit-def $sgpr31_lo16, implicit-def $sgpr31_hi16, implicit-def $sgpr24_sgpr25, implicit-def $sgpr24_sgpr25_sgpr26, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29, implicit-def $sgpr26_sgpr27, implicit-def $sgpr28_sgpr29, implicit-def $sgpr28_sgpr29_sgpr30, implicit-def $sgpr28_sgpr29_sgpr30_sgpr31, implicit-def $sgpr30_sgpr31, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30, implicit-def $sgpr36_sgpr37_sgpr38_sgpr39, implicit-def $sgpr36, implicit-def $sgpr36_lo16, implicit-def $sgpr36_hi16, implicit-def $sgpr37, implicit-def $sgpr37_lo16, implicit-def $sgpr37_hi16, implicit-def $sgpr38, implicit-def $sgpr38_lo16, implicit-def $sgpr38_hi16, implicit-def $sgpr39, implicit-def $sgpr39_lo16, implicit-def $sgpr39_hi16, implicit-def $sgpr36_sgpr37, implicit-def $sgpr36_sgpr37_sgpr38, implicit-def $sgpr38_sgpr39, implicit killed $sgpr14_sgpr15 {
+  ; TopDown-NEXT:     renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23 = S_LOAD_DWORDX8_IMM renamable $sgpr14_sgpr15, 0, 0 :: (invariant load (s256))
+  ; TopDown-NEXT:     renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM renamable $sgpr14_sgpr15, 128, 0 :: (invariant load (s128))
+  ; TopDown-NEXT:     renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX8_IMM renamable $sgpr14_sgpr15, 96, 0 :: (invariant load (s256))
+  ; TopDown-NEXT:     renamable $sgpr36_sgpr37_sgpr38_sgpr39 = S_LOAD_DWORDX4_IMM killed renamable $sgpr14_sgpr15, 32, 0 :: (invariant load (s128))
+  ; TopDown-NEXT:   }
+  ; TopDown-NEXT:   renamable $vgpr5 = V_INTERP_P10_F32_inreg 0, $vgpr2, 0, $vgpr0, 0, $vgpr2, 0, 7, implicit $m0, implicit $exec, implicit $mode
+  ; TopDown-NEXT:   renamable $vgpr6 = V_INTERP_P10_F32_inreg 0, $vgpr3, 0, killed $vgpr0, 0, $vgpr3, 0, 7, implicit $m0, implicit $exec, implicit $mode
+  ; TopDown-NEXT:   renamable $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+  ; TopDown-NEXT:   renamable $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+  ; TopDown-NEXT:   renamable $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+  ; TopDown-NEXT:   renamable $vgpr9 = V_INTERP_P2_F32_inreg 0, killed $vgpr2, 0, $vgpr1, 0, killed $vgpr5, 0, 7, implicit $m0, implicit $exec, implicit $mode
+  ; TopDown-NEXT:   renamable $vgpr10 = V_INTERP_P2_F32_inreg 0, killed $vgpr3, 0, killed $vgpr1, 0, killed $vgpr6, 0, 7, implicit $m0, implicit $exec, implicit $mode
+  ; TopDown-NEXT:   BUNDLE implicit-def $vgpr11, implicit killed $vgpr9_vgpr10, implicit killed $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec, implicit killed $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, implicit killed $sgpr36_sgpr37_sgpr38_sgpr39 {
+  ; TopDown-NEXT:     renamable $vgpr11 = IMAGE_SAMPLE_V1_V2_gfx11 $vgpr9_vgpr10, killed renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 8)
+  ; TopDown-NEXT:     renamable $vgpr5_vgpr6_vgpr7_vgpr8 = IMAGE_SAMPLE_V4_V2_gfx11 killed $vgpr9_vgpr10, killed renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, killed renamable $sgpr36_sgpr37_sgpr38_sgpr39, 15, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), addrspace 8)
+  ; TopDown-NEXT:   }
+  ; TopDown-NEXT:   nofpexcept V_CMP_GT_F32_e32 1065353216, killed $vgpr11, implicit-def $vcc_lo, implicit $mode, implicit $exec
+  ; TopDown-NEXT:   renamable $sgpr0_sgpr1 = COPY $vcc
+  ; TopDown-NEXT:   nofpexcept V_CMP_GT_F32_e32 1065353216, killed $vgpr8, implicit-def $vcc_lo, implicit $mode, implicit $exec
+  ; TopDown-NEXT:   renamable $sgpr2_sgpr3 = S_AND_B64 killed renamable $sgpr0_sgpr1, killed renamable $vcc, implicit-def dead $scc
+  ; TopDown-NEXT:   $sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 killed $sgpr2_sgpr3, implicit-def $exec, implicit-def $scc, implicit $exec
+  ; TopDown-NEXT:   S_CBRANCH_EXECZ %bb.2, implicit $exec
+  ; TopDown-NEXT: {{  $}}
+  ; TopDown-NEXT: bb.1:
+  ; TopDown-NEXT:   successors: %bb.2(0x80000000)
+  ; TopDown-NEXT: {{  $}}
+  ; TopDown-NEXT:   S_BRANCH %bb.2
+  ; TopDown-NEXT: {{  $}}
+  ; TopDown-NEXT: bb.2:
+  ; TopDown-NEXT:   S_ENDPGM 0
+  ;
+  ; BottomUp-LABEL: name: sched_image_sample_post_ra
+  ; BottomUp: bb.0.entry:
+  ; BottomUp-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; BottomUp-NEXT:   liveins: $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
+  ; BottomUp-NEXT: {{  $}}
+  ; BottomUp-NEXT:   $sgpr12_sgpr13 = S_MOV_B64 $exec
+  ; BottomUp-NEXT:   $exec = S_WQM_B64 $exec, implicit-def $scc
+  ; BottomUp-NEXT:   $sgpr16_sgpr17 = S_MOV_B64 $exec
+  ; BottomUp-NEXT:   $m0 = S_MOV_B32 killed $sgpr6
+  ; BottomUp-NEXT:   $sgpr14 = S_MOV_B32 killed $sgpr5
+  ; BottomUp-NEXT:   $exec = S_WQM_B64 $exec, implicit-def $scc
+  ; BottomUp-NEXT:   $vgpr2 = LDS_PARAM_LOAD 0, 0, 0, implicit $m0, implicit $exec
+  ; BottomUp-NEXT:   $vgpr3 = LDS_PARAM_LOAD 0, 1, 0, implicit $m0, implicit $exec
+  ; BottomUp-NEXT:   $sgpr8 = S_MOV_B32 killed $sgpr1
+  ; BottomUp-NEXT:   renamable $sgpr0_sgpr1 = S_GETPC_B64
+  ; BottomUp-NEXT:   $sgpr9 = S_MOV_B32 killed $sgpr2
+  ; BottomUp-NEXT:   $sgpr10 = S_MOV_B32 killed $sgpr3
+  ; BottomUp-NEXT:   $sgpr15 = S_MOV_B32 killed $sgpr1
+  ; BottomUp-NEXT:   $exec = S_MOV_B64 killed renamable $sgpr16_sgpr17
+  ; BottomUp-NEXT:   BUNDLE implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, implicit-def $sgpr16, implicit-def $sgpr16_lo16, implicit-def $sgpr16_hi16, implicit-def $sgpr17, implicit-def $sgpr17_lo16, implicit-def $sgpr17_hi16, implicit-def $sgpr18, implicit-def $sgpr18_lo16, implicit-def $sgpr18_hi16, implicit-def $sgpr19, implicit-def $sgpr19_lo16, implicit-def $sgpr19_hi16, implicit-def $sgpr20, implicit-def $sgpr20_lo16, implicit-def $sgpr20_hi16, implicit-def $sgpr21, implicit-def $sgpr21_lo16, implicit-def $sgpr21_hi16, implicit-def $sgpr22, implicit-def $sgpr22_lo16, implicit-def $sgpr22_hi16, implicit-def $sgpr23, implicit-def $sgpr23_lo16, implicit-def $sgpr23_hi16, implicit-def $sgpr16_sgpr17, implicit-def $sgpr16_sgpr17_sgpr18, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21, implicit-def $sgpr18_sgpr19, implicit-def $sgpr20_sgpr21, implicit-def $sgpr20_sgpr21_sgpr22, implicit-def $sgpr20_sgpr21_sgpr22_sgpr23, implicit-def $sgpr22_sgpr23, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $sgpr0, implicit-def $sgpr0_lo16, implicit-def $sgpr0_hi16, implicit-def $sgpr1, implicit-def $sgpr1_lo16, implicit-def $sgpr1_hi16, implicit-def $sgpr2, implicit-def $sgpr2_lo16, implicit-def $sgpr2_hi16, implicit-def $sgpr3, implicit-def $sgpr3_lo16, implicit-def $sgpr3_hi16, implicit-def $sgpr0_sgpr1, implicit-def $sgpr0_sgpr1_sgpr2, implicit-def $sgpr2_sgpr3, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit-def $sgpr24, implicit-def $sgpr24_lo16, implicit-def $sgpr24_hi16, implicit-def $sgpr25, implicit-def $sgpr25_lo16, implicit-def $sgpr25_hi16, implicit-def $sgpr26, implicit-def $sgpr26_lo16, implicit-def $sgpr26_hi16, implicit-def $sgpr27, implicit-def $sgpr27_lo16, implicit-def $sgpr27_hi16, implicit-def $sgpr28, implicit-def $sgpr28_lo16, implicit-def $sgpr28_hi16, implicit-def $sgpr29, implicit-def $sgpr29_lo16, implicit-def $sgpr29_hi16, implicit-def $sgpr30, implicit-def $sgpr30_lo16, implicit-def $sgpr30_hi16, implicit-def $sgpr31, implicit-def $sgpr31_lo16, implicit-def $sgpr31_hi16, implicit-def $sgpr24_sgpr25, implicit-def $sgpr24_sgpr25_sgpr26, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29, implicit-def $sgpr26_sgpr27, implicit-def $sgpr28_sgpr29, implicit-def $sgpr28_sgpr29_sgpr30, implicit-def $sgpr28_sgpr29_sgpr30_sgpr31, implicit-def $sgpr30_sgpr31, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30, implicit-def $sgpr36_sgpr37_sgpr38_sgpr39, implicit-def $sgpr36, implicit-def $sgpr36_lo16, implicit-def $sgpr36_hi16, implicit-def $sgpr37, implicit-def $sgpr37_lo16, implicit-def $sgpr37_hi16, implicit-def $sgpr38, implicit-def $sgpr38_lo16, implicit-def $sgpr38_hi16, implicit-def $sgpr39, implicit-def $sgpr39_lo16, implicit-def $sgpr39_hi16, implicit-def $sgpr36_sgpr37, implicit-def $sgpr36_sgpr37_sgpr38, implicit-def $sgpr38_sgpr39, implicit killed $sgpr14_sgpr15 {
+  ; BottomUp-NEXT:     renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23 = S_LOAD_DWORDX8_IMM renamable $sgpr14_sgpr15, 0, 0 :: (invariant load (s256))
+  ; BottomUp-NEXT:     renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM renamable $sgpr14_sgpr15, 128, 0 :: (invariant load (s128))
+  ; BottomUp-NEXT:     renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX8_IMM renamable $sgpr14_sgpr15, 96, 0 :: (invariant load (s256))
+  ; BottomUp-NEXT:     renamable $sgpr36_sgpr37_sgpr38_sgpr39 = S_LOAD_DWORDX4_IMM killed renamable $sgpr14_sgpr15, 32, 0 :: (invariant load (s128))
+  ; BottomUp-NEXT:   }
+  ; BottomUp-NEXT:   renamable $vgpr6 = V_INTERP_P10_F32_inreg 0, $vgpr3, 0, $vgpr0, 0, $vgpr3, 0, 7, implicit $m0, implicit $exec, implicit $mode
+  ; BottomUp-NEXT:   renamable $vgpr5 = V_INTERP_P10_F32_inreg 0, $vgpr2, 0, killed $vgpr0, 0, $vgpr2, 0, 7, implicit $m0, implicit $exec, implicit $mode
+  ; BottomUp-NEXT:   renamable $vgpr10 = V_INTERP_P2_F32_inreg 0, killed $vgpr3, 0, $vgpr1, 0, killed $vgpr6, 0, 7, implicit $m0, implicit $exec, implicit $mode
+  ; BottomUp-NEXT:   renamable $vgpr9 = V_INTERP_P2_F32_inreg 0, killed $vgpr2, 0, killed $vgpr1, 0, killed $vgpr5, 0, 7, implicit $m0, implicit $exec, implicit $mode
+  ; BottomUp-NEXT:   BUNDLE implicit-def $vgpr11, implicit killed $vgpr9_vgpr10, implicit killed $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec, implicit killed $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, implicit killed $sgpr36_sgpr37_sgpr38_sgpr39 {
+  ; BottomUp-NEXT:     renamable $vgpr11 = IMAGE_SAMPLE_V1_V2_gfx11 $vgpr9_vgpr10, killed renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 8)
+  ; BottomUp-NEXT:     renamable $vgpr5_vgpr6_vgpr7_vgpr8 = IMAGE_SAMPLE_V4_V2_gfx11 killed $vgpr9_vgpr10, killed renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, killed renamable $sgpr36_sgpr37_sgpr38_sgpr39, 15, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), addrspace 8)
+  ; BottomUp-NEXT:   }
+  ; BottomUp-NEXT:   renamable $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+  ; BottomUp-NEXT:   nofpexcept V_CMP_GT_F32_e32 1065353216, killed $vgpr11, implicit-def $vcc_lo, implicit $mode, implicit $exec
+  ; BottomUp-NEXT:   renamable $sgpr0_sgpr1 = COPY $vcc
+  ; BottomUp-NEXT:   nofpexcept V_CMP_GT_F32_e32 1065353216, killed $vgpr8, implicit-def $vcc_lo, implicit $mode, implicit $exec
+  ; BottomUp-NEXT:   renamable $sgpr2_sgpr3 = S_AND_B64 killed renamable $sgpr0_sgpr1, killed renamable $vcc, implicit-def dead $scc
+  ; BottomUp-NEXT:   renamable $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+  ; BottomUp-NEXT:   renamable $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+  ; BottomUp-NEXT:   $sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 killed $sgpr2_sgpr3, implicit-def $exec, implicit-def $scc, implicit $exec
+  ; BottomUp-NEXT:   S_CBRANCH_EXECZ %bb.2, implicit $exec
+  ; BottomUp-NEXT: {{  $}}
+  ; BottomUp-NEXT: bb.1:
+  ; BottomUp-NEXT:   successors: %bb.2(0x80000000)
+  ; BottomUp-NEXT: {{  $}}
+  ; BottomUp-NEXT:   S_BRANCH %bb.2
+  ; BottomUp-NEXT: {{  $}}
+  ; BottomUp-NEXT: bb.2:
+  ; BottomUp-NEXT:   S_ENDPGM 0
+  bb.0.entry:
+    successors: %bb.1(0x40000000), %bb.34(0x40000000); %bb.1(50.00%), %bb.34(50.00%)
+    liveins: $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
+    $sgpr12_sgpr13 = S_MOV_B64 $exec
+    $exec = S_WQM_B64 $exec, implicit-def $scc
+    $sgpr14 = S_MOV_B32 $sgpr5
+    $m0 = S_MOV_B32 $sgpr6
+    $sgpr16_sgpr17 = S_MOV_B64 $exec
+    $exec = S_WQM_B64 $exec, implicit-def $scc
+    $sgpr10 = S_MOV_B32 $sgpr3
+    $sgpr9  = S_MOV_B32 $sgpr2
+    $sgpr8  = S_MOV_B32 $sgpr1
+    $vgpr2 = LDS_PARAM_LOAD 0, 0, 0, implicit $m0, implicit $exec
+    $vgpr3 = LDS_PARAM_LOAD 0, 1, 0, implicit $m0, implicit $exec
+    renamable $sgpr0_sgpr1 = S_GETPC_B64
+    $sgpr15 = S_MOV_B32 killed $sgpr1
+    $exec = S_MOV_B64 killed renamable $sgpr16_sgpr17
+    BUNDLE implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, implicit-def $sgpr16, implicit-def $sgpr16_lo16, implicit-def $sgpr16_hi16, implicit-def $sgpr17, implicit-def $sgpr17_lo16, implicit-def $sgpr17_hi16, implicit-def $sgpr18, implicit-def $sgpr18_lo16, implicit-def $sgpr18_hi16, implicit-def $sgpr19, implicit-def $sgpr19_lo16, implicit-def $sgpr19_hi16, implicit-def $sgpr20, implicit-def $sgpr20_lo16, implicit-def $sgpr20_hi16, implicit-def $sgpr21, implicit-def $sgpr21_lo16, implicit-def $sgpr21_hi16, implicit-def $sgpr22, implicit-def $sgpr22_lo16, implicit-def $sgpr22_hi16, implicit-def $sgpr23, implicit-def $sgpr23_lo16, implicit-def $sgpr23_hi16, implicit-def $sgpr16_sgpr17, implicit-def $sgpr16_sgpr17_sgpr18, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21, implicit-def $sgpr18_sgpr19, implicit-def $sgpr20_sgpr21, implicit-def $sgpr20_sgpr21_sgpr22, implicit-def $sgpr20_sgpr21_sgpr22_sgpr23, implicit-def $sgpr22_sgpr23, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $sgpr0, implicit-def $sgpr0_lo16, implicit-def $sgpr0_hi16, implicit-def $sgpr1, implicit-def $sgpr1_lo16, implicit-def $sgpr1_hi16, implicit-def $sgpr2, implicit-def $sgpr2_lo16, implicit-def $sgpr2_hi16, implicit-def $sgpr3, implicit-def $sgpr3_lo16, implicit-def $sgpr3_hi16, implicit-def $sgpr0_sgpr1, implicit-def $sgpr0_sgpr1_sgpr2, implicit-def $sgpr2_sgpr3, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit-def $sgpr24, implicit-def $sgpr24_lo16, implicit-def $sgpr24_hi16, implicit-def $sgpr25, implicit-def $sgpr25_lo16, implicit-def $sgpr25_hi16, implicit-def $sgpr26, implicit-def $sgpr26_lo16, implicit-def $sgpr26_hi16, implicit-def $sgpr27, implicit-def $sgpr27_lo16, implicit-def $sgpr27_hi16, implicit-def $sgpr28, implicit-def $sgpr28_lo16, implicit-def $sgpr28_hi16, implicit-def $sgpr29, implicit-def $sgpr29_lo16, implicit-def $sgpr29_hi16, implicit-def $sgpr30, implicit-def $sgpr30_lo16, implicit-def $sgpr30_hi16, implicit-def $sgpr31, implicit-def $sgpr31_lo16, implicit-def $sgpr31_hi16, implicit-def $sgpr24_sgpr25, implicit-def $sgpr24_sgpr25_sgpr26, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29, implicit-def $sgpr26_sgpr27, implicit-def $sgpr28_sgpr29, implicit-def $sgpr28_sgpr29_sgpr30, implicit-def $sgpr28_sgpr29_sgpr30_sgpr31, implicit-def $sgpr30_sgpr31, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30, implicit-def $sgpr36_sgpr37_sgpr38_sgpr39, implicit-def $sgpr36, implicit-def $sgpr36_lo16, implicit-def $sgpr36_hi16, implicit-def $sgpr37, implicit-def $sgpr37_lo16, implicit-def $sgpr37_hi16, implicit-def $sgpr38, implicit-def $sgpr38_lo16, implicit-def $sgpr38_hi16, implicit-def $sgpr39, implicit-def $sgpr39_lo16, implicit-def $sgpr39_hi16, implicit-def $sgpr36_sgpr37, implicit-def $sgpr36_sgpr37_sgpr38, implicit-def $sgpr38_sgpr39, implicit $sgpr14_sgpr15 {
+      renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23 = S_LOAD_DWORDX8_IMM renamable $sgpr14_sgpr15, 0, 0 :: (invariant load (s256))
+      renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM renamable $sgpr14_sgpr15, 128, 0 :: (invariant load (s128))
+      renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX8_IMM renamable $sgpr14_sgpr15, 96, 0 :: (invariant load (s256))
+      renamable $sgpr36_sgpr37_sgpr38_sgpr39 = S_LOAD_DWORDX4_IMM renamable $sgpr14_sgpr15, 32, 0 :: (invariant load (s128))
+    }
+    renamable $vgpr5  = V_INTERP_P10_F32_inreg 0, $vgpr2, 0, $vgpr0, 0, $vgpr2, 0, 7, implicit $m0, implicit $exec, implicit $mode
+    renamable $vgpr6  = V_INTERP_P10_F32_inreg 0, $vgpr3, 0, $vgpr0, 0, $vgpr3, 0, 7, implicit $m0, implicit $exec, implicit $mode
+    renamable $vgpr9  = V_INTERP_P2_F32_inreg 0, $vgpr2, 0, $vgpr1, 0, killed $vgpr5, 0, 7, implicit $m0, implicit $exec, implicit $mode
+    renamable $vgpr10 = V_INTERP_P2_F32_inreg 0, $vgpr3, 0, $vgpr1, 0, killed $vgpr6, 0, 7, implicit $m0, implicit $exec, implicit $mode
+    BUNDLE implicit-def $vgpr11, implicit $vgpr9_vgpr10, implicit killed $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec, implicit killed $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, implicit killed $sgpr36_sgpr37_sgpr38_sgpr39 {
+      renamable $vgpr11 = IMAGE_SAMPLE_V1_V2_gfx11 $vgpr9_vgpr10, killed renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 8)
+      renamable $vgpr5_vgpr6_vgpr7_vgpr8 = IMAGE_SAMPLE_V4_V2_gfx11 $vgpr9_vgpr10, killed renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, killed renamable $sgpr36_sgpr37_sgpr38_sgpr39, 15, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), addrspace 8)
+    }
+    nofpexcept V_CMP_GT_F32_e32 1065353216, $vgpr11, implicit-def $vcc, implicit $mode, implicit $exec
+    renamable $sgpr0_sgpr1 = COPY $vcc
+    nofpexcept V_CMP_GT_F32_e32 1065353216, $vgpr8,  implicit-def $vcc, implicit $mode, implicit $exec
+    renamable $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+    renamable $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+    renamable $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+    renamable $sgpr2_sgpr3 = S_AND_B64 killed renamable $sgpr0_sgpr1, killed renamable $vcc, implicit-def dead $scc
+    $sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 $sgpr2_sgpr3, implicit-def $exec, implicit-def $scc, implicit $exec
+    S_CBRANCH_EXECZ %bb.34, implicit $exec
+
+  bb.1:
+    successors: %bb.34(0x80000000)
+    S_BRANCH %bb.34
+
+  bb.34:
+    S_ENDPGM 0
+...

>From ee44249f9e7ce8754a17c3117e0610bc7bf4fe54 Mon Sep 17 00:00:00 2001
From: Harrison Hao <tsworld1314 at gmail.com>
Date: Tue, 27 May 2025 17:36:14 +0800
Subject: [PATCH 5/5] [AMDGPU] Rmove unnecssary RecedeCycle.

---
 .../lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 71 +------------------
 1 file changed, 3 insertions(+), 68 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 7369b4ad60e90..4a1583a40babd 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -280,39 +280,6 @@ void GCNHazardRecognizer::processBundle() {
   CurrCycleInstr = nullptr;
 }
 
-void GCNHazardRecognizer::processBundleBottomUp() {
-  // Walk through the instructions in this bundle in bottom-up order.
-  // We only use this during post-RA scheduling, so hazard recognizer mode
-  // should never be active here (it always runs top-down).
-  assert(!IsHazardRecognizerMode &&
-         "Bottom-up scheduling shouldn't run in hazard recognizer mode");
-
-  // Step through each instruction in the bundle in bottom-up order.
-  MachineBasicBlock::instr_iterator MI =
-      std::next(CurrCycleInstr->getIterator());
-  MachineBasicBlock::instr_iterator E =
-      CurrCycleInstr->getParent()->instr_end();
-
-  // Evict stale entries to maintain a fixed lookahead window.
-  // TODO: Hazard detection is not yet implemented. This scheduling
-  // is intended for GFX11 and newer.
-  for (; MI != E && MI->isInsideBundle(); ++MI) {
-    CurrCycleInstr = &*MI;
-
-    // Remove up to (MaxLookAhead - 1) oldest entries.
-    for (unsigned I = 0, E = MaxLookAhead - 1; I < E && !EmittedInstrs.empty();
-         ++I)
-      EmittedInstrs.pop_back();
-
-    EmittedInstrs.push_back(CurrCycleInstr);
-
-    // Keep only the most recent MaxLookAhead entries
-    EmittedInstrs.resize(MaxLookAhead);
-  }
-
-  CurrCycleInstr = nullptr;
-}
-
 void GCNHazardRecognizer::runOnInstruction(MachineInstr *MI) {
   assert(IsHazardRecognizerMode);
 
@@ -450,41 +417,9 @@ void GCNHazardRecognizer::AdvanceCycle() {
 }
 
 void GCNHazardRecognizer::RecedeCycle() {
-  // If no instruction was issued this cycle, pop the oldest placeholder.
-  if (!CurrCycleInstr) {
-    if (!EmittedInstrs.empty())
-      EmittedInstrs.pop_back();
-    return;
-  }
-
-  // If this is a bundle header, handle the entire bundle here.
-  if (CurrCycleInstr->isBundle()) {
-    processBundleBottomUp();
-    return;
-  }
-
-  unsigned NumWaitStates = TII.getNumWaitStates(*CurrCycleInstr);
-  if (!NumWaitStates) {
-    CurrCycleInstr = nullptr;
-    return;
-  }
-
-  // Add current instruction to the emitted list.
-  EmittedInstrs.push_back(CurrCycleInstr);
-
-  // Model remaining wait states by removing older placeholders.
-  for (unsigned I = 1, E = std::min(NumWaitStates, getMaxLookAhead()); I < E;
-       ++I) {
-    if (!EmittedInstrs.empty())
-      EmittedInstrs.pop_back();
-  }
-
-  // getMaxLookahead() is the largest number of wait states we will ever need
-  // to insert, so there is no point in keeping track of more than that many
-  // wait states.
-  EmittedInstrs.resize(getMaxLookAhead());
-
-  CurrCycleInstr = nullptr;
+  if (IsHazardRecognizerMode || ST.getGeneration() < AMDGPUSubtarget::GFX11)
+    llvm_unreachable(
+        "hazard recognizer does not support bottom-up scheduling.");
 }
 
 //===----------------------------------------------------------------------===//



More information about the llvm-commits mailing list