[llvm] [AMDGPU] Support bottom-up postRA scheduing. (PR #135295)
Harrison Hao via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 10 20:01:13 PDT 2025
https://github.com/harrisonGPU updated https://github.com/llvm/llvm-project/pull/135295
>From 8574e1fce7a559cfbbc54b7285a88d52900a23ce Mon Sep 17 00:00:00 2001
From: Harrison Hao <tsworld1314 at gmail.com>
Date: Wed, 9 Apr 2025 17:52:33 +0800
Subject: [PATCH] [AMDGPU] Support bottom-up postRA scheduling.
---
.../lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 46 ++++++++++++++++-
llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h | 1 +
.../CodeGen/AMDGPU/sched-barrier-post-RA.mir | 50 ++++++++++++++++++-
3 files changed, 95 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index aaefe27b1324f..43d04cbeb9451 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -284,6 +284,25 @@ void GCNHazardRecognizer::processBundle() {
CurrCycleInstr = nullptr;
}
+void GCNHazardRecognizer::reverseProcessBundle() {
+ MachineBasicBlock::instr_iterator MI =
+ std::next(CurrCycleInstr->getIterator());
+ MachineBasicBlock::instr_iterator E =
+ CurrCycleInstr->getParent()->instr_end();
+
+ for (; MI != E && MI->isInsideBundle(); ++MI) {
+ CurrCycleInstr = &*MI;
+ for (unsigned I = 0, E = MaxLookAhead - 1; I < E; ++I) {
+ if (!EmittedInstrs.empty())
+ EmittedInstrs.pop_back();
+ }
+
+ EmittedInstrs.push_back(CurrCycleInstr);
+ EmittedInstrs.resize(MaxLookAhead);
+ }
+ CurrCycleInstr = nullptr;
+}
+
void GCNHazardRecognizer::runOnInstruction(MachineInstr *MI) {
assert(IsHazardRecognizerMode);
@@ -423,7 +442,32 @@ void GCNHazardRecognizer::AdvanceCycle() {
}
void GCNHazardRecognizer::RecedeCycle() {
- llvm_unreachable("hazard recognizer does not support bottom-up scheduling.");
+ if (!CurrCycleInstr) {
+ if (!EmittedInstrs.empty())
+ EmittedInstrs.pop_back();
+ return;
+ }
+
+ if (CurrCycleInstr->isBundle()) {
+ reverseProcessBundle();
+ return;
+ }
+
+ unsigned NumWaitStates = TII.getNumWaitStates(*CurrCycleInstr);
+ if (!NumWaitStates) {
+ CurrCycleInstr = nullptr;
+ return;
+ }
+
+ EmittedInstrs.push_back(CurrCycleInstr);
+ for (unsigned i = 1, e = std::min(NumWaitStates, getMaxLookAhead()); i < e;
+ ++i) {
+ if (!EmittedInstrs.empty())
+ EmittedInstrs.pop_back();
+ }
+
+ EmittedInstrs.resize(getMaxLookAhead());
+ CurrCycleInstr = nullptr;
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
index bbc55851bf967..eed2561bad231 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
@@ -69,6 +69,7 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
// Advance over a MachineInstr bundle. Look for hazards in the bundled
// instructions.
void processBundle();
+ void reverseProcessBundle();
// Run on an individual instruction in hazard recognizer mode. This can be
// used on a newly inserted instruction before returning from PreEmitNoops.
diff --git a/llvm/test/CodeGen/AMDGPU/sched-barrier-post-RA.mir b/llvm/test/CodeGen/AMDGPU/sched-barrier-post-RA.mir
index 7bdb8f5b35ec5..02ebffca84bda 100644
--- a/llvm/test/CodeGen/AMDGPU/sched-barrier-post-RA.mir
+++ b/llvm/test/CodeGen/AMDGPU/sched-barrier-post-RA.mir
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -misched-cluster=false -run-pass=postmisched -verify-misched -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -misched-cluster=false -run-pass=postmisched -verify-misched -o - %s | FileCheck -check-prefix=CHECK %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -misched-cluster=false -run-pass=postmisched -misched-postra-direction=bottomup -verify-misched -o - %s | FileCheck -check-prefix=CHECK-BOTTOMUP %s
--- |
define amdgpu_kernel void @no_sched_barrier(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) { ret void }
@@ -29,6 +30,21 @@ body: |
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR killed renamable $vgpr0, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1, 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
; CHECK-NEXT: }
; CHECK-NEXT: S_ENDPGM 0
+ ;
+ ; CHECK-BOTTOMUP-LABEL: name: no_sched_barrier
+ ; CHECK-BOTTOMUP: renamable $vgpr0 = IMPLICIT_DEF
+ ; CHECK-BOTTOMUP-NEXT: renamable $sgpr0_sgpr1 = IMPLICIT_DEF
+ ; CHECK-BOTTOMUP-NEXT: BUNDLE implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit $sgpr0_sgpr1, implicit $vgpr0, implicit $exec {
+ ; CHECK-BOTTOMUP-NEXT: renamable $vgpr1 = GLOBAL_LOAD_DWORD_SADDR renamable $sgpr0_sgpr1, renamable $vgpr0, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
+ ; CHECK-BOTTOMUP-NEXT: renamable $vgpr2 = GLOBAL_LOAD_DWORD_SADDR renamable $sgpr0_sgpr1, renamable $vgpr0, 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
+ ; CHECK-BOTTOMUP-NEXT: }
+ ; CHECK-BOTTOMUP-NEXT: renamable $vgpr1 = nsw V_MUL_LO_U32_e64 killed $vgpr1, $vgpr1, implicit $exec
+ ; CHECK-BOTTOMUP-NEXT: renamable $vgpr2 = nsw V_MUL_LO_U32_e64 killed $vgpr2, $vgpr2, implicit $exec
+ ; CHECK-BOTTOMUP-NEXT: BUNDLE implicit killed $vgpr0, implicit killed $vgpr1, implicit killed $sgpr0_sgpr1, implicit $exec, implicit killed $vgpr2 {
+ ; CHECK-BOTTOMUP-NEXT: GLOBAL_STORE_DWORD_SADDR renamable $vgpr0, killed renamable $vgpr1, renamable $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
+ ; CHECK-BOTTOMUP-NEXT: GLOBAL_STORE_DWORD_SADDR killed renamable $vgpr0, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1, 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
+ ; CHECK-BOTTOMUP-NEXT: }
+ ; CHECK-BOTTOMUP-NEXT: S_ENDPGM 0
renamable $sgpr0_sgpr1 = IMPLICIT_DEF
renamable $vgpr0 = IMPLICIT_DEF
BUNDLE implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit $sgpr0_sgpr1, implicit $vgpr0, implicit $exec {
@@ -66,6 +82,22 @@ body: |
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR killed renamable $vgpr0, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1, 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
; CHECK-NEXT: }
; CHECK-NEXT: S_ENDPGM 0
+ ;
+ ; CHECK-BOTTOMUP-LABEL: name: sched_barrier_mask_0
+ ; CHECK-BOTTOMUP: renamable $vgpr0 = IMPLICIT_DEF
+ ; CHECK-BOTTOMUP-NEXT: renamable $sgpr0_sgpr1 = IMPLICIT_DEF
+ ; CHECK-BOTTOMUP-NEXT: BUNDLE implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit $sgpr0_sgpr1, implicit $vgpr0, implicit $exec {
+ ; CHECK-BOTTOMUP-NEXT: renamable $vgpr1 = GLOBAL_LOAD_DWORD_SADDR renamable $sgpr0_sgpr1, renamable $vgpr0, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
+ ; CHECK-BOTTOMUP-NEXT: renamable $vgpr2 = GLOBAL_LOAD_DWORD_SADDR renamable $sgpr0_sgpr1, renamable $vgpr0, 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
+ ; CHECK-BOTTOMUP-NEXT: }
+ ; CHECK-BOTTOMUP-NEXT: renamable $vgpr1 = nsw V_MUL_LO_U32_e64 killed $vgpr1, $vgpr1, implicit $exec
+ ; CHECK-BOTTOMUP-NEXT: SCHED_BARRIER 0
+ ; CHECK-BOTTOMUP-NEXT: renamable $vgpr2 = nsw V_MUL_LO_U32_e64 killed $vgpr2, $vgpr2, implicit $exec
+ ; CHECK-BOTTOMUP-NEXT: BUNDLE implicit killed $vgpr0, implicit killed $vgpr1, implicit killed $sgpr0_sgpr1, implicit $exec, implicit killed $vgpr2 {
+ ; CHECK-BOTTOMUP-NEXT: GLOBAL_STORE_DWORD_SADDR renamable $vgpr0, killed renamable $vgpr1, renamable $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
+ ; CHECK-BOTTOMUP-NEXT: GLOBAL_STORE_DWORD_SADDR killed renamable $vgpr0, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1, 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
+ ; CHECK-BOTTOMUP-NEXT: }
+ ; CHECK-BOTTOMUP-NEXT: S_ENDPGM 0
renamable $sgpr0_sgpr1 = IMPLICIT_DEF
renamable $vgpr0 = IMPLICIT_DEF
BUNDLE implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit $sgpr0_sgpr1, implicit $vgpr0, implicit $exec {
@@ -105,6 +137,22 @@ body: |
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR killed renamable $vgpr0, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1, 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
; CHECK-NEXT: }
; CHECK-NEXT: S_ENDPGM 0
+ ;
+ ; CHECK-BOTTOMUP-LABEL: name: sched_barrier_mask_1
+ ; CHECK-BOTTOMUP: renamable $vgpr0 = IMPLICIT_DEF
+ ; CHECK-BOTTOMUP-NEXT: renamable $sgpr0_sgpr1 = IMPLICIT_DEF
+ ; CHECK-BOTTOMUP-NEXT: BUNDLE implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit $sgpr0_sgpr1, implicit $vgpr0, implicit $exec {
+ ; CHECK-BOTTOMUP-NEXT: renamable $vgpr1 = GLOBAL_LOAD_DWORD_SADDR renamable $sgpr0_sgpr1, renamable $vgpr0, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
+ ; CHECK-BOTTOMUP-NEXT: renamable $vgpr2 = GLOBAL_LOAD_DWORD_SADDR renamable $sgpr0_sgpr1, renamable $vgpr0, 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
+ ; CHECK-BOTTOMUP-NEXT: }
+ ; CHECK-BOTTOMUP-NEXT: renamable $vgpr1 = nsw V_MUL_LO_U32_e64 killed $vgpr1, $vgpr1, implicit $exec
+ ; CHECK-BOTTOMUP-NEXT: renamable $vgpr2 = nsw V_MUL_LO_U32_e64 killed $vgpr2, $vgpr2, implicit $exec
+ ; CHECK-BOTTOMUP-NEXT: SCHED_BARRIER 1
+ ; CHECK-BOTTOMUP-NEXT: BUNDLE implicit killed $vgpr0, implicit killed $vgpr1, implicit killed $sgpr0_sgpr1, implicit $exec, implicit killed $vgpr2 {
+ ; CHECK-BOTTOMUP-NEXT: GLOBAL_STORE_DWORD_SADDR renamable $vgpr0, killed renamable $vgpr1, renamable $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
+ ; CHECK-BOTTOMUP-NEXT: GLOBAL_STORE_DWORD_SADDR killed renamable $vgpr0, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1, 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
+ ; CHECK-BOTTOMUP-NEXT: }
+ ; CHECK-BOTTOMUP-NEXT: S_ENDPGM 0
renamable $sgpr0_sgpr1 = IMPLICIT_DEF
renamable $vgpr0 = IMPLICIT_DEF
BUNDLE implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit $sgpr0_sgpr1, implicit $vgpr0, implicit $exec {
More information about the llvm-commits
mailing list