[llvm] [AMDGPU] Cluster export instruction in PostRA Scheduler (PR #141399)

Carl Ritson via llvm-commits llvm-commits at lists.llvm.org
Sun May 25 01:17:07 PDT 2025


https://github.com/perlfu updated https://github.com/llvm/llvm-project/pull/141399

>From 759503271ce1a9777875109277e7ea8206911ff1 Mon Sep 17 00:00:00 2001
From: Carl Ritson <carl.ritson at amd.com>
Date: Sun, 25 May 2025 16:56:50 +0900
Subject: [PATCH 1/3] [AMDGPU] Add export clustering post-RA scheduler test
 (NFC)

This is a pre-commit test for #141399.
---
 .../CodeGen/AMDGPU/export-cluster-postra.mir  | 46 +++++++++++++++++++
 1 file changed, 46 insertions(+)
 create mode 100644 llvm/test/CodeGen/AMDGPU/export-cluster-postra.mir

diff --git a/llvm/test/CodeGen/AMDGPU/export-cluster-postra.mir b/llvm/test/CodeGen/AMDGPU/export-cluster-postra.mir
new file mode 100644
index 0000000000000..089fc220ff9ed
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/export-cluster-postra.mir
@@ -0,0 +1,46 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -passes=postmisched %s -o - | FileCheck %s
+
+--- |
+  define amdgpu_ps void @export_cluster_postra() {
+    ret void
+  }
+...
+---
+
+---
+name:            export_cluster_postra
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr4, $vgpr5, $vgpr6, $sgpr8_sgpr9_sgpr10_sgpr11
+
+    ; CHECK-LABEL: name: export_cluster_postra
+    ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr4, $vgpr5, $vgpr6, $sgpr8_sgpr9_sgpr10_sgpr11
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: BUNDLE implicit-def $sgpr0, implicit-def $sgpr1, implicit killed $sgpr8_sgpr9_sgpr10_sgpr11 {
+    ; CHECK-NEXT:   $sgpr0 = S_BUFFER_LOAD_DWORD_IMM $sgpr8_sgpr9_sgpr10_sgpr11, 20, 0 :: (dereferenceable invariant load (s32))
+    ; CHECK-NEXT:   $sgpr1 = S_BUFFER_LOAD_DWORD_IMM killed $sgpr8_sgpr9_sgpr10_sgpr11, 28, 0 :: (dereferenceable invariant load (s32))
+    ; CHECK-NEXT: }
+    ; CHECK-NEXT: $vgpr6 = nofpexcept V_CVT_PKRTZ_F16_F32_e64 0, killed $vgpr6, 0, 1065353216, 0, 0, implicit $mode, implicit $exec
+    ; CHECK-NEXT: $vgpr5 = nofpexcept V_CVT_PKRTZ_F16_F32_e64 0, killed $vgpr5, 0, 1065353216, 0, 0, implicit $mode, implicit $exec
+    ; CHECK-NEXT: EXP 0, killed $vgpr0, killed $vgpr6, undef $vgpr0, undef $vgpr0, -1, 0, 3, implicit $exec
+    ; CHECK-NEXT: EXP 1, killed $vgpr1, killed $vgpr5, undef $vgpr0, undef $vgpr0, -1, 0, 3, implicit $exec
+    ; CHECK-NEXT: $vgpr7 = nnan nsz arcp contract afn reassoc nofpexcept V_MUL_F32_e32 killed $sgpr0, $vgpr2, implicit $mode, implicit $exec
+    ; CHECK-NEXT: $vgpr2 = nnan nsz arcp contract afn reassoc nofpexcept V_ADD_F32_e64 0, killed $vgpr2, 0, killed $sgpr1, 1, 0, implicit $mode, implicit $exec
+    ; CHECK-NEXT: $vgpr3 = nofpexcept V_CVT_PKRTZ_F16_F32_e32 killed $vgpr7, killed $vgpr3, implicit $mode, implicit $exec
+    ; CHECK-NEXT: $vgpr2 = nofpexcept V_CVT_PKRTZ_F16_F32_e32 killed $vgpr4, killed $vgpr2, implicit $mode, implicit $exec
+    ; CHECK-NEXT: EXP_DONE 2, killed $vgpr3, killed $vgpr2, undef $vgpr0, undef $vgpr0, -1, 0, 3, implicit $exec
+    BUNDLE implicit-def $sgpr0, implicit-def $sgpr1, implicit $sgpr8_sgpr9_sgpr10_sgpr11 {
+      $sgpr0 = S_BUFFER_LOAD_DWORD_IMM $sgpr8_sgpr9_sgpr10_sgpr11, 20, 0 :: (dereferenceable invariant load (s32))
+      $sgpr1 = S_BUFFER_LOAD_DWORD_IMM $sgpr8_sgpr9_sgpr10_sgpr11, 28, 0 :: (dereferenceable invariant load (s32))
+    }
+    $vgpr6 = nofpexcept V_CVT_PKRTZ_F16_F32_e64 0, $vgpr6, 0, 1065353216, 0, 0, implicit $mode, implicit $exec
+    $vgpr5 = nofpexcept V_CVT_PKRTZ_F16_F32_e64 0, $vgpr5, 0, 1065353216, 0, 0, implicit $mode, implicit $exec
+    $vgpr7 = nnan nsz arcp contract afn reassoc nofpexcept V_MUL_F32_e32 $sgpr0, $vgpr2, implicit $mode, implicit $exec
+    $vgpr3 = nofpexcept V_CVT_PKRTZ_F16_F32_e32 $vgpr7, $vgpr3, implicit $mode, implicit $exec
+    $vgpr2 = nnan nsz arcp contract afn reassoc nofpexcept V_ADD_F32_e64 0, $vgpr2, 0, $sgpr1, 1, 0, implicit $mode, implicit $exec
+    $vgpr2 = nofpexcept V_CVT_PKRTZ_F16_F32_e32 $vgpr4, $vgpr2, implicit $mode, implicit $exec
+    EXP 0, $vgpr0, $vgpr6, undef $vgpr0, undef $vgpr0, -1, 0, 3, implicit $exec
+    EXP 1, $vgpr1, $vgpr5, undef $vgpr0, undef $vgpr0, -1, 0, 3, implicit $exec
+    EXP_DONE 2, $vgpr3, $vgpr2, undef $vgpr0, undef $vgpr0, -1, 0, 3, implicit $exec
+...

>From e7f8d165b9966ca08b32a2a36a235cdc16c95ef5 Mon Sep 17 00:00:00 2001
From: Carl Ritson <carl.ritson at amd.com>
Date: Sun, 25 May 2025 16:52:49 +0900
Subject: [PATCH 2/3] [AMDGPU] Cluster export instruction in PostRA Scheduler

DAG mutation needs to be applied post-RA to maintain order
established during pre-RA scheduler.
---
 llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index e24d8481408ad..7479703ce353a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -1129,6 +1129,7 @@ GCNTargetMachine::createPostMachineScheduler(MachineSchedContext *C) const {
        getOptLevel() >= CodeGenOptLevel::Less) &&
       EnableVOPD)
     DAG->addMutation(createVOPDPairingMutation());
+  DAG->addMutation(createAMDGPUExportClusteringDAGMutation());
   return DAG;
 }
 //===----------------------------------------------------------------------===//

>From 0eb864ee8bd18c8c8b5b54c5c181b2dacbdafbca Mon Sep 17 00:00:00 2001
From: Carl Ritson <carl.ritson at amd.com>
Date: Sun, 25 May 2025 17:16:22 +0900
Subject: [PATCH 3/3] - Test changes

---
 llvm/test/CodeGen/AMDGPU/export-cluster-postra.mir | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/export-cluster-postra.mir b/llvm/test/CodeGen/AMDGPU/export-cluster-postra.mir
index 089fc220ff9ed..5dfec45245a83 100644
--- a/llvm/test/CodeGen/AMDGPU/export-cluster-postra.mir
+++ b/llvm/test/CodeGen/AMDGPU/export-cluster-postra.mir
@@ -23,12 +23,12 @@ body: |
     ; CHECK-NEXT: }
     ; CHECK-NEXT: $vgpr6 = nofpexcept V_CVT_PKRTZ_F16_F32_e64 0, killed $vgpr6, 0, 1065353216, 0, 0, implicit $mode, implicit $exec
     ; CHECK-NEXT: $vgpr5 = nofpexcept V_CVT_PKRTZ_F16_F32_e64 0, killed $vgpr5, 0, 1065353216, 0, 0, implicit $mode, implicit $exec
-    ; CHECK-NEXT: EXP 0, killed $vgpr0, killed $vgpr6, undef $vgpr0, undef $vgpr0, -1, 0, 3, implicit $exec
-    ; CHECK-NEXT: EXP 1, killed $vgpr1, killed $vgpr5, undef $vgpr0, undef $vgpr0, -1, 0, 3, implicit $exec
     ; CHECK-NEXT: $vgpr7 = nnan nsz arcp contract afn reassoc nofpexcept V_MUL_F32_e32 killed $sgpr0, $vgpr2, implicit $mode, implicit $exec
     ; CHECK-NEXT: $vgpr2 = nnan nsz arcp contract afn reassoc nofpexcept V_ADD_F32_e64 0, killed $vgpr2, 0, killed $sgpr1, 1, 0, implicit $mode, implicit $exec
     ; CHECK-NEXT: $vgpr3 = nofpexcept V_CVT_PKRTZ_F16_F32_e32 killed $vgpr7, killed $vgpr3, implicit $mode, implicit $exec
     ; CHECK-NEXT: $vgpr2 = nofpexcept V_CVT_PKRTZ_F16_F32_e32 killed $vgpr4, killed $vgpr2, implicit $mode, implicit $exec
+    ; CHECK-NEXT: EXP 0, killed $vgpr0, killed $vgpr6, undef $vgpr0, undef $vgpr0, -1, 0, 3, implicit $exec
+    ; CHECK-NEXT: EXP 1, killed $vgpr1, killed $vgpr5, undef $vgpr0, undef $vgpr0, -1, 0, 3, implicit $exec
     ; CHECK-NEXT: EXP_DONE 2, killed $vgpr3, killed $vgpr2, undef $vgpr0, undef $vgpr0, -1, 0, 3, implicit $exec
     BUNDLE implicit-def $sgpr0, implicit-def $sgpr1, implicit $sgpr8_sgpr9_sgpr10_sgpr11 {
       $sgpr0 = S_BUFFER_LOAD_DWORD_IMM $sgpr8_sgpr9_sgpr10_sgpr11, 20, 0 :: (dereferenceable invariant load (s32))



More information about the llvm-commits mailing list