[llvm] [AMDGPU] Don't allow nesting hard clauses. (PR #188460)

via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 25 04:32:35 PDT 2026


https://github.com/sstipano updated https://github.com/llvm/llvm-project/pull/188460

>From e349486a7291d0611726757ca635a4010efaffab Mon Sep 17 00:00:00 2001
From: sstipano <sstipano7 at gmail.com>
Date: Wed, 25 Mar 2026 12:14:26 +0100
Subject: [PATCH] [AMDGPU] Don't allow nesting hard clauses.

---
 .../lib/Target/AMDGPU/SIInsertHardClauses.cpp |  15 +-
 .../si-insert-hard-clauses-no-nesting.mir     | 148 ++++++++++++++++++
 2 files changed, 162 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/si-insert-hard-clauses-no-nesting.mir

diff --git a/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp b/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp
index 0a68512668c7d..af2020d4304e9 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp
@@ -209,8 +209,21 @@ class SIInsertHardClauses {
     bool Changed = false;
     for (auto &MBB : MF) {
       ClauseInfo CI;
+      unsigned ExistingClauseRemaining = 0;
       for (auto &MI : MBB) {
-        HardClauseType Type = getHardClauseType(MI);
+        HardClauseType Type;
+        if (ExistingClauseRemaining) {
+          if (!MI.isMetaInstruction())
+            ExistingClauseRemaining--;
+          Type = HARDCLAUSE_ILLEGAL;
+        } else if (MI.getOpcode() == AMDGPU::S_CLAUSE) {
+          // Respect existing explicit clauses. Re-clausing instructions that
+          // are already covered by an S_CLAUSE can create nested clauses.
+          ExistingClauseRemaining = (MI.getOperand(0).getImm() & 63) + 1;
+          Type = HARDCLAUSE_ILLEGAL;
+        } else {
+          Type = getHardClauseType(MI);
+        }
 
         int64_t Dummy1;
         bool Dummy2;
diff --git a/llvm/test/CodeGen/AMDGPU/si-insert-hard-clauses-no-nesting.mir b/llvm/test/CodeGen/AMDGPU/si-insert-hard-clauses-no-nesting.mir
new file mode 100644
index 0000000000000..b226a1bb4de2b
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/si-insert-hard-clauses-no-nesting.mir
@@ -0,0 +1,148 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs -run-pass si-insert-hard-clauses %s -o - | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass si-insert-hard-clauses %s -o - | FileCheck %s
+
+---
+name: no_nested_clause_from_existing_clause
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $sgpr0_sgpr1
+    ; CHECK-LABEL: name: no_nested_clause_from_existing_clause
+    ; CHECK: liveins: $sgpr0_sgpr1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: BUNDLE implicit-def $sgpr2, implicit-def $sgpr3, implicit $sgpr0_sgpr1 {
+    ; CHECK-NEXT:   S_CLAUSE 1
+    ; CHECK-NEXT:   $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
+    ; CHECK-NEXT:   KILL undef renamable $sgpr0
+    ; CHECK-NEXT:   $sgpr3 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 4, 0
+    ; CHECK-NEXT: }
+    ; CHECK-NEXT: S_ENDPGM 0
+    BUNDLE implicit-def $sgpr2, implicit-def $sgpr3, implicit $sgpr0_sgpr1 {
+      S_CLAUSE 1
+      $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
+      KILL undef renamable $sgpr0
+      $sgpr3 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 4, 0
+    }
+    S_ENDPGM 0
+...
+
+---
+name: no_nested_clause_longer_existing_clause
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $sgpr0_sgpr1
+    ; CHECK-LABEL: name: no_nested_clause_longer_existing_clause
+    ; CHECK: liveins: $sgpr0_sgpr1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: BUNDLE implicit-def $sgpr2, implicit-def $sgpr3, implicit-def $sgpr4, implicit $sgpr0_sgpr1 {
+    ; CHECK-NEXT:   S_CLAUSE 2
+    ; CHECK-NEXT:   $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
+    ; CHECK-NEXT:   KILL undef renamable $sgpr0
+    ; CHECK-NEXT:   $sgpr3 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 4, 0
+    ; CHECK-NEXT:   KILL undef renamable $sgpr0
+    ; CHECK-NEXT:   $sgpr4 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 8, 0
+    ; CHECK-NEXT: }
+    ; CHECK-NEXT: S_ENDPGM 0
+    BUNDLE implicit-def $sgpr2, implicit-def $sgpr3, implicit-def $sgpr4, implicit $sgpr0_sgpr1 {
+      S_CLAUSE 2
+      $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
+      KILL undef renamable $sgpr0
+      $sgpr3 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 4, 0
+      KILL undef renamable $sgpr0
+      $sgpr4 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 8, 0
+    }
+    S_ENDPGM 0
+...
+
+---
+name: no_nested_clause_existing_flat_clause
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+    ; CHECK-LABEL: name: no_nested_clause_existing_flat_clause
+    ; CHECK: liveins: $vgpr0_vgpr1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: BUNDLE implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit $vgpr0_vgpr1, implicit $exec, implicit $flat_scr {
+    ; CHECK-NEXT:   S_CLAUSE 2
+    ; CHECK-NEXT:   $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+    ; CHECK-NEXT:   $vgpr3 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 4, 0, implicit $exec, implicit $flat_scr
+    ; CHECK-NEXT:   $vgpr4 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 8, 0, implicit $exec, implicit $flat_scr
+    ; CHECK-NEXT: }
+    ; CHECK-NEXT: S_ENDPGM 0
+    BUNDLE implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit $vgpr0_vgpr1, implicit $exec, implicit $flat_scr {
+      S_CLAUSE 2
+      $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+      $vgpr3 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 4, 0, implicit $exec, implicit $flat_scr
+      $vgpr4 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 8, 0, implicit $exec, implicit $flat_scr
+    }
+    S_ENDPGM 0
+...
+
+---
+name: no_nested_clause_but_new_clause_afterwards
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $sgpr0_sgpr1
+    ; CHECK-LABEL: name: no_nested_clause_but_new_clause_afterwards
+    ; CHECK: liveins: $sgpr0_sgpr1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: BUNDLE implicit-def $sgpr2, implicit-def $sgpr3, implicit $sgpr0_sgpr1 {
+    ; CHECK-NEXT:   S_CLAUSE 1
+    ; CHECK-NEXT:   $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
+    ; CHECK-NEXT:   KILL undef renamable $sgpr0
+    ; CHECK-NEXT:   $sgpr3 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 4, 0
+    ; CHECK-NEXT: }
+    ; CHECK-NEXT: BUNDLE implicit-def $sgpr4, implicit-def $sgpr5, implicit $sgpr0_sgpr1 {
+    ; CHECK-NEXT:   S_CLAUSE 1
+    ; CHECK-NEXT:   $sgpr4 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 8, 0
+    ; CHECK-NEXT:   $sgpr5 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 12, 0
+    ; CHECK-NEXT: }
+    ; CHECK-NEXT: S_ENDPGM 0
+    BUNDLE implicit-def $sgpr2, implicit-def $sgpr3, implicit $sgpr0_sgpr1 {
+      S_CLAUSE 1
+      $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
+      KILL undef renamable $sgpr0
+      $sgpr3 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 4, 0
+    }
+    $sgpr4 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 8, 0
+    $sgpr5 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 12, 0
+    S_ENDPGM 0
+...
+
+---
+name: new_clause_after_existing_clause_with_gap
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $sgpr0_sgpr1
+    ; CHECK-LABEL: name: new_clause_after_existing_clause_with_gap
+    ; CHECK: liveins: $sgpr0_sgpr1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: BUNDLE implicit-def $sgpr2, implicit-def $sgpr3, implicit $sgpr0_sgpr1 {
+    ; CHECK-NEXT:   S_CLAUSE 1
+    ; CHECK-NEXT:   $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
+    ; CHECK-NEXT:   KILL undef renamable $sgpr0
+    ; CHECK-NEXT:   $sgpr3 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 4, 0
+    ; CHECK-NEXT: }
+    ; CHECK-NEXT: $sgpr6 = S_MOV_B32 0
+    ; CHECK-NEXT: BUNDLE implicit-def $sgpr4, implicit-def $sgpr5, implicit $sgpr0_sgpr1 {
+    ; CHECK-NEXT:   S_CLAUSE 1
+    ; CHECK-NEXT:   $sgpr4 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 8, 0
+    ; CHECK-NEXT:   $sgpr5 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 12, 0
+    ; CHECK-NEXT: }
+    ; CHECK-NEXT: S_ENDPGM 0
+    BUNDLE implicit-def $sgpr2, implicit-def $sgpr3, implicit $sgpr0_sgpr1 {
+      S_CLAUSE 1
+      $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
+      KILL undef renamable $sgpr0
+      $sgpr3 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 4, 0
+    }
+    $sgpr6 = S_MOV_B32 0
+    $sgpr4 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 8, 0
+    $sgpr5 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 12, 0
+    S_ENDPGM 0
+...



More information about the llvm-commits mailing list