[llvm] [CodeGen] Register-coalescer remat fix subreg liveness (PR #165662)

Vigneshwar Jayakumar via llvm-commits llvm-commits at lists.llvm.org
Mon Nov 3 09:42:16 PST 2025


https://github.com/VigneshwarJ updated https://github.com/llvm/llvm-project/pull/165662

>From 64bbd4b298187c375abb518761b7e25080eedf2d Mon Sep 17 00:00:00 2001
From: vigneshwar jayakumar <vigneshwar.jayakumar at amd.com>
Date: Thu, 30 Oct 2025 01:59:23 -0500
Subject: [PATCH 1/4] [CodeGen] Register-coalescer remat fix subreg liveness

This is a bugfix in rematerialization where the liveness of subreg mask
was incorrectly updated causing crash in scheduler.
---
 llvm/lib/CodeGen/RegisterCoalescer.cpp        |  1 +
 .../AMDGPU/reg-coalescer-subreg-liveness.mir  | 55 +++++++++++++++++++
 2 files changed, 56 insertions(+)
 create mode 100644 llvm/test/CodeGen/AMDGPU/reg-coalescer-subreg-liveness.mir

diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp
index e17a214b9a27d..acd189e4b1a8d 100644
--- a/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -1625,6 +1625,7 @@ bool RegisterCoalescer::reMaterializeDef(const CoalescerPair &CP,
           // dead def so that the interferences are properly modeled.
           if (!SR.liveAt(DefIndex))
             SR.createDeadDef(DefIndex, Alloc);
+          SR.LaneMask = DstMask & SR.LaneMask;
         }
       }
       if (UpdatedSubRanges)
diff --git a/llvm/test/CodeGen/AMDGPU/reg-coalescer-subreg-liveness.mir b/llvm/test/CodeGen/AMDGPU/reg-coalescer-subreg-liveness.mir
new file mode 100644
index 0000000000000..3afc1d343a728
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/reg-coalescer-subreg-liveness.mir
@@ -0,0 +1,55 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -run-pass=register-coalescer -verify-coalescing -o - %s | FileCheck %s
+
+# This test is to check fix for failure with "Bad machine code: Defining instruction does not modify register" due to corrupt lane mask.
+
+---
+name:            reg_coalescer_subreg_liveness
+tracksRegLiveness: true
+liveins:
+body:             |
+  ; CHECK-LABEL: name: reg_coalescer_subreg_liveness
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $sgpr4_sgpr5
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   undef [[S_LOAD_DWORD_IMM:%[0-9]+]].sub2:sgpr_128 = S_LOAD_DWORD_IMM [[COPY]](p4), 0, 0 :: (dereferenceable invariant load (s32), align 16, addrspace 4)
+  ; CHECK-NEXT:   [[S_LOAD_DWORD_IMM:%[0-9]+]].sub1:sgpr_128 = S_LOAD_DWORD_IMM [[COPY]](p4), 24, 0 :: (dereferenceable invariant load (s32), align 8, addrspace 4)
+  ; CHECK-NEXT:   [[S_LOAD_DWORD_IMM:%[0-9]+]].sub0:sgpr_128 = S_MOV_B32 1
+  ; CHECK-NEXT:   undef [[S_MOV_B32_:%[0-9]+]].sub0:sgpr_256 = S_MOV_B32 0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_NOP 0, implicit [[S_LOAD_DWORD_IMM]], implicit [[S_MOV_B32_]]
+  ; CHECK-NEXT:   S_BRANCH %bb.2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   S_ENDPGM 0
+  bb.0:
+    liveins: $sgpr4_sgpr5
+
+    %5:sgpr_64(p4) = COPY killed $sgpr4_sgpr5
+    %8:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %5(p4), 0, 0 :: (dereferenceable invariant load (s32) , align 16, addrspace 4)
+    %10:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM killed %5(p4), 24, 0 :: (dereferenceable invariant load (s32) , align 8, addrspace 4)
+    %7:sreg_32 = S_MOV_B32 1
+    undef %20.sub0:sgpr_128 = COPY %7
+    %0:sgpr_128 = COPY %20
+    %0.sub1:sgpr_128 = COPY killed %10
+    %27:sgpr_128 = COPY %0
+    %27.sub2:sgpr_128 = COPY killed %8
+    %29:sreg_32 = S_MOV_B32 0
+    undef %30.sub0:sgpr_256 = COPY %29
+    %37:sreg_32 = COPY %7
+  bb.1:
+
+    %1:sreg_32 = COPY killed %37
+    undef %33.sub0:sgpr_128 = COPY %7
+    %33.sub1:sgpr_128 = COPY killed %1
+    S_NOP 0, implicit %0, implicit %30
+    S_BRANCH %bb.2
+
+  bb.2:
+    S_ENDPGM 0
+...

>From 54060b35eb038fd7fc49ea89a0edd7be64bb7e77 Mon Sep 17 00:00:00 2001
From: vigneshwar jayakumar <vigneshwar.jayakumar at amd.com>
Date: Thu, 30 Oct 2025 10:28:17 -0500
Subject: [PATCH 2/4] fix tests

---
 llvm/test/CodeGen/SystemZ/regcoal_remat_empty_subrange.ll | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/test/CodeGen/SystemZ/regcoal_remat_empty_subrange.ll b/llvm/test/CodeGen/SystemZ/regcoal_remat_empty_subrange.ll
index 678d9a9073155..ff9b6a34c1d53 100644
--- a/llvm/test/CodeGen/SystemZ/regcoal_remat_empty_subrange.ll
+++ b/llvm/test/CodeGen/SystemZ/regcoal_remat_empty_subrange.ll
@@ -22,10 +22,10 @@ define void @main(i16 %in) {
 ; CHECK-NEXT:    locghile %r3, 1
 ; CHECK-NEXT:    o %r0, 0(%r1)
 ; CHECK-NEXT:    larl %r1, g_222
-; CHECK-NEXT:    lghi %r5, 0
 ; CHECK-NEXT:    dsgfr %r2, %r0
+; CHECK-NEXT:    lghi %r3, 0
 ; CHECK-NEXT:    stgrl %r2, g_39
-; CHECK-NEXT:    stc %r5, 19(%r1)
+; CHECK-NEXT:    stc %r3, 19(%r1)
 ; CHECK-NEXT:    br %r14
   %tmp = load i32, ptr @g_151, align 4
   %tmp3 = or i32 %tmp, 1

>From 74bdb76fb15de21102f27b7e52d3b132b508f797 Mon Sep 17 00:00:00 2001
From: vigneshwar jayakumar <vigneshwar.jayakumar at amd.com>
Date: Thu, 30 Oct 2025 17:47:18 -0500
Subject: [PATCH 3/4] review comments

---
 .../AMDGPU/reg-coalescer-subreg-liveness.mir  | 41 ++++++++++---------
 1 file changed, 22 insertions(+), 19 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/reg-coalescer-subreg-liveness.mir b/llvm/test/CodeGen/AMDGPU/reg-coalescer-subreg-liveness.mir
index 3afc1d343a728..d9ef55c15b779 100644
--- a/llvm/test/CodeGen/AMDGPU/reg-coalescer-subreg-liveness.mir
+++ b/llvm/test/CodeGen/AMDGPU/reg-coalescer-subreg-liveness.mir
@@ -13,9 +13,9 @@ body:             |
   ; CHECK-NEXT:   successors: %bb.1(0x80000000)
   ; CHECK-NEXT:   liveins: $sgpr4_sgpr5
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5
-  ; CHECK-NEXT:   undef [[S_LOAD_DWORD_IMM:%[0-9]+]].sub2:sgpr_128 = S_LOAD_DWORD_IMM [[COPY]](p4), 0, 0 :: (dereferenceable invariant load (s32), align 16, addrspace 4)
-  ; CHECK-NEXT:   [[S_LOAD_DWORD_IMM:%[0-9]+]].sub1:sgpr_128 = S_LOAD_DWORD_IMM [[COPY]](p4), 24, 0 :: (dereferenceable invariant load (s32), align 8, addrspace 4)
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   undef [[S_LOAD_DWORD_IMM:%[0-9]+]].sub2:sgpr_128 = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s32), align 16, addrspace 4)
+  ; CHECK-NEXT:   [[S_LOAD_DWORD_IMM:%[0-9]+]].sub1:sgpr_128 = S_LOAD_DWORD_IMM [[COPY]], 24, 0 :: (dereferenceable invariant load (s32), align 8, addrspace 4)
   ; CHECK-NEXT:   [[S_LOAD_DWORD_IMM:%[0-9]+]].sub0:sgpr_128 = S_MOV_B32 1
   ; CHECK-NEXT:   undef [[S_MOV_B32_:%[0-9]+]].sub0:sgpr_256 = S_MOV_B32 0
   ; CHECK-NEXT: {{  $}}
@@ -28,26 +28,29 @@ body:             |
   ; CHECK-NEXT: bb.2:
   ; CHECK-NEXT:   S_ENDPGM 0
   bb.0:
+    successors: %bb.1(0x80000000)
     liveins: $sgpr4_sgpr5
 
-    %5:sgpr_64(p4) = COPY killed $sgpr4_sgpr5
-    %8:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %5(p4), 0, 0 :: (dereferenceable invariant load (s32) , align 16, addrspace 4)
-    %10:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM killed %5(p4), 24, 0 :: (dereferenceable invariant load (s32) , align 8, addrspace 4)
-    %7:sreg_32 = S_MOV_B32 1
-    undef %20.sub0:sgpr_128 = COPY %7
-    %0:sgpr_128 = COPY %20
-    %0.sub1:sgpr_128 = COPY killed %10
-    %27:sgpr_128 = COPY %0
-    %27.sub2:sgpr_128 = COPY killed %8
-    %29:sreg_32 = S_MOV_B32 0
-    undef %30.sub0:sgpr_256 = COPY %29
-    %37:sreg_32 = COPY %7
+    %0:sgpr_64 = COPY killed $sgpr4_sgpr5
+    %1:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 0, 0 :: (dereferenceable invariant load (s32), align 16, addrspace 4)
+    %2:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM killed %0, 24, 0 :: (dereferenceable invariant load (s32), align 8, addrspace 4)
+    %3:sreg_32 = S_MOV_B32 1
+    undef %4.sub0:sgpr_128 = COPY %3
+    %5:sgpr_128 = COPY %4
+    %5.sub1:sgpr_128 = COPY killed %2
+    %6:sgpr_128 = COPY %5
+    %6.sub2:sgpr_128 = COPY killed %1
+    %7:sreg_32 = S_MOV_B32 0
+    undef %8.sub0:sgpr_256 = COPY %7
+    %9:sreg_32 = COPY %3
+
   bb.1:
+    successors: %bb.2(0x80000000)
 
-    %1:sreg_32 = COPY killed %37
-    undef %33.sub0:sgpr_128 = COPY %7
-    %33.sub1:sgpr_128 = COPY killed %1
-    S_NOP 0, implicit %0, implicit %30
+    %10:sreg_32 = COPY killed %9
+    undef %11.sub0:sgpr_128 = COPY %3
+    %11.sub1:sgpr_128 = COPY killed %10
+    S_NOP 0, implicit %5, implicit %8
     S_BRANCH %bb.2
 
   bb.2:

>From 4df78bb73a53b5bf13ed06fe6ae62dadb55f986e Mon Sep 17 00:00:00 2001
From: vigneshwar jayakumar <vigneshwar.jayakumar at amd.com>
Date: Mon, 3 Nov 2025 11:41:58 -0600
Subject: [PATCH 4/4] updated tests

---
 .../AMDGPU/reg-coalescer-subreg-liveness.mir  | 62 ++++++++++++-------
 1 file changed, 41 insertions(+), 21 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/reg-coalescer-subreg-liveness.mir b/llvm/test/CodeGen/AMDGPU/reg-coalescer-subreg-liveness.mir
index d9ef55c15b779..adbeff2182065 100644
--- a/llvm/test/CodeGen/AMDGPU/reg-coalescer-subreg-liveness.mir
+++ b/llvm/test/CodeGen/AMDGPU/reg-coalescer-subreg-liveness.mir
@@ -1,5 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
-# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -run-pass=register-coalescer -verify-coalescing -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -run-pass=register-coalescer -verify-coalescing -o - %s | FileCheck %s
 
 # This test is to check fix for failure with "Bad machine code: Defining instruction does not modify register" due to corrupt lane mask.
 
@@ -14,15 +14,25 @@ body:             |
   ; CHECK-NEXT:   liveins: $sgpr4_sgpr5
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK-NEXT:   undef [[S_LOAD_DWORD_IMM:%[0-9]+]].sub2:sgpr_128 = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s32), align 16, addrspace 4)
-  ; CHECK-NEXT:   [[S_LOAD_DWORD_IMM:%[0-9]+]].sub1:sgpr_128 = S_LOAD_DWORD_IMM [[COPY]], 24, 0 :: (dereferenceable invariant load (s32), align 8, addrspace 4)
+  ; CHECK-NEXT:   undef [[S_LOAD_DWORD_IMM:%[0-9]+]].sub1:sgpr_128 = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s32), align 16, addrspace 4)
+  ; CHECK-NEXT:   undef [[S_MOV_B32_:%[0-9]+]].sub0:sgpr_128 = S_MOV_B32 1
+  ; CHECK-NEXT:   undef [[S_MOV_B32_1:%[0-9]+]].sub0:sgpr_256 = S_MOV_B32 0
+  ; CHECK-NEXT:   TENSOR_LOAD_TO_LDS_D2 [[S_MOV_B32_]], [[S_MOV_B32_1]], 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt
   ; CHECK-NEXT:   [[S_LOAD_DWORD_IMM:%[0-9]+]].sub0:sgpr_128 = S_MOV_B32 1
-  ; CHECK-NEXT:   undef [[S_MOV_B32_:%[0-9]+]].sub0:sgpr_256 = S_MOV_B32 0
+  ; CHECK-NEXT:   [[S_MOV_B32_:%[0-9]+]].sub1:sgpr_128 = COPY [[S_MOV_B32_]].sub0
+  ; CHECK-NEXT:   [[S_LOAD_DWORD_IMM:%[0-9]+]].sub2:sgpr_128 = S_MOV_B32 0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1:
-  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   S_NOP 0, implicit [[S_LOAD_DWORD_IMM]], implicit [[S_MOV_B32_]]
+  ; CHECK-NEXT:   [[S_MOV_B32_:%[0-9]+]].sub2:sgpr_128 = COPY [[S_MOV_B32_]].sub0
+  ; CHECK-NEXT:   [[S_MOV_B32_:%[0-9]+]].sub3:sgpr_128 = COPY [[S_MOV_B32_]].sub0
+  ; CHECK-NEXT:   TENSOR_LOAD_TO_LDS_D2 [[S_MOV_B32_]], [[S_MOV_B32_1]], 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt
+  ; CHECK-NEXT:   TENSOR_LOAD_TO_LDS_D2 [[S_LOAD_DWORD_IMM]], [[S_MOV_B32_1]], 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt
+  ; CHECK-NEXT:   $vcc_lo = COPY $exec_lo
+  ; CHECK-NEXT:   [[S_MOV_B32_:%[0-9]+]].sub1:sgpr_128 = S_MOV_B32 0
+  ; CHECK-NEXT:   [[S_LOAD_DWORD_IMM:%[0-9]+]].sub2:sgpr_128 = S_MOV_B32 1
+  ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc_lo, implicit $vcc_lo, implicit $vcc_lo
   ; CHECK-NEXT:   S_BRANCH %bb.2
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2:
@@ -32,25 +42,35 @@ body:             |
     liveins: $sgpr4_sgpr5
 
     %0:sgpr_64 = COPY killed $sgpr4_sgpr5
-    %1:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 0, 0 :: (dereferenceable invariant load (s32), align 16, addrspace 4)
-    %2:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM killed %0, 24, 0 :: (dereferenceable invariant load (s32), align 8, addrspace 4)
-    %3:sreg_32 = S_MOV_B32 1
-    undef %4.sub0:sgpr_128 = COPY %3
-    %5:sgpr_128 = COPY %4
-    %5.sub1:sgpr_128 = COPY killed %2
-    %6:sgpr_128 = COPY %5
-    %6.sub2:sgpr_128 = COPY killed %1
-    %7:sreg_32 = S_MOV_B32 0
-    undef %8.sub0:sgpr_256 = COPY %7
-    %9:sreg_32 = COPY %3
+    %1:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM killed %0, 0, 0 :: (dereferenceable invariant load (s32), align 16, addrspace 4)
+    %2:sreg_32 = S_MOV_B32 1
+    undef %3.sub0:sgpr_128 = COPY %2
+    %4:sreg_32 = S_MOV_B32 0
+    undef %5.sub0:sgpr_256 = COPY %4
+    TENSOR_LOAD_TO_LDS_D2 %3, %5, 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt
+    %6:sgpr_128 = COPY killed %3
+    %6.sub1:sgpr_128 = COPY killed %1
+    %7:sreg_32 = COPY $exec_lo
+    %8:sreg_32 = COPY %2
+    %9:sreg_32 = COPY %4
 
   bb.1:
-    successors: %bb.2(0x80000000)
+    successors: %bb.1(0x40000000), %bb.2(0x40000000)
 
-    %10:sreg_32 = COPY killed %9
-    undef %11.sub0:sgpr_128 = COPY %3
+    %10:sreg_32 = COPY killed %8
+    undef %11.sub0:sgpr_128 = COPY %2
     %11.sub1:sgpr_128 = COPY killed %10
-    S_NOP 0, implicit %5, implicit %8
+    %11.sub2:sgpr_128 = COPY %2
+    %11.sub3:sgpr_128 = COPY %2
+    TENSOR_LOAD_TO_LDS_D2 killed %11, %5, 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt
+    %12:sreg_32 = COPY killed %9
+    %13:sgpr_128 = COPY %6
+    %13.sub2:sgpr_128 = COPY killed %12
+    TENSOR_LOAD_TO_LDS_D2 killed %13, %5, 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt
+    $vcc_lo = COPY %7
+    %8:sreg_32 = COPY %4
+    %9:sreg_32 = COPY %2
+    S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc_lo, implicit $vcc_lo, implicit $vcc
     S_BRANCH %bb.2
 
   bb.2:



More information about the llvm-commits mailing list