[llvm] b5f2001 - [CodeGen] Register-coalescer remat fix subreg liveness (#165662)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 4 20:40:45 PST 2025
Author: Vigneshwar Jayakumar
Date: 2025-11-04T22:40:40-06:00
New Revision: b5f200129ad96f87bce11e5c8f0eafeb00b70b9c
URL: https://github.com/llvm/llvm-project/commit/b5f200129ad96f87bce11e5c8f0eafeb00b70b9c
DIFF: https://github.com/llvm/llvm-project/commit/b5f200129ad96f87bce11e5c8f0eafeb00b70b9c.diff
LOG: [CodeGen] Register-coalescer remat fix subreg liveness (#165662)
This is a bugfix in rematerialization where the liveness of subreg mask
was incorrectly updated causing crash in scheduler.
Added:
llvm/test/CodeGen/AMDGPU/reg-coalescer-subreg-liveness.mir
Modified:
llvm/lib/CodeGen/RegisterCoalescer.cpp
llvm/test/CodeGen/SystemZ/regcoal_remat_empty_subrange.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp
index 38f6deb39ddf3..99f76936a180f 100644
--- a/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -1600,6 +1600,22 @@ bool RegisterCoalescer::reMaterializeDef(const CoalescerPair &CP,
SlotIndex DefIndex =
CurrIdx.getRegSlot(NewMI.getOperand(0).isEarlyClobber());
VNInfo::Allocator &Alloc = LIS->getVNInfoAllocator();
+
+ // Refine the subranges that are now defined by the remat.
+ // This will split existing subranges if necessary.
+ DstInt.refineSubRanges(
+ Alloc, DstMask,
+ [&DefIndex, &Alloc](LiveInterval::SubRange &SR) {
+ // We know that this lane is defined by this instruction,
+ // but at this point it might not be live because it was not defined
+ // by the original instruction. This happens when the
+ // rematerialization widens the defined register. Assign that lane a
+ // dead def so that the interferences are properly modeled.
+ if (!SR.liveAt(DefIndex))
+ SR.createDeadDef(DefIndex, Alloc);
+ },
+ *LIS->getSlotIndexes(), *TRI);
+
for (LiveInterval::SubRange &SR : DstInt.subranges()) {
if ((SR.LaneMask & DstMask).none()) {
LLVM_DEBUG(dbgs()
@@ -1617,14 +1633,6 @@ bool RegisterCoalescer::reMaterializeDef(const CoalescerPair &CP,
// updateRegDefUses. The original subrange def may have only undefed
// some lanes.
UpdatedSubRanges = true;
- } else {
- // We know that this lane is defined by this instruction,
- // but at this point it might not be live because it was not defined
- // by the original instruction. This happens when the
- // rematerialization widens the defined register. Assign that lane a
- // dead def so that the interferences are properly modeled.
- if (!SR.liveAt(DefIndex))
- SR.createDeadDef(DefIndex, Alloc);
}
}
if (UpdatedSubRanges)
diff --git a/llvm/test/CodeGen/AMDGPU/reg-coalescer-subreg-liveness.mir b/llvm/test/CodeGen/AMDGPU/reg-coalescer-subreg-liveness.mir
new file mode 100644
index 0000000000000..381cb8c9d1047
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/reg-coalescer-subreg-liveness.mir
@@ -0,0 +1,131 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -run-pass=register-coalescer -verify-coalescing -o - %s | FileCheck %s
+
+# This test is to check fix for failure with "Bad machine code: Defining instruction does not modify register" due to corrupt lane mask.
+
+---
+name: reg_coalescer_subreg_liveness
+tracksRegLiveness: true
+liveins:
+body: |
+ ; CHECK-LABEL: name: reg_coalescer_subreg_liveness
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $sgpr4_sgpr5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: undef [[S_LOAD_DWORD_IMM:%[0-9]+]].sub1:sgpr_128 = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s32), align 16, addrspace 4)
+ ; CHECK-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub0:sgpr_128 = S_MOV_B32 1
+ ; CHECK-NEXT: undef [[S_MOV_B32_1:%[0-9]+]].sub0:sgpr_256 = S_MOV_B32 0
+ ; CHECK-NEXT: TENSOR_LOAD_TO_LDS_D2 [[S_MOV_B32_]], [[S_MOV_B32_1]], 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt
+ ; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]].sub0:sgpr_128 = S_MOV_B32 1
+ ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]].sub1:sgpr_128 = COPY [[S_MOV_B32_]].sub0
+ ; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]].sub2:sgpr_128 = S_MOV_B32 0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]].sub2:sgpr_128 = COPY [[S_MOV_B32_]].sub0
+ ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]].sub3:sgpr_128 = COPY [[S_MOV_B32_]].sub0
+ ; CHECK-NEXT: TENSOR_LOAD_TO_LDS_D2 [[S_MOV_B32_]], [[S_MOV_B32_1]], 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt
+ ; CHECK-NEXT: TENSOR_LOAD_TO_LDS_D2 [[S_LOAD_DWORD_IMM]], [[S_MOV_B32_1]], 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt
+ ; CHECK-NEXT: $vcc_lo = COPY $exec_lo
+ ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]].sub1:sgpr_128 = S_MOV_B32 0
+ ; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]].sub2:sgpr_128 = S_MOV_B32 1
+ ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc_lo, implicit $vcc_lo, implicit $vcc_lo
+ ; CHECK-NEXT: S_BRANCH %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1(0x80000000)
+ liveins: $sgpr4_sgpr5
+
+ %0:sgpr_64 = COPY killed $sgpr4_sgpr5
+ %1:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM killed %0, 0, 0 :: (dereferenceable invariant load (s32), align 16, addrspace 4)
+ %2:sreg_32 = S_MOV_B32 1
+ undef %3.sub0:sgpr_128 = COPY %2
+ %4:sreg_32 = S_MOV_B32 0
+ undef %5.sub0:sgpr_256 = COPY %4
+ TENSOR_LOAD_TO_LDS_D2 %3, %5, 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt
+ %6:sgpr_128 = COPY killed %3
+ %6.sub1:sgpr_128 = COPY killed %1
+ %7:sreg_32 = COPY $exec_lo
+ %8:sreg_32 = COPY %2
+ %9:sreg_32 = COPY %4
+
+ bb.1:
+ successors: %bb.1(0x40000000), %bb.2(0x40000000)
+
+ %10:sreg_32 = COPY killed %8
+ undef %11.sub0:sgpr_128 = COPY %2
+ %11.sub1:sgpr_128 = COPY killed %10
+ %11.sub2:sgpr_128 = COPY %2
+ %11.sub3:sgpr_128 = COPY %2
+ TENSOR_LOAD_TO_LDS_D2 killed %11, %5, 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt
+ %12:sreg_32 = COPY killed %9
+ %13:sgpr_128 = COPY %6
+ %13.sub2:sgpr_128 = COPY killed %12
+ TENSOR_LOAD_TO_LDS_D2 killed %13, %5, 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt
+ $vcc_lo = COPY %7
+ %8:sreg_32 = COPY %4
+ %9:sreg_32 = COPY %2
+ S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc_lo, implicit $vcc_lo, implicit $vcc
+ S_BRANCH %bb.2
+
+ bb.2:
+ S_ENDPGM 0
+...
+---
+name: reg_coalescer_subreg_liveness_2
+tracksRegLiveness: true
+liveins:
+body: |
+ ; CHECK-LABEL: name: reg_coalescer_subreg_liveness_2
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $sgpr4_sgpr5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: undef [[S_LOAD_DWORD_IMM:%[0-9]+]].sub2:sgpr_128 = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s32), align 16, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]].sub1:sgpr_128 = S_LOAD_DWORD_IMM [[COPY]], 24, 0 :: (dereferenceable invariant load (s32), align 8, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]].sub0:sgpr_128 = S_MOV_B32 1
+ ; CHECK-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub0:sgpr_256 = S_MOV_B32 0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_NOP 0, implicit [[S_LOAD_DWORD_IMM]], implicit [[S_MOV_B32_]]
+ ; CHECK-NEXT: S_BRANCH %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1(0x80000000)
+ liveins: $sgpr4_sgpr5
+
+ %0:sgpr_64 = COPY killed $sgpr4_sgpr5
+ %1:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 0, 0 :: (dereferenceable invariant load (s32), align 16, addrspace 4)
+ %2:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM killed %0, 24, 0 :: (dereferenceable invariant load (s32), align 8, addrspace 4)
+ %3:sreg_32 = S_MOV_B32 1
+ undef %4.sub0:sgpr_128 = COPY %3
+ %5:sgpr_128 = COPY %4
+ %5.sub1:sgpr_128 = COPY killed %2
+ %6:sgpr_128 = COPY %5
+ %6.sub2:sgpr_128 = COPY killed %1
+ %7:sreg_32 = S_MOV_B32 0
+ undef %8.sub0:sgpr_256 = COPY %7
+ %9:sreg_32 = COPY %3
+
+ bb.1:
+ successors: %bb.2(0x80000000)
+
+ %10:sreg_32 = COPY killed %9
+ undef %11.sub0:sgpr_128 = COPY %3
+ %11.sub1:sgpr_128 = COPY killed %10
+ S_NOP 0, implicit %5, implicit %8
+ S_BRANCH %bb.2
+
+ bb.2:
+ S_ENDPGM 0
+...
diff --git a/llvm/test/CodeGen/SystemZ/regcoal_remat_empty_subrange.ll b/llvm/test/CodeGen/SystemZ/regcoal_remat_empty_subrange.ll
index 678d9a9073155..ff9b6a34c1d53 100644
--- a/llvm/test/CodeGen/SystemZ/regcoal_remat_empty_subrange.ll
+++ b/llvm/test/CodeGen/SystemZ/regcoal_remat_empty_subrange.ll
@@ -22,10 +22,10 @@ define void @main(i16 %in) {
; CHECK-NEXT: locghile %r3, 1
; CHECK-NEXT: o %r0, 0(%r1)
; CHECK-NEXT: larl %r1, g_222
-; CHECK-NEXT: lghi %r5, 0
; CHECK-NEXT: dsgfr %r2, %r0
+; CHECK-NEXT: lghi %r3, 0
; CHECK-NEXT: stgrl %r2, g_39
-; CHECK-NEXT: stc %r5, 19(%r1)
+; CHECK-NEXT: stc %r3, 19(%r1)
; CHECK-NEXT: br %r14
%tmp = load i32, ptr @g_151, align 4
%tmp3 = or i32 %tmp, 1
More information about the llvm-commits
mailing list