[llvm] RegAllocGreedy: Check if copied lanes are live in trySplitAroundHintReg (PR #160424)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 26 18:35:11 PDT 2025
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/160424
>From 5f93f5cfd3577a04185cc55849405ac48f1e1519 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Tue, 23 Sep 2025 21:32:23 +0900
Subject: [PATCH] RegAllocGreedy: Check if copied lanes are live in
trySplitAroundHintReg
For subregister copies, do a subregister live check instead of checking
the main range. Doesn't do much yet, the split analysis still does not
track live ranges.
---
llvm/lib/CodeGen/RegAllocGreedy.cpp | 24 ++++++-
llvm/test/CodeGen/SystemZ/fp-cmp-04.ll | 4 +-
llvm/test/CodeGen/X86/fshl.ll | 81 ++++++++++++-----------
llvm/test/CodeGen/X86/fshr.ll | 90 +++++++++++++-------------
4 files changed, 109 insertions(+), 90 deletions(-)
diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp
index 8e6cf3e6b51b3..6e0585b2e9e55 100644
--- a/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -1406,8 +1406,28 @@ bool RAGreedy::trySplitAroundHintReg(MCPhysReg Hint,
continue;
// Check if VirtReg interferes with OtherReg after this COPY instruction.
- if (!IsDef && VirtReg.liveAt(LIS->getInstructionIndex(Instr).getRegSlot()))
- continue;
+ if (Opnd.readsReg()) {
+ SlotIndex Index = LIS->getInstructionIndex(Instr).getRegSlot();
+
+ if (SubReg) {
+ LaneBitmask Mask = TRI->getSubRegIndexLaneMask(SubReg);
+ if (IsDef)
+ Mask = ~Mask;
+
+ if (any_of(VirtReg.subranges(), [=](const LiveInterval::SubRange &S) {
+ if ((S.LaneMask & Mask).any()) {
+ if (S.liveAt(Index))
+ return true;
+ }
+ return false;
+ })) {
+ continue;
+ }
+ } else {
+ if (VirtReg.liveAt(Index))
+ continue;
+ }
+ }
MCRegister OtherPhysReg =
OtherReg.isPhysical() ? OtherReg.asMCReg() : VRM->getPhys(OtherReg);
diff --git a/llvm/test/CodeGen/SystemZ/fp-cmp-04.ll b/llvm/test/CodeGen/SystemZ/fp-cmp-04.ll
index d3d641357ae58..eb7c1b632dba9 100644
--- a/llvm/test/CodeGen/SystemZ/fp-cmp-04.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-cmp-04.ll
@@ -235,7 +235,7 @@ define half @f12_half(half %dummy, half %val, ptr %dest) {
; CHECK-NEXT: blah %f0
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: brasl %r14, __extendhfsf2 at PLT
-; CHECK-NEXT: ltebr %f0, %f0
+; CHECK-NEXT: ltebr %f1, %f0
; CHECK-NEXT: jl .LBB11_2
; CHECK-NEXT:# %bb.1:
; CHECK-NEXT: lgdr %r0, %f8
@@ -344,7 +344,7 @@ define half @f15_half(half %val, half %dummy, ptr %dest) {
; CHECK-NEXT: blah %f2
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: brasl %r14, __extendhfsf2 at PLT
-; CHECK-NEXT: ltebr %f0, %f0
+; CHECK-NEXT: ltebr %f1, %f0
; CHECK-NEXT: jl .LBB15_2
; CHECK-NEXT:# %bb.1:
; CHECK-NEXT: lgdr %r0, %f8
diff --git a/llvm/test/CodeGen/X86/fshl.ll b/llvm/test/CodeGen/X86/fshl.ll
index ec1b8a3c8d6d9..f998128af95f8 100644
--- a/llvm/test/CodeGen/X86/fshl.ll
+++ b/llvm/test/CodeGen/X86/fshl.ll
@@ -335,84 +335,83 @@ define i128 @var_shift_i128(i128 %x, i128 %y, i128 %z) nounwind {
; X86-SLOW-NEXT: pushl %esi
; X86-SLOW-NEXT: andl $-16, %esp
; X86-SLOW-NEXT: subl $32, %esp
-; X86-SLOW-NEXT: movl 24(%ebp), %esi
+; X86-SLOW-NEXT: movl 24(%ebp), %edi
; X86-SLOW-NEXT: movl 28(%ebp), %eax
; X86-SLOW-NEXT: movl 48(%ebp), %edx
; X86-SLOW-NEXT: movl 56(%ebp), %ecx
; X86-SLOW-NEXT: testb $64, %cl
-; X86-SLOW-NEXT: movl 52(%ebp), %edi
+; X86-SLOW-NEXT: movl 52(%ebp), %ebx
; X86-SLOW-NEXT: jne .LBB6_1
; X86-SLOW-NEXT: # %bb.2:
; X86-SLOW-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: movl %esi, %edx
-; X86-SLOW-NEXT: movl 32(%ebp), %esi
-; X86-SLOW-NEXT: movl %edi, %ecx
-; X86-SLOW-NEXT: movl %eax, %edi
+; X86-SLOW-NEXT: movl %edi, %edx
+; X86-SLOW-NEXT: movl 32(%ebp), %edi
+; X86-SLOW-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SLOW-NEXT: movl %eax, %ebx
; X86-SLOW-NEXT: movl 36(%ebp), %eax
; X86-SLOW-NEXT: jmp .LBB6_3
; X86-SLOW-NEXT: .LBB6_1:
; X86-SLOW-NEXT: movl 40(%ebp), %ecx
; X86-SLOW-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-SLOW-NEXT: movl 44(%ebp), %ecx
+; X86-SLOW-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-SLOW-NEXT: .LBB6_3:
-; X86-SLOW-NEXT: movl 56(%ebp), %ebx
-; X86-SLOW-NEXT: testb $32, %bl
+; X86-SLOW-NEXT: movl 56(%ebp), %ecx
+; X86-SLOW-NEXT: testb $32, %cl
; X86-SLOW-NEXT: jne .LBB6_4
; X86-SLOW-NEXT: # %bb.5:
-; X86-SLOW-NEXT: movl %ecx, %ebx
; X86-SLOW-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SLOW-NEXT: movl %edx, %edi
+; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X86-SLOW-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-SLOW-NEXT: jmp .LBB6_6
; X86-SLOW-NEXT: .LBB6_4:
-; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-SLOW-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: movl %ecx, %edx
-; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-SLOW-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SLOW-NEXT: movl %edx, %ebx
+; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X86-SLOW-NEXT: .LBB6_6:
-; X86-SLOW-NEXT: movl %edx, %esi
+; X86-SLOW-NEXT: movl %edi, %eax
+; X86-SLOW-NEXT: shll %cl, %eax
+; X86-SLOW-NEXT: shrl %esi
+; X86-SLOW-NEXT: movl %ecx, %edx
+; X86-SLOW-NEXT: notb %dl
+; X86-SLOW-NEXT: movl %edx, %ecx
+; X86-SLOW-NEXT: shrl %cl, %esi
+; X86-SLOW-NEXT: orl %eax, %esi
+; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SLOW-NEXT: movl %ebx, %eax
; X86-SLOW-NEXT: movl 56(%ebp), %ecx
-; X86-SLOW-NEXT: shll %cl, %esi
-; X86-SLOW-NEXT: movl %ebx, %edi
+; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-SLOW-NEXT: shll %cl, %eax
; X86-SLOW-NEXT: shrl %edi
-; X86-SLOW-NEXT: movl %ecx, %ebx
-; X86-SLOW-NEXT: notb %bl
-; X86-SLOW-NEXT: movl %ebx, %ecx
-; X86-SLOW-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-SLOW-NEXT: movl %edx, %ecx
; X86-SLOW-NEXT: shrl %cl, %edi
-; X86-SLOW-NEXT: orl %esi, %edi
+; X86-SLOW-NEXT: orl %eax, %edi
; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X86-SLOW-NEXT: movl %esi, %eax
; X86-SLOW-NEXT: movl 56(%ebp), %ecx
; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-SLOW-NEXT: shll %cl, %eax
-; X86-SLOW-NEXT: shrl %edx
-; X86-SLOW-NEXT: movl %ebx, %ecx
-; X86-SLOW-NEXT: shrl %cl, %edx
-; X86-SLOW-NEXT: orl %eax, %edx
-; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X86-SLOW-NEXT: movl %ebx, %eax
+; X86-SLOW-NEXT: shrl %ebx
+; X86-SLOW-NEXT: movl %edx, %ecx
+; X86-SLOW-NEXT: shrl %cl, %ebx
+; X86-SLOW-NEXT: orl %eax, %ebx
; X86-SLOW-NEXT: movl 56(%ebp), %ecx
; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-SLOW-NEXT: shll %cl, %eax
; X86-SLOW-NEXT: shrl %esi
-; X86-SLOW-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X86-SLOW-NEXT: movl %edx, %ecx
; X86-SLOW-NEXT: shrl %cl, %esi
; X86-SLOW-NEXT: orl %eax, %esi
-; X86-SLOW-NEXT: movl 56(%ebp), %ecx
-; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx
-; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-SLOW-NEXT: shll %cl, %eax
-; X86-SLOW-NEXT: shrl %ebx
-; X86-SLOW-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
-; X86-SLOW-NEXT: shrl %cl, %ebx
-; X86-SLOW-NEXT: orl %eax, %ebx
; X86-SLOW-NEXT: movl 8(%ebp), %eax
-; X86-SLOW-NEXT: movl %ebx, 12(%eax)
-; X86-SLOW-NEXT: movl %esi, 8(%eax)
-; X86-SLOW-NEXT: movl %edx, 4(%eax)
-; X86-SLOW-NEXT: movl %edi, (%eax)
+; X86-SLOW-NEXT: movl %esi, 12(%eax)
+; X86-SLOW-NEXT: movl %ebx, 8(%eax)
+; X86-SLOW-NEXT: movl %edi, 4(%eax)
+; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-SLOW-NEXT: movl %ecx, (%eax)
; X86-SLOW-NEXT: leal -12(%ebp), %esp
; X86-SLOW-NEXT: popl %esi
; X86-SLOW-NEXT: popl %edi
diff --git a/llvm/test/CodeGen/X86/fshr.ll b/llvm/test/CodeGen/X86/fshr.ll
index 544ab7fc77374..c307833e488c9 100644
--- a/llvm/test/CodeGen/X86/fshr.ll
+++ b/llvm/test/CodeGen/X86/fshr.ll
@@ -322,79 +322,79 @@ define i128 @var_shift_i128(i128 %x, i128 %y, i128 %z) nounwind {
; X86-SLOW-NEXT: subl $16, %esp
; X86-SLOW-NEXT: movl 24(%ebp), %edx
; X86-SLOW-NEXT: movl 28(%ebp), %esi
-; X86-SLOW-NEXT: movl 48(%ebp), %ebx
+; X86-SLOW-NEXT: movl 48(%ebp), %edi
; X86-SLOW-NEXT: movl 56(%ebp), %eax
; X86-SLOW-NEXT: testb $64, %al
-; X86-SLOW-NEXT: movl 52(%ebp), %edi
+; X86-SLOW-NEXT: movl 52(%ebp), %eax
; X86-SLOW-NEXT: je .LBB6_1
; X86-SLOW-NEXT: # %bb.2:
-; X86-SLOW-NEXT: movl %ebx, (%esp) # 4-byte Spill
-; X86-SLOW-NEXT: movl %edx, %ebx
+; X86-SLOW-NEXT: movl %edi, (%esp) # 4-byte Spill
+; X86-SLOW-NEXT: movl %edx, %edi
; X86-SLOW-NEXT: movl 32(%ebp), %edx
-; X86-SLOW-NEXT: movl %edi, %eax
-; X86-SLOW-NEXT: movl %esi, %edi
+; X86-SLOW-NEXT: movl %eax, %ecx
+; X86-SLOW-NEXT: movl %esi, %eax
; X86-SLOW-NEXT: movl 36(%ebp), %esi
; X86-SLOW-NEXT: jmp .LBB6_3
; X86-SLOW-NEXT: .LBB6_1:
-; X86-SLOW-NEXT: movl 40(%ebp), %eax
-; X86-SLOW-NEXT: movl %eax, (%esp) # 4-byte Spill
-; X86-SLOW-NEXT: movl 44(%ebp), %eax
+; X86-SLOW-NEXT: movl 40(%ebp), %ecx
+; X86-SLOW-NEXT: movl %ecx, (%esp) # 4-byte Spill
+; X86-SLOW-NEXT: movl 44(%ebp), %ecx
; X86-SLOW-NEXT: .LBB6_3:
-; X86-SLOW-NEXT: movl 56(%ebp), %ecx
-; X86-SLOW-NEXT: testb $32, %cl
+; X86-SLOW-NEXT: movl 56(%ebp), %ebx
+; X86-SLOW-NEXT: testb $32, %bl
; X86-SLOW-NEXT: je .LBB6_4
; X86-SLOW-NEXT: # %bb.5:
-; X86-SLOW-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SLOW-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-SLOW-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SLOW-NEXT: movl %ecx, %ebx
; X86-SLOW-NEXT: jmp .LBB6_6
; X86-SLOW-NEXT: .LBB6_4:
; X86-SLOW-NEXT: movl %edx, %esi
+; X86-SLOW-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-SLOW-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: movl %eax, %ebx
-; X86-SLOW-NEXT: movl (%esp), %eax # 4-byte Reload
+; X86-SLOW-NEXT: movl %ecx, %edi
+; X86-SLOW-NEXT: movl (%esp), %ebx # 4-byte Reload
; X86-SLOW-NEXT: .LBB6_6:
-; X86-SLOW-NEXT: shrl %cl, %eax
-; X86-SLOW-NEXT: movl %eax, %edx
-; X86-SLOW-NEXT: movl %ecx, %eax
-; X86-SLOW-NEXT: notb %al
-; X86-SLOW-NEXT: movl %ebx, %edi
-; X86-SLOW-NEXT: addl %ebx, %ebx
-; X86-SLOW-NEXT: movl %eax, %ecx
-; X86-SLOW-NEXT: shll %cl, %ebx
-; X86-SLOW-NEXT: orl %edx, %ebx
-; X86-SLOW-NEXT: movl %ebx, (%esp) # 4-byte Spill
; X86-SLOW-NEXT: movl 56(%ebp), %ecx
-; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx
-; X86-SLOW-NEXT: shrl %cl, %edi
-; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X86-SLOW-NEXT: leal (%ebx,%ebx), %edx
-; X86-SLOW-NEXT: movl %eax, %ecx
-; X86-SLOW-NEXT: shll %cl, %edx
-; X86-SLOW-NEXT: orl %edi, %edx
+; X86-SLOW-NEXT: shrl %cl, %ebx
+; X86-SLOW-NEXT: movl %ecx, %edx
+; X86-SLOW-NEXT: notb %dl
+; X86-SLOW-NEXT: movl %edi, %eax
+; X86-SLOW-NEXT: addl %edi, %edi
+; X86-SLOW-NEXT: movl %edx, %ecx
+; X86-SLOW-NEXT: shll %cl, %edi
+; X86-SLOW-NEXT: orl %ebx, %edi
+; X86-SLOW-NEXT: movl %edi, (%esp) # 4-byte Spill
; X86-SLOW-NEXT: movl 56(%ebp), %ecx
; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx
-; X86-SLOW-NEXT: shrl %cl, %ebx
-; X86-SLOW-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SLOW-NEXT: shrl %cl, %eax
; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X86-SLOW-NEXT: leal (%edi,%edi), %ebx
-; X86-SLOW-NEXT: movl %eax, %ecx
+; X86-SLOW-NEXT: movl %edx, %ecx
; X86-SLOW-NEXT: shll %cl, %ebx
-; X86-SLOW-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-SLOW-NEXT: orl %eax, %ebx
; X86-SLOW-NEXT: movl 56(%ebp), %ecx
; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-SLOW-NEXT: shrl %cl, %edi
+; X86-SLOW-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-SLOW-NEXT: leal (%eax,%eax), %edi
+; X86-SLOW-NEXT: movl %edx, %ecx
+; X86-SLOW-NEXT: shll %cl, %edi
+; X86-SLOW-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-SLOW-NEXT: movl 56(%ebp), %ecx
+; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-SLOW-NEXT: shrl %cl, %eax
; X86-SLOW-NEXT: addl %esi, %esi
-; X86-SLOW-NEXT: movl %eax, %ecx
+; X86-SLOW-NEXT: movl %edx, %ecx
; X86-SLOW-NEXT: shll %cl, %esi
-; X86-SLOW-NEXT: orl %edi, %esi
-; X86-SLOW-NEXT: movl 8(%ebp), %ecx
-; X86-SLOW-NEXT: movl %esi, 12(%ecx)
-; X86-SLOW-NEXT: movl %ebx, 8(%ecx)
-; X86-SLOW-NEXT: movl %edx, 4(%ecx)
-; X86-SLOW-NEXT: movl (%esp), %eax # 4-byte Reload
-; X86-SLOW-NEXT: movl %eax, (%ecx)
-; X86-SLOW-NEXT: movl %ecx, %eax
+; X86-SLOW-NEXT: orl %eax, %esi
+; X86-SLOW-NEXT: movl 8(%ebp), %eax
+; X86-SLOW-NEXT: movl %esi, 12(%eax)
+; X86-SLOW-NEXT: movl %edi, 8(%eax)
+; X86-SLOW-NEXT: movl %ebx, 4(%eax)
+; X86-SLOW-NEXT: movl (%esp), %ecx # 4-byte Reload
+; X86-SLOW-NEXT: movl %ecx, (%eax)
; X86-SLOW-NEXT: leal -12(%ebp), %esp
; X86-SLOW-NEXT: popl %esi
; X86-SLOW-NEXT: popl %edi
More information about the llvm-commits
mailing list