[llvm] Greedy: Make trySplitAroundHintReg try to match hints with subreg copies (PR #160294)

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 23 17:47:13 PDT 2025


https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/160294

>From 35f1e158c17351fcd02abfb0b50e32fc2c630c28 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Thu, 18 Sep 2025 23:39:05 +0900
Subject: [PATCH 1/3] Greedy: Make trySplitAroundHintReg try to match hints
 with subreg copies

This is essentially the same patch as 116ca9522e89f1e4e02676b5bbe505e80c4d4933;
when trying to match a physreg hint, try to find a compatible physreg if there is
a subregister copy. This has the slight difference of using getSubReg on the hint
instead of getMatchingSuperReg (the other use should also use getSubReg instead,
it's faster).

At the moment this turns out to have very little effect. The adjacent code needs
better handling of subregisters, so continue adding this piecemeal. The X86 test
shows a net reduction in real instructions, plus a few new kills.
---
 llvm/lib/CodeGen/RegAllocGreedy.cpp      | 30 +++++++++++++-----
 llvm/test/CodeGen/X86/atomic-bit-test.ll | 40 +++++++++++++-----------
 2 files changed, 44 insertions(+), 26 deletions(-)

diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp
index d004815d2c17a..76ed9dad3456d 100644
--- a/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -1383,21 +1383,35 @@ bool RAGreedy::trySplitAroundHintReg(MCPhysReg Hint,
   // Compute the cost of assigning a non Hint physical register to VirtReg.
   // We define it as the total frequency of broken COPY instructions to/from
   // Hint register, and after split, they can be deleted.
-  for (const MachineInstr &Instr : MRI->reg_nodbg_instructions(Reg)) {
-    if (!TII->isFullCopyInstr(Instr))
+
+  // FIXME: This is miscounting the costs with subregisters. In particular, this
+  // should support recognizing SplitKit formed copy bundles instead of direct
+  // copy instructions.
+  for (const MachineOperand &Opnd : MRI->reg_nodbg_operands(Reg)) {
+    const MachineInstr &Instr = *Opnd.getParent();
+    if (!Instr.isCopy() || Opnd.isImplicit())
       continue;
-    Register OtherReg = Instr.getOperand(1).getReg();
-    if (OtherReg == Reg) {
-      OtherReg = Instr.getOperand(0).getReg();
-      if (OtherReg == Reg)
-        continue;
+
+    // Look for the other end of the copy.
+    const bool IsDef = Opnd.isDef();
+    const MachineOperand &OtherOpnd = Instr.getOperand(IsDef);
+    Register OtherReg = OtherOpnd.getReg();
+    assert(Reg == Opnd.getReg());
+    if (OtherReg == Reg)
+      continue;
+
+    unsigned SubReg = Opnd.getSubReg();
+    if (!IsDef) {
       // Check if VirtReg interferes with OtherReg after this COPY instruction.
       if (VirtReg.liveAt(LIS->getInstructionIndex(Instr).getRegSlot()))
         continue;
     }
+
     MCRegister OtherPhysReg =
         OtherReg.isPhysical() ? OtherReg.asMCReg() : VRM->getPhys(OtherReg);
-    if (OtherPhysReg == Hint)
+    MCRegister ThisHint =
+        SubReg ? TRI->getSubReg(Hint, SubReg) : MCRegister(Hint);
+    if (OtherPhysReg == ThisHint)
       Cost += MBFI->getBlockFreq(Instr.getParent());
   }
 
diff --git a/llvm/test/CodeGen/X86/atomic-bit-test.ll b/llvm/test/CodeGen/X86/atomic-bit-test.ll
index 8f91f4120842b..b06bef44a5e9e 100644
--- a/llvm/test/CodeGen/X86/atomic-bit-test.ll
+++ b/llvm/test/CodeGen/X86/atomic-bit-test.ll
@@ -469,52 +469,56 @@ entry:
 define i16 @use_in_diff_bb() nounwind {
 ; X86-LABEL: use_in_diff_bb:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    pushl %esi
-; X86-NEXT:    movzwl v16, %esi
+; X86-NEXT:    movzwl v16, %eax
 ; X86-NEXT:    .p2align 4
 ; X86-NEXT:  .LBB17_1: # %atomicrmw.start
 ; X86-NEXT:    # =>This Inner Loop Header: Depth=1
-; X86-NEXT:    movl %esi, %ecx
+; X86-NEXT:    movl %eax, %ecx
 ; X86-NEXT:    orl $1, %ecx
-; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    lock cmpxchgw %cx, v16
-; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    # kill: def $ax killed $ax def $eax
 ; X86-NEXT:    jne .LBB17_1
 ; X86-NEXT:  # %bb.2: # %atomicrmw.end
-; X86-NEXT:    xorl %eax, %eax
-; X86-NEXT:    testb %al, %al
+; X86-NEXT:    xorl %ecx, %ecx
+; X86-NEXT:    testb %cl, %cl
 ; X86-NEXT:    jne .LBB17_4
 ; X86-NEXT:  # %bb.3:
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl %eax, %esi
 ; X86-NEXT:    calll foo at PLT
-; X86-NEXT:  .LBB17_4:
-; X86-NEXT:    andl $1, %esi
 ; X86-NEXT:    movl %esi, %eax
 ; X86-NEXT:    popl %esi
+; X86-NEXT:  .LBB17_4:
+; X86-NEXT:    andl $1, %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: use_in_diff_bb:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    pushq %rbx
-; X64-NEXT:    movzwl v16(%rip), %ebx
+; X64-NEXT:    movzwl v16(%rip), %eax
 ; X64-NEXT:    .p2align 4
 ; X64-NEXT:  .LBB17_1: # %atomicrmw.start
 ; X64-NEXT:    # =>This Inner Loop Header: Depth=1
-; X64-NEXT:    movl %ebx, %ecx
+; X64-NEXT:    movl %eax, %ecx
 ; X64-NEXT:    orl $1, %ecx
-; X64-NEXT:    movl %ebx, %eax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    lock cmpxchgw %cx, v16(%rip)
-; X64-NEXT:    movl %eax, %ebx
+; X64-NEXT:    # kill: def $ax killed $ax def $eax
 ; X64-NEXT:    jne .LBB17_1
 ; X64-NEXT:  # %bb.2: # %atomicrmw.end
-; X64-NEXT:    xorl %eax, %eax
-; X64-NEXT:    testb %al, %al
+; X64-NEXT:    xorl %ecx, %ecx
+; X64-NEXT:    testb %cl, %cl
 ; X64-NEXT:    jne .LBB17_4
 ; X64-NEXT:  # %bb.3:
+; X64-NEXT:    pushq %rbx
+; X64-NEXT:    movl %eax, %ebx
 ; X64-NEXT:    callq foo at PLT
-; X64-NEXT:  .LBB17_4:
-; X64-NEXT:    andl $1, %ebx
 ; X64-NEXT:    movl %ebx, %eax
 ; X64-NEXT:    popq %rbx
+; X64-NEXT:  .LBB17_4:
+; X64-NEXT:    andl $1, %eax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
 entry:
   %0 = atomicrmw or ptr @v16, i16 1 monotonic, align 2

>From 48c24cfe77c3ff17428943436d24a10986a52c0d Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Wed, 24 Sep 2025 09:33:30 +0900
Subject: [PATCH 2/3] Review comments

---
 llvm/lib/CodeGen/RegAllocGreedy.cpp | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp
index 76ed9dad3456d..e2ce47ba81275 100644
--- a/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -1401,11 +1401,13 @@ bool RAGreedy::trySplitAroundHintReg(MCPhysReg Hint,
       continue;
 
     unsigned SubReg = Opnd.getSubReg();
-    if (!IsDef) {
-      // Check if VirtReg interferes with OtherReg after this COPY instruction.
-      if (VirtReg.liveAt(LIS->getInstructionIndex(Instr).getRegSlot()))
-        continue;
-    }
+    unsigned OtherSubReg = OtherOpnd.getSubReg();
+    if (SubReg && OtherSubReg && SubReg != OtherSubReg)
+      continue;
+
+    // Check if VirtReg interferes with OtherReg after this COPY instruction.
+    if (!IsDef && VirtReg.liveAt(LIS->getInstructionIndex(Instr).getRegSlot()))
+      continue;
 
     MCRegister OtherPhysReg =
         OtherReg.isPhysical() ? OtherReg.asMCReg() : VRM->getPhys(OtherReg);

>From f1053fca3a2161e9140d7eb0c39ef8a0959521e4 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Wed, 24 Sep 2025 09:37:45 +0900
Subject: [PATCH 3/3] refine comment

---
 llvm/lib/CodeGen/RegAllocGreedy.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp
index e2ce47ba81275..8e6cf3e6b51b3 100644
--- a/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -1386,7 +1386,7 @@ bool RAGreedy::trySplitAroundHintReg(MCPhysReg Hint,
 
   // FIXME: This is miscounting the costs with subregisters. In particular, this
   // should support recognizing SplitKit formed copy bundles instead of direct
-  // copy instructions.
+  // copy instructions, which will appear in the same block.
   for (const MachineOperand &Opnd : MRI->reg_nodbg_operands(Reg)) {
     const MachineInstr &Instr = *Opnd.getParent();
     if (!Instr.isCopy() || Opnd.isImplicit())



More information about the llvm-commits mailing list