[llvm] [MachineCSE] Trivially coalesce subreg copies to expose CSE (PR #153120)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 11 18:52:38 PDT 2025
https://github.com/AZero13 created https://github.com/llvm/llvm-project/pull/153120
None
>From 2af22bd273cf334d0310efb6b9e1bf5d6d1265bd Mon Sep 17 00:00:00 2001
From: AZero13 <gfunni234 at gmail.com>
Date: Mon, 11 Aug 2025 21:52:17 -0400
Subject: [PATCH] [MachineCSE] Trivially coalesce subreg copies to expose CSE
---
llvm/lib/CodeGen/MachineCSE.cpp | 54 ++++++++++++++-----
llvm/test/CodeGen/AArch64/zext-to-tbl.ll | 16 +++---
.../test/CodeGen/X86/cse-add-with-overflow.ll | 3 +-
3 files changed, 50 insertions(+), 23 deletions(-)
diff --git a/llvm/lib/CodeGen/MachineCSE.cpp b/llvm/lib/CodeGen/MachineCSE.cpp
index 780ed92bc8581..a1d236c7955ec 100644
--- a/llvm/lib/CodeGen/MachineCSE.cpp
+++ b/llvm/lib/CodeGen/MachineCSE.cpp
@@ -186,20 +186,48 @@ bool MachineCSEImpl::PerformTrivialCopyPropagation(MachineInstr *MI,
Register SrcReg = DefMI->getOperand(1).getReg();
if (!SrcReg.isVirtual())
continue;
- // FIXME: We should trivially coalesce subregister copies to expose CSE
- // opportunities on instructions with truncated operands (see
- // cse-add-with-overflow.ll). This can be done here as follows:
- // if (SrcSubReg)
- // RC = TRI->getMatchingSuperRegClass(MRI->getRegClass(SrcReg), RC,
- // SrcSubReg);
- // MO.substVirtReg(SrcReg, SrcSubReg, *TRI);
- //
- // The 2-addr pass has been updated to handle coalesced subregs. However,
- // some machine-specific code still can't handle it.
- // To handle it properly we also need a way find a constrained subregister
- // class given a super-reg class and subreg index.
- if (DefMI->getOperand(1).getSubReg())
+ unsigned SrcSubReg = DefMI->getOperand(1).getSubReg();
+ if (SrcSubReg) {
+ const TargetRegisterClass *UseRC = MRI->getRegClassOrNull(Reg);
+ const TargetRegisterClass *SrcRC = MRI->getRegClassOrNull(SrcReg);
+ if (!UseRC || !SrcRC)
+ continue;
+
+ const TargetRegisterClass *NewSuperRC =
+ TRI->getMatchingSuperRegClass(SrcRC, UseRC, SrcSubReg);
+ if (!NewSuperRC)
+ continue;
+
+ if (!MRI->constrainRegClass(SrcReg, NewSuperRC))
+ continue;
+
+ // Note: We don't call constrainRegAttrs(SrcReg, Reg) here because
+ // we're replacing uses of Reg with SrcReg:SrcSubReg, not merging
+ // their constraints. The substVirtReg call will handle the substitution.
+
+ LLVM_DEBUG(dbgs() << "Coalescing (subreg): " << *DefMI);
+ LLVM_DEBUG(dbgs() << "*** to: " << *MI);
+
+ // Propagate SrcReg:SrcSubReg of copies to MI.
+ MO.substVirtReg(SrcReg, SrcSubReg, *TRI);
+ MRI->clearKillFlags(SrcReg);
+
+ if (OnlyOneUse) {
+ SmallVector<MachineOperand *, 4> DbgUses;
+ for (auto &U : MRI->use_operands(Reg)) {
+ MachineInstr *UDI = U.getParent();
+ if (UDI->isDebugValue())
+ DbgUses.push_back(&U);
+ }
+ for (MachineOperand *U : DbgUses)
+ U->substVirtReg(SrcReg, SrcSubReg, *TRI);
+
+ DefMI->eraseFromParent();
+ ++NumCoalesces;
+ }
+ Changed = true;
continue;
+ }
if (!MRI->constrainRegAttrs(SrcReg, Reg))
continue;
LLVM_DEBUG(dbgs() << "Coalescing: " << *DefMI);
diff --git a/llvm/test/CodeGen/AArch64/zext-to-tbl.ll b/llvm/test/CodeGen/AArch64/zext-to-tbl.ll
index 74a717f1635a3..8f5974881c489 100644
--- a/llvm/test/CodeGen/AArch64/zext-to-tbl.ll
+++ b/llvm/test/CodeGen/AArch64/zext-to-tbl.ll
@@ -1247,12 +1247,12 @@ define void @zext_v16i4_to_v16i32_in_loop(ptr %src, ptr %dst) {
; CHECK-NEXT: cmp x8, #128
; CHECK-NEXT: ubfx x12, x9, #48, #4
; CHECK-NEXT: lsr x10, x9, #52
-; CHECK-NEXT: ubfx x13, x9, #32, #4
+; CHECK-NEXT: ubfx x14, x9, #32, #4
; CHECK-NEXT: ubfx w15, w9, #16, #4
; CHECK-NEXT: lsr x11, x9, #36
-; CHECK-NEXT: lsr w14, w9, #20
+; CHECK-NEXT: lsr w13, w9, #20
; CHECK-NEXT: fmov s1, w12
-; CHECK-NEXT: fmov s2, w13
+; CHECK-NEXT: fmov s2, w14
; CHECK-NEXT: lsr w12, w9, #4
; CHECK-NEXT: fmov s3, w15
; CHECK-NEXT: mov.h v1[1], w10
@@ -1260,7 +1260,7 @@ define void @zext_v16i4_to_v16i32_in_loop(ptr %src, ptr %dst) {
; CHECK-NEXT: mov.h v2[1], w11
; CHECK-NEXT: fmov s4, w10
; CHECK-NEXT: lsr x11, x9, #56
-; CHECK-NEXT: mov.h v3[1], w14
+; CHECK-NEXT: mov.h v3[1], w13
; CHECK-NEXT: lsr x10, x9, #40
; CHECK-NEXT: mov.h v4[1], w12
; CHECK-NEXT: lsr w12, w9, #24
@@ -1301,14 +1301,14 @@ define void @zext_v16i4_to_v16i32_in_loop(ptr %src, ptr %dst) {
; CHECK-BE-NEXT: add x8, x8, #16
; CHECK-BE-NEXT: cmp x8, #128
; CHECK-BE-NEXT: ubfx w11, w9, #12, #4
-; CHECK-BE-NEXT: lsr w14, w9, #28
+; CHECK-BE-NEXT: lsr w13, w9, #28
; CHECK-BE-NEXT: lsr w10, w9, #8
; CHECK-BE-NEXT: ubfx x15, x9, #44, #4
; CHECK-BE-NEXT: lsr w12, w9, #24
-; CHECK-BE-NEXT: lsr x13, x9, #40
+; CHECK-BE-NEXT: lsr x14, x9, #40
; CHECK-BE-NEXT: fmov s1, w11
; CHECK-BE-NEXT: lsr x11, x9, #60
-; CHECK-BE-NEXT: fmov s2, w14
+; CHECK-BE-NEXT: fmov s2, w13
; CHECK-BE-NEXT: fmov s3, w15
; CHECK-BE-NEXT: fmov s4, w11
; CHECK-BE-NEXT: lsr w11, w9, #20
@@ -1316,7 +1316,7 @@ define void @zext_v16i4_to_v16i32_in_loop(ptr %src, ptr %dst) {
; CHECK-BE-NEXT: lsr x10, x9, #56
; CHECK-BE-NEXT: mov v2.h[1], w12
; CHECK-BE-NEXT: lsr w12, w9, #4
-; CHECK-BE-NEXT: mov v3.h[1], w13
+; CHECK-BE-NEXT: mov v3.h[1], w14
; CHECK-BE-NEXT: mov v4.h[1], w10
; CHECK-BE-NEXT: lsr x10, x9, #36
; CHECK-BE-NEXT: mov v1.h[2], w12
diff --git a/llvm/test/CodeGen/X86/cse-add-with-overflow.ll b/llvm/test/CodeGen/X86/cse-add-with-overflow.ll
index 40214267e1743..ff71472a65f7d 100644
--- a/llvm/test/CodeGen/X86/cse-add-with-overflow.ll
+++ b/llvm/test/CodeGen/X86/cse-add-with-overflow.ll
@@ -1,12 +1,11 @@
; RUN: llc < %s -mtriple=x86_64-darwin -mcpu=generic | FileCheck %s
-; XFAIL: *
; rdar:15661073 simple example of redundant adds
;
; MachineCSE should coalesce trivial subregister copies.
;
; The extra movl+addl should be removed during MachineCSE.
; CHECK-LABEL: redundantadd
-; CHECK: cmpq
+; CHECK: cmpl
; CHECK: movq
; CHECK-NOT: movl
; CHECK: addl
More information about the llvm-commits
mailing list