[llvm] PeepholeOpt: Try to constrain uses to support subregister (PR #161338)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 30 02:22:24 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-arm
Author: Matt Arsenault (arsenm)
<details>
<summary>Changes</summary>
This allows removing a special case hack in ARM. ARM's implementation
of getExtractSubregLikeInputs has the strange property that it reports
a register with a class that does not support the reported subregister
index. We can however reconstrain the register to support this usage.
This is an alternative to #<!-- -->159600. I've included the test, but
the output is different. In this case version the VMOVSR is
replaced with an ordinary subregister extract copy.
---
Full diff: https://github.com/llvm/llvm-project/pull/161338.diff
6 Files Affected:
- (modified) llvm/lib/CodeGen/PeepholeOptimizer.cpp (+24)
- (modified) llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp (-14)
- (modified) llvm/lib/Target/ARM/ARMBaseRegisterInfo.h (-5)
- (added) llvm/test/CodeGen/ARM/issue159343.ll (+55)
- (added) llvm/test/CodeGen/ARM/pr159343.mir (+31)
- (modified) llvm/test/CodeGen/ARM/shouldRewriteCopySrc.ll (+2-2)
``````````diff
diff --git a/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/llvm/lib/CodeGen/PeepholeOptimizer.cpp
index fb3e6482bb096..729a57ef23b1e 100644
--- a/llvm/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/llvm/lib/CodeGen/PeepholeOptimizer.cpp
@@ -1203,6 +1203,18 @@ bool PeepholeOptimizer::optimizeCoalescableCopyImpl(Rewriter &&CpyRewriter) {
if (!NewSrc.Reg)
continue;
+ if (NewSrc.SubReg) {
+ // Verify the register class supports the subregister index. ARM's
+ // copy-like queries return register:subreg pairs where the register's
+ // current class does not directly support the subregister index.
+ const TargetRegisterClass *RC = MRI->getRegClass(NewSrc.Reg);
+ const TargetRegisterClass *WithSubRC =
+ TRI->getSubClassWithSubReg(RC, NewSrc.SubReg);
+ if (!MRI->constrainRegClass(NewSrc.Reg, WithSubRC))
+ continue;
+ Changed = true;
+ }
+
// Rewrite source.
if (CpyRewriter.RewriteCurrentSource(NewSrc.Reg, NewSrc.SubReg)) {
// We may have extended the live-range of NewSrc, account for that.
@@ -1275,6 +1287,18 @@ MachineInstr &PeepholeOptimizer::rewriteSource(MachineInstr &CopyLike,
const TargetRegisterClass *DefRC = MRI->getRegClass(Def.Reg);
Register NewVReg = MRI->createVirtualRegister(DefRC);
+ if (NewSrc.SubReg) {
+ const TargetRegisterClass *NewSrcRC = MRI->getRegClass(NewSrc.Reg);
+ const TargetRegisterClass *WithSubRC =
+ TRI->getSubClassWithSubReg(NewSrcRC, NewSrc.SubReg);
+
+ // The new source may not directly support the subregister, but we should be
+ // able to assume it is constrainable to support the subregister (otherwise
+ // ValueTracker was lying and reported a useless value).
+ if (!MRI->constrainRegClass(NewSrc.Reg, WithSubRC))
+ llvm_unreachable("replacement register cannot support subregister");
+ }
+
MachineInstr *NewCopy =
BuildMI(*CopyLike.getParent(), &CopyLike, CopyLike.getDebugLoc(),
TII->get(TargetOpcode::COPY), NewVReg)
diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index e94220af05a0d..2e8a676269a74 100644
--- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -960,17 +960,3 @@ bool ARMBaseRegisterInfo::shouldCoalesce(MachineInstr *MI,
}
return false;
}
-
-bool ARMBaseRegisterInfo::shouldRewriteCopySrc(const TargetRegisterClass *DefRC,
- unsigned DefSubReg,
- const TargetRegisterClass *SrcRC,
- unsigned SrcSubReg) const {
- // We can't extract an SPR from an arbitary DPR (as opposed to a DPR_VFP2).
- if (DefRC == &ARM::SPRRegClass && DefSubReg == 0 &&
- SrcRC == &ARM::DPRRegClass &&
- (SrcSubReg == ARM::ssub_0 || SrcSubReg == ARM::ssub_1))
- return false;
-
- return TargetRegisterInfo::shouldRewriteCopySrc(DefRC, DefSubReg,
- SrcRC, SrcSubReg);
-}
diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
index 5b67b34089d7e..03b0fa0d1ee08 100644
--- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -158,11 +158,6 @@ class ARMBaseRegisterInfo : public ARMGenRegisterInfo {
const TargetRegisterClass *NewRC,
LiveIntervals &LIS) const override;
- bool shouldRewriteCopySrc(const TargetRegisterClass *DefRC,
- unsigned DefSubReg,
- const TargetRegisterClass *SrcRC,
- unsigned SrcSubReg) const override;
-
int getSEHRegNum(unsigned i) const { return getEncodingValue(i); }
};
diff --git a/llvm/test/CodeGen/ARM/issue159343.ll b/llvm/test/CodeGen/ARM/issue159343.ll
new file mode 100644
index 0000000000000..03292582918a9
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/issue159343.ll
@@ -0,0 +1,55 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s | FileCheck %s
+
+; Make sure there's no assertion from peephole-opt introducing illegal
+; subregister index uses.
+
+target triple = "thumbv7-unknown-linux-android29"
+
+define void @_ZN11VersionEdit10DecodeFromEv(i1 %call4, ptr %__profc__ZN11VersionEdit10DecodeFromEv) nounwind {
+; CHECK-LABEL: _ZN11VersionEdit10DecodeFromEv:
+; CHECK: @ %bb.0: @ %land.rhs.lr.ph
+; CHECK-NEXT: lsls r0, r0, #31
+; CHECK-NEXT: beq .LBB0_2
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: adr r0, .LCPI0_0
+; CHECK-NEXT: vld1.64 {d0, d1}, [r0:128]
+; CHECK-NEXT: b .LBB0_3
+; CHECK-NEXT: .LBB0_2: @ %select.false
+; CHECK-NEXT: vmov.i32 q0, #0x0
+; CHECK-NEXT: .LBB0_3: @ %select.end
+; CHECK-NEXT: vldr s5, .LCPI0_1
+; CHECK-NEXT: vldr s4, .LCPI0_2
+; CHECK-NEXT: vmov.f32 s6, s0
+; CHECK-NEXT: vmov.f32 s7, s1
+; CHECK-NEXT: vst1.64 {d2, d3}, [r1]
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .p2align 4
+; CHECK-NEXT: @ %bb.4:
+; CHECK-NEXT: .LCPI0_0:
+; CHECK-NEXT: .long 1 @ 0x1
+; CHECK-NEXT: .long 0 @ 0x0
+; CHECK-NEXT: .long 1 @ 0x1
+; CHECK-NEXT: .long 0 @ 0x0
+; CHECK-NEXT: .LCPI0_1:
+; CHECK-NEXT: .long 0x00000000 @ float 0
+; CHECK-NEXT: .LCPI0_2:
+; CHECK-NEXT: .long 0x00000001 @ float 1.40129846E-45
+land.rhs.lr.ph:
+ br i1 %call4, label %sw.bb, label %while.cond.while.end_crit_edge.split.loop.exit43
+
+while.cond.while.end_crit_edge.split.loop.exit43: ; preds = %land.rhs.lr.ph
+ %ext0 = extractelement <4 x i64> zeroinitializer, i64 0
+ br label %while.cond.while.end_crit_edge
+
+while.cond.while.end_crit_edge: ; preds = %sw.bb, %while.cond.while.end_crit_edge.split.loop.exit43
+ %pgocount5374.ph = phi i64 [ %ext1, %sw.bb ], [ %ext0, %while.cond.while.end_crit_edge.split.loop.exit43 ]
+ %ins = insertelement <2 x i64> splat (i64 1), i64 %pgocount5374.ph, i64 1
+ store <2 x i64> %ins, ptr %__profc__ZN11VersionEdit10DecodeFromEv, align 8
+ ret void
+
+sw.bb: ; preds = %land.rhs.lr.ph
+ %ext1 = extractelement <4 x i64> splat (i64 1), i64 0
+ br label %while.cond.while.end_crit_edge
+}
+
diff --git a/llvm/test/CodeGen/ARM/pr159343.mir b/llvm/test/CodeGen/ARM/pr159343.mir
new file mode 100644
index 0000000000000..9b71b1ad94b2f
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/pr159343.mir
@@ -0,0 +1,31 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
+# RUN: llc -run-pass=peephole-opt -verify-machineinstrs -mtriple=thumbv7-unknown-linux-android29 %s -o - | FileCheck %s
+---
+name: Test_shouldRewriteCopySrc_Invalid_SubReg
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $r0, $r1
+
+ ; CHECK-LABEL: name: Test_shouldRewriteCopySrc_Invalid_SubReg
+ ; CHECK: liveins: $r0, $r1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:dpair = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:dpr_vfp2 = COPY [[DEF]].dsub_0
+ ; CHECK-NEXT: [[VMOVRRD:%[0-9]+]]:gpr, [[VMOVRRD1:%[0-9]+]]:gpr = VMOVRRD [[COPY]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:spr = COPY [[COPY]].ssub_1
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:spr = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:spr = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF3:%[0-9]+]]:spr = IMPLICIT_DEF
+ ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:mqpr = REG_SEQUENCE killed [[DEF2]], %subreg.ssub_0, killed [[DEF1]], %subreg.ssub_1, killed [[DEF3]], %subreg.ssub_2, [[COPY]].ssub_1, %subreg.ssub_3
+ ; CHECK-NEXT: VST1q64 $r1, 0, killed [[REG_SEQUENCE]], 14 /* CC::al */, $noreg
+ %0:dpair = IMPLICIT_DEF
+ %1:dpr = COPY %0.dsub_0
+ %2:gpr, %3:gpr = VMOVRRD killed %1, 14 /* CC::al */, $noreg
+ %4:spr = VMOVSR killed %3, 14 /* CC::al */, $noreg
+ %5:spr = IMPLICIT_DEF
+ %6:spr = IMPLICIT_DEF
+ %7:spr = IMPLICIT_DEF
+ %8:mqpr = REG_SEQUENCE killed %6, %subreg.ssub_0, killed %5, %subreg.ssub_1, killed %7, %subreg.ssub_2, killed %4, %subreg.ssub_3
+ VST1q64 $r1, 0, killed %8, 14 /* CC::al */, $noreg
+...
diff --git a/llvm/test/CodeGen/ARM/shouldRewriteCopySrc.ll b/llvm/test/CodeGen/ARM/shouldRewriteCopySrc.ll
index e653aaa316fed..2bf8f29eccb40 100644
--- a/llvm/test/CodeGen/ARM/shouldRewriteCopySrc.ll
+++ b/llvm/test/CodeGen/ARM/shouldRewriteCopySrc.ll
@@ -12,8 +12,8 @@ define float @shouldRewriteCopySrc(double %arg) #0 {
; CHECK-NEXT: @APP
; CHECK-NEXT: nop
; CHECK-NEXT: @NO_APP
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: vmov s0, r0
+; CHECK-NEXT: vmov.f64 d0, d16
+; CHECK-NEXT: @ kill: def $s0 killed $s0 killed $d0
; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
; CHECK-NEXT: bx lr
bb:
``````````
</details>
https://github.com/llvm/llvm-project/pull/161338
More information about the llvm-commits
mailing list