[llvm] PeepholeOpt: Immediately check if a reg_sequence compose supports a subregister (PR #128279)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 21 19:45:11 PST 2025
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/128279
This is a quick fix for EXPENSIVE_CHECKS bot failures. I still think we could
defer looking for a compatible subregister further up the use-def chain, and
should be able to check compatibilty with the ultimate found source.
>From 6bbfb60112740293c710d21bf302123bbb9e492f Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Sat, 22 Feb 2025 10:27:26 +0700
Subject: [PATCH] PeepholeOpt: Immediately check if a reg_sequence compose
supports a subregister
This is a quick fix for EXPENSIVE_CHECKS bot failures. I still think we could
defer looking for a compatible subregister further up the use-def chain, and
should be able to check compatibilty with the ultimate found source.
---
llvm/lib/CodeGen/PeepholeOptimizer.cpp | 15 +++++---
...-sequence-compose-supports-subreg-index.ll | 34 +++++++++++++++++++
2 files changed, 45 insertions(+), 4 deletions(-)
create mode 100644 llvm/test/CodeGen/Thumb2/peephole-opt-check-reg-sequence-compose-supports-subreg-index.ll
diff --git a/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/llvm/lib/CodeGen/PeepholeOptimizer.cpp
index 5416cdd39aaf3..44aec9bd0f157 100644
--- a/llvm/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/llvm/lib/CodeGen/PeepholeOptimizer.cpp
@@ -1991,10 +1991,6 @@ ValueTrackerResult ValueTracker::getNextSourceFromRegSequence() {
// If we did not find an exact match, see if we can do a composition to
// extract a sub-subregister.
for (const RegSubRegPairAndIdx &RegSeqInput : RegSeqInputRegs) {
- // We don't check if the resulting class supports the subregister index
- // yet. This will occur before any rewrite when looking for an eligible
- // source.
-
LaneBitmask DefMask = TRI->getSubRegIndexLaneMask(DefSubReg);
LaneBitmask ThisOpRegMask = TRI->getSubRegIndexLaneMask(RegSeqInput.SubIdx);
@@ -2012,6 +2008,17 @@ ValueTrackerResult ValueTracker::getNextSourceFromRegSequence() {
unsigned ComposedDefInSrcReg1 =
TRI->composeSubRegIndices(RegSeqInput.SubReg, ReverseDefCompose);
+
+ // TODO: We should be able to defer checking if the result register class
+ // supports the index to continue looking for a rewritable source.
+ //
+ // TODO: Should we modify the register class to support the index?
+ const TargetRegisterClass *SrcRC = MRI.getRegClass(RegSeqInput.Reg);
+ const TargetRegisterClass *SrcWithSubRC =
+ TRI->getSubClassWithSubReg(SrcRC, ComposedDefInSrcReg1);
+ if (SrcRC != SrcWithSubRC)
+ return ValueTrackerResult();
+
return ValueTrackerResult(RegSeqInput.Reg, ComposedDefInSrcReg1);
}
diff --git a/llvm/test/CodeGen/Thumb2/peephole-opt-check-reg-sequence-compose-supports-subreg-index.ll b/llvm/test/CodeGen/Thumb2/peephole-opt-check-reg-sequence-compose-supports-subreg-index.ll
new file mode 100644
index 0000000000000..eefa511bb5907
--- /dev/null
+++ b/llvm/test/CodeGen/Thumb2/peephole-opt-check-reg-sequence-compose-supports-subreg-index.ll
@@ -0,0 +1,34 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc --mattr=+mve.fp,+fp64 -verify-machineinstrs < %s | FileCheck %s
+
+; Check that peephole-opt doesn't introduce an invalid subregister use
+
+target triple = "thumbv8.1m.main-none-none-eabi"
+
+define <4 x float> @reg_sequence_subreg_compose_failure(<4 x float> %a, <2 x float> %b) {
+; CHECK-LABEL: reg_sequence_subreg_compose_failure:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov d0, r0, r1
+; CHECK-NEXT: mov r0, sp
+; CHECK-NEXT: vmov d1, r2, r3
+; CHECK-NEXT: vldrw.u32 q1, [r0]
+; CHECK-NEXT: vldr s0, .LCPI0_0
+; CHECK-NEXT: vmov.f32 s8, s1
+; CHECK-NEXT: vmov.f32 s9, s3
+; CHECK-NEXT: vmul.f32 q1, q2, q1
+; CHECK-NEXT: vmov.f32 s2, s0
+; CHECK-NEXT: vmov.f32 s1, s4
+; CHECK-NEXT: vmov.f32 s3, s5
+; CHECK-NEXT: vmov r0, r1, d0
+; CHECK-NEXT: vmov r2, r3, d1
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .p2align 2
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI0_0:
+; CHECK-NEXT: .long 0x00000000 @ float 0
+entry:
+ %a.imag = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 1, i32 3>
+ %mul = fmul <2 x float> %a.imag, %b
+ %interleaved.vec = shufflevector <2 x float> zeroinitializer, <2 x float> %mul, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+ ret <4 x float> %interleaved.vec
+}
More information about the llvm-commits
mailing list