[llvm] c53eb93 - PeepholeOpt: Immediately check if a reg_sequence compose supports a subregister (#128279)

via llvm-commits llvm-commits at lists.llvm.org
Tue Feb 25 19:15:21 PST 2025


Author: Matt Arsenault
Date: 2025-02-26T10:15:17+07:00
New Revision: c53eb93dd7e93988b8456d317e3ebffa0c809fb9

URL: https://github.com/llvm/llvm-project/commit/c53eb93dd7e93988b8456d317e3ebffa0c809fb9
DIFF: https://github.com/llvm/llvm-project/commit/c53eb93dd7e93988b8456d317e3ebffa0c809fb9.diff

LOG: PeepholeOpt: Immediately check if a reg_sequence compose supports a subregister (#128279)

This is a quick fix for EXPENSIVE_CHECKS bot failures. I still think we
could
defer looking for a compatible subregister further up the use-def chain,
and
should be able to check compatibilty with the ultimate found source.

Added: 
    llvm/test/CodeGen/Thumb2/peephole-opt-check-reg-sequence-compose-supports-subreg-index.ll

Modified: 
    llvm/lib/CodeGen/PeepholeOptimizer.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/llvm/lib/CodeGen/PeepholeOptimizer.cpp
index 5416cdd39aaf3..44aec9bd0f157 100644
--- a/llvm/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/llvm/lib/CodeGen/PeepholeOptimizer.cpp
@@ -1991,10 +1991,6 @@ ValueTrackerResult ValueTracker::getNextSourceFromRegSequence() {
   // If we did not find an exact match, see if we can do a composition to
   // extract a sub-subregister.
   for (const RegSubRegPairAndIdx &RegSeqInput : RegSeqInputRegs) {
-    // We don't check if the resulting class supports the subregister index
-    // yet. This will occur before any rewrite when looking for an eligible
-    // source.
-
     LaneBitmask DefMask = TRI->getSubRegIndexLaneMask(DefSubReg);
     LaneBitmask ThisOpRegMask = TRI->getSubRegIndexLaneMask(RegSeqInput.SubIdx);
 
@@ -2012,6 +2008,17 @@ ValueTrackerResult ValueTracker::getNextSourceFromRegSequence() {
 
     unsigned ComposedDefInSrcReg1 =
         TRI->composeSubRegIndices(RegSeqInput.SubReg, ReverseDefCompose);
+
+    // TODO: We should be able to defer checking if the result register class
+    // supports the index to continue looking for a rewritable source.
+    //
+    // TODO: Should we modify the register class to support the index?
+    const TargetRegisterClass *SrcRC = MRI.getRegClass(RegSeqInput.Reg);
+    const TargetRegisterClass *SrcWithSubRC =
+        TRI->getSubClassWithSubReg(SrcRC, ComposedDefInSrcReg1);
+    if (SrcRC != SrcWithSubRC)
+      return ValueTrackerResult();
+
     return ValueTrackerResult(RegSeqInput.Reg, ComposedDefInSrcReg1);
   }
 

diff  --git a/llvm/test/CodeGen/Thumb2/peephole-opt-check-reg-sequence-compose-supports-subreg-index.ll b/llvm/test/CodeGen/Thumb2/peephole-opt-check-reg-sequence-compose-supports-subreg-index.ll
new file mode 100644
index 0000000000000..eefa511bb5907
--- /dev/null
+++ b/llvm/test/CodeGen/Thumb2/peephole-opt-check-reg-sequence-compose-supports-subreg-index.ll
@@ -0,0 +1,34 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc --mattr=+mve.fp,+fp64 -verify-machineinstrs < %s | FileCheck %s
+
+; Check that peephole-opt doesn't introduce an invalid subregister use
+
+target triple = "thumbv8.1m.main-none-none-eabi"
+
+define <4 x float> @reg_sequence_subreg_compose_failure(<4 x float> %a, <2 x float> %b) {
+; CHECK-LABEL: reg_sequence_subreg_compose_failure:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmov d0, r0, r1
+; CHECK-NEXT:    mov r0, sp
+; CHECK-NEXT:    vmov d1, r2, r3
+; CHECK-NEXT:    vldrw.u32 q1, [r0]
+; CHECK-NEXT:    vldr s0, .LCPI0_0
+; CHECK-NEXT:    vmov.f32 s8, s1
+; CHECK-NEXT:    vmov.f32 s9, s3
+; CHECK-NEXT:    vmul.f32 q1, q2, q1
+; CHECK-NEXT:    vmov.f32 s2, s0
+; CHECK-NEXT:    vmov.f32 s1, s4
+; CHECK-NEXT:    vmov.f32 s3, s5
+; CHECK-NEXT:    vmov r0, r1, d0
+; CHECK-NEXT:    vmov r2, r3, d1
+; CHECK-NEXT:    bx lr
+; CHECK-NEXT:    .p2align 2
+; CHECK-NEXT:  @ %bb.1:
+; CHECK-NEXT:  .LCPI0_0:
+; CHECK-NEXT:    .long 0x00000000 @ float 0
+entry:
+  %a.imag = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 1, i32 3>
+  %mul = fmul <2 x float> %a.imag, %b
+  %interleaved.vec = shufflevector <2 x float> zeroinitializer, <2 x float> %mul, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+  ret <4 x float> %interleaved.vec
+}


        


More information about the llvm-commits mailing list