[llvm] 311339e - [DAG] SimplifyDemandedBits - ISD::AND - only request DemandedElts when looking for a splat constant
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Thu May 16 05:06:00 PDT 2024
Author: Simon Pilgrim
Date: 2024-05-16T13:05:35+01:00
New Revision: 311339e25cd4d431902f93b1cbc6b67ac3fd2abf
URL: https://github.com/llvm/llvm-project/commit/311339e25cd4d431902f93b1cbc6b67ac3fd2abf
DIFF: https://github.com/llvm/llvm-project/commit/311339e25cd4d431902f93b1cbc6b67ac3fd2abf.diff
LOG: [DAG] SimplifyDemandedBits - ISD::AND - only request DemandedElts when looking for a splat constant
Limit the isConstOrConstSplat call to the vector elements we care about
Noticed while investigating regressions in #92096
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/test/CodeGen/Thumb2/mve-laneinterleaving-cost.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 9ddb14e11dab2..38583de03d9c8 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -1380,7 +1380,7 @@ bool TargetLowering::SimplifyDemandedBits(
// using the bits from the RHS. Below, we use knowledge about the RHS to
// simplify the LHS, here we're using information from the LHS to simplify
// the RHS.
- if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1)) {
+ if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1, DemandedElts)) {
// Do not increment Depth here; that can cause an infinite loop.
KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
// If the LHS already has zeros where RHSC does, this 'and' is dead.
diff --git a/llvm/test/CodeGen/Thumb2/mve-laneinterleaving-cost.ll b/llvm/test/CodeGen/Thumb2/mve-laneinterleaving-cost.ll
index 203ce1f881189..c2511a4992cf5 100644
--- a/llvm/test/CodeGen/Thumb2/mve-laneinterleaving-cost.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-laneinterleaving-cost.ll
@@ -6,39 +6,36 @@ define arm_aapcs_vfpcc <4 x i32> @loads_i32(ptr %A, ptr %B, ptr %C) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r6, lr}
; CHECK-NEXT: push {r4, r5, r6, lr}
-; CHECK-NEXT: .vsave {d8, d9}
-; CHECK-NEXT: vpush {d8, d9}
-; CHECK-NEXT: vldrw.u32 q1, [r1]
-; CHECK-NEXT: vmov.i64 q2, #0xffffffff
-; CHECK-NEXT: vmov.f32 s0, s6
-; CHECK-NEXT: vmov.f32 s2, s7
-; CHECK-NEXT: vand q0, q0, q2
-; CHECK-NEXT: vmov.f32 s6, s5
-; CHECK-NEXT: vmov r4, r5, d0
-; CHECK-NEXT: vmov r3, r1, d1
+; CHECK-NEXT: vldrw.u32 q2, [r1]
+; CHECK-NEXT: vmov.i64 q1, #0xffffffff
+; CHECK-NEXT: vmov.f32 s0, s10
+; CHECK-NEXT: vmov.f32 s2, s11
+; CHECK-NEXT: vand q0, q0, q1
+; CHECK-NEXT: vmov.f32 s10, s9
+; CHECK-NEXT: vmov r3, r4, d0
+; CHECK-NEXT: vand q2, q2, q1
+; CHECK-NEXT: vmov r5, r1, d1
; CHECK-NEXT: vldrw.u32 q0, [r0]
+; CHECK-NEXT: vldrw.u32 q1, [r2]
+; CHECK-NEXT: vmov lr, r12, d5
; CHECK-NEXT: vmov.f32 s12, s2
; CHECK-NEXT: vmov.f32 s2, s3
; CHECK-NEXT: vmov r0, s12
-; CHECK-NEXT: vand q3, q1, q2
-; CHECK-NEXT: vldrw.u32 q1, [r2]
-; CHECK-NEXT: vmov lr, r12, d7
-; CHECK-NEXT: vmov.f32 s16, s6
-; CHECK-NEXT: vmov.f32 s18, s7
-; CHECK-NEXT: vand q2, q4, q2
+; CHECK-NEXT: vmov.f32 s12, s6
+; CHECK-NEXT: vmov.f32 s6, s7
; CHECK-NEXT: asrs r2, r0, #31
-; CHECK-NEXT: adds r0, r0, r4
-; CHECK-NEXT: adcs r5, r2
-; CHECK-NEXT: vmov r2, s8
-; CHECK-NEXT: asrl r0, r5, r2
+; CHECK-NEXT: adds r0, r0, r3
+; CHECK-NEXT: adc.w r3, r2, r4
+; CHECK-NEXT: vmov r2, s12
+; CHECK-NEXT: asrl r0, r3, r2
; CHECK-NEXT: vmov r2, s2
; CHECK-NEXT: vmov.f32 s2, s1
-; CHECK-NEXT: asrs r4, r2, #31
-; CHECK-NEXT: adds r2, r2, r3
-; CHECK-NEXT: adcs r1, r4
-; CHECK-NEXT: vmov r3, s10
+; CHECK-NEXT: asrs r3, r2, #31
+; CHECK-NEXT: adds r2, r2, r5
+; CHECK-NEXT: adcs r1, r3
+; CHECK-NEXT: vmov r3, s6
; CHECK-NEXT: asrl r2, r1, r3
-; CHECK-NEXT: vmov r4, r5, d6
+; CHECK-NEXT: vmov r4, r5, d4
; CHECK-NEXT: vmov r1, s2
; CHECK-NEXT: vmov.f32 s2, s5
; CHECK-NEXT: adds.w r6, r1, lr
@@ -54,7 +51,6 @@ define arm_aapcs_vfpcc <4 x i32> @loads_i32(ptr %A, ptr %B, ptr %C) {
; CHECK-NEXT: asrl r4, r1, r3
; CHECK-NEXT: vmov q0[2], q0[0], r4, r0
; CHECK-NEXT: vmov q0[3], q0[1], r6, r2
-; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: pop {r4, r5, r6, pc}
entry:
%a = load <4 x i32>, ptr %A, align 4
@@ -138,62 +134,58 @@ entry:
define arm_aapcs_vfpcc void @load_store_i32(ptr %A, ptr %B, ptr %C, ptr %D) {
; CHECK-LABEL: load_store_i32:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
-; CHECK-NEXT: push {r4, r5, r6, r7, lr}
-; CHECK-NEXT: .pad #4
-; CHECK-NEXT: sub sp, #4
-; CHECK-NEXT: .vsave {d8, d9, d10, d11}
-; CHECK-NEXT: vpush {d8, d9, d10, d11}
-; CHECK-NEXT: vldrw.u32 q0, [r1]
-; CHECK-NEXT: vmov.i64 q4, #0xffffffff
-; CHECK-NEXT: vmov.f32 s4, s2
-; CHECK-NEXT: vmov.f32 s2, s1
-; CHECK-NEXT: vmov.f32 s6, s3
-; CHECK-NEXT: vand q2, q0, q4
+; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT: .vsave {d8}
+; CHECK-NEXT: vpush {d8}
+; CHECK-NEXT: vldrw.u32 q2, [r1]
+; CHECK-NEXT: vmov.i64 q0, #0xffffffff
+; CHECK-NEXT: vmov.f32 s4, s10
+; CHECK-NEXT: vmov.f32 s6, s11
+; CHECK-NEXT: vmov.f32 s10, s9
+; CHECK-NEXT: vand q1, q1, q0
+; CHECK-NEXT: vand q2, q2, q0
; CHECK-NEXT: vldrw.u32 q0, [r0]
-; CHECK-NEXT: vand q1, q1, q4
-; CHECK-NEXT: vmov r5, r1, d3
+; CHECK-NEXT: vmov r6, r4, d3
; CHECK-NEXT: vmov.f32 s12, s2
; CHECK-NEXT: vmov.f32 s2, s3
-; CHECK-NEXT: vmov r0, r12, d2
+; CHECK-NEXT: vmov lr, r12, d2
; CHECK-NEXT: vldrw.u32 q1, [r2]
-; CHECK-NEXT: vmov r4, lr, d5
-; CHECK-NEXT: vmov.f32 s20, s6
-; CHECK-NEXT: vmov.f32 s6, s1
-; CHECK-NEXT: vmov.f32 s22, s7
-; CHECK-NEXT: vand q4, q5, q4
-; CHECK-NEXT: vmov r6, s2
+; CHECK-NEXT: vmov r5, r1, d5
+; CHECK-NEXT: vmov.f32 s16, s6
+; CHECK-NEXT: vmov.f32 s6, s7
+; CHECK-NEXT: vmov.f32 s10, s1
+; CHECK-NEXT: vmov r0, s2
; CHECK-NEXT: vmov.f32 s2, s5
-; CHECK-NEXT: adds r2, r6, r5
-; CHECK-NEXT: vmov r5, s18
-; CHECK-NEXT: asr.w r7, r6, #31
-; CHECK-NEXT: adcs r1, r7
-; CHECK-NEXT: asrl r2, r1, r5
-; CHECK-NEXT: vmov r7, s2
-; CHECK-NEXT: vmov r1, s6
-; CHECK-NEXT: adds r4, r4, r1
-; CHECK-NEXT: asr.w r5, r1, #31
-; CHECK-NEXT: adc.w r1, r5, lr
-; CHECK-NEXT: asrl r4, r1, r7
-; CHECK-NEXT: vmov r6, r5, d4
+; CHECK-NEXT: adds.w r8, r0, r6
+; CHECK-NEXT: asr.w r2, r0, #31
+; CHECK-NEXT: adc.w r7, r2, r4
+; CHECK-NEXT: vmov r2, s6
+; CHECK-NEXT: asrl r8, r7, r2
+; CHECK-NEXT: vmov r2, s10
+; CHECK-NEXT: asrs r4, r2, #31
+; CHECK-NEXT: adds r2, r2, r5
+; CHECK-NEXT: adcs r1, r4
+; CHECK-NEXT: vmov r4, s2
+; CHECK-NEXT: asrl r2, r1, r4
+; CHECK-NEXT: vmov r5, r7, d4
; CHECK-NEXT: vmov r1, s12
-; CHECK-NEXT: adds r0, r0, r1
-; CHECK-NEXT: asr.w r7, r1, #31
-; CHECK-NEXT: adc.w r1, r7, r12
-; CHECK-NEXT: vmov r7, s16
-; CHECK-NEXT: asrl r0, r1, r7
+; CHECK-NEXT: adds.w r6, r1, lr
+; CHECK-NEXT: asr.w r4, r1, #31
+; CHECK-NEXT: adc.w r1, r4, r12
+; CHECK-NEXT: vmov r4, s16
+; CHECK-NEXT: asrl r6, r1, r4
; CHECK-NEXT: vmov r1, s0
-; CHECK-NEXT: adds r6, r6, r1
-; CHECK-NEXT: asr.w r7, r1, #31
-; CHECK-NEXT: adc.w r1, r7, r5
+; CHECK-NEXT: adds r0, r1, r5
+; CHECK-NEXT: asr.w r4, r1, #31
+; CHECK-NEXT: adc.w r1, r4, r7
; CHECK-NEXT: vmov r7, s4
-; CHECK-NEXT: asrl r6, r1, r7
-; CHECK-NEXT: vmov q0[2], q0[0], r6, r0
-; CHECK-NEXT: vmov q0[3], q0[1], r4, r2
+; CHECK-NEXT: asrl r0, r1, r7
+; CHECK-NEXT: vmov q0[2], q0[0], r0, r6
+; CHECK-NEXT: vmov q0[3], q0[1], r2, r8
; CHECK-NEXT: vstrw.32 q0, [r3]
-; CHECK-NEXT: vpop {d8, d9, d10, d11}
-; CHECK-NEXT: add sp, #4
-; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
+; CHECK-NEXT: vpop {d8}
+; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc}
entry:
%a = load <4 x i32>, ptr %A, align 4
%b = load <4 x i32>, ptr %B, align 4
More information about the llvm-commits
mailing list