[llvm] 6161a8d - DAG: Pull fneg out of select feeding fadd into fsub
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 19 09:00:47 PST 2022
Author: Simon Pilgrim
Date: 2022-12-19T11:38:30-05:00
New Revision: 6161a8dd5c5a1f190fd964576fb743620e972cc7
URL: https://github.com/llvm/llvm-project/commit/6161a8dd5c5a1f190fd964576fb743620e972cc7
DIFF: https://github.com/llvm/llvm-project/commit/6161a8dd5c5a1f190fd964576fb743620e972cc7.diff
LOG: DAG: Pull fneg out of select feeding fadd into fsub
Enables folding fadd x, (select c, (fneg a), (fneg b))
-> fsub (select a, b), c
Avoids some regressions in a future AMDGPU change.
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/test/CodeGen/ARM/fadd-select-fneg-combine.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 257fbda0122e5..84cf6e9c02772 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -7005,6 +7005,41 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
OptForSize, Cost, Depth))
return DAG.getNode(ISD::FP_ROUND, DL, VT, NegV, Op.getOperand(1));
break;
+ case ISD::SELECT:
+ case ISD::VSELECT: {
+ // fold (fneg (select C, LHS, RHS)) -> (select C, (fneg LHS), (fneg RHS))
+ // iff at least one cost is cheaper and the other is neutral/cheaper
+ SDValue LHS = Op.getOperand(1);
+ NegatibleCost CostLHS = NegatibleCost::Expensive;
+ SDValue NegLHS =
+ getNegatedExpression(LHS, DAG, LegalOps, OptForSize, CostLHS, Depth);
+ if (!NegLHS || CostLHS > NegatibleCost::Neutral) {
+ RemoveDeadNode(NegLHS);
+ break;
+ }
+
+ // Prevent this node from being deleted by the next call.
+ Handles.emplace_back(NegLHS);
+
+ SDValue RHS = Op.getOperand(2);
+ NegatibleCost CostRHS = NegatibleCost::Expensive;
+ SDValue NegRHS =
+ getNegatedExpression(RHS, DAG, LegalOps, OptForSize, CostRHS, Depth);
+
+ // We're done with the handles.
+ Handles.clear();
+
+ if (!NegRHS || CostRHS > NegatibleCost::Neutral ||
+ (CostLHS != NegatibleCost::Cheaper &&
+ CostRHS != NegatibleCost::Cheaper)) {
+ RemoveDeadNode(NegLHS);
+ RemoveDeadNode(NegRHS);
+ break;
+ }
+
+ Cost = std::min(CostLHS, CostRHS);
+ return DAG.getSelect(DL, VT, Op.getOperand(0), NegLHS, NegRHS);
+ }
}
return SDValue();
diff --git a/llvm/test/CodeGen/ARM/fadd-select-fneg-combine.ll b/llvm/test/CodeGen/ARM/fadd-select-fneg-combine.ll
index 957daa72b91da..656bce616ea04 100644
--- a/llvm/test/CodeGen/ARM/fadd-select-fneg-combine.ll
+++ b/llvm/test/CodeGen/ARM/fadd-select-fneg-combine.ll
@@ -4,14 +4,12 @@
define float @fadd_select_fneg_fneg_f32(i32 %arg0, float %x, float %y, float %z) {
; CHECK-LABEL: fadd_select_fneg_fneg_f32:
; CHECK: @ %bb.0:
-; CHECK-NEXT: eor r2, r2, #-2147483648
-; CHECK-NEXT: eor r1, r1, #-2147483648
; CHECK-NEXT: vmov s0, r3
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: vmov s2, r2
; CHECK-NEXT: vmov s4, r1
; CHECK-NEXT: vseleq.f32 s2, s4, s2
-; CHECK-NEXT: vadd.f32 s0, s2, s0
+; CHECK-NEXT: vsub.f32 s0, s0, s2
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
%cmp = icmp eq i32 %arg0, 0
@@ -28,11 +26,9 @@ define half @fadd_select_fneg_fneg_f16(i32 %arg0, half %x, half %y, half %z) {
; CHECK-NEXT: vmov.f16 s0, r2
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: vmov.f16 s2, r1
-; CHECK-NEXT: vneg.f16 s0, s0
-; CHECK-NEXT: vneg.f16 s2, s2
; CHECK-NEXT: vseleq.f16 s0, s2, s0
; CHECK-NEXT: vmov.f16 s2, r3
-; CHECK-NEXT: vadd.f16 s0, s0, s2
+; CHECK-NEXT: vsub.f16 s0, s2, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
%cmp = icmp eq i32 %arg0, 0
@@ -253,12 +249,11 @@ define half @fadd_select_fneg_negk_f16(i32 %arg0, half %x, half %y) {
; CHECK-LABEL: fadd_select_fneg_negk_f16:
; CHECK: @ %bb.0:
; CHECK-NEXT: vmov.f16 s0, r1
-; CHECK-NEXT: vmov.f16 s2, #-4.000000e+00
-; CHECK-NEXT: vneg.f16 s0, s0
+; CHECK-NEXT: vmov.f16 s2, #4.000000e+00
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: vseleq.f16 s0, s0, s2
; CHECK-NEXT: vmov.f16 s2, r2
-; CHECK-NEXT: vadd.f16 s0, s0, s2
+; CHECK-NEXT: vsub.f16 s0, s2, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
%cmp = icmp eq i32 %arg0, 0
@@ -272,12 +267,11 @@ define half @fadd_select_fneg_posk_f16(i32 %arg0, half %x, half %y) {
; CHECK-LABEL: fadd_select_fneg_posk_f16:
; CHECK: @ %bb.0:
; CHECK-NEXT: vmov.f16 s0, r1
-; CHECK-NEXT: vmov.f16 s2, #4.000000e+00
-; CHECK-NEXT: vneg.f16 s0, s0
+; CHECK-NEXT: vmov.f16 s2, #-4.000000e+00
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: vseleq.f16 s0, s0, s2
; CHECK-NEXT: vmov.f16 s2, r2
-; CHECK-NEXT: vadd.f16 s0, s0, s2
+; CHECK-NEXT: vsub.f16 s0, s2, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
%cmp = icmp eq i32 %arg0, 0
@@ -290,41 +284,40 @@ define half @fadd_select_fneg_posk_f16(i32 %arg0, half %x, half %y) {
define <8 x half> @fadd_vselect_fneg_posk_v8f16(<8 x i32> %arg0, <8 x half> %x, <8 x half> %y) {
; CHECK-LABEL: fadd_vselect_fneg_posk_v8f16:
; CHECK: @ %bb.0:
-; CHECK-NEXT: push {r4, r5, r11, lr}
+; CHECK-NEXT: push {r4, r5, r6, lr}
; CHECK-NEXT: vmov d0, r0, r1
-; CHECK-NEXT: add r0, sp, #16
+; CHECK-NEXT: vmov.i8 q1, #0xff
; CHECK-NEXT: vmov d1, r2, r3
-; CHECK-NEXT: vldrw.u32 q3, [r0]
+; CHECK-NEXT: add r0, sp, #16
; CHECK-NEXT: vcmp.i32 eq, q0, zr
; CHECK-NEXT: vmov.i8 q0, #0x0
-; CHECK-NEXT: vmov.i8 q1, #0xff
-; CHECK-NEXT: add r0, sp, #32
; CHECK-NEXT: vpsel q2, q1, q0
+; CHECK-NEXT: vldrw.u32 q3, [r0]
+; CHECK-NEXT: vmov r2, r1, d4
+; CHECK-NEXT: add r12, sp, #32
+; CHECK-NEXT: vmov r4, r5, d5
+; CHECK-NEXT: vmov.16 q2[0], r2
+; CHECK-NEXT: vmov.16 q2[1], r1
; CHECK-NEXT: vcmp.i32 eq, q3, zr
; CHECK-NEXT: vpsel q1, q1, q0
-; CHECK-NEXT: vldrw.u32 q0, [r0]
-; CHECK-NEXT: vmov r1, r0, d4
-; CHECK-NEXT: vmov r4, r5, d5
-; CHECK-NEXT: vmov.16 q2[0], r1
-; CHECK-NEXT: vmov.16 q2[1], r0
-; CHECK-NEXT: vmov r2, r3, d2
; CHECK-NEXT: vmov.16 q2[2], r4
-; CHECK-NEXT: vmov lr, r12, d3
+; CHECK-NEXT: vmov r3, r0, d2
; CHECK-NEXT: vmov.16 q2[3], r5
-; CHECK-NEXT: vneg.f16 q0, q0
-; CHECK-NEXT: vmov.16 q2[4], r2
-; CHECK-NEXT: vmov.i16 q1, #0x4400
-; CHECK-NEXT: vmov.16 q2[5], r3
+; CHECK-NEXT: vmov.16 q2[4], r3
+; CHECK-NEXT: vmov r6, lr, d3
+; CHECK-NEXT: vmov.16 q2[5], r0
+; CHECK-NEXT: vldrw.u32 q1, [r12]
+; CHECK-NEXT: vmov.16 q2[6], r6
+; CHECK-NEXT: vmov.i16 q0, #0xc400
+; CHECK-NEXT: vmov.16 q2[7], lr
; CHECK-NEXT: add r0, sp, #48
-; CHECK-NEXT: vmov.16 q2[6], lr
-; CHECK-NEXT: vmov.16 q2[7], r12
; CHECK-NEXT: vcmp.i16 ne, q2, zr
-; CHECK-NEXT: vpsel q0, q0, q1
+; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: vldrw.u32 q1, [r0]
-; CHECK-NEXT: vadd.f16 q0, q0, q1
+; CHECK-NEXT: vsub.f16 q0, q1, q0
; CHECK-NEXT: vmov r0, r1, d0
; CHECK-NEXT: vmov r2, r3, d1
-; CHECK-NEXT: pop {r4, r5, r11, pc}
+; CHECK-NEXT: pop {r4, r5, r6, pc}
%cmp = icmp eq <8 x i32> %arg0, zeroinitializer
%neg.x = fneg <8 x half> %x
%select = select <8 x i1> %cmp, <8 x half> %neg.x, <8 x half> <half 4.0, half 4.0, half 4.0, half 4.0, half 4.0, half 4.0, half 4.0, half 4.0>
More information about the llvm-commits
mailing list