[llvm-branch-commits] [llvm] DAG: Call SimplifyDemandedBits on fcopysign sign value (PR #97151)
Matt Arsenault via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Sat Jun 29 00:46:12 PDT 2024
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/97151
Math library code has quite a few places with complex bit
logic that are ultimately fed into a copysign. This helps
avoid some regressions in a future patch.
This assumes the position in the float type, which should
at least be valid for IEEE types. Not sure if we need to guard
against ppc_fp128 or anything else weird.
There appears to be some value in simplifying the value operand
as well, but I'll address that separately.
>From 1cc5dcac0c7c83adaef02ad2d092764de1922e4b Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Sat, 29 Jun 2024 07:42:27 +0200
Subject: [PATCH] DAG: Call SimplifyDemandedBits on fcopysign sign value
Math library code has quite a few places with complex bit
logic that are ultimately fed into a copysign. This helps
avoid some regressions in a future patch.
This assumes the position in the float type, which should
at least be valid for IEEE types. Not sure if we need to guard
against ppc_fp128 or anything else weird.
There appears to be some value in simplifying the value operand
as well, but I'll address that separately.
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 6 ++
.../AMDGPU/copysign-simplify-demanded-bits.ll | 22 +++-----
.../PowerPC/fp128-bitcast-after-operation.ll | 55 +++++++++----------
llvm/test/CodeGen/RISCV/double-arith.ll | 4 +-
.../RISCV/double-bitmanip-dagcombines.ll | 12 ++--
.../RISCV/float-bitmanip-dagcombines.ll | 10 ++--
6 files changed, 47 insertions(+), 62 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 254d63abdf805..eaab2b3421190 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -17565,6 +17565,12 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
+ // We only take the sign bit from the sign operand.
+ EVT SignVT = N1.getValueType();
+ if (SimplifyDemandedBits(N1,
+ APInt::getSignMask(SignVT.getScalarSizeInBits())))
+ return SDValue(N, 0);
+
return SDValue();
}
diff --git a/llvm/test/CodeGen/AMDGPU/copysign-simplify-demanded-bits.ll b/llvm/test/CodeGen/AMDGPU/copysign-simplify-demanded-bits.ll
index 1eccb55202f02..af4f236c783c6 100644
--- a/llvm/test/CodeGen/AMDGPU/copysign-simplify-demanded-bits.ll
+++ b/llvm/test/CodeGen/AMDGPU/copysign-simplify-demanded-bits.ll
@@ -11,11 +11,10 @@ define half @test_pown_reduced_fast_f16_known_odd(half %x, i32 %y.arg) #0 {
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_or_b32_e32 v1, 1, v1
; GFX9-NEXT: v_cvt_f32_i32_e32 v1, v1
-; GFX9-NEXT: v_and_b32_e32 v2, 0xffff8000, v0
; GFX9-NEXT: s_movk_i32 s4, 0x7fff
; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX9-NEXT: v_mul_f16_e64 v0, |v0|, v1
-; GFX9-NEXT: v_bfi_b32 v0, s4, v0, v2
+; GFX9-NEXT: v_mul_f16_e64 v1, |v0|, v1
+; GFX9-NEXT: v_bfi_b32 v0, s4, v1, v0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%y = or i32 %y.arg, 1
%fabs = call half @llvm.fabs.f16(half %x)
@@ -37,10 +36,9 @@ define <2 x half> @test_pown_reduced_fast_v2f16_known_odd(<2 x half> %x, <2 x i3
; GFX9-NEXT: v_cvt_f32_i32_e32 v2, v2
; GFX9-NEXT: v_cvt_f32_i32_e32 v1, v1
; GFX9-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v0
-; GFX9-NEXT: v_and_b32_e32 v0, 0x80008000, v0
+; GFX9-NEXT: s_movk_i32 s4, 0x7fff
; GFX9-NEXT: v_cvt_f16_f32_e32 v2, v2
; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX9-NEXT: s_movk_i32 s4, 0x7fff
; GFX9-NEXT: v_pack_b32_f16 v1, v1, v2
; GFX9-NEXT: v_pk_mul_f16 v1, v3, v1
; GFX9-NEXT: v_bfi_b32 v2, s4, v1, v0
@@ -67,10 +65,9 @@ define float @test_pown_reduced_fast_f32_known_odd(float %x, i32 %y.arg) #0 {
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_or_b32_e32 v1, 1, v1
; GFX9-NEXT: v_cvt_f32_i32_e32 v1, v1
-; GFX9-NEXT: v_and_b32_e32 v2, 0x80000000, v0
; GFX9-NEXT: s_brev_b32 s4, -2
-; GFX9-NEXT: v_mul_f32_e64 v0, |v0|, v1
-; GFX9-NEXT: v_bfi_b32 v0, s4, v0, v2
+; GFX9-NEXT: v_mul_f32_e64 v1, |v0|, v1
+; GFX9-NEXT: v_bfi_b32 v0, s4, v1, v0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%y = or i32 %y.arg, 1
%fabs = call float @llvm.fabs.f32(float %x)
@@ -94,8 +91,6 @@ define <2 x float> @test_pown_reduced_fast_v2f32_known_odd(<2 x float> %x, <2 x
; GFX9-NEXT: s_brev_b32 s4, -2
; GFX9-NEXT: v_mul_f32_e64 v3, |v1|, v3
; GFX9-NEXT: v_mul_f32_e64 v2, |v0|, v2
-; GFX9-NEXT: v_and_b32_e32 v1, 0x80000000, v1
-; GFX9-NEXT: v_and_b32_e32 v0, 0x80000000, v0
; GFX9-NEXT: v_bfi_b32 v0, s4, v2, v0
; GFX9-NEXT: v_bfi_b32 v1, s4, v3, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -118,8 +113,7 @@ define double @test_pown_reduced_fast_f64_known_odd(double %x, i32 %y.arg) #0 {
; GFX9-NEXT: v_cvt_f64_i32_e32 v[2:3], v2
; GFX9-NEXT: s_brev_b32 s4, -2
; GFX9-NEXT: v_mul_f64 v[2:3], |v[0:1]|, v[2:3]
-; GFX9-NEXT: v_and_b32_e32 v0, 0x80000000, v1
-; GFX9-NEXT: v_bfi_b32 v1, s4, v3, v0
+; GFX9-NEXT: v_bfi_b32 v1, s4, v3, v1
; GFX9-NEXT: v_mov_b32_e32 v0, v2
; GFX9-NEXT: s_setpc_b64 s[30:31]
%y = or i32 %y.arg, 1
@@ -144,10 +138,8 @@ define <2 x double> @test_pown_reduced_fast_v2f64_known_odd(<2 x double> %x, <2
; GFX9-NEXT: s_brev_b32 s4, -2
; GFX9-NEXT: v_mul_f64 v[4:5], |v[0:1]|, v[4:5]
; GFX9-NEXT: v_mul_f64 v[6:7], |v[2:3]|, v[6:7]
-; GFX9-NEXT: v_and_b32_e32 v0, 0x80000000, v3
-; GFX9-NEXT: v_and_b32_e32 v1, 0x80000000, v1
; GFX9-NEXT: v_bfi_b32 v1, s4, v5, v1
-; GFX9-NEXT: v_bfi_b32 v3, s4, v7, v0
+; GFX9-NEXT: v_bfi_b32 v3, s4, v7, v3
; GFX9-NEXT: v_mov_b32_e32 v0, v4
; GFX9-NEXT: v_mov_b32_e32 v2, v6
; GFX9-NEXT: s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll b/llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll
index aedbb3f6a985a..ebec8c1c4d654 100644
--- a/llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll
+++ b/llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll
@@ -152,41 +152,36 @@ define i128 @test_copysign(ppc_fp128 %x, ppc_fp128 %y) nounwind {
; PPC32-LABEL: test_copysign:
; PPC32: # %bb.0: # %entry
; PPC32-NEXT: mflr 0
-; PPC32-NEXT: stwu 1, -96(1)
-; PPC32-NEXT: stw 0, 100(1)
-; PPC32-NEXT: stfd 1, 40(1)
-; PPC32-NEXT: lwz 3, 44(1)
-; PPC32-NEXT: stfd 2, 32(1)
-; PPC32-NEXT: stw 3, 60(1)
-; PPC32-NEXT: lwz 3, 40(1)
-; PPC32-NEXT: stfd 3, 72(1)
-; PPC32-NEXT: stw 3, 56(1)
+; PPC32-NEXT: stwu 1, -80(1)
+; PPC32-NEXT: stw 0, 84(1)
+; PPC32-NEXT: stfd 1, 32(1)
; PPC32-NEXT: lwz 3, 36(1)
-; PPC32-NEXT: stfd 4, 64(1)
+; PPC32-NEXT: stfd 2, 24(1)
; PPC32-NEXT: stw 3, 52(1)
; PPC32-NEXT: lwz 3, 32(1)
-; PPC32-NEXT: lfd 1, 56(1)
+; PPC32-NEXT: stfd 3, 56(1)
; PPC32-NEXT: stw 3, 48(1)
-; PPC32-NEXT: lwz 3, 76(1)
-; PPC32-NEXT: lfd 2, 48(1)
-; PPC32-NEXT: stw 3, 92(1)
-; PPC32-NEXT: lwz 3, 72(1)
-; PPC32-NEXT: stw 3, 88(1)
-; PPC32-NEXT: lwz 3, 68(1)
-; PPC32-NEXT: lfd 3, 88(1)
-; PPC32-NEXT: stw 3, 84(1)
-; PPC32-NEXT: lwz 3, 64(1)
-; PPC32-NEXT: stw 3, 80(1)
-; PPC32-NEXT: lfd 4, 80(1)
+; PPC32-NEXT: lwz 3, 28(1)
+; PPC32-NEXT: lfd 4, 64(1)
+; PPC32-NEXT: stw 3, 44(1)
+; PPC32-NEXT: lwz 3, 24(1)
+; PPC32-NEXT: lfd 1, 48(1)
+; PPC32-NEXT: stw 3, 40(1)
+; PPC32-NEXT: lwz 3, 60(1)
+; PPC32-NEXT: lfd 2, 40(1)
+; PPC32-NEXT: stw 3, 76(1)
+; PPC32-NEXT: lwz 3, 56(1)
+; PPC32-NEXT: stw 3, 72(1)
+; PPC32-NEXT: lfd 3, 72(1)
; PPC32-NEXT: bl copysignl
-; PPC32-NEXT: stfd 1, 16(1)
-; PPC32-NEXT: stfd 2, 24(1)
-; PPC32-NEXT: lwz 3, 16(1)
-; PPC32-NEXT: lwz 4, 20(1)
-; PPC32-NEXT: lwz 5, 24(1)
-; PPC32-NEXT: lwz 6, 28(1)
-; PPC32-NEXT: lwz 0, 100(1)
-; PPC32-NEXT: addi 1, 1, 96
+; PPC32-NEXT: stfd 1, 8(1)
+; PPC32-NEXT: stfd 2, 16(1)
+; PPC32-NEXT: lwz 3, 8(1)
+; PPC32-NEXT: lwz 4, 12(1)
+; PPC32-NEXT: lwz 5, 16(1)
+; PPC32-NEXT: lwz 6, 20(1)
+; PPC32-NEXT: lwz 0, 84(1)
+; PPC32-NEXT: addi 1, 1, 80
; PPC32-NEXT: mtlr 0
; PPC32-NEXT: blr
entry:
diff --git a/llvm/test/CodeGen/RISCV/double-arith.ll b/llvm/test/CodeGen/RISCV/double-arith.ll
index a2093f5b5e43a..ced6ff66ef678 100644
--- a/llvm/test/CodeGen/RISCV/double-arith.ll
+++ b/llvm/test/CodeGen/RISCV/double-arith.ll
@@ -320,9 +320,7 @@ define double @fsgnjn_d(double %a, double %b) nounwind {
;
; RV64IZFINXZDINX-LABEL: fsgnjn_d:
; RV64IZFINXZDINX: # %bb.0:
-; RV64IZFINXZDINX-NEXT: li a2, -1
-; RV64IZFINXZDINX-NEXT: slli a2, a2, 63
-; RV64IZFINXZDINX-NEXT: xor a1, a1, a2
+; RV64IZFINXZDINX-NEXT: not a1, a1
; RV64IZFINXZDINX-NEXT: fsgnj.d a0, a0, a1
; RV64IZFINXZDINX-NEXT: ret
;
diff --git a/llvm/test/CodeGen/RISCV/double-bitmanip-dagcombines.ll b/llvm/test/CodeGen/RISCV/double-bitmanip-dagcombines.ll
index 99835ff59493f..70d3291d3a840 100644
--- a/llvm/test/CodeGen/RISCV/double-bitmanip-dagcombines.ll
+++ b/llvm/test/CodeGen/RISCV/double-bitmanip-dagcombines.ll
@@ -156,20 +156,16 @@ define double @fcopysign_fneg(double %a, double %b) nounwind {
;
; RV64IFD-LABEL: fcopysign_fneg:
; RV64IFD: # %bb.0:
-; RV64IFD-NEXT: li a2, -1
-; RV64IFD-NEXT: slli a2, a2, 63
-; RV64IFD-NEXT: xor a1, a1, a2
-; RV64IFD-NEXT: fmv.d.x fa5, a1
+; RV64IFD-NEXT: fmv.d.x fa5, a0
+; RV64IFD-NEXT: not a0, a1
; RV64IFD-NEXT: fmv.d.x fa4, a0
-; RV64IFD-NEXT: fsgnj.d fa5, fa4, fa5
+; RV64IFD-NEXT: fsgnj.d fa5, fa5, fa4
; RV64IFD-NEXT: fmv.x.d a0, fa5
; RV64IFD-NEXT: ret
;
; RV64IZFINXZDINX-LABEL: fcopysign_fneg:
; RV64IZFINXZDINX: # %bb.0:
-; RV64IZFINXZDINX-NEXT: li a2, -1
-; RV64IZFINXZDINX-NEXT: slli a2, a2, 63
-; RV64IZFINXZDINX-NEXT: xor a1, a1, a2
+; RV64IZFINXZDINX-NEXT: not a1, a1
; RV64IZFINXZDINX-NEXT: fsgnj.d a0, a0, a1
; RV64IZFINXZDINX-NEXT: ret
%1 = fneg double %b
diff --git a/llvm/test/CodeGen/RISCV/float-bitmanip-dagcombines.ll b/llvm/test/CodeGen/RISCV/float-bitmanip-dagcombines.ll
index 2495b1ad300c6..53df588bf1c37 100644
--- a/llvm/test/CodeGen/RISCV/float-bitmanip-dagcombines.ll
+++ b/llvm/test/CodeGen/RISCV/float-bitmanip-dagcombines.ll
@@ -119,18 +119,16 @@ define float @fcopysign_fneg(float %a, float %b) nounwind {
;
; RV32IF-LABEL: fcopysign_fneg:
; RV32IF: # %bb.0:
-; RV32IF-NEXT: lui a2, 524288
-; RV32IF-NEXT: xor a1, a1, a2
-; RV32IF-NEXT: fmv.w.x fa5, a1
+; RV32IF-NEXT: fmv.w.x fa5, a0
+; RV32IF-NEXT: not a0, a1
; RV32IF-NEXT: fmv.w.x fa4, a0
-; RV32IF-NEXT: fsgnj.s fa5, fa4, fa5
+; RV32IF-NEXT: fsgnj.s fa5, fa5, fa4
; RV32IF-NEXT: fmv.x.w a0, fa5
; RV32IF-NEXT: ret
;
; RV32IZFINX-LABEL: fcopysign_fneg:
; RV32IZFINX: # %bb.0:
-; RV32IZFINX-NEXT: lui a2, 524288
-; RV32IZFINX-NEXT: xor a1, a1, a2
+; RV32IZFINX-NEXT: not a1, a1
; RV32IZFINX-NEXT: fsgnj.s a0, a0, a1
; RV32IZFINX-NEXT: ret
;
More information about the llvm-branch-commits
mailing list