[llvm] f3c008c - DAG: Relax foldBitcastedFPLogic conditions

Tue Feb 14 03:59:16 PST 2023

Author: Matt Arsenault
Date: 2023-02-14T07:59:10-04:00
New Revision: f3c008ca776bd17c085ba5be34cdf7569971628a

URL: https://github.com/llvm/llvm-project/commit/f3c008ca776bd17c085ba5be34cdf7569971628a
DIFF: https://github.com/llvm/llvm-project/commit/f3c008ca776bd17c085ba5be34cdf7569971628a.diff

LOG: DAG: Relax foldBitcastedFPLogic conditions

Requiring a bitcast to exist was unhelpful. The most basic cases
are always going to be a CopyFromReg or load, so they would need
a new cast inserted. Don't require a bitcast if it's a free
operation. I don't think this logic makes particularly much sense
(it seems to be imparting special interpretation of bitcast), but
this needs to be in sync with foldSignChangeInBitcast.

We should also get rid of this hasBitPreservingFPLogic hook. fabs/fneg
are bitpreserving or incorrectly implemented, so this should just be a
regular legality check.

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/test/CodeGen/AMDGPU/fneg-modifier-casting.ll
    llvm/test/CodeGen/AMDGPU/fneg.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 56bf9ae6f34f2..65faaae95acd3 100644

--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -14433,6 +14433,16 @@ static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
     return SDValue();
   }
 
+  // This needs to be the inverse of logic in foldSignChangeInBitcast.
+  // FIXME: I don't think looking for bitcast intrinsically makes sense, but
+  // removing this would require more changes.
+  auto IsBitCastOrFree = [&TLI, FPOpcode](SDValue Op, EVT VT) {
+    if (Op.getOpcode() == ISD::BITCAST && Op.getOperand(0).getValueType() == VT)
+      return true;
+
+    return FPOpcode == ISD::FABS ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
+  };
+
   // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
   // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
   // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
@@ -14440,9 +14450,9 @@ static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
   SDValue LogicOp0 = N0.getOperand(0);
   ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
   if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
-      LogicOp0.getOpcode() == ISD::BITCAST &&
-      LogicOp0.getOperand(0).getValueType() == VT) {
-    SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0.getOperand(0));
+      IsBitCastOrFree(LogicOp0, VT)) {
+    SDValue CastOp0 = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, LogicOp0);
+    SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, CastOp0);
     NumFPLogicOpsConv++;
     if (N0.getOpcode() == ISD::OR)
       return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);

diff  --git a/llvm/test/CodeGen/AMDGPU/fneg-modifier-casting.ll b/llvm/test/CodeGen/AMDGPU/fneg-modifier-casting.ll
index fff2d8d51a185..45a1b844c6f80 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-modifier-casting.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-modifier-casting.ll
@@ -484,10 +484,9 @@ define double @fneg_xor_select_i64_user_with_srcmods(i1 %cond, i64 %arg0, i64 %a
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    v_and_b32_e32 v0, 1, v0
 ; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
-; GCN-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
-; GCN-NEXT:    v_cndmask_b32_e32 v1, v4, v2, vcc
-; GCN-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
-; GCN-NEXT:    v_add_f64 v[0:1], v[0:1], 2.0
+; GCN-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
+; GCN-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; GCN-NEXT:    v_add_f64 v[0:1], -v[1:2], 2.0
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: fneg_xor_select_i64_user_with_srcmods:
@@ -497,10 +496,8 @@ define double @fneg_xor_select_i64_user_with_srcmods(i1 %cond, i64 %arg0, i64 %a
 ; GFX11-NEXT:    v_and_b32_e32 v0, 1, v0
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX11-NEXT:    v_dual_cndmask_b32 v0, v3, v1 :: v_dual_cndmask_b32 v1, v4, v2
-; GFX11-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_add_f64 v[0:1], v[0:1], 2.0
+; GFX11-NEXT:    v_dual_cndmask_b32 v1, v3, v1 :: v_dual_cndmask_b32 v2, v4, v2
+; GFX11-NEXT:    v_add_f64 v[0:1], -v[1:2], 2.0
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
   %select = select i1 %cond, i64 %arg0, i64 %arg1
   %fneg = xor i64 %select, 9223372036854775808

diff  --git a/llvm/test/CodeGen/AMDGPU/fneg.ll b/llvm/test/CodeGen/AMDGPU/fneg.ll
index 1e8459df7517e..fdee6a27c0343 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg.ll
@@ -120,8 +120,7 @@ define i32 @v_fneg_i32(i32 %in) {
 
 ; FUNC-LABEL: {{^}}s_fneg_i32_fp_use:
 ; GCN: s_load_dword [[IN:s[0-9]+]]
-; GCN: s_xor_b32 [[FNEG:s[0-9]+]], [[IN]], 0x80000000
-; GCN: v_add_f32_e64 v{{[0-9]+}}, [[FNEG]], 2.0
+; GCN: v_sub_f32_e64 v{{[0-9]+}}, 2.0, [[IN]]
 define amdgpu_kernel void @s_fneg_i32_fp_use(ptr addrspace(1) %out, i32 %in) {
   %fneg = xor i32 %in, -2147483648
   %bitcast = bitcast i32 %fneg to float
@@ -132,8 +131,7 @@ define amdgpu_kernel void @s_fneg_i32_fp_use(ptr addrspace(1) %out, i32 %in) {
 
 ; FUNC-LABEL: {{^}}v_fneg_i32_fp_use:
 ; GCN: s_waitcnt
-; GCN-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
-; GCN-NEXT: v_add_f32_e32 v0, 2.0, v0
+; GCN-NEXT: v_sub_f32_e32 v0, 2.0, v0
 ; GCN-NEXT: s_setpc_b64
 define float @v_fneg_i32_fp_use(i32 %in) {
   %fneg = xor i32 %in, -2147483648
@@ -160,8 +158,7 @@ define i64 @v_fneg_i64(i64 %in) {
 }
 
 ; FUNC-LABEL: {{^}}s_fneg_i64_fp_use:
-; GCN: s_xor_b32 s[[NEG_HI:[0-9]+]], s{{[0-9]+}}, 0x80000000
-; GCN: v_add_f64 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 2.0
+; GCN: v_add_f64 v{{\[[0-9]+:[0-9]+\]}}, -s{{\[[0-9]+:[0-9]+\]}}, 2.0
 define amdgpu_kernel void @s_fneg_i64_fp_use(ptr addrspace(1) %out, i64 %in) {
   %fneg = xor i64 %in, -9223372036854775808
   %bitcast = bitcast i64 %fneg to double
@@ -172,8 +169,7 @@ define amdgpu_kernel void @s_fneg_i64_fp_use(ptr addrspace(1) %out, i64 %in) {
 
 ; FUNC-LABEL: {{^}}v_fneg_i64_fp_use:
 ; GCN: s_waitcnt
-; GCN-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
-; GCN-NEXT: v_add_f64 v[0:1], v[0:1], 2.0
+; GCN-NEXT: v_add_f64 v[0:1], -v[0:1], 2.0
 ; GCN-NEXT: s_setpc_b64
 define double @v_fneg_i64_fp_use(i64 %in) {
   %fneg = xor i64 %in, -9223372036854775808
@@ -197,9 +193,7 @@ define i16 @v_fneg_i16(i16 %in) {
 ; SI: v_cvt_f16_f32_e32 [[CVT1:v[0-9]+]], [[ADD]]
 
 ; VI: s_load_dword [[IN:s[0-9]+]]
-; VI: v_mov_b32_e32 [[K:v[0-9]+]], 0xffff8000
-; VI: v_xor_b32_e32 [[NEG:v[0-9]+]], [[IN]], [[K]]
-; VI: v_add_f16_e32 v{{[0-9]+}}, 2.0, [[NEG]]
+; VI: v_sub_f16_e64 v{{[0-9]+}}, 2.0, [[IN]]
 define amdgpu_kernel void @s_fneg_i16_fp_use(ptr addrspace(1) %out, i16 %in) {
   %fneg = xor i16 %in, -32768
   %bitcast = bitcast i16 %fneg to half
@@ -215,8 +209,7 @@ define amdgpu_kernel void @s_fneg_i16_fp_use(ptr addrspace(1) %out, i16 %in) {
 ; SI-NEXT: s_setpc_b64
 
 ; VI: s_waitcnt
-; VI-NEXT: v_xor_b32_e32 v0, 0xffff8000, v0
-; VI-NEXT: v_add_f16_e32 v0, 2.0, v0
+; VI-NEXT: v_sub_f16_e32 v0, 2.0, v0
 ; VI-NEXT: s_setpc_b64
 define half @v_fneg_i16_fp_use(i16 %in) {
   %fneg = xor i16 %in, -32768
@@ -291,10 +284,9 @@ define amdgpu_kernel void @s_fneg_v2i16_fp_use(ptr addrspace(1) %out, i32 %arg)
 ; SI: v_add_f32_e32 v1, 2.0, v1
 
 ; VI: s_waitcnt
-; VI: v_xor_b32_e32 v0, 0x80008000, v0
 ; VI: v_mov_b32_e32 v1, 0x4000
-; VI: v_add_f16_sdwa v1, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; VI: v_add_f16_e32 v0, 2.0, v0
+; VI: v_sub_f16_sdwa v1, v1, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; VI: v_sub_f16_e32 v0, 2.0, v0
 ; VI: v_or_b32_e32 v0, v0, v1
 ; VI: s_setpc_b64
 define <2 x half> @v_fneg_v2i16_fp_use(i32 %arg) {