[llvm] 54d755a - DAG: Fix incorrect folding of fmul -1 to fneg
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 14 18:25:08 PDT 2021
Author: Matt Arsenault
Date: 2021-09-14T21:25:02-04:00
New Revision: 54d755a034362814bd7a0b90f172cbba39729cf4
URL: https://github.com/llvm/llvm-project/commit/54d755a034362814bd7a0b90f172cbba39729cf4
DIFF: https://github.com/llvm/llvm-project/commit/54d755a034362814bd7a0b90f172cbba39729cf4.diff
LOG: DAG: Fix incorrect folding of fmul -1 to fneg
The fmul is a canonicalizing operation, and fneg is not so this would
break denormals that need flushing and also would not quiet signaling
nans. Fold to fsub instead, which is also canonicalizing.
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/test/CodeGen/AArch64/arm64-fmadd.ll
llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_3op.ll
llvm/test/CodeGen/AMDGPU/fneg-combines.ll
llvm/test/CodeGen/ARM/fnegs.ll
llvm/test/CodeGen/Hexagon/opt-fneg.ll
llvm/test/CodeGen/PowerPC/combine-fneg.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 6f27b44d0e331..5a49f33e46fe2 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -14004,10 +14004,13 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
if (N1CFP && N1CFP->isExactlyValue(+2.0))
return DAG.getNode(ISD::FADD, DL, VT, N0, N0);
- // fold (fmul X, -1.0) -> (fneg X)
- if (N1CFP && N1CFP->isExactlyValue(-1.0))
- if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
- return DAG.getNode(ISD::FNEG, DL, VT, N0);
+ // fold (fmul X, -1.0) -> (fsub -0.0, X)
+ if (N1CFP && N1CFP->isExactlyValue(-1.0)) {
+ if (!LegalOperations || TLI.isOperationLegal(ISD::FSUB, VT)) {
+ return DAG.getNode(ISD::FSUB, DL, VT,
+ DAG.getConstantFP(-0.0, DL, VT), N0, Flags);
+ }
+ }
// -N0 * -N1 --> N0 * N1
TargetLowering::NegatibleCost CostN0 =
diff --git a/llvm/test/CodeGen/AArch64/arm64-fmadd.ll b/llvm/test/CodeGen/AArch64/arm64-fmadd.ll
index 6079bc4f07bc5..00bd0ec68b686 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fmadd.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fmadd.ll
@@ -82,7 +82,7 @@ define double @fms64(double %a, double %b, double %c) nounwind readnone ssp {
; CHECK-NEXT: fmsub d0, d0, d1, d2
; CHECK-NEXT: ret
entry:
- %mul = fmul double %b, -1.000000e+00
+ %mul = fneg double %b
%0 = tail call double @llvm.fma.f64(double %a, double %mul, double %c)
ret double %0
}
@@ -93,7 +93,7 @@ define double @fms64_com(double %a, double %b, double %c) nounwind readnone ssp
; CHECK-NEXT: fmsub d0, d1, d0, d2
; CHECK-NEXT: ret
entry:
- %mul = fmul double %b, -1.000000e+00
+ %mul = fneg double %b
%0 = tail call double @llvm.fma.f64(double %mul, double %a, double %c)
ret double %0
}
@@ -104,7 +104,7 @@ define double @fnms64(double %a, double %b, double %c) nounwind readnone ssp {
; CHECK-NEXT: fnmsub d0, d0, d1, d2
; CHECK-NEXT: ret
entry:
- %mul = fmul double %c, -1.000000e+00
+ %mul = fneg double %c
%0 = tail call double @llvm.fma.f64(double %a, double %b, double %mul)
ret double %0
}
diff --git a/llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_3op.ll b/llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_3op.ll
index f3ad71b9c4455..ba8e616eeeb05 100644
--- a/llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_3op.ll
+++ b/llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_3op.ll
@@ -22,7 +22,7 @@ define half @fms16(half %a, half %b, half %c) nounwind readnone ssp {
; CHECK-LABEL: fms16:
; CHECK: fmsub h0, h0, h1, h2
entry:
- %mul = fmul half %b, -1.000000e+00
+ %mul = fneg half %b
%0 = tail call half @llvm.fma.f16(half %a, half %mul, half %c)
ret half %0
}
@@ -32,7 +32,7 @@ define half @fms16_com(half %a, half %b, half %c) nounwind readnone ssp {
; CHECK: fmsub h0, h1, h0, h2
; CHECK-NEXT: ret
entry:
- %mul = fmul half %b, -1.000000e+00
+ %mul = fneg half %b
%0 = tail call half @llvm.fma.f16(half %mul, half %a, half %c)
ret half %0
}
@@ -42,7 +42,7 @@ define half @fnms16(half %a, half %b, half %c) nounwind readnone ssp {
; CHECK: fnmsub h0, h0, h1, h2
; CHECK-NEXT: ret
entry:
- %mul = fmul half %c, -1.000000e+00
+ %mul = fneg half %c
%0 = tail call half @llvm.fma.f16(half %a, half %b, half %mul)
ret half %0
}
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.ll
index 900d07c2d7ce4..000ef3f25db9b 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-combines.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.ll
@@ -2597,6 +2597,51 @@ bb:
ret <2 x float> %i6
}
+; This expects denormal flushing, so can't turn this fmul into fneg
+; TODO: Keeping this as fmul saves encoding size
+; GCN-LABEL: {{^}}nnan_fmul_neg1_to_fneg:
+; GCN: v_sub_f32_e32 [[TMP:v[0-9]+]], 0x80000000, v0
+; GCN-NEXT: v_mul_f32_e32 v0, [[TMP]], v1
+define float @nnan_fmul_neg1_to_fneg(float %x, float %y) #0 {
+ %mul = fmul float %x, -1.0
+ %add = fmul nnan float %mul, %y
+ ret float %add
+}
+
+; It's legal to turn this fmul into an fneg since denormals are
+; preserved and we know an snan can't happen from the flag.
+; GCN-LABEL: {{^}}denormal_fmul_neg1_to_fneg:
+; GCN: v_mul_f32_e64 v0, -v0, v1
+; GCN-NEXT: s_setpc_b64
+define float @denormal_fmul_neg1_to_fneg(float %x, float %y) {
+ %mul = fmul nnan float %x, -1.0
+ %add = fmul float %mul, %y
+ ret float %add
+}
+
+; know the source can't be an snan
+; GCN-LABEL: {{^}}denorm_snan_fmul_neg1_to_fneg:
+; GCN: v_mul_f32_e64 [[TMP:v[0-9]+]], v0, -v0
+; GCN: v_mul_f32_e32 v0, [[TMP]], v1
+; GCN-NEXT: s_setpc_b64
+define float @denorm_snan_fmul_neg1_to_fneg(float %x, float %y) {
+ %canonical = fmul float %x, %x
+ %mul = fmul float %canonical, -1.0
+ %add = fmul float %mul, %y
+ ret float %add
+}
+
+; GCN-LABEL: {{^}}flush_snan_fmul_neg1_to_fneg:
+; GCN: v_mul_f32_e32 [[TMP0:v[0-9]+]], 1.0, v0
+; GCN: v_sub_f32_e32 [[TMP1:v[0-9]+]], 0x80000000, [[TMP0]]
+; GCN-NEXT: v_mul_f32_e32 v0, [[TMP1]], v1
+define float @flush_snan_fmul_neg1_to_fneg(float %x, float %y) #0 {
+ %quiet = call float @llvm.canonicalize.f32(float %x)
+ %mul = fmul float %quiet, -1.0
+ %add = fmul float %mul, %y
+ ret float %add
+}
+
declare i32 @llvm.amdgcn.workitem.id.x() #1
declare float @llvm.fma.f32(float, float, float) #1
declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)
diff --git a/llvm/test/CodeGen/ARM/fnegs.ll b/llvm/test/CodeGen/ARM/fnegs.ll
index 3a4767e9173fd..875fc00f5dddc 100644
--- a/llvm/test/CodeGen/ARM/fnegs.ll
+++ b/llvm/test/CodeGen/ARM/fnegs.ll
@@ -49,7 +49,7 @@ entry:
define float @test2(float* %a) {
entry:
%0 = load float, float* %a, align 4 ; <float> [#uses=2]
- %1 = fmul float -1.000000e+00, %0 ; <float> [#uses=2]
+ %1 = fneg float %0 ; <float> [#uses=2]
%2 = fpext float %1 to double ; <double> [#uses=1]
%3 = fcmp olt double %2, 1.234000e+00 ; <i1> [#uses=1]
%retval = select i1 %3, float %1, float %0 ; <float> [#uses=1]
diff --git a/llvm/test/CodeGen/Hexagon/opt-fneg.ll b/llvm/test/CodeGen/Hexagon/opt-fneg.ll
index da496c588019f..5c122c90010a2 100644
--- a/llvm/test/CodeGen/Hexagon/opt-fneg.ll
+++ b/llvm/test/CodeGen/Hexagon/opt-fneg.ll
@@ -3,6 +3,7 @@
define float @foo(float %x) nounwind {
entry:
+; CHECK-LABEL: foo:
; CHECK: r{{[0-9]+}} = togglebit(r{{[0-9]+}},#31)
%x.addr = alloca float, align 4
store float %x, float* %x.addr, align 4
@@ -13,14 +14,25 @@ entry:
define float @bar(float %x) nounwind {
entry:
+; CHECK-LABEL: bar:
; CHECK: r{{[0-9]+}} = togglebit(r{{[0-9]+}},#31)
%sub = fsub float -0.000000e+00, %x
ret float %sub
}
-define float @baz(float %x) nounwind {
+define float @baz0(float %x) nounwind {
entry:
+; CHECK-LABEL: baz0:
; CHECK: r{{[0-9]+}} = togglebit(r{{[0-9]+}},#31)
- %conv1 = fmul float %x, -1.000000e+00
+ %conv1 = fmul nnan float %x, -1.000000e+00
+ ret float %conv1
+}
+
+define float @baz1(float %x) nounwind {
+entry:
+ %not.nan = fadd nnan float %x, %x
+; CHECK-LABEL: baz1:
+; CHECK: r{{[0-9]+}} = togglebit(r{{[0-9]+}},#31)
+ %conv1 = fmul float %not.nan, -1.000000e+00
ret float %conv1
}
diff --git a/llvm/test/CodeGen/PowerPC/combine-fneg.ll b/llvm/test/CodeGen/PowerPC/combine-fneg.ll
index 771c05f184a04..1124fbd22a0e5 100644
--- a/llvm/test/CodeGen/PowerPC/combine-fneg.ll
+++ b/llvm/test/CodeGen/PowerPC/combine-fneg.ll
@@ -13,10 +13,10 @@ define <4 x double> @fneg_fdiv_splat(double %a0, <4 x double> %a1) {
; CHECK-NEXT: xvredp 2, 0
; CHECK-NEXT: xxswapd 1, 1
; CHECK-NEXT: xxlor 3, 1, 1
-; CHECK-NEXT: xvnmsubadp 3, 0, 2
-; CHECK-NEXT: xvmaddadp 2, 2, 3
-; CHECK-NEXT: xvnmsubadp 1, 0, 2
-; CHECK-NEXT: xvnmaddadp 2, 2, 1
+; CHECK-NEXT: xvmaddadp 3, 0, 2
+; CHECK-NEXT: xvnmsubadp 2, 2, 3
+; CHECK-NEXT: xvmaddadp 1, 0, 2
+; CHECK-NEXT: xvmsubadp 2, 2, 1
; CHECK-NEXT: xvmuldp 34, 34, 2
; CHECK-NEXT: xvmuldp 35, 35, 2
; CHECK-NEXT: blr
More information about the llvm-commits
mailing list