[llvm] c125af8 - [DAGCombine] Check reassoc flags in aggressive fsub fusion
Jinsong Ji via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 23 07:00:21 PDT 2021
Author: Jinsong Ji
Date: 2021-06-23T13:59:40Z
New Revision: c125af82a5ff5dbbbcb8ebc5cde156d41e6ac281
URL: https://github.com/llvm/llvm-project/commit/c125af82a5ff5dbbbcb8ebc5cde156d41e6ac281
DIFF: https://github.com/llvm/llvm-project/commit/c125af82a5ff5dbbbcb8ebc5cde156d41e6ac281.diff
LOG: [DAGCombine] Check reassoc flags in aggressive fsub fusion
The is from discussion in https://reviews.llvm.org/D104247#inline-993387
The contract and reassoc flags shouldn't imply each other .
All the aggressive fsub fusion reassociate operations,
we should guard them with reassoc flag check.
Reviewed By: mcberg2017
Differential Revision: https://reviews.llvm.org/D104723
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/test/CodeGen/AMDGPU/fpext-free.ll
llvm/test/CodeGen/PowerPC/fma-assoc.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 49db4d0792dda..02f3ea22fa42f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -13252,14 +13252,23 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
}
}
+ auto isReassociable = [Options](SDNode *N) {
+ return Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
+ };
+
+ auto isContractableAndReassociableFMUL = [isContractableFMUL,
+ isReassociable](SDValue N) {
+ return isContractableFMUL(N) && isReassociable(N.getNode());
+ };
+
// More folding opportunities when target permits.
- if (Aggressive) {
+ if (Aggressive && isReassociable(N)) {
bool CanFuse = Options.UnsafeFPMath || N->getFlags().hasAllowContract();
// fold (fsub (fma x, y, (fmul u, v)), z)
// -> (fma x, y (fma u, v, (fneg z)))
if (CanFuse && N0.getOpcode() == PreferredFusedOpcode &&
- isContractableFMUL(N0.getOperand(2)) && N0->hasOneUse() &&
- N0.getOperand(2)->hasOneUse()) {
+ isContractableAndReassociableFMUL(N0.getOperand(2)) &&
+ N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
N0.getOperand(1),
DAG.getNode(PreferredFusedOpcode, SL, VT,
@@ -13271,7 +13280,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// fold (fsub x, (fma y, z, (fmul u, v)))
// -> (fma (fneg y), z, (fma (fneg u), v, x))
if (CanFuse && N1.getOpcode() == PreferredFusedOpcode &&
- isContractableFMUL(N1.getOperand(2)) &&
+ isContractableAndReassociableFMUL(N1.getOperand(2)) &&
N1->hasOneUse() && NoSignedZero) {
SDValue N20 = N1.getOperand(2).getOperand(0);
SDValue N21 = N1.getOperand(2).getOperand(1);
@@ -13282,7 +13291,6 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
DAG.getNode(ISD::FNEG, SL, VT, N20), N21, N0));
}
-
// fold (fsub (fma x, y, (fpext (fmul u, v))), z)
// -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
if (N0.getOpcode() == PreferredFusedOpcode &&
@@ -13290,7 +13298,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
SDValue N02 = N0.getOperand(2);
if (N02.getOpcode() == ISD::FP_EXTEND) {
SDValue N020 = N02.getOperand(0);
- if (isContractableFMUL(N020) &&
+ if (isContractableAndReassociableFMUL(N020) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
N020.getValueType())) {
return DAG.getNode(
@@ -13314,7 +13322,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
SDValue N00 = N0.getOperand(0);
if (N00.getOpcode() == PreferredFusedOpcode) {
SDValue N002 = N00.getOperand(2);
- if (isContractableFMUL(N002) &&
+ if (isContractableAndReassociableFMUL(N002) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
N00.getValueType())) {
return DAG.getNode(
@@ -13336,7 +13344,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
N1.getOperand(2).getOpcode() == ISD::FP_EXTEND &&
N1->hasOneUse()) {
SDValue N120 = N1.getOperand(2).getOperand(0);
- if (isContractableFMUL(N120) &&
+ if (isContractableAndReassociableFMUL(N120) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
N120.getValueType())) {
SDValue N1200 = N120.getOperand(0);
@@ -13363,7 +13371,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
SDValue N100 = CvtSrc.getOperand(0);
SDValue N101 = CvtSrc.getOperand(1);
SDValue N102 = CvtSrc.getOperand(2);
- if (isContractableFMUL(N102) &&
+ if (isContractableAndReassociableFMUL(N102) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
CvtSrc.getValueType())) {
SDValue N1020 = N102.getOperand(0);
diff --git a/llvm/test/CodeGen/AMDGPU/fpext-free.ll b/llvm/test/CodeGen/AMDGPU/fpext-free.ll
index 03b0d86c15c53..f0f354c7a034f 100644
--- a/llvm/test/CodeGen/AMDGPU/fpext-free.ll
+++ b/llvm/test/CodeGen/AMDGPU/fpext-free.ll
@@ -309,10 +309,10 @@ entry:
; GFX9-F32DENORM-NEXT: s_setpc_b64
define float @fsub_muladd_fpext_mul_f16_to_f32(float %x, float %y, float %z, half %u, half %v) #0 {
entry:
- %mul = fmul half %u, %v
+ %mul = fmul reassoc half %u, %v
%mul.ext = fpext half %mul to float
%fma = call float @llvm.fmuladd.f32(float %x, float %y, float %mul.ext)
- %add = fsub float %fma, %z
+ %add = fsub reassoc float %fma, %z
ret float %add
}
@@ -350,10 +350,10 @@ entry:
; GFX9-F32DENORM-NEXT: s_setpc_b64
define float @fsub_muladd_fpext_mul_f16_to_f32_commute(float %x, float %y, float %z, half %u, half %v) #0 {
entry:
- %mul = fmul half %u, %v
+ %mul = fmul reassoc half %u, %v
%mul.ext = fpext half %mul to float
%fma = call float @llvm.fmuladd.f32(float %y, float %z, float %mul.ext)
- %add = fsub float %x, %fma
+ %add = fsub reassoc float %x, %fma
ret float %add
}
diff --git a/llvm/test/CodeGen/PowerPC/fma-assoc.ll b/llvm/test/CodeGen/PowerPC/fma-assoc.ll
index 39b933804928a..1c21c877e1135 100644
--- a/llvm/test/CodeGen/PowerPC/fma-assoc.ll
+++ b/llvm/test/CodeGen/PowerPC/fma-assoc.ll
@@ -187,14 +187,16 @@ define double @test_FMADD_ASSOC_EXT4(float %A, float %B, float %C,
define double @test_FMSUB_ASSOC_EXT1(float %A, float %B, double %C,
; CHECK-LABEL: test_FMSUB_ASSOC_EXT1:
; CHECK: # %bb.0:
-; CHECK-NEXT: fmsub 0, 1, 2, 5
-; CHECK-NEXT: fmadd 1, 3, 4, 0
+; CHECK-NEXT: fmuls 0, 1, 2
+; CHECK-NEXT: fmadd 0, 3, 4, 0
+; CHECK-NEXT: fsub 1, 0, 5
; CHECK-NEXT: blr
;
; CHECK-VSX-LABEL: test_FMSUB_ASSOC_EXT1:
; CHECK-VSX: # %bb.0:
-; CHECK-VSX-NEXT: xsmsubmdp 1, 2, 5
-; CHECK-VSX-NEXT: xsmaddadp 1, 3, 4
+; CHECK-VSX-NEXT: fmuls 0, 1, 2
+; CHECK-VSX-NEXT: xsmaddadp 0, 3, 4
+; CHECK-VSX-NEXT: xssubdp 1, 0, 5
; CHECK-VSX-NEXT: blr
double %D, double %E) {
%F = fmul float %A, %B ; <float> [#uses=1]
@@ -208,15 +210,16 @@ define double @test_FMSUB_ASSOC_EXT1(float %A, float %B, double %C,
define double @test_FMSUB_ASSOC_EXT2(float %A, float %B, float %C,
; CHECK-LABEL: test_FMSUB_ASSOC_EXT2:
; CHECK: # %bb.0:
-; CHECK-NEXT: fmsub 0, 3, 4, 5
-; CHECK-NEXT: fmadd 1, 1, 2, 0
+; CHECK-NEXT: fmuls 0, 3, 4
+; CHECK-NEXT: fmadds 0, 1, 2, 0
+; CHECK-NEXT: fsub 1, 0, 5
; CHECK-NEXT: blr
;
; CHECK-VSX-LABEL: test_FMSUB_ASSOC_EXT2:
; CHECK-VSX: # %bb.0:
-; CHECK-VSX-NEXT: xsmsubmdp 3, 4, 5
-; CHECK-VSX-NEXT: xsmaddadp 3, 1, 2
-; CHECK-VSX-NEXT: fmr 1, 3
+; CHECK-VSX-NEXT: fmuls 0, 3, 4
+; CHECK-VSX-NEXT: fmadds 0, 1, 2, 0
+; CHECK-VSX-NEXT: xssubdp 1, 0, 5
; CHECK-VSX-NEXT: blr
float %D, double %E) {
%F = fmul float %A, %B ; <float> [#uses=1]
@@ -230,18 +233,16 @@ define double @test_FMSUB_ASSOC_EXT2(float %A, float %B, float %C,
define double @test_FMSUB_ASSOC_EXT3(float %A, float %B, double %C,
; CHECK-LABEL: test_FMSUB_ASSOC_EXT3:
; CHECK: # %bb.0:
-; CHECK-NEXT: fneg 0, 1
-; CHECK-NEXT: fmadd 0, 0, 2, 5
-; CHECK-NEXT: fneg 1, 3
-; CHECK-NEXT: fmadd 1, 1, 4, 0
+; CHECK-NEXT: fmuls 0, 1, 2
+; CHECK-NEXT: fmadd 0, 3, 4, 0
+; CHECK-NEXT: fsub 1, 5, 0
; CHECK-NEXT: blr
;
; CHECK-VSX-LABEL: test_FMSUB_ASSOC_EXT3:
; CHECK-VSX: # %bb.0:
-; CHECK-VSX-NEXT: xsnegdp 1, 1
-; CHECK-VSX-NEXT: xsnegdp 0, 3
-; CHECK-VSX-NEXT: xsmaddmdp 1, 2, 5
-; CHECK-VSX-NEXT: xsmaddadp 1, 0, 4
+; CHECK-VSX-NEXT: fmuls 0, 1, 2
+; CHECK-VSX-NEXT: xsmaddadp 0, 3, 4
+; CHECK-VSX-NEXT: xssubdp 1, 5, 0
; CHECK-VSX-NEXT: blr
double %D, double %E) {
%F = fmul float %A, %B ; <float> [#uses=1]
@@ -255,19 +256,16 @@ define double @test_FMSUB_ASSOC_EXT3(float %A, float %B, double %C,
define double @test_FMSUB_ASSOC_EXT4(float %A, float %B, float %C,
; CHECK-LABEL: test_FMSUB_ASSOC_EXT4:
; CHECK: # %bb.0:
-; CHECK-NEXT: fneg 0, 3
-; CHECK-NEXT: fmadd 0, 0, 4, 5
-; CHECK-NEXT: fneg 1, 1
-; CHECK-NEXT: fmadd 1, 1, 2, 0
+; CHECK-NEXT: fmuls 0, 3, 4
+; CHECK-NEXT: fmadds 0, 1, 2, 0
+; CHECK-NEXT: fsub 1, 5, 0
; CHECK-NEXT: blr
;
; CHECK-VSX-LABEL: test_FMSUB_ASSOC_EXT4:
; CHECK-VSX: # %bb.0:
-; CHECK-VSX-NEXT: xsnegdp 0, 3
-; CHECK-VSX-NEXT: xsnegdp 1, 1
-; CHECK-VSX-NEXT: xsmaddmdp 0, 4, 5
-; CHECK-VSX-NEXT: xsmaddadp 0, 1, 2
-; CHECK-VSX-NEXT: fmr 1, 0
+; CHECK-VSX-NEXT: fmuls 0, 3, 4
+; CHECK-VSX-NEXT: fmadds 0, 1, 2, 0
+; CHECK-VSX-NEXT: xssubdp 1, 5, 0
; CHECK-VSX-NEXT: blr
float %D, double %E) {
%F = fmul float %A, %B ; <float> [#uses=1]
More information about the llvm-commits
mailing list