[llvm] r290312 - AMDGPU: Check fast math flags in fadd/fsub combines
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 21 20:03:35 PST 2016
Author: arsenm
Date: Wed Dec 21 22:03:35 2016
New Revision: 290312
URL: http://llvm.org/viewvc/llvm-project?rev=290312&view=rev
Log:
AMDGPU: Check fast math flags in fadd/fsub combines
Modified:
llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h
llvm/trunk/test/CodeGen/AMDGPU/fmuladd.f64.ll
Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=290312&r1=290311&r2=290312&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Wed Dec 21 22:03:35 2016
@@ -3871,7 +3871,11 @@ SDValue SITargetLowering::performMinMaxC
return SDValue();
}
-unsigned SITargetLowering::getFusedOpcode(const SelectionDAG &DAG, EVT VT) const {
+unsigned SITargetLowering::getFusedOpcode(const SelectionDAG &DAG,
+ const SDNode *N0,
+ const SDNode *N1) const {
+ EVT VT = N0->getValueType(0);
+
// Only do this if we are not trying to support denormals. v_mad_f32 does not
// support denormals ever.
if ((VT == MVT::f32 && !Subtarget->hasFP32Denormals()) ||
@@ -3879,7 +3883,10 @@ unsigned SITargetLowering::getFusedOpcod
return ISD::FMAD;
const TargetOptions &Options = DAG.getTarget().Options;
- if ((Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
+ if ((Options.AllowFPOpFusion == FPOpFusion::Fast ||
+ Options.UnsafeFPMath ||
+ (cast<BinaryWithFlagsSDNode>(N0)->Flags.hasUnsafeAlgebra() &&
+ cast<BinaryWithFlagsSDNode>(N1)->Flags.hasUnsafeAlgebra())) &&
isFMAFasterThanFMulAndFAdd(VT)) {
return ISD::FMA;
}
@@ -3907,7 +3914,7 @@ SDValue SITargetLowering::performFAddCom
if (LHS.getOpcode() == ISD::FADD) {
SDValue A = LHS.getOperand(0);
if (A == LHS.getOperand(1)) {
- unsigned FusedOp = getFusedOpcode(DAG, VT);
+ unsigned FusedOp = getFusedOpcode(DAG, N, LHS.getNode());
if (FusedOp != 0) {
const SDValue Two = DAG.getConstantFP(2.0, SL, VT);
return DAG.getNode(FusedOp, SL, VT, Two, A, RHS);
@@ -3919,7 +3926,7 @@ SDValue SITargetLowering::performFAddCom
if (RHS.getOpcode() == ISD::FADD) {
SDValue A = RHS.getOperand(0);
if (A == RHS.getOperand(1)) {
- unsigned FusedOp = getFusedOpcode(DAG, VT);
+ unsigned FusedOp = getFusedOpcode(DAG, N, RHS.getNode());
if (FusedOp != 0) {
const SDValue Two = DAG.getConstantFP(2.0, SL, VT);
return DAG.getNode(FusedOp, SL, VT, Two, A, LHS);
@@ -3951,7 +3958,7 @@ SDValue SITargetLowering::performFSubCom
// (fsub (fadd a, a), c) -> mad 2.0, a, (fneg c)
SDValue A = LHS.getOperand(0);
if (A == LHS.getOperand(1)) {
- unsigned FusedOp = getFusedOpcode(DAG, VT);
+ unsigned FusedOp = getFusedOpcode(DAG, N, LHS.getNode());
if (FusedOp != 0){
const SDValue Two = DAG.getConstantFP(2.0, SL, VT);
SDValue NegRHS = DAG.getNode(ISD::FNEG, SL, VT, RHS);
@@ -3966,7 +3973,7 @@ SDValue SITargetLowering::performFSubCom
SDValue A = RHS.getOperand(0);
if (A == RHS.getOperand(1)) {
- unsigned FusedOp = getFusedOpcode(DAG, VT);
+ unsigned FusedOp = getFusedOpcode(DAG, N, RHS.getNode());
if (FusedOp != 0){
const SDValue NegTwo = DAG.getConstantFP(-2.0, SL, VT);
return DAG.getNode(FusedOp, SL, VT, NegTwo, A, LHS);
Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h?rev=290312&r1=290311&r2=290312&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h Wed Dec 21 22:03:35 2016
@@ -83,7 +83,8 @@ class SITargetLowering final : public AM
SDValue performMinMaxCombine(SDNode *N, DAGCombinerInfo &DCI) const;
- unsigned getFusedOpcode(const SelectionDAG &DAG, EVT VT) const;
+ unsigned getFusedOpcode(const SelectionDAG &DAG,
+ const SDNode *N0, const SDNode *N1) const;
SDValue performFAddCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performFSubCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performSetCCCombine(SDNode *N, DAGCombinerInfo &DCI) const;
Modified: llvm/trunk/test/CodeGen/AMDGPU/fmuladd.f64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fmuladd.f64.ll?rev=290312&r1=290311&r2=290312&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fmuladd.f64.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fmuladd.f64.ll Wed Dec 21 22:03:35 2016
@@ -112,6 +112,69 @@ define void @mad_sub_f64(double addrspac
ret void
}
+; GCN-LABEL: {{^}}fadd_a_a_b_f64_fast_add0:
+; GCN-STRICT: v_add_f64
+; GCN-STRICT: v_add_f64
+
+; GCN-CONTRACT: v_fma_f64
+define void @fadd_a_a_b_f64_fast_add0(double addrspace(1)* %out,
+ double addrspace(1)* %in1,
+ double addrspace(1)* %in2) #0 {
+ %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
+ %gep.0 = getelementptr double, double addrspace(1)* %out, i32 %tid
+ %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
+ %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
+
+ %r0 = load volatile double, double addrspace(1)* %gep.0
+ %r1 = load volatile double, double addrspace(1)* %gep.1
+
+ %add.0 = fadd fast double %r0, %r0
+ %add.1 = fadd double %add.0, %r1
+ store double %add.1, double addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}fadd_a_a_b_f64_fast_add1:
+; GCN-STRICT: v_add_f64
+; GCN-STRICT: v_add_f64
+
+; GCN-CONTRACT: v_fma_f64
+define void @fadd_a_a_b_f64_fast_add1(double addrspace(1)* %out,
+ double addrspace(1)* %in1,
+ double addrspace(1)* %in2) #0 {
+ %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
+ %gep.0 = getelementptr double, double addrspace(1)* %out, i32 %tid
+ %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
+ %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
+
+ %r0 = load volatile double, double addrspace(1)* %gep.0
+ %r1 = load volatile double, double addrspace(1)* %gep.1
+
+ %add.0 = fadd double %r0, %r0
+ %add.1 = fadd fast double %add.0, %r1
+ store double %add.1, double addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}fadd_a_a_b_f64_fast:
+; GCN: v_fma_f64
+define void @fadd_a_a_b_f64_fast(double addrspace(1)* %out,
+ double addrspace(1)* %in1,
+ double addrspace(1)* %in2) #0 {
+ %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
+ %gep.0 = getelementptr double, double addrspace(1)* %out, i32 %tid
+ %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
+ %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
+
+ %r0 = load volatile double, double addrspace(1)* %gep.0
+ %r1 = load volatile double, double addrspace(1)* %gep.1
+
+ %add.0 = fadd fast double %r0, %r0
+ %add.1 = fadd fast double %add.0, %r1
+ store double %add.1, double addrspace(1)* %gep.out
+ ret void
+}
+
declare i32 @llvm.amdgcn.workitem.id.x() #1
declare double @llvm.fmuladd.f64(double, double, double) #1
More information about the llvm-commits
mailing list