[PATCH] D107551: [DAGCombine] Add node level checks for fp-contract and fp-ninf in visitFMULForFMADistributiveCombine().
Abinav Puthan Purayil via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 10 20:58:06 PDT 2021
abinavpp updated this revision to Diff 365654.
abinavpp marked an inline comment as done.
abinavpp added a comment.
Rebased; Addressed review comments.
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D107551/new/
https://reviews.llvm.org/D107551
Files:
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/test/CodeGen/AMDGPU/fma.ll
llvm/test/CodeGen/X86/fma-scalar-combine.ll
Index: llvm/test/CodeGen/X86/fma-scalar-combine.ll
===================================================================
--- llvm/test/CodeGen/X86/fma-scalar-combine.ll
+++ llvm/test/CodeGen/X86/fma-scalar-combine.ll
@@ -558,3 +558,16 @@
%add1 = fadd contract float %mul1, %mul2
ret float %add1
}
+
+; Fold (fmul (fadd x, 1.0), y) -> (fma x, y, y) without FP specific command-line
+; options.
+define float @combine_fmul_distributive(float %x, float %y) {
+; CHECK-LABEL: combine_fmul_distributive:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vfmadd231ss %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xb9,0xc0]
+; CHECK-NEXT: # xmm0 = (xmm1 * xmm0) + xmm0
+; CHECK-NEXT: retq # encoding: [0xc3]
+ %fadd = fadd ninf float %y, 1.0
+ %fmul = fmul contract float %fadd, %x
+ ret float %fmul
+}
Index: llvm/test/CodeGen/AMDGPU/fma.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/fma.ll
+++ llvm/test/CodeGen/AMDGPU/fma.ll
@@ -144,3 +144,13 @@
store float %tmp10, float addrspace(1)* %gep.out
ret void
}
+
+; Fold (fmul (fadd x, 1.0), y) -> (fma x, y, y) without FP specific command-line
+; options.
+; FUNC-LABEL: {{^}}fold_fmul_distributive:
+; GFX906: v_fmac_f32_e32 v0, v1, v0
+define float @fold_fmul_distributive(float %x, float %y) {
+ %fadd = fadd ninf float %y, 1.0
+ %fmul = fmul contract float %fadd, %x
+ ret float %fmul
+}
Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
===================================================================
--- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -13018,6 +13018,20 @@
return DAG.getBuildVector(VT, DL, Ops);
}
+// Returns true if floating point contraction is allowed on the FMUL-SDValue
+// `N`
+static bool isContractableFMUL(const TargetOptions &Options, SDValue N) {
+ assert(N.getOpcode() == ISD::FMUL);
+
+ return Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath ||
+ N->getFlags().hasAllowContract();
+}
+
+// Return true if `N` can assume no infinities involved in it's computation.
+static bool hasNoInfs(const TargetOptions &Options, SDValue N) {
+ return Options.NoInfsFPMath || N.getNode()->getFlags().hasNoInfs();
+}
+
/// Try to perform FMA combining on a given FADD node.
SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
SDValue N0 = N->getOperand(0);
@@ -13553,12 +13567,13 @@
// The transforms below are incorrect when x == 0 and y == inf, because the
// intermediate multiplication produces a nan.
- if (!Options.NoInfsFPMath)
+ SDValue FAdd = N0.getOpcode() == ISD::FADD ? N0 : N1;
+ if (!hasNoInfs(Options, FAdd))
return SDValue();
// Floating-point multiply-add without intermediate rounding.
bool HasFMA =
- (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
+ isContractableFMUL(Options, SDValue(N, 0)) &&
TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
(!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D107551.365654.patch
Type: text/x-patch
Size: 3090 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20210811/d85f429e/attachment-0001.bin>
More information about the llvm-commits
mailing list