[llvm] 1b2fe71 - [DAGCombiner] Tighten reasscociation of visitFMA

Mon Oct 19 19:17:17 PDT 2020

Author: Qiu Chaofan
Date: 2020-10-20T10:13:01+08:00
New Revision: 1b2fe71ecf6bd647a244fc64c231e31dfe1faaa2

URL: https://github.com/llvm/llvm-project/commit/1b2fe71ecf6bd647a244fc64c231e31dfe1faaa2
DIFF: https://github.com/llvm/llvm-project/commit/1b2fe71ecf6bd647a244fc64c231e31dfe1faaa2.diff

LOG: [DAGCombiner] Tighten reasscociation of visitFMA

>From LangRef, FMF contract should not enable reassociating to form
arbitrary contractions. So it should not help rearrange nodes like
(fma (fmul x, c1), c2, y) into (fma x, c1*c2, y).

Reviewed By: spatel

Differential Revision: https://reviews.llvm.org/D89527

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/test/CodeGen/PowerPC/fma-combine.ll
    llvm/test/CodeGen/X86/fma-scalar-combine.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index b9b82931e410..f4cf77ba8bc0 100644

--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -13217,10 +13217,11 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
   EVT VT = N->getValueType(0);
   SDLoc DL(N);
   const TargetOptions &Options = DAG.getTarget().Options;
+  // FMA nodes have flags that propagate to the created nodes.
   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
 
-  // FMA nodes have flags that propagate to the created nodes.
-  bool UnsafeFPMath = Options.UnsafeFPMath || isContractable(N);
+  bool UnsafeFPMath =
+      Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
 
   // Constant fold FMA.
   if (isa<ConstantFPSDNode>(N0) &&

diff  --git a/llvm/test/CodeGen/PowerPC/fma-combine.ll b/llvm/test/CodeGen/PowerPC/fma-combine.ll
index 217d520f8918..c07cb1cc12e5 100644
--- a/llvm/test/CodeGen/PowerPC/fma-combine.ll
+++ b/llvm/test/CodeGen/PowerPC/fma-combine.ll
@@ -313,5 +313,37 @@ entry:
   ret <2 x double> %0
 }
 
+define double @fma_combine_const(double %a, double %b) {
+; CHECK-FAST-LABEL: fma_combine_const:
+; CHECK-FAST:       # %bb.0: # %entry
+; CHECK-FAST-NEXT:    addis 3, 2, .LCPI9_0 at toc@ha
+; CHECK-FAST-NEXT:    lfd 0, .LCPI9_0 at toc@l(3)
+; CHECK-FAST-NEXT:    xsmaddadp 2, 1, 0
+; CHECK-FAST-NEXT:    fmr 1, 2
+; CHECK-FAST-NEXT:    blr
+;
+; CHECK-FAST-NOVSX-LABEL: fma_combine_const:
+; CHECK-FAST-NOVSX:       # %bb.0: # %entry
+; CHECK-FAST-NOVSX-NEXT:    addis 3, 2, .LCPI9_0 at toc@ha
+; CHECK-FAST-NOVSX-NEXT:    lfd 0, .LCPI9_0 at toc@l(3)
+; CHECK-FAST-NOVSX-NEXT:    fmadd 1, 1, 0, 2
+; CHECK-FAST-NOVSX-NEXT:    blr
+;
+; CHECK-LABEL: fma_combine_const:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addis 3, 2, .LCPI9_0 at toc@ha
+; CHECK-NEXT:    lfd 0, .LCPI9_0 at toc@l(3)
+; CHECK-NEXT:    addis 3, 2, .LCPI9_1 at toc@ha
+; CHECK-NEXT:    lfd 3, .LCPI9_1 at toc@l(3)
+; CHECK-NEXT:    xsmuldp 0, 1, 0
+; CHECK-NEXT:    fmr 1, 2
+; CHECK-NEXT:    xsmaddadp 1, 0, 3
+; CHECK-NEXT:    blr
+entry:
+  %0 = fmul double %a, 1.1
+  %1 = call contract double @llvm.fma.f64(double %0, double 2.1, double %b)
+  ret double %1
+}
+
 declare double @llvm.fma.f64(double, double, double) nounwind readnone
 declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) nounwind readnone

diff  --git a/llvm/test/CodeGen/X86/fma-scalar-combine.ll b/llvm/test/CodeGen/X86/fma-scalar-combine.ll
index aa7374747cf8..1804828df293 100644
--- a/llvm/test/CodeGen/X86/fma-scalar-combine.ll
+++ b/llvm/test/CodeGen/X86/fma-scalar-combine.ll
@@ -542,3 +542,19 @@ entry:
   %8 = insertelement <2 x double> %c, double %7, i64 0
   ret <2 x double> %8
 }
+
+; Don't fold into (fmul x, c1+c2) if reassoc not set
+define float @fma_const_fmul(float %x) {
+; CHECK-LABEL: fma_const_fmul:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vmulss {{.*}}(%rip), %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x59,0x0d,A,A,A,A]
+; CHECK-NEXT:    # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
+; CHECK-NEXT:    vfmadd132ss {{.*}}(%rip), %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0x99,0x05,A,A,A,A]
+; CHECK-NEXT:    # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
+; CHECK-NEXT:    # xmm0 = (xmm0 * mem) + xmm1
+; CHECK-NEXT:    retq # encoding: [0xc3]
+  %mul1 = fmul contract float %x, 10.0
+  %mul2 = fmul contract float %x, 11.0
+  %add1 = fadd contract float %mul1, %mul2
+  ret float %add1
+}