[PATCH] D28087: X86 instr selection: combine ADDSUB + MUL to FMADDSUB

Fri Dec 23 21:27:05 PST 2016

v_klochkov created this revision.
v_klochkov added subscribers: llvm-commits, craig.topper, delena.

Hello,

Please review the patch that fuses MUL+ADDSUB operations into FMADDSUB
when AVX2 is available.

MUL+ADDSUB are often generated by LLVM (with -ffast-math flag) for
complex MUL operations.

C code:
#include <complex.h>
_Complex double a, b, dst;
void cmul() {

  dst = a * b;

}

asm without patch:

  vmovupd b(%rip), %xmm0
  vmovddup        a(%rip), %xmm1  # xmm1 = mem[0,0]
  vmulpd  %xmm1, %xmm0, %xmm1 <<<<<<<<<<<<<<<<<<<<<<<
  vpermilpd       $1, %xmm0, %xmm0 # xmm0 = xmm0[1,0]
  vmovddup        a+8(%rip), %xmm2 # xmm2 = mem[0,0]
  vmulpd  %xmm2, %xmm0, %xmm0
  vaddsubpd       %xmm0, %xmm1, %xmm0 <<<<<<<<<<<<<<<
  vmovupd %xmm0, dst(%rip)

asm with the patch:

  vmovupd b(%rip), %xmm0
  vmovddup        a(%rip), %xmm1  # xmm1 = mem[0,0]
  vpermilpd       $1, %xmm0, %xmm2 # xmm2 = xmm0[1,0]
  vmovddup        a+8(%rip), %xmm3 # xmm3 = mem[0,0]
  vmulpd  %xmm3, %xmm2, %xmm2
  vfmaddsub231pd  %xmm1, %xmm0, %xmm2 <<<<<<<<<<<<<<<<<<<
  vmovupd %xmm2, dst(%rip)

Thank you,
Vyacheslav Klochkov


https://reviews.llvm.org/D28087

Files:
  llvm/lib/Target/X86/X86ISelLowering.cpp
  llvm/test/CodeGen/X86/fmaddsub-combine.ll


Index: llvm/lib/Target/X86/X86ISelLowering.cpp
===================================================================

--- llvm/lib/Target/X86/X86ISelLowering.cpp
+++ llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -32041,6 +32041,28 @@
   return SDValue();
 }
 
+/// Do target specific dag combines of MUL and ADDSUB nodes into FMADDSUB.
+static SDValue combineAddsub(SDNode *N, SelectionDAG &DAG,
+                             const X86Subtarget &Subtarget) {
+  SDValue Op1 = N->getOperand(0);
+  if (N->getOpcode() != X86ISD::ADDSUB || Op1->getOpcode() != ISD::FMUL ||
+      !Op1->hasOneUse() || !Subtarget.hasFMA())
+    return SDValue();
+
+  const TargetOptions &Options = DAG.getTarget().Options;
+  bool AllowFusion =
+      (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath);
+  if (!AllowFusion)
+    return SDValue();
+
+  EVT VT = N->getValueType(0);
+  SDValue Op3 = N->getOperand(1);
+  SDValue Op2 = Op1->getOperand(1);
+  Op1 = Op1->getOperand(0);
+
+  return DAG.getNode(X86ISD::FMADDSUB, SDLoc(N), VT, Op1, Op2, Op3);
+}
+
 /// Do target-specific dag combines on floating point negations.
 static SDValue combineFneg(SDNode *N, SelectionDAG &DAG,
                            const X86Subtarget &Subtarget) {
@@ -33383,6 +33405,7 @@
   case ISD::UINT_TO_FP:     return combineUIntToFP(N, DAG, Subtarget);
   case ISD::FADD:
   case ISD::FSUB:           return combineFaddFsub(N, DAG, Subtarget);
+  case X86ISD::ADDSUB:      return combineAddsub(N, DAG, Subtarget);
   case ISD::FNEG:           return combineFneg(N, DAG, Subtarget);
   case ISD::TRUNCATE:       return combineTruncate(N, DAG, Subtarget);
   case X86ISD::FAND:        return combineFAnd(N, DAG, Subtarget);
Index: llvm/test/CodeGen/X86/fmaddsub-combine.ll
===================================================================
--- llvm/test/CodeGen/X86/fmaddsub-combine.ll
+++ llvm/test/CodeGen/X86/fmaddsub-combine.ll
@@ -0,0 +1,34 @@
+
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
+
+; This test check the fusing of MUL + ADDSUB to FMADDSUB.
+
+define void @cmul() #0 {
+; CHECK-LABEL: cmul:
+; CHECK: vfmaddsub231pd
+; CHECK: retq
+entry:
+  %Ar = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @a, i64 0, i32 0), align 8
+  %Ai = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @a, i64 0, i32 1), align 8
+  %Bir = load <2 x double>, <2 x double>* bitcast ({ double, double }* @b to <2 x double>*), align 8
+  %Aur = insertelement <2 x double> undef, double %Ar, i32 0
+  %Arr = insertelement <2 x double> %Aur, double %Ar, i32 1
+  %ArBi.ArBr = fmul fast <2 x double> %Bir, %Arr
+  %Bri = shufflevector <2 x double> %Bir, <2 x double> undef, <2 x i32> <i32 1, i32 0>
+  %Aui = insertelement <2 x double> undef, double %Ai, i32 0
+  %Aii = insertelement <2 x double> %Aui, double %Ai, i32 1
+  %AiBr.AiBi = fmul fast <2 x double> %Bri, %Aii
+  %Sub = fsub fast <2 x double> %ArBi.ArBr, %AiBr.AiBi
+  %Add = fadd fast <2 x double> %ArBi.ArBr, %AiBr.AiBi
+  %Addsub = shufflevector <2 x double> %Sub, <2 x double> %Add, <2 x i32> <i32 0, i32 3>
+  store <2 x double> %Addsub, <2 x double>* bitcast ({ double, double }* @dst to <2 x double>*), align 8
+  ret void
+}
+
+ at a = common global { double, double } zeroinitializer, align 8
+ at b = common global { double, double } zeroinitializer, align 8
+ at dst = common global { double, double } zeroinitializer, align 8
+
+attributes #0 = { nounwind "target-features"="+fma" "unsafe-fp-math"="true" }
+
+


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D28087.82431.patch
Type: text/x-patch
Size: 3538 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20161224/33fbed72/attachment.bin>