[PATCH] D28087: X86 instr selection: combine ADDSUB + MUL to FMADDSUB
Vyacheslav Klochkov via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 23 21:27:05 PST 2016
v_klochkov created this revision.
v_klochkov added subscribers: llvm-commits, craig.topper, delena.
Hello,
Please review the patch that fuses MUL+ADDSUB operations into FMADDSUB
when AVX2 is available.
MUL+ADDSUB are often generated by LLVM (with -ffast-math flag) for
complex MUL operations.
C code:
#include <complex.h>
_Complex double a, b, dst;
void cmul() {
dst = a * b;
}
asm without patch:
vmovupd b(%rip), %xmm0
vmovddup a(%rip), %xmm1 # xmm1 = mem[0,0]
vmulpd %xmm1, %xmm0, %xmm1 <<<<<<<<<<<<<<<<<<<<<<<
vpermilpd $1, %xmm0, %xmm0 # xmm0 = xmm0[1,0]
vmovddup a+8(%rip), %xmm2 # xmm2 = mem[0,0]
vmulpd %xmm2, %xmm0, %xmm0
vaddsubpd %xmm0, %xmm1, %xmm0 <<<<<<<<<<<<<<<
vmovupd %xmm0, dst(%rip)
asm with the patch:
vmovupd b(%rip), %xmm0
vmovddup a(%rip), %xmm1 # xmm1 = mem[0,0]
vpermilpd $1, %xmm0, %xmm2 # xmm2 = xmm0[1,0]
vmovddup a+8(%rip), %xmm3 # xmm3 = mem[0,0]
vmulpd %xmm3, %xmm2, %xmm2
vfmaddsub231pd %xmm1, %xmm0, %xmm2 <<<<<<<<<<<<<<<<<<<
vmovupd %xmm2, dst(%rip)
Thank you,
Vyacheslav Klochkov
https://reviews.llvm.org/D28087
Files:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/fmaddsub-combine.ll
Index: llvm/lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- llvm/lib/Target/X86/X86ISelLowering.cpp
+++ llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -32041,6 +32041,28 @@
return SDValue();
}
+/// Do target specific dag combines of MUL and ADDSUB nodes into FMADDSUB.
+static SDValue combineAddsub(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ SDValue Op1 = N->getOperand(0);
+ if (N->getOpcode() != X86ISD::ADDSUB || Op1->getOpcode() != ISD::FMUL ||
+ !Op1->hasOneUse() || !Subtarget.hasFMA())
+ return SDValue();
+
+ const TargetOptions &Options = DAG.getTarget().Options;
+ bool AllowFusion =
+ (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath);
+ if (!AllowFusion)
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ SDValue Op3 = N->getOperand(1);
+ SDValue Op2 = Op1->getOperand(1);
+ Op1 = Op1->getOperand(0);
+
+ return DAG.getNode(X86ISD::FMADDSUB, SDLoc(N), VT, Op1, Op2, Op3);
+}
+
/// Do target-specific dag combines on floating point negations.
static SDValue combineFneg(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
@@ -33383,6 +33405,7 @@
case ISD::UINT_TO_FP: return combineUIntToFP(N, DAG, Subtarget);
case ISD::FADD:
case ISD::FSUB: return combineFaddFsub(N, DAG, Subtarget);
+ case X86ISD::ADDSUB: return combineAddsub(N, DAG, Subtarget);
case ISD::FNEG: return combineFneg(N, DAG, Subtarget);
case ISD::TRUNCATE: return combineTruncate(N, DAG, Subtarget);
case X86ISD::FAND: return combineFAnd(N, DAG, Subtarget);
Index: llvm/test/CodeGen/X86/fmaddsub-combine.ll
===================================================================
--- llvm/test/CodeGen/X86/fmaddsub-combine.ll
+++ llvm/test/CodeGen/X86/fmaddsub-combine.ll
@@ -0,0 +1,34 @@
+
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
+
+; This test check the fusing of MUL + ADDSUB to FMADDSUB.
+
+define void @cmul() #0 {
+; CHECK-LABEL: cmul:
+; CHECK: vfmaddsub231pd
+; CHECK: retq
+entry:
+ %Ar = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @a, i64 0, i32 0), align 8
+ %Ai = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @a, i64 0, i32 1), align 8
+ %Bir = load <2 x double>, <2 x double>* bitcast ({ double, double }* @b to <2 x double>*), align 8
+ %Aur = insertelement <2 x double> undef, double %Ar, i32 0
+ %Arr = insertelement <2 x double> %Aur, double %Ar, i32 1
+ %ArBi.ArBr = fmul fast <2 x double> %Bir, %Arr
+ %Bri = shufflevector <2 x double> %Bir, <2 x double> undef, <2 x i32> <i32 1, i32 0>
+ %Aui = insertelement <2 x double> undef, double %Ai, i32 0
+ %Aii = insertelement <2 x double> %Aui, double %Ai, i32 1
+ %AiBr.AiBi = fmul fast <2 x double> %Bri, %Aii
+ %Sub = fsub fast <2 x double> %ArBi.ArBr, %AiBr.AiBi
+ %Add = fadd fast <2 x double> %ArBi.ArBr, %AiBr.AiBi
+ %Addsub = shufflevector <2 x double> %Sub, <2 x double> %Add, <2 x i32> <i32 0, i32 3>
+ store <2 x double> %Addsub, <2 x double>* bitcast ({ double, double }* @dst to <2 x double>*), align 8
+ ret void
+}
+
+ at a = common global { double, double } zeroinitializer, align 8
+ at b = common global { double, double } zeroinitializer, align 8
+ at dst = common global { double, double } zeroinitializer, align 8
+
+attributes #0 = { nounwind "target-features"="+fma" "unsafe-fp-math"="true" }
+
+
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D28087.82431.patch
Type: text/x-patch
Size: 3538 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20161224/33fbed72/attachment.bin>
More information about the llvm-commits
mailing list