[llvm] r254016 - [X86][FMA] Optimize FNEG(FMA) Patterns
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 24 12:31:46 PST 2015
Author: rksimon
Date: Tue Nov 24 14:31:46 2015
New Revision: 254016
URL: http://llvm.org/viewvc/llvm-project?rev=254016&view=rev
Log:
[X86][FMA] Optimize FNEG(FMA) Patterns
X86 needs to use its own FMA opcodes, preventing the standard FNEG(FMA) pattern table recognition method used by other platforms. This patch adds support for lowering FNEG(FMA(X,Y,Z)) into a single suitably negated FMA instruction.
Fix for PR24364
Differential Revision: http://reviews.llvm.org/D14906
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/fma_patterns.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=254016&r1=254015&r2=254016&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Nov 24 14:31:46 2015
@@ -1773,6 +1773,7 @@ X86TargetLowering::X86TargetLowering(con
setTargetDAGCombine(ISD::ADD);
setTargetDAGCombine(ISD::FADD);
setTargetDAGCombine(ISD::FSUB);
+ setTargetDAGCombine(ISD::FNEG);
setTargetDAGCombine(ISD::FMA);
setTargetDAGCombine(ISD::SUB);
setTargetDAGCombine(ISD::LOAD);
@@ -26148,6 +26149,33 @@ static SDValue PerformFSUBCombine(SDNode
return SDValue();
}
+/// Do target-specific dag combines on floating point negations.
+static SDValue PerformFNEGCombine(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ EVT VT = N->getValueType(0);
+ SDValue Arg = N->getOperand(0);
+
+ // If we're negating a FMA node, then we can adjust the
+ // instruction to include the extra negation.
+ if (Arg.hasOneUse()) {
+ switch (Arg.getOpcode()) {
+ case X86ISD::FMADD:
+ return DAG.getNode(X86ISD::FNMSUB, SDLoc(N), VT, Arg.getOperand(0),
+ Arg.getOperand(1), Arg.getOperand(2));
+ case X86ISD::FMSUB:
+ return DAG.getNode(X86ISD::FNMADD, SDLoc(N), VT, Arg.getOperand(0),
+ Arg.getOperand(1), Arg.getOperand(2));
+ case X86ISD::FNMADD:
+ return DAG.getNode(X86ISD::FMSUB, SDLoc(N), VT, Arg.getOperand(0),
+ Arg.getOperand(1), Arg.getOperand(2));
+ case X86ISD::FNMSUB:
+ return DAG.getNode(X86ISD::FMADD, SDLoc(N), VT, Arg.getOperand(0),
+ Arg.getOperand(1), Arg.getOperand(2));
+ }
+ }
+ return SDValue();
+}
+
/// Do target-specific dag combines on X86ISD::FOR and X86ISD::FXOR nodes.
static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
@@ -27042,6 +27070,7 @@ SDValue X86TargetLowering::PerformDAGCom
case ISD::UINT_TO_FP: return PerformUINT_TO_FPCombine(N, DAG, Subtarget);
case ISD::FADD: return PerformFADDCombine(N, DAG, Subtarget);
case ISD::FSUB: return PerformFSUBCombine(N, DAG, Subtarget);
+ case ISD::FNEG: return PerformFNEGCombine(N, DAG, Subtarget);
case ISD::TRUNCATE: return PerformTRUNCATECombine(N, DAG, Subtarget);
case X86ISD::FXOR:
case X86ISD::FOR: return PerformFORCombine(N, DAG, Subtarget);
Modified: llvm/trunk/test/CodeGen/X86/fma_patterns.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fma_patterns.ll?rev=254016&r1=254015&r2=254016&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fma_patterns.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fma_patterns.ll Tue Nov 24 14:31:46 2015
@@ -568,6 +568,74 @@ define <4 x double> @test_v4f64_interp(<
ret <4 x double> %r
}
+; (fneg (fma x, y, z)) -> (fma x, -y, -z)
+
+define <4 x float> @test_v4f32_fneg_fmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
+; CHECK_FMA-LABEL: test_v4f32_fneg_fmadd:
+; CHECK_FMA: # BB#0:
+; CHECK_FMA-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0
+; CHECK_FMA-NEXT: retq
+;
+; CHECK_FMA4-LABEL: test_v4f32_fneg_fmadd:
+; CHECK_FMA4: # BB#0:
+; CHECK_FMA4-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK_FMA4-NEXT: retq
+ %mul = fmul <4 x float> %a0, %a1
+ %add = fadd <4 x float> %mul, %a2
+ %neg = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %add
+ ret <4 x float> %neg
+}
+
+define <4 x double> @test_v4f64_fneg_fmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
+; CHECK_FMA-LABEL: test_v4f64_fneg_fmsub:
+; CHECK_FMA: # BB#0:
+; CHECK_FMA-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0
+; CHECK_FMA-NEXT: retq
+;
+; CHECK_FMA4-LABEL: test_v4f64_fneg_fmsub:
+; CHECK_FMA4: # BB#0:
+; CHECK_FMA4-NEXT: vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0
+; CHECK_FMA4-NEXT: retq
+ %mul = fmul <4 x double> %a0, %a1
+ %sub = fsub <4 x double> %mul, %a2
+ %neg = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %sub
+ ret <4 x double> %neg
+}
+
+define <4 x float> @test_v4f32_fneg_fnmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
+; CHECK_FMA-LABEL: test_v4f32_fneg_fnmadd:
+; CHECK_FMA: # BB#0:
+; CHECK_FMA-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0
+; CHECK_FMA-NEXT: retq
+;
+; CHECK_FMA4-LABEL: test_v4f32_fneg_fnmadd:
+; CHECK_FMA4: # BB#0:
+; CHECK_FMA4-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK_FMA4-NEXT: retq
+ %mul = fmul <4 x float> %a0, %a1
+ %neg0 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %mul
+ %add = fadd <4 x float> %neg0, %a2
+ %neg1 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %add
+ ret <4 x float> %neg1
+}
+
+define <4 x double> @test_v4f64_fneg_fnmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
+; CHECK_FMA-LABEL: test_v4f64_fneg_fnmsub:
+; CHECK_FMA: # BB#0:
+; CHECK_FMA-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0
+; CHECK_FMA-NEXT: retq
+;
+; CHECK_FMA4-LABEL: test_v4f64_fneg_fnmsub:
+; CHECK_FMA4: # BB#0:
+; CHECK_FMA4-NEXT: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0
+; CHECK_FMA4-NEXT: retq
+ %mul = fmul <4 x double> %a0, %a1
+ %neg0 = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %mul
+ %sub = fsub <4 x double> %neg0, %a2
+ %neg1 = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %sub
+ ret <4 x double> %neg1
+}
+
; (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
define <4 x float> @test_v4f32_fma_x_c1_fmul_x_c2(<4 x float> %x) #0 {
More information about the llvm-commits
mailing list