[llvm-commits] [llvm] r163035 - in /llvm/trunk: lib/Target/X86/X86InstrFMA.td lib/Target/X86/X86InstrInfo.cpp test/CodeGen/X86/fma_patterns.ll
Craig Topper
craig.topper at gmail.com
Fri Aug 31 16:10:34 PDT 2012
Author: ctopper
Date: Fri Aug 31 18:10:34 2012
New Revision: 163035
URL: http://llvm.org/viewvc/llvm-project?rev=163035&view=rev
Log:
Mark FMA4 instructions as commutable and add them to the folding tables.
Modified:
llvm/trunk/lib/Target/X86/X86InstrFMA.td
llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
llvm/trunk/test/CodeGen/X86/fma_patterns.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrFMA.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFMA.td?rev=163035&r1=163034&r2=163035&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrFMA.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrFMA.td Fri Aug 31 18:10:34 2012
@@ -200,6 +200,7 @@
multiclass fma4s<bits<8> opc, string OpcodeStr, RegisterClass RC,
X86MemOperand x86memop, ValueType OpVT, SDNode OpNode,
PatFrag mem_frag> {
+ let isCommutable = 1 in
def rr : FMA4<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, RC:$src3),
!strconcat(OpcodeStr,
@@ -228,6 +229,7 @@
multiclass fma4s_int<bits<8> opc, string OpcodeStr, Operand memop,
ComplexPattern mem_cpat, Intrinsic Int> {
+ let isCommutable = 1 in
def rr_Int : FMA4<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, VR128:$src3),
!strconcat(OpcodeStr,
@@ -251,6 +253,7 @@
multiclass fma4p<bits<8> opc, string OpcodeStr, SDNode OpNode,
ValueType OpVT128, ValueType OpVT256,
PatFrag ld_frag128, PatFrag ld_frag256> {
+ let isCommutable = 1 in
def rr : FMA4<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, VR128:$src3),
!strconcat(OpcodeStr,
@@ -270,6 +273,7 @@
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst,
(OpNode VR128:$src1, (ld_frag128 addr:$src2), VR128:$src3))]>;
+ let isCommutable = 1 in
def rrY : FMA4<opc, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, VR256:$src3),
!strconcat(OpcodeStr,
Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=163035&r1=163034&r2=163035&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Fri Aug 31 18:10:34 2012
@@ -1110,6 +1110,36 @@
{ X86::VPUNPCKLWDYrr, X86::VPUNPCKLWDYrm, TB_ALIGN_32 },
{ X86::VPXORYrr, X86::VPXORYrm, TB_ALIGN_32 },
// FIXME: add AVX 256-bit foldable instructions
+
+ // FMA4 foldable patterns
+ { X86::VFMADDSS4rr, X86::VFMADDSS4mr, TB_ALIGN_16 },
+ { X86::VFMADDSD4rr, X86::VFMADDSD4mr, TB_ALIGN_16 },
+ { X86::VFMADDPS4rr, X86::VFMADDPS4mr, TB_ALIGN_16 },
+ { X86::VFMADDPD4rr, X86::VFMADDPD4mr, TB_ALIGN_16 },
+ { X86::VFMADDPS4rrY, X86::VFMADDPS4mrY, TB_ALIGN_32 },
+ { X86::VFMADDPD4rrY, X86::VFMADDPD4mrY, TB_ALIGN_32 },
+ { X86::VFNMADDPS4rr, X86::VFNMADDPS4mr, TB_ALIGN_16 },
+ { X86::VFNMADDPD4rr, X86::VFNMADDPD4mr, TB_ALIGN_16 },
+ { X86::VFNMADDPS4rrY, X86::VFNMADDPS4mrY, TB_ALIGN_32 },
+ { X86::VFNMADDPD4rrY, X86::VFNMADDPD4mrY, TB_ALIGN_32 },
+ { X86::VFMSUBSS4rr, X86::VFMSUBSS4mr, TB_ALIGN_16 },
+ { X86::VFMSUBSD4rr, X86::VFMSUBSD4mr, TB_ALIGN_16 },
+ { X86::VFMSUBPS4rr, X86::VFMSUBPS4mr, TB_ALIGN_16 },
+ { X86::VFMSUBPD4rr, X86::VFMSUBPD4mr, TB_ALIGN_16 },
+ { X86::VFMSUBPS4rrY, X86::VFMSUBPS4mrY, TB_ALIGN_32 },
+ { X86::VFMSUBPD4rrY, X86::VFMSUBPD4mrY, TB_ALIGN_32 },
+ { X86::VFNMSUBPS4rr, X86::VFNMSUBPS4mr, TB_ALIGN_16 },
+ { X86::VFNMSUBPD4rr, X86::VFNMSUBPD4mr, TB_ALIGN_16 },
+ { X86::VFNMSUBPS4rrY, X86::VFNMSUBPS4mrY, TB_ALIGN_32 },
+ { X86::VFNMSUBPD4rrY, X86::VFNMSUBPD4mrY, TB_ALIGN_32 },
+ { X86::VFMADDSUBPS4rr, X86::VFMADDSUBPS4mr, TB_ALIGN_16 },
+ { X86::VFMADDSUBPD4rr, X86::VFMADDSUBPD4mr, TB_ALIGN_16 },
+ { X86::VFMADDSUBPS4rrY, X86::VFMADDSUBPS4mrY, TB_ALIGN_32 },
+ { X86::VFMADDSUBPD4rrY, X86::VFMADDSUBPD4mrY, TB_ALIGN_32 },
+ { X86::VFMSUBADDPS4rr, X86::VFMSUBADDPS4mr, TB_ALIGN_16 },
+ { X86::VFMSUBADDPD4rr, X86::VFMSUBADDPD4mr, TB_ALIGN_16 },
+ { X86::VFMSUBADDPS4rrY, X86::VFMSUBADDPS4mrY, TB_ALIGN_32 },
+ { X86::VFMSUBADDPD4rrY, X86::VFMSUBADDPD4mrY, TB_ALIGN_32 },
};
for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) {
@@ -1237,6 +1267,36 @@
{ X86::VFMSUBADDPDr132rY, X86::VFMSUBADDPDr132mY, TB_ALIGN_32 },
{ X86::VFMSUBADDPSr213rY, X86::VFMSUBADDPSr213mY, TB_ALIGN_32 },
{ X86::VFMSUBADDPDr213rY, X86::VFMSUBADDPDr213mY, TB_ALIGN_32 },
+
+ // FMA4 foldable patterns
+ { X86::VFMADDSS4rr, X86::VFMADDSS4rm, TB_ALIGN_16 },
+ { X86::VFMADDSD4rr, X86::VFMADDSD4rm, TB_ALIGN_16 },
+ { X86::VFMADDPS4rr, X86::VFMADDPS4rm, TB_ALIGN_16 },
+ { X86::VFMADDPD4rr, X86::VFMADDPD4rm, TB_ALIGN_16 },
+ { X86::VFMADDPS4rrY, X86::VFMADDPS4rmY, TB_ALIGN_32 },
+ { X86::VFMADDPD4rrY, X86::VFMADDPD4rmY, TB_ALIGN_32 },
+ { X86::VFNMADDPS4rr, X86::VFNMADDPS4rm, TB_ALIGN_16 },
+ { X86::VFNMADDPD4rr, X86::VFNMADDPD4rm, TB_ALIGN_16 },
+ { X86::VFNMADDPS4rrY, X86::VFNMADDPS4rmY, TB_ALIGN_32 },
+ { X86::VFNMADDPD4rrY, X86::VFNMADDPD4rmY, TB_ALIGN_32 },
+ { X86::VFMSUBSS4rr, X86::VFMSUBSS4rm, TB_ALIGN_16 },
+ { X86::VFMSUBSD4rr, X86::VFMSUBSD4rm, TB_ALIGN_16 },
+ { X86::VFMSUBPS4rr, X86::VFMSUBPS4rm, TB_ALIGN_16 },
+ { X86::VFMSUBPD4rr, X86::VFMSUBPD4rm, TB_ALIGN_16 },
+ { X86::VFMSUBPS4rrY, X86::VFMSUBPS4rmY, TB_ALIGN_32 },
+ { X86::VFMSUBPD4rrY, X86::VFMSUBPD4rmY, TB_ALIGN_32 },
+ { X86::VFNMSUBPS4rr, X86::VFNMSUBPS4rm, TB_ALIGN_16 },
+ { X86::VFNMSUBPD4rr, X86::VFNMSUBPD4rm, TB_ALIGN_16 },
+ { X86::VFNMSUBPS4rrY, X86::VFNMSUBPS4rmY, TB_ALIGN_32 },
+ { X86::VFNMSUBPD4rrY, X86::VFNMSUBPD4rmY, TB_ALIGN_32 },
+ { X86::VFMADDSUBPS4rr, X86::VFMADDSUBPS4rm, TB_ALIGN_16 },
+ { X86::VFMADDSUBPD4rr, X86::VFMADDSUBPD4rm, TB_ALIGN_16 },
+ { X86::VFMADDSUBPS4rrY, X86::VFMADDSUBPS4rmY, TB_ALIGN_32 },
+ { X86::VFMADDSUBPD4rrY, X86::VFMADDSUBPD4rmY, TB_ALIGN_32 },
+ { X86::VFMSUBADDPS4rr, X86::VFMSUBADDPS4rm, TB_ALIGN_16 },
+ { X86::VFMSUBADDPD4rr, X86::VFMSUBADDPD4rm, TB_ALIGN_16 },
+ { X86::VFMSUBADDPS4rrY, X86::VFMSUBADDPS4rmY, TB_ALIGN_32 },
+ { X86::VFMSUBADDPD4rrY, X86::VFMSUBADDPD4rmY, TB_ALIGN_32 },
};
for (unsigned i = 0, e = array_lengthof(OpTbl3); i != e; ++i) {
Modified: llvm/trunk/test/CodeGen/X86/fma_patterns.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fma_patterns.ll?rev=163035&r1=163034&r2=163035&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fma_patterns.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fma_patterns.ll Fri Aug 31 18:10:34 2012
@@ -181,3 +181,32 @@
%res = fsub float %y, %a2
ret float %res
}
+
+; CHECK: test_x86_fmadd_ps
+; CHECK: vmovaps (%rdi), %xmm2
+; CHECK: vfmadd213ps %xmm1, %xmm0, %xmm2
+; CHECK: ret
+; CHECK_FMA4: test_x86_fmadd_ps
+; CHECK_FMA4: vfmaddps %xmm1, (%rdi), %xmm0, %xmm0
+; CHECK_FMA4: ret
+define <4 x float> @test_x86_fmadd_ps_load(<4 x float>* %a0, <4 x float> %a1, <4 x float> %a2) {
+ %x = load <4 x float>* %a0
+ %y = fmul <4 x float> %x, %a1
+ %res = fadd <4 x float> %y, %a2
+ ret <4 x float> %res
+}
+
+; CHECK: test_x86_fmsub_ps
+; CHECK: vmovaps (%rdi), %xmm2
+; CHECK: fmsub213ps %xmm1, %xmm0, %xmm2
+; CHECK: ret
+; CHECK_FMA4: test_x86_fmsub_ps
+; CHECK_FMA4: vfmsubps %xmm1, (%rdi), %xmm0, %xmm0
+; CHECK_FMA4: ret
+define <4 x float> @test_x86_fmsub_ps_load(<4 x float>* %a0, <4 x float> %a1, <4 x float> %a2) {
+ %x = load <4 x float>* %a0
+ %y = fmul <4 x float> %x, %a1
+ %res = fsub <4 x float> %y, %a2
+ ret <4 x float> %res
+}
+
More information about the llvm-commits
mailing list