[llvm] r229439 - [X86][SSE] Add SSE MOVQ instructions to SSEPackedInt domain

Mon Feb 16 13:50:57 PST 2015

Author: rksimon
Date: Mon Feb 16 15:50:56 2015
New Revision: 229439

URL: http://llvm.org/viewvc/llvm-project?rev=229439&view=rev
Log:
[X86][SSE] Add SSE MOVQ instructions to SSEPackedInt domain

Patch to explicitly add the SSE MOVQ (rr,mr,rm) instructions to SSEPackedInt domain - prevents a number of costly domain switches.

Differential Revision: http://reviews.llvm.org/D7600

Modified:
    llvm/trunk/lib/Target/X86/X86InstrSSE.td
    llvm/trunk/test/CodeGen/X86/2011-10-19-widen_vselect.ll
    llvm/trunk/test/CodeGen/X86/combine-or.ll
    llvm/trunk/test/CodeGen/X86/vector-shuffle-mmx.ll
    llvm/trunk/test/CodeGen/X86/widen_load-1.ll

Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=229439&r1=229438&r2=229439&view=diff
==============================================================================

--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Mon Feb 16 15:50:56 2015
@@ -5011,7 +5011,7 @@ def : InstAlias<"vmovd\t{$src, $dst|$dst
 // Move Quadword Int to Packed Quadword Int
 //
 
-let SchedRW = [WriteLoad] in {
+let ExeDomain = SSEPackedInt, SchedRW = [WriteLoad] in {
 def VMOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
                     "vmovq\t{$src, $dst|$dst, $src}",
                     [(set VR128:$dst,
@@ -5023,12 +5023,12 @@ def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (ou
                       (v2i64 (scalar_to_vector (loadi64 addr:$src))))],
                       IIC_SSE_MOVDQ>, XS,
                     Requires<[UseSSE2]>; // SSE2 instruction with XS Prefix
-} // SchedRW
+} // ExeDomain, SchedRW
 
 //===---------------------------------------------------------------------===//
 // Move Packed Quadword Int to Quadword Int
 //
-let SchedRW = [WriteStore] in {
+let ExeDomain = SSEPackedInt, SchedRW = [WriteStore] in {
 def VMOVPQI2QImr : VS2I<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
                       "movq\t{$src, $dst|$dst, $src}",
                       [(store (i64 (vector_extract (v2i64 VR128:$src),
@@ -5039,7 +5039,7 @@ def MOVPQI2QImr : S2I<0xD6, MRMDestMem,
                       [(store (i64 (vector_extract (v2i64 VR128:$src),
                                     (iPTR 0))), addr:$dst)],
                                     IIC_SSE_MOVDQ>;
-} // SchedRW
+} // ExeDomain, SchedRW
 
 // For disassembler only
 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0,
@@ -5060,7 +5060,7 @@ let Predicates = [UseSSE2] in
 def : Pat<(int_x86_sse2_storel_dq addr:$dst, VR128:$src),
           (MOVPQI2QImr addr:$dst, VR128:$src)>;
 
-let isCodeGenOnly = 1, AddedComplexity = 20 in {
+let ExeDomain = SSEPackedInt, isCodeGenOnly = 1, AddedComplexity = 20 in {
 def VMOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
                      "vmovq\t{$src, $dst|$dst, $src}",
                      [(set VR128:$dst,
@@ -5076,7 +5076,7 @@ def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (o
                                                  (loadi64 addr:$src))))))],
                                                  IIC_SSE_MOVDQ>,
                      XS, Requires<[UseSSE2]>, Sched<[WriteLoad]>;
-}
+} // ExeDomain, isCodeGenOnly, AddedComplexity
 
 let Predicates = [UseAVX], AddedComplexity = 20 in {
   def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4f32 addr:$src)))),
@@ -5102,7 +5102,7 @@ def : Pat<(v4i64 (X86vzload addr:$src)),
 // Moving from XMM to XMM and clear upper 64 bits. Note, there is a bug in
 // IA32 document. movq xmm1, xmm2 does clear the high bits.
 //
-let SchedRW = [WriteVecLogic] in {
+let ExeDomain = SSEPackedInt, SchedRW = [WriteVecLogic] in {
 let AddedComplexity = 15 in
 def VMOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                         "vmovq\t{$src, $dst|$dst, $src}",
@@ -5115,9 +5115,9 @@ def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg,
                     [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))],
                     IIC_SSE_MOVQ_RR>,
                       XS, Requires<[UseSSE2]>;
-} // SchedRW
+} // ExeDomain, SchedRW
 
-let isCodeGenOnly = 1, SchedRW = [WriteVecLogicLd] in {
+let ExeDomain = SSEPackedInt, isCodeGenOnly = 1, SchedRW = [WriteVecLogicLd] in {
 let AddedComplexity = 20 in
 def VMOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
                         "vmovq\t{$src, $dst|$dst, $src}",
@@ -5133,7 +5133,7 @@ def MOVZPQILo2PQIrm : I<0x7E, MRMSrcMem,
                                              IIC_SSE_MOVDQ>,
                       XS, Requires<[UseSSE2]>;
 }
-} // isCodeGenOnly, SchedRW
+} // ExeDomain, isCodeGenOnly, SchedRW
 
 let AddedComplexity = 20 in {
   let Predicates = [UseAVX] in {

Modified: llvm/trunk/test/CodeGen/X86/2011-10-19-widen_vselect.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2011-10-19-widen_vselect.ll?rev=229439&r1=229438&r2=229439&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/2011-10-19-widen_vselect.ll (original)
+++ llvm/trunk/test/CodeGen/X86/2011-10-19-widen_vselect.ll Mon Feb 16 15:50:56 2015
@@ -26,7 +26,7 @@ entry:
 }
 
 ; CHECK-LABEL: zero_test
-; CHECK: xorps	%xmm0, %xmm0
+; CHECK: pxor %xmm0, %xmm0
 ; CHECK: ret
 
 define void @zero_test() {

Modified: llvm/trunk/test/CodeGen/X86/combine-or.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/combine-or.ll?rev=229439&r1=229438&r2=229439&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/combine-or.ll (original)
+++ llvm/trunk/test/CodeGen/X86/combine-or.ll Mon Feb 16 15:50:56 2015
@@ -255,7 +255,7 @@ define <4 x i32> @test19(<4 x i32> %a, <
 define <2 x i64> @test20(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test20:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    orps %xmm1, %xmm0
+; CHECK-NEXT:    por %xmm1, %xmm0
 ; CHECK-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
 ; CHECK-NEXT:    retq
   %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>

Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-mmx.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-mmx.ll?rev=229439&r1=229438&r2=229439&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-mmx.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-mmx.ll Mon Feb 16 15:50:56 2015
@@ -54,7 +54,7 @@ define void @test1() {
 ;
 ; X64-LABEL: test1:
 ; X64:       ## BB#0: ## %entry
-; X64-NEXT:    xorps %xmm0, %xmm0
+; X64-NEXT:    pxor %xmm0, %xmm0
 ; X64-NEXT:    movq %xmm0, -{{[0-9]+}}(%rsp)
 ; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %mm0
 ; X64-NEXT:    pshuflw {{.*#+}} xmm0 = mem[0,2,2,3,4,5,6,7]

Modified: llvm/trunk/test/CodeGen/X86/widen_load-1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/widen_load-1.ll?rev=229439&r1=229438&r2=229439&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/widen_load-1.ll (original)
+++ llvm/trunk/test/CodeGen/X86/widen_load-1.ll Mon Feb 16 15:50:56 2015
@@ -9,8 +9,8 @@
 ; SSE: movaps  %xmm0, (%rsp)
 ; SSE: callq   killcommon
 
-; AVX: vmovaps    compl+128(%rip), %xmm0
-; AVX: vmovaps  %xmm0, (%rsp)
+; AVX: vmovdqa    compl+128(%rip), %xmm0
+; AVX: vmovdqa  %xmm0, (%rsp)
 ; AVX: callq   killcommon
 
 @compl = linkonce global [20 x i64] zeroinitializer, align 64 ; <[20 x i64]*> [#uses=1]