[llvm] r348086 - [X86] Add custom type legalization for v2i32/v4i16/v8i8->mmx bitcasts to avoid a store/load to/from the stack.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Sat Dec 1 21:46:50 PST 2018


Author: ctopper
Date: Sat Dec  1 21:46:50 2018
New Revision: 348086

URL: http://llvm.org/viewvc/llvm-project?rev=348086&view=rev
Log:
[X86] Add custom type legalization for v2i32/v4i16/v8i8->mmx bitcasts to avoid a store/load to/from the stack.

Widen the input to a 128 bit vector by padding with undef elements. Then use a movdq2q to convert from xmm register to mmx register.

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/3dnow-intrinsics.ll
    llvm/trunk/test/CodeGen/X86/mmx-arith.ll
    llvm/trunk/test/CodeGen/X86/vec_insert-7.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=348086&r1=348085&r2=348086&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat Dec  1 21:46:50 2018
@@ -25221,7 +25221,8 @@ static SDValue LowerBITCAST(SDValue Op,
   if (SrcVT == MVT::v2i32 || SrcVT == MVT::v4i16 || SrcVT == MVT::v8i8 ||
       SrcVT == MVT::i64) {
     assert(Subtarget.hasSSE2() && "Requires at least SSE2!");
-    if (DstVT != MVT::f64 && DstVT != MVT::i64)
+    if (DstVT != MVT::f64 && DstVT != MVT::i64 &&
+        !(DstVT == MVT::x86mmx && SrcVT.isVector()))
       // This conversion needs to be expanded.
       return SDValue();
 
@@ -25253,8 +25254,13 @@ static SDValue LowerBITCAST(SDValue Op,
 
     EVT NewVT = EVT::getVectorVT(*DAG.getContext(), SVT, NumElts * 2);
     SDValue BV = DAG.getBuildVector(NewVT, dl, Elts);
-    MVT V2X64VT = MVT::getVectorVT(DstVT, 2);
+
+    MVT V2X64VT = DstVT == MVT::f64 ? MVT::v2f64 : MVT::v2i64;
     SDValue ToV2X64 = DAG.getBitcast(V2X64VT, BV);
+
+    if (DstVT == MVT::x86mmx)
+      return DAG.getNode(X86ISD::MOVDQ2Q, dl, DstVT, ToV2X64);
+
     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, DstVT, ToV2X64,
                        DAG.getIntPtrConstant(0, dl));
   }

Modified: llvm/trunk/test/CodeGen/X86/3dnow-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/3dnow-intrinsics.ll?rev=348086&r1=348085&r2=348086&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/3dnow-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/3dnow-intrinsics.ll Sat Dec  1 21:46:50 2018
@@ -897,8 +897,8 @@ define <2 x i32> @test_pswapdsi(<2 x i32
 ; X64-LABEL: test_pswapdsi:
 ; X64:       # %bb.0: # %entry
 ; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; X64-NEXT:    movq %xmm0, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    pswapd -{{[0-9]+}}(%rsp), %mm0 # mm0 = mem[1,0]
+; X64-NEXT:    movdq2q %xmm0, %mm0
+; X64-NEXT:    pswapd %mm0, %mm0 # mm0 = mm0[1,0]
 ; X64-NEXT:    movq %mm0, -{{[0-9]+}}(%rsp)
 ; X64-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
 ; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]

Modified: llvm/trunk/test/CodeGen/X86/mmx-arith.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/mmx-arith.ll?rev=348086&r1=348085&r2=348086&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/mmx-arith.ll (original)
+++ llvm/trunk/test/CodeGen/X86/mmx-arith.ll Sat Dec  1 21:46:50 2018
@@ -14,69 +14,59 @@ define void @test0(x86_mmx* %A, x86_mmx*
 ; X32-NEXT:    movl %esp, %ebp
 ; X32-NEXT:    .cfi_def_cfa_register %ebp
 ; X32-NEXT:    andl $-8, %esp
-; X32-NEXT:    subl $32, %esp
+; X32-NEXT:    subl $16, %esp
 ; X32-NEXT:    movl 12(%ebp), %ecx
 ; X32-NEXT:    movl 8(%ebp), %eax
 ; X32-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
-; X32-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
 ; X32-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
-; X32-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; X32-NEXT:    paddw %xmm0, %xmm1
-; X32-NEXT:    movdqa {{.*#+}} xmm0 = [255,255,255,255,255,255,255,255]
-; X32-NEXT:    pand %xmm0, %xmm1
-; X32-NEXT:    packuswb %xmm1, %xmm1
-; X32-NEXT:    movq %xmm1, {{[0-9]+}}(%esp)
-; X32-NEXT:    movq {{[0-9]+}}(%esp), %mm0
+; X32-NEXT:    paddb %xmm0, %xmm1
+; X32-NEXT:    movdq2q %xmm1, %mm0
 ; X32-NEXT:    movq %xmm1, (%eax)
 ; X32-NEXT:    paddsb (%ecx), %mm0
 ; X32-NEXT:    movq %mm0, (%eax)
 ; X32-NEXT:    paddusb (%ecx), %mm0
 ; X32-NEXT:    movq %mm0, {{[0-9]+}}(%esp)
-; X32-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
-; X32-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; X32-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
 ; X32-NEXT:    movq %mm0, (%eax)
-; X32-NEXT:    movq {{.*#+}} xmm2 = mem[0],zero
-; X32-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; X32-NEXT:    psubw %xmm2, %xmm1
-; X32-NEXT:    pand %xmm0, %xmm1
-; X32-NEXT:    packuswb %xmm1, %xmm1
-; X32-NEXT:    movq %xmm1, {{[0-9]+}}(%esp)
-; X32-NEXT:    movq {{[0-9]+}}(%esp), %mm0
-; X32-NEXT:    movq %xmm1, (%eax)
+; X32-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
+; X32-NEXT:    psubb %xmm1, %xmm0
+; X32-NEXT:    movdq2q %xmm0, %mm0
+; X32-NEXT:    movq %xmm0, (%eax)
 ; X32-NEXT:    psubsb (%ecx), %mm0
 ; X32-NEXT:    movq %mm0, (%eax)
 ; X32-NEXT:    psubusb (%ecx), %mm0
 ; X32-NEXT:    movq %mm0, (%esp)
+; X32-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
+; X32-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-NEXT:    movq %mm0, (%eax)
 ; X32-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
 ; X32-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; X32-NEXT:    movq %mm0, (%eax)
+; X32-NEXT:    pmullw %xmm0, %xmm1
+; X32-NEXT:    movdqa {{.*#+}} xmm0 = [255,255,255,255,255,255,255,255]
+; X32-NEXT:    movdqa %xmm1, %xmm2
+; X32-NEXT:    pand %xmm0, %xmm2
+; X32-NEXT:    packuswb %xmm2, %xmm2
+; X32-NEXT:    movq %xmm2, (%eax)
 ; X32-NEXT:    movq {{.*#+}} xmm2 = mem[0],zero
 ; X32-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; X32-NEXT:    pmullw %xmm1, %xmm2
+; X32-NEXT:    pand %xmm1, %xmm2
 ; X32-NEXT:    movdqa %xmm2, %xmm1
 ; X32-NEXT:    pand %xmm0, %xmm1
 ; X32-NEXT:    packuswb %xmm1, %xmm1
 ; X32-NEXT:    movq %xmm1, (%eax)
 ; X32-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
 ; X32-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; X32-NEXT:    pand %xmm2, %xmm1
+; X32-NEXT:    por %xmm2, %xmm1
 ; X32-NEXT:    movdqa %xmm1, %xmm2
 ; X32-NEXT:    pand %xmm0, %xmm2
 ; X32-NEXT:    packuswb %xmm2, %xmm2
 ; X32-NEXT:    movq %xmm2, (%eax)
 ; X32-NEXT:    movq {{.*#+}} xmm2 = mem[0],zero
 ; X32-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; X32-NEXT:    por %xmm1, %xmm2
-; X32-NEXT:    movdqa %xmm2, %xmm1
-; X32-NEXT:    pand %xmm0, %xmm1
-; X32-NEXT:    packuswb %xmm1, %xmm1
-; X32-NEXT:    movq %xmm1, (%eax)
-; X32-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
-; X32-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; X32-NEXT:    pxor %xmm2, %xmm1
-; X32-NEXT:    pand %xmm0, %xmm1
-; X32-NEXT:    packuswb %xmm1, %xmm1
-; X32-NEXT:    movq %xmm1, (%eax)
+; X32-NEXT:    pxor %xmm1, %xmm2
+; X32-NEXT:    pand %xmm0, %xmm2
+; X32-NEXT:    packuswb %xmm2, %xmm2
+; X32-NEXT:    movq %xmm2, (%eax)
 ; X32-NEXT:    emms
 ; X32-NEXT:    movl %ebp, %esp
 ; X32-NEXT:    popl %ebp
@@ -86,65 +76,55 @@ define void @test0(x86_mmx* %A, x86_mmx*
 ; X64-LABEL: test0:
 ; X64:       # %bb.0: # %entry
 ; X64-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
-; X64-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
 ; X64-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
-; X64-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; X64-NEXT:    paddw %xmm0, %xmm1
-; X64-NEXT:    movdqa {{.*#+}} xmm0 = [255,255,255,255,255,255,255,255]
-; X64-NEXT:    pand %xmm0, %xmm1
-; X64-NEXT:    packuswb %xmm1, %xmm1
-; X64-NEXT:    movq %xmm1, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %mm0
+; X64-NEXT:    paddb %xmm0, %xmm1
+; X64-NEXT:    movdq2q %xmm1, %mm0
 ; X64-NEXT:    movq %xmm1, (%rdi)
 ; X64-NEXT:    paddsb (%rsi), %mm0
 ; X64-NEXT:    movq %mm0, (%rdi)
 ; X64-NEXT:    paddusb (%rsi), %mm0
 ; X64-NEXT:    movq %mm0, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
-; X64-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; X64-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
 ; X64-NEXT:    movq %mm0, (%rdi)
-; X64-NEXT:    movq {{.*#+}} xmm2 = mem[0],zero
-; X64-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; X64-NEXT:    psubw %xmm2, %xmm1
-; X64-NEXT:    pand %xmm0, %xmm1
-; X64-NEXT:    packuswb %xmm1, %xmm1
-; X64-NEXT:    movq %xmm1, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %mm0
-; X64-NEXT:    movq %xmm1, (%rdi)
+; X64-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
+; X64-NEXT:    psubb %xmm1, %xmm0
+; X64-NEXT:    movdq2q %xmm0, %mm0
+; X64-NEXT:    movq %xmm0, (%rdi)
 ; X64-NEXT:    psubsb (%rsi), %mm0
 ; X64-NEXT:    movq %mm0, (%rdi)
 ; X64-NEXT:    psubusb (%rsi), %mm0
 ; X64-NEXT:    movq %mm0, -{{[0-9]+}}(%rsp)
+; X64-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
+; X64-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X64-NEXT:    movq %mm0, (%rdi)
 ; X64-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
 ; X64-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; X64-NEXT:    movq %mm0, (%rdi)
+; X64-NEXT:    pmullw %xmm0, %xmm1
+; X64-NEXT:    movdqa {{.*#+}} xmm0 = [255,255,255,255,255,255,255,255]
+; X64-NEXT:    movdqa %xmm1, %xmm2
+; X64-NEXT:    pand %xmm0, %xmm2
+; X64-NEXT:    packuswb %xmm2, %xmm2
+; X64-NEXT:    movq %xmm2, (%rdi)
 ; X64-NEXT:    movq {{.*#+}} xmm2 = mem[0],zero
 ; X64-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; X64-NEXT:    pmullw %xmm1, %xmm2
+; X64-NEXT:    pand %xmm1, %xmm2
 ; X64-NEXT:    movdqa %xmm2, %xmm1
 ; X64-NEXT:    pand %xmm0, %xmm1
 ; X64-NEXT:    packuswb %xmm1, %xmm1
 ; X64-NEXT:    movq %xmm1, (%rdi)
 ; X64-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
 ; X64-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; X64-NEXT:    pand %xmm2, %xmm1
+; X64-NEXT:    por %xmm2, %xmm1
 ; X64-NEXT:    movdqa %xmm1, %xmm2
 ; X64-NEXT:    pand %xmm0, %xmm2
 ; X64-NEXT:    packuswb %xmm2, %xmm2
 ; X64-NEXT:    movq %xmm2, (%rdi)
 ; X64-NEXT:    movq {{.*#+}} xmm2 = mem[0],zero
 ; X64-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; X64-NEXT:    por %xmm1, %xmm2
-; X64-NEXT:    movdqa %xmm2, %xmm1
-; X64-NEXT:    pand %xmm0, %xmm1
-; X64-NEXT:    packuswb %xmm1, %xmm1
-; X64-NEXT:    movq %xmm1, (%rdi)
-; X64-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
-; X64-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; X64-NEXT:    pxor %xmm2, %xmm1
-; X64-NEXT:    pand %xmm0, %xmm1
-; X64-NEXT:    packuswb %xmm1, %xmm1
-; X64-NEXT:    movq %xmm1, (%rdi)
+; X64-NEXT:    pxor %xmm1, %xmm2
+; X64-NEXT:    pand %xmm0, %xmm2
+; X64-NEXT:    packuswb %xmm2, %xmm2
+; X64-NEXT:    movq %xmm2, (%rdi)
 ; X64-NEXT:    emms
 ; X64-NEXT:    retq
 entry:
@@ -315,58 +295,34 @@ define void @test2(x86_mmx* %A, x86_mmx*
 ; X32-NEXT:    movl %esp, %ebp
 ; X32-NEXT:    .cfi_def_cfa_register %ebp
 ; X32-NEXT:    andl $-8, %esp
-; X32-NEXT:    subl $48, %esp
+; X32-NEXT:    subl $24, %esp
 ; X32-NEXT:    movl 12(%ebp), %ecx
 ; X32-NEXT:    movl 8(%ebp), %eax
 ; X32-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
-; X32-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
 ; X32-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
-; X32-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; X32-NEXT:    paddd %xmm0, %xmm1
-; X32-NEXT:    pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7]
-; X32-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
-; X32-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; X32-NEXT:    movq %xmm0, {{[0-9]+}}(%esp)
-; X32-NEXT:    movq {{[0-9]+}}(%esp), %mm0
-; X32-NEXT:    movq %xmm0, (%eax)
+; X32-NEXT:    paddw %xmm0, %xmm1
+; X32-NEXT:    movdq2q %xmm1, %mm0
+; X32-NEXT:    movq %xmm1, (%eax)
 ; X32-NEXT:    paddsw (%ecx), %mm0
 ; X32-NEXT:    movq %mm0, (%eax)
 ; X32-NEXT:    paddusw (%ecx), %mm0
 ; X32-NEXT:    movq %mm0, {{[0-9]+}}(%esp)
 ; X32-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
-; X32-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
 ; X32-NEXT:    movq %mm0, (%eax)
 ; X32-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
-; X32-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; X32-NEXT:    psubd %xmm1, %xmm0
-; X32-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
-; X32-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
-; X32-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; X32-NEXT:    movq %xmm0, {{[0-9]+}}(%esp)
-; X32-NEXT:    movq {{[0-9]+}}(%esp), %mm0
+; X32-NEXT:    psubw %xmm1, %xmm0
+; X32-NEXT:    movdq2q %xmm0, %mm0
 ; X32-NEXT:    movq %xmm0, (%eax)
 ; X32-NEXT:    psubsw (%ecx), %mm0
 ; X32-NEXT:    movq %mm0, (%eax)
 ; X32-NEXT:    psubusw (%ecx), %mm0
 ; X32-NEXT:    movq %mm0, {{[0-9]+}}(%esp)
 ; X32-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
-; X32-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
 ; X32-NEXT:    movq %mm0, (%eax)
 ; X32-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
-; X32-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; X32-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
-; X32-NEXT:    pmuludq %xmm1, %xmm0
-; X32-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; X32-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
-; X32-NEXT:    pmuludq %xmm2, %xmm1
-; X32-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
-; X32-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; X32-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
-; X32-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
-; X32-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; X32-NEXT:    movq %xmm0, {{[0-9]+}}(%esp)
-; X32-NEXT:    movq {{[0-9]+}}(%esp), %mm0
-; X32-NEXT:    movq %xmm0, (%eax)
+; X32-NEXT:    pmullw %xmm0, %xmm1
+; X32-NEXT:    movdq2q %xmm1, %mm0
+; X32-NEXT:    movq %xmm1, (%eax)
 ; X32-NEXT:    pmulhw (%ecx), %mm0
 ; X32-NEXT:    movq %mm0, (%eax)
 ; X32-NEXT:    pmaddwd (%ecx), %mm0
@@ -404,54 +360,30 @@ define void @test2(x86_mmx* %A, x86_mmx*
 ; X64-LABEL: test2:
 ; X64:       # %bb.0: # %entry
 ; X64-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
-; X64-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
 ; X64-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
-; X64-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; X64-NEXT:    paddd %xmm0, %xmm1
-; X64-NEXT:    pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7]
-; X64-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
-; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; X64-NEXT:    movq %xmm0, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %mm0
-; X64-NEXT:    movq %xmm0, (%rdi)
+; X64-NEXT:    paddw %xmm0, %xmm1
+; X64-NEXT:    movdq2q %xmm1, %mm0
+; X64-NEXT:    movq %xmm1, (%rdi)
 ; X64-NEXT:    paddsw (%rsi), %mm0
 ; X64-NEXT:    movq %mm0, (%rdi)
 ; X64-NEXT:    paddusw (%rsi), %mm0
 ; X64-NEXT:    movq %mm0, -{{[0-9]+}}(%rsp)
 ; X64-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
-; X64-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
 ; X64-NEXT:    movq %mm0, (%rdi)
 ; X64-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
-; X64-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; X64-NEXT:    psubd %xmm1, %xmm0
-; X64-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
-; X64-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
-; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; X64-NEXT:    movq %xmm0, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %mm0
+; X64-NEXT:    psubw %xmm1, %xmm0
+; X64-NEXT:    movdq2q %xmm0, %mm0
 ; X64-NEXT:    movq %xmm0, (%rdi)
 ; X64-NEXT:    psubsw (%rsi), %mm0
 ; X64-NEXT:    movq %mm0, (%rdi)
 ; X64-NEXT:    psubusw (%rsi), %mm0
 ; X64-NEXT:    movq %mm0, -{{[0-9]+}}(%rsp)
 ; X64-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
-; X64-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
 ; X64-NEXT:    movq %mm0, (%rdi)
 ; X64-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
-; X64-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; X64-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
-; X64-NEXT:    pmuludq %xmm1, %xmm0
-; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; X64-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
-; X64-NEXT:    pmuludq %xmm2, %xmm1
-; X64-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
-; X64-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; X64-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
-; X64-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
-; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; X64-NEXT:    movq %xmm0, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %mm0
-; X64-NEXT:    movq %xmm0, (%rdi)
+; X64-NEXT:    pmullw %xmm0, %xmm1
+; X64-NEXT:    movdq2q %xmm1, %mm0
+; X64-NEXT:    movq %xmm1, (%rdi)
 ; X64-NEXT:    pmulhw (%rsi), %mm0
 ; X64-NEXT:    movq %mm0, (%rdi)
 ; X64-NEXT:    pmaddwd (%rsi), %mm0

Modified: llvm/trunk/test/CodeGen/X86/vec_insert-7.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_insert-7.ll?rev=348086&r1=348085&r2=348086&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_insert-7.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_insert-7.ll Sat Dec  1 21:46:50 2018
@@ -8,12 +8,11 @@
 define x86_mmx @mmx_movzl(x86_mmx %x) nounwind {
 ; X32-LABEL: mmx_movzl:
 ; X32:       ## %bb.0:
-; X32-NEXT:    subl $20, %esp
+; X32-NEXT:    subl $12, %esp
 ; X32-NEXT:    movl $32, %eax
 ; X32-NEXT:    movd %eax, %xmm0
-; X32-NEXT:    movq %xmm0, (%esp)
-; X32-NEXT:    movq (%esp), %mm0
-; X32-NEXT:    addl $20, %esp
+; X32-NEXT:    movdq2q %xmm0, %mm0
+; X32-NEXT:    addl $12, %esp
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: mmx_movzl:




More information about the llvm-commits mailing list