[llvm] r321898 - [X86] Stop printing moves between VR64 and GR64 with 'movd' mnemonic. Use 'movq' instead.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Fri Jan 5 12:55:13 PST 2018


Author: ctopper
Date: Fri Jan  5 12:55:12 2018
New Revision: 321898

URL: http://llvm.org/viewvc/llvm-project?rev=321898&view=rev
Log:
[X86] Stop printing moves between VR64 and GR64 with 'movd' mnemonic. Use 'movq' instead.

This behavior existed to work with an old version of the gnu assembler on MacOS that only accepted this form. Newer versions of GNU assembler and the current LLVM derived version of the assembler on MacOS support movq as well.

Modified:
    llvm/trunk/lib/Target/X86/X86InstrInfo.td
    llvm/trunk/lib/Target/X86/X86InstrMMX.td
    llvm/trunk/test/CodeGen/X86/2007-07-03-GR64ToVR64.ll
    llvm/trunk/test/CodeGen/X86/3dnow-schedule.ll
    llvm/trunk/test/CodeGen/X86/bitcast-mmx.ll
    llvm/trunk/test/CodeGen/X86/mmx-bitcast.ll
    llvm/trunk/test/CodeGen/X86/mmx-cvt.ll
    llvm/trunk/test/CodeGen/X86/mmx-fold-load.ll
    llvm/trunk/test/CodeGen/X86/mmx-schedule.ll
    llvm/trunk/test/CodeGen/X86/select-mmx.ll
    llvm/trunk/test/CodeGen/X86/x86-64-psub.ll
    llvm/trunk/test/MC/Disassembler/X86/x86-64.txt
    llvm/trunk/test/MC/X86/x86-64.s
    llvm/trunk/test/MC/X86/x86_64-encoding.s

Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.td?rev=321898&r1=321897&r2=321898&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.td Fri Jan  5 12:55:12 2018
@@ -3208,10 +3208,12 @@ def : InstAlias<"mov\t{$seg, $mem|$mem,
 // Match 'movq <largeimm>, <reg>' as an alias for movabsq.
 def : InstAlias<"mov{q}\t{$imm, $reg|$reg, $imm}", (MOV64ri GR64:$reg, i64imm:$imm), 0>;
 
-// Match 'movq GR64, MMX' as an alias for movd.
-def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
+// Match 'movd GR64, MMX' as an alias for movq to be compatible with gas,
+// which supports this due to an old AMD documentation bug when 64-bit mode was
+// created.
+def : InstAlias<"movd\t{$src, $dst|$dst, $src}",
                 (MMX_MOVD64to64rr VR64:$dst, GR64:$src), 0>;
-def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
+def : InstAlias<"movd\t{$src, $dst|$dst, $src}",
                 (MMX_MOVD64from64rr GR64:$dst, VR64:$src), 0>;
 
 // movsx aliases

Modified: llvm/trunk/lib/Target/X86/X86InstrMMX.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrMMX.td?rev=321898&r1=321897&r2=321898&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrMMX.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrMMX.td Fri Jan  5 12:55:12 2018
@@ -254,13 +254,13 @@ def MMX_MOVD64grr : MMXI<0x7E, MRMDestRe
 
 let isBitcast = 1 in
 def MMX_MOVD64to64rr : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src),
-                             "movd\t{$src, $dst|$dst, $src}",
+                             "movq\t{$src, $dst|$dst, $src}",
                              [(set VR64:$dst, (bitconvert GR64:$src))],
                              IIC_MMX_MOV_MM_RM>, Sched<[WriteMove]>;
 
 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
 def MMX_MOVD64to64rm : MMXRI<0x6E, MRMSrcMem, (outs VR64:$dst),
-                             (ins i64mem:$src), "movd\t{$src, $dst|$dst, $src}",
+                             (ins i64mem:$src), "movq\t{$src, $dst|$dst, $src}",
                              [], IIC_MMX_MOVQ_RM>, Sched<[WriteLoad]>;
 
 // These are 64 bit moves, but since the OS X assembler doesn't
@@ -269,7 +269,7 @@ def MMX_MOVD64to64rm : MMXRI<0x6E, MRMSr
 let SchedRW = [WriteMove], isBitcast = 1 in {
 def MMX_MOVD64from64rr : MMXRI<0x7E, MRMDestReg,
                                (outs GR64:$dst), (ins VR64:$src),
-                               "movd\t{$src, $dst|$dst, $src}",
+                               "movq\t{$src, $dst|$dst, $src}",
                              [(set GR64:$dst,
                               (bitconvert VR64:$src))], IIC_MMX_MOV_REG_MM>;
 let hasSideEffects = 0 in
@@ -286,7 +286,7 @@ def MMX_MOVQ64rr_REV : MMXI<0x7F, MRMDes
 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
 def MMX_MOVD64from64rm : MMXRI<0x7E, MRMDestMem,
                                (outs), (ins i64mem:$dst, VR64:$src),
-                               "movd\t{$src, $dst|$dst, $src}",
+                               "movq\t{$src, $dst|$dst, $src}",
                                [], IIC_MMX_MOV_REG_MM>, Sched<[WriteStore]>;
 
 let SchedRW = [WriteLoad] in {

Modified: llvm/trunk/test/CodeGen/X86/2007-07-03-GR64ToVR64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2007-07-03-GR64ToVR64.ll?rev=321898&r1=321897&r2=321898&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/2007-07-03-GR64ToVR64.ll (original)
+++ llvm/trunk/test/CodeGen/X86/2007-07-03-GR64ToVR64.ll Fri Jan  5 12:55:12 2018
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx | FileCheck %s
 
-; CHECK: movd %rsi, [[MM0:%mm[0-9]+]]
-; CHECK: movd %rdi, [[MM1:%mm[0-9]+]]
+; CHECK: movq %rsi, [[MM0:%mm[0-9]+]]
+; CHECK: movq %rdi, [[MM1:%mm[0-9]+]]
 ; CHECK: paddusw [[MM0]], [[MM1]]
 
 @R = external global x86_mmx		; <x86_mmx*> [#uses=1]

Modified: llvm/trunk/test/CodeGen/X86/3dnow-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/3dnow-schedule.ll?rev=321898&r1=321897&r2=321898&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/3dnow-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/3dnow-schedule.ll Fri Jan  5 12:55:12 2018
@@ -16,7 +16,7 @@ define i64 @test_pavgusb(x86_mmx %a0, x8
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pavgusb %mm1, %mm0 # sched: [5:1.00]
 ; CHECK-NEXT:    pavgusb (%rdi), %mm0 # sched: [9:1.00]
-; CHECK-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; CHECK-NEXT:    retq # sched: [1:1.00]
   %1 = call x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -31,7 +31,7 @@ define i64 @test_pf2id(x86_mmx* %a0) opt
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pf2id (%rdi), %mm0 # sched: [7:1.00]
 ; CHECK-NEXT:    pf2id %mm0, %mm0 # sched: [3:1.00]
-; CHECK-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; CHECK-NEXT:    retq # sched: [1:1.00]
   %1 = load x86_mmx, x86_mmx *%a0, align 8
   %2 = call x86_mmx @llvm.x86.3dnow.pf2id(x86_mmx %1)
@@ -46,7 +46,7 @@ define i64 @test_pf2iw(x86_mmx* %a0) opt
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pf2iw (%rdi), %mm0 # sched: [7:1.00]
 ; CHECK-NEXT:    pf2iw %mm0, %mm0 # sched: [3:1.00]
-; CHECK-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; CHECK-NEXT:    retq # sched: [1:1.00]
   %1 = load x86_mmx, x86_mmx *%a0, align 8
   %2 = call x86_mmx @llvm.x86.3dnowa.pf2iw(x86_mmx %1)
@@ -61,7 +61,7 @@ define i64 @test_pfacc(x86_mmx %a0, x86_
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pfacc %mm1, %mm0 # sched: [3:1.00]
 ; CHECK-NEXT:    pfacc (%rdi), %mm0 # sched: [7:1.00]
-; CHECK-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; CHECK-NEXT:    retq # sched: [1:1.00]
   %1 = call x86_mmx @llvm.x86.3dnow.pfacc(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -76,7 +76,7 @@ define i64 @test_pfadd(x86_mmx %a0, x86_
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pfadd %mm1, %mm0 # sched: [3:1.00]
 ; CHECK-NEXT:    pfadd (%rdi), %mm0 # sched: [7:1.00]
-; CHECK-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; CHECK-NEXT:    retq # sched: [1:1.00]
   %1 = call x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -91,7 +91,7 @@ define i64 @test_pfcmpeq(x86_mmx %a0, x8
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pfcmpeq %mm1, %mm0 # sched: [3:1.00]
 ; CHECK-NEXT:    pfcmpeq (%rdi), %mm0 # sched: [7:1.00]
-; CHECK-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; CHECK-NEXT:    retq # sched: [1:1.00]
   %1 = call x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -106,7 +106,7 @@ define i64 @test_pfcmpge(x86_mmx %a0, x8
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pfcmpge %mm1, %mm0 # sched: [3:1.00]
 ; CHECK-NEXT:    pfcmpge (%rdi), %mm0 # sched: [7:1.00]
-; CHECK-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; CHECK-NEXT:    retq # sched: [1:1.00]
   %1 = call x86_mmx @llvm.x86.3dnow.pfcmpge(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -121,7 +121,7 @@ define i64 @test_pfcmpgt(x86_mmx %a0, x8
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pfcmpgt %mm1, %mm0 # sched: [3:1.00]
 ; CHECK-NEXT:    pfcmpgt (%rdi), %mm0 # sched: [7:1.00]
-; CHECK-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; CHECK-NEXT:    retq # sched: [1:1.00]
   %1 = call x86_mmx @llvm.x86.3dnow.pfcmpgt(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -136,7 +136,7 @@ define i64 @test_pfmax(x86_mmx %a0, x86_
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pfmax %mm1, %mm0 # sched: [3:1.00]
 ; CHECK-NEXT:    pfmax (%rdi), %mm0 # sched: [7:1.00]
-; CHECK-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; CHECK-NEXT:    retq # sched: [1:1.00]
   %1 = call x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -151,7 +151,7 @@ define i64 @test_pfmin(x86_mmx %a0, x86_
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pfmin %mm1, %mm0 # sched: [3:1.00]
 ; CHECK-NEXT:    pfmin (%rdi), %mm0 # sched: [7:1.00]
-; CHECK-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; CHECK-NEXT:    retq # sched: [1:1.00]
   %1 = call x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -166,7 +166,7 @@ define i64 @test_pfmul(x86_mmx %a0, x86_
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pfmul %mm1, %mm0 # sched: [3:1.00]
 ; CHECK-NEXT:    pfmul (%rdi), %mm0 # sched: [7:1.00]
-; CHECK-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; CHECK-NEXT:    retq # sched: [1:1.00]
   %1 = call x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -181,7 +181,7 @@ define i64 @test_pfnacc(x86_mmx %a0, x86
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pfnacc %mm1, %mm0 # sched: [3:1.00]
 ; CHECK-NEXT:    pfnacc (%rdi), %mm0 # sched: [7:1.00]
-; CHECK-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; CHECK-NEXT:    retq # sched: [1:1.00]
   %1 = call x86_mmx @llvm.x86.3dnowa.pfnacc(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -196,7 +196,7 @@ define i64 @test_pfpnacc(x86_mmx %a0, x8
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pfpnacc %mm1, %mm0 # sched: [3:1.00]
 ; CHECK-NEXT:    pfpnacc (%rdi), %mm0 # sched: [7:1.00]
-; CHECK-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; CHECK-NEXT:    retq # sched: [1:1.00]
   %1 = call x86_mmx @llvm.x86.3dnowa.pfpnacc(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -211,7 +211,7 @@ define i64 @test_pfrcp(x86_mmx* %a0) opt
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pfrcp (%rdi), %mm0 # sched: [7:1.00]
 ; CHECK-NEXT:    pfrcp %mm0, %mm0 # sched: [3:1.00]
-; CHECK-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; CHECK-NEXT:    retq # sched: [1:1.00]
   %1 = load x86_mmx, x86_mmx *%a0, align 8
   %2 = call x86_mmx @llvm.x86.3dnow.pfrcp(x86_mmx %1)
@@ -226,7 +226,7 @@ define i64 @test_pfrcpit1(x86_mmx %a0, x
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pfrcpit1 %mm1, %mm0 # sched: [3:1.00]
 ; CHECK-NEXT:    pfrcpit1 (%rdi), %mm0 # sched: [7:1.00]
-; CHECK-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; CHECK-NEXT:    retq # sched: [1:1.00]
   %1 = call x86_mmx @llvm.x86.3dnow.pfrcpit1(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -241,7 +241,7 @@ define i64 @test_pfrcpit2(x86_mmx %a0, x
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pfrcpit2 %mm1, %mm0 # sched: [3:1.00]
 ; CHECK-NEXT:    pfrcpit2 (%rdi), %mm0 # sched: [7:1.00]
-; CHECK-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; CHECK-NEXT:    retq # sched: [1:1.00]
   %1 = call x86_mmx @llvm.x86.3dnow.pfrcpit2(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -256,7 +256,7 @@ define i64 @test_pfrsqit1(x86_mmx %a0, x
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pfrsqit1 %mm1, %mm0 # sched: [3:1.00]
 ; CHECK-NEXT:    pfrsqit1 (%rdi), %mm0 # sched: [7:1.00]
-; CHECK-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; CHECK-NEXT:    retq # sched: [1:1.00]
   %1 = call x86_mmx @llvm.x86.3dnow.pfrsqit1(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -271,7 +271,7 @@ define i64 @test_pfrsqrt(x86_mmx* %a0) o
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pfrsqrt (%rdi), %mm0 # sched: [7:1.00]
 ; CHECK-NEXT:    pfrsqrt %mm0, %mm0 # sched: [3:1.00]
-; CHECK-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; CHECK-NEXT:    retq # sched: [1:1.00]
   %1 = load x86_mmx, x86_mmx *%a0, align 8
   %2 = call x86_mmx @llvm.x86.3dnow.pfrsqrt(x86_mmx %1)
@@ -286,7 +286,7 @@ define i64 @test_pfsub(x86_mmx %a0, x86_
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pfsub %mm1, %mm0 # sched: [3:1.00]
 ; CHECK-NEXT:    pfsub (%rdi), %mm0 # sched: [7:1.00]
-; CHECK-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; CHECK-NEXT:    retq # sched: [1:1.00]
   %1 = call x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -301,7 +301,7 @@ define i64 @test_pfsubr(x86_mmx %a0, x86
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pfsubr %mm1, %mm0 # sched: [3:1.00]
 ; CHECK-NEXT:    pfsubr (%rdi), %mm0 # sched: [7:1.00]
-; CHECK-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; CHECK-NEXT:    retq # sched: [1:1.00]
   %1 = call x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -316,7 +316,7 @@ define i64 @test_pi2fd(x86_mmx* %a0) opt
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pi2fd (%rdi), %mm0 # sched: [8:1.00]
 ; CHECK-NEXT:    pi2fd %mm0, %mm0 # sched: [4:1.00]
-; CHECK-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; CHECK-NEXT:    retq # sched: [1:1.00]
   %1 = load x86_mmx, x86_mmx *%a0, align 8
   %2 = call x86_mmx @llvm.x86.3dnow.pi2fd(x86_mmx %1)
@@ -331,7 +331,7 @@ define i64 @test_pi2fw(x86_mmx* %a0) opt
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pi2fw (%rdi), %mm0 # sched: [8:1.00]
 ; CHECK-NEXT:    pi2fw %mm0, %mm0 # sched: [4:1.00]
-; CHECK-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; CHECK-NEXT:    retq # sched: [1:1.00]
   %1 = load x86_mmx, x86_mmx *%a0, align 8
   %2 = call x86_mmx @llvm.x86.3dnowa.pi2fw(x86_mmx %1)
@@ -346,7 +346,7 @@ define i64 @test_pmulhrw(x86_mmx %a0, x8
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pmulhrw %mm1, %mm0 # sched: [5:1.00]
 ; CHECK-NEXT:    pmulhrw (%rdi), %mm0 # sched: [9:1.00]
-; CHECK-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; CHECK-NEXT:    retq # sched: [1:1.00]
   %1 = call x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -383,7 +383,7 @@ define i64 @test_pswapd(x86_mmx* %a0) op
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pswapd (%rdi), %mm0 # mm0 = mem[1,0] sched: [5:1.00]
 ; CHECK-NEXT:    pswapd %mm0, %mm0 # mm0 = mm0[1,0] sched: [1:1.00]
-; CHECK-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; CHECK-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; CHECK-NEXT:    retq # sched: [1:1.00]
   %1 = load x86_mmx, x86_mmx *%a0, align 8
   %2 = call x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx %1)

Modified: llvm/trunk/test/CodeGen/X86/bitcast-mmx.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/bitcast-mmx.ll?rev=321898&r1=321897&r2=321898&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/bitcast-mmx.ll (original)
+++ llvm/trunk/test/CodeGen/X86/bitcast-mmx.ll Fri Jan  5 12:55:12 2018
@@ -11,7 +11,7 @@ define i32 @t0(i64 %x) nounwind {
 ;
 ; X64-LABEL: t0:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    movd %rdi, %mm0
+; X64-NEXT:    movq %rdi, %mm0
 ; X64-NEXT:    pshufw $238, %mm0, %mm0 # mm0 = mm0[2,3,2,3]
 ; X64-NEXT:    movd %mm0, %eax
 ; X64-NEXT:    retq
@@ -47,9 +47,9 @@ define i64 @t1(i64 %x, i32 %n) nounwind
 ; X64-LABEL: t1:
 ; X64:       # %bb.0: # %entry
 ; X64-NEXT:    movd %esi, %mm0
-; X64-NEXT:    movd %rdi, %mm1
+; X64-NEXT:    movq %rdi, %mm1
 ; X64-NEXT:    psllq %mm0, %mm1
-; X64-NEXT:    movd %mm1, %rax
+; X64-NEXT:    movq %mm1, %rax
 ; X64-NEXT:    retq
 entry:
   %0 = bitcast i64 %x to x86_mmx
@@ -81,9 +81,9 @@ define i64 @t2(i64 %x, i32 %n, i32 %w) n
 ; X64-NEXT:    movd %esi, %mm0
 ; X64-NEXT:    movd %edx, %mm1
 ; X64-NEXT:    psllq %mm0, %mm1
-; X64-NEXT:    movd %rdi, %mm0
+; X64-NEXT:    movq %rdi, %mm0
 ; X64-NEXT:    por %mm1, %mm0
-; X64-NEXT:    movd %mm0, %rax
+; X64-NEXT:    movq %mm0, %rax
 ; X64-NEXT:    retq
 entry:
   %0 = insertelement <2 x i32> undef, i32 %w, i32 0
@@ -120,7 +120,7 @@ define i64 @t3(<1 x i64>* %y, i32* %n) n
 ; X64-NEXT:    movq (%rdi), %mm0
 ; X64-NEXT:    movd (%rsi), %mm1
 ; X64-NEXT:    psllq %mm1, %mm0
-; X64-NEXT:    movd %mm0, %rax
+; X64-NEXT:    movq %mm0, %rax
 ; X64-NEXT:    retq
 entry:
   %0 = bitcast <1 x i64>* %y to x86_mmx*

Modified: llvm/trunk/test/CodeGen/X86/mmx-bitcast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/mmx-bitcast.ll?rev=321898&r1=321897&r2=321898&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/mmx-bitcast.ll (original)
+++ llvm/trunk/test/CodeGen/X86/mmx-bitcast.ll Fri Jan  5 12:55:12 2018
@@ -6,7 +6,7 @@ define i64 @t0(x86_mmx* %p) {
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    movq (%rdi), %mm0
 ; CHECK-NEXT:    paddq %mm0, %mm0
-; CHECK-NEXT:    movd %mm0, %rax
+; CHECK-NEXT:    movq %mm0, %rax
 ; CHECK-NEXT:    retq
   %t = load x86_mmx, x86_mmx* %p
   %u = tail call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %t, x86_mmx %t)
@@ -19,7 +19,7 @@ define i64 @t1(x86_mmx* %p) {
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    movq (%rdi), %mm0
 ; CHECK-NEXT:    paddd %mm0, %mm0
-; CHECK-NEXT:    movd %mm0, %rax
+; CHECK-NEXT:    movq %mm0, %rax
 ; CHECK-NEXT:    retq
   %t = load x86_mmx, x86_mmx* %p
   %u = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %t, x86_mmx %t)
@@ -32,7 +32,7 @@ define i64 @t2(x86_mmx* %p) {
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    movq (%rdi), %mm0
 ; CHECK-NEXT:    paddw %mm0, %mm0
-; CHECK-NEXT:    movd %mm0, %rax
+; CHECK-NEXT:    movq %mm0, %rax
 ; CHECK-NEXT:    retq
   %t = load x86_mmx, x86_mmx* %p
   %u = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %t, x86_mmx %t)
@@ -45,7 +45,7 @@ define i64 @t3(x86_mmx* %p) {
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    movq (%rdi), %mm0
 ; CHECK-NEXT:    paddb %mm0, %mm0
-; CHECK-NEXT:    movd %mm0, %rax
+; CHECK-NEXT:    movq %mm0, %rax
 ; CHECK-NEXT:    retq
   %t = load x86_mmx, x86_mmx* %p
   %u = tail call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %t, x86_mmx %t)
@@ -58,8 +58,8 @@ define i64 @t3(x86_mmx* %p) {
 define void @t4(<1 x i64> %A, <1 x i64> %B) {
 ; CHECK-LABEL: t4:
 ; CHECK:       ## %bb.0: ## %entry
-; CHECK-NEXT:    movd %rdi, %mm0
-; CHECK-NEXT:    movd %rsi, %mm1
+; CHECK-NEXT:    movq %rdi, %mm0
+; CHECK-NEXT:    movq %rsi, %mm1
 ; CHECK-NEXT:    paddusw %mm0, %mm1
 ; CHECK-NEXT:    movq _R@{{.*}}(%rip), %rax
 ; CHECK-NEXT:    movq %mm1, (%rax)
@@ -93,9 +93,9 @@ declare x86_mmx @llvm.x86.mmx.pslli.q(x8
 define <1 x i64> @t6(i64 %t) {
 ; CHECK-LABEL: t6:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    movd %rdi, %mm0
+; CHECK-NEXT:    movq %rdi, %mm0
 ; CHECK-NEXT:    psllq $48, %mm0
-; CHECK-NEXT:    movd %mm0, %rax
+; CHECK-NEXT:    movq %mm0, %rax
 ; CHECK-NEXT:    retq
   %t1 = insertelement <1 x i64> undef, i64 %t, i32 0
   %t0 = bitcast <1 x i64> %t1 to x86_mmx

Modified: llvm/trunk/test/CodeGen/X86/mmx-cvt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/mmx-cvt.ll?rev=321898&r1=321897&r2=321898&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/mmx-cvt.ll (original)
+++ llvm/trunk/test/CodeGen/X86/mmx-cvt.ll Fri Jan  5 12:55:12 2018
@@ -346,7 +346,7 @@ define <4 x float> @cvt_v2i32_v2f32(<1 x
 ; X64:       # %bb.0:
 ; X64-NEXT:    movq (%rdi), %mm0
 ; X64-NEXT:    paddd %mm0, %mm0
-; X64-NEXT:    movd %mm0, %rax
+; X64-NEXT:    movq %mm0, %rax
 ; X64-NEXT:    movq %rax, %xmm0
 ; X64-NEXT:    cvtdq2ps %xmm0, %xmm0
 ; X64-NEXT:    retq

Modified: llvm/trunk/test/CodeGen/X86/mmx-fold-load.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/mmx-fold-load.ll?rev=321898&r1=321897&r2=321898&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/mmx-fold-load.ll (original)
+++ llvm/trunk/test/CodeGen/X86/mmx-fold-load.ll Fri Jan  5 12:55:12 2018
@@ -26,7 +26,7 @@ define i64 @t0(<1 x i64>* %a, i32* %b) n
 ; X64-NEXT:    movq (%rdi), %mm0
 ; X64-NEXT:    movd (%rsi), %mm1
 ; X64-NEXT:    psllq %mm1, %mm0
-; X64-NEXT:    movd %mm0, %rax
+; X64-NEXT:    movq %mm0, %rax
 ; X64-NEXT:    retq
 entry:
   %0 = bitcast <1 x i64>* %a to x86_mmx*
@@ -62,7 +62,7 @@ define i64 @t1(<1 x i64>* %a, i32* %b) n
 ; X64-NEXT:    movq (%rdi), %mm0
 ; X64-NEXT:    movd (%rsi), %mm1
 ; X64-NEXT:    psrlq %mm1, %mm0
-; X64-NEXT:    movd %mm0, %rax
+; X64-NEXT:    movq %mm0, %rax
 ; X64-NEXT:    retq
 entry:
   %0 = bitcast <1 x i64>* %a to x86_mmx*
@@ -98,7 +98,7 @@ define i64 @t2(<1 x i64>* %a, i32* %b) n
 ; X64-NEXT:    movq (%rdi), %mm0
 ; X64-NEXT:    movd (%rsi), %mm1
 ; X64-NEXT:    psllw %mm1, %mm0
-; X64-NEXT:    movd %mm0, %rax
+; X64-NEXT:    movq %mm0, %rax
 ; X64-NEXT:    retq
 entry:
   %0 = bitcast <1 x i64>* %a to x86_mmx*
@@ -134,7 +134,7 @@ define i64 @t3(<1 x i64>* %a, i32* %b) n
 ; X64-NEXT:    movq (%rdi), %mm0
 ; X64-NEXT:    movd (%rsi), %mm1
 ; X64-NEXT:    psrlw %mm1, %mm0
-; X64-NEXT:    movd %mm0, %rax
+; X64-NEXT:    movq %mm0, %rax
 ; X64-NEXT:    retq
 entry:
   %0 = bitcast <1 x i64>* %a to x86_mmx*
@@ -170,7 +170,7 @@ define i64 @t4(<1 x i64>* %a, i32* %b) n
 ; X64-NEXT:    movq (%rdi), %mm0
 ; X64-NEXT:    movd (%rsi), %mm1
 ; X64-NEXT:    pslld %mm1, %mm0
-; X64-NEXT:    movd %mm0, %rax
+; X64-NEXT:    movq %mm0, %rax
 ; X64-NEXT:    retq
 entry:
   %0 = bitcast <1 x i64>* %a to x86_mmx*
@@ -206,7 +206,7 @@ define i64 @t5(<1 x i64>* %a, i32* %b) n
 ; X64-NEXT:    movq (%rdi), %mm0
 ; X64-NEXT:    movd (%rsi), %mm1
 ; X64-NEXT:    psrld %mm1, %mm0
-; X64-NEXT:    movd %mm0, %rax
+; X64-NEXT:    movq %mm0, %rax
 ; X64-NEXT:    retq
 entry:
   %0 = bitcast <1 x i64>* %a to x86_mmx*
@@ -242,7 +242,7 @@ define i64 @t6(<1 x i64>* %a, i32* %b) n
 ; X64-NEXT:    movq (%rdi), %mm0
 ; X64-NEXT:    movd (%rsi), %mm1
 ; X64-NEXT:    psraw %mm1, %mm0
-; X64-NEXT:    movd %mm0, %rax
+; X64-NEXT:    movq %mm0, %rax
 ; X64-NEXT:    retq
 entry:
   %0 = bitcast <1 x i64>* %a to x86_mmx*
@@ -278,7 +278,7 @@ define i64 @t7(<1 x i64>* %a, i32* %b) n
 ; X64-NEXT:    movq (%rdi), %mm0
 ; X64-NEXT:    movd (%rsi), %mm1
 ; X64-NEXT:    psrad %mm1, %mm0
-; X64-NEXT:    movd %mm0, %rax
+; X64-NEXT:    movq %mm0, %rax
 ; X64-NEXT:    retq
 entry:
   %0 = bitcast <1 x i64>* %a to x86_mmx*
@@ -310,7 +310,7 @@ define i64 @tt0(x86_mmx %t, x86_mmx* %q)
 ; X64-LABEL: tt0:
 ; X64:       # %bb.0: # %entry
 ; X64-NEXT:    paddb (%rdi), %mm0
-; X64-NEXT:    movd %mm0, %rax
+; X64-NEXT:    movq %mm0, %rax
 ; X64-NEXT:    emms
 ; X64-NEXT:    retq
 entry:
@@ -343,7 +343,7 @@ define i64 @tt1(x86_mmx %t, x86_mmx* %q)
 ; X64-LABEL: tt1:
 ; X64:       # %bb.0: # %entry
 ; X64-NEXT:    paddw (%rdi), %mm0
-; X64-NEXT:    movd %mm0, %rax
+; X64-NEXT:    movq %mm0, %rax
 ; X64-NEXT:    emms
 ; X64-NEXT:    retq
 entry:
@@ -375,7 +375,7 @@ define i64 @tt2(x86_mmx %t, x86_mmx* %q)
 ; X64-LABEL: tt2:
 ; X64:       # %bb.0: # %entry
 ; X64-NEXT:    paddd (%rdi), %mm0
-; X64-NEXT:    movd %mm0, %rax
+; X64-NEXT:    movq %mm0, %rax
 ; X64-NEXT:    emms
 ; X64-NEXT:    retq
 entry:
@@ -407,7 +407,7 @@ define i64 @tt3(x86_mmx %t, x86_mmx* %q)
 ; X64-LABEL: tt3:
 ; X64:       # %bb.0: # %entry
 ; X64-NEXT:    paddq (%rdi), %mm0
-; X64-NEXT:    movd %mm0, %rax
+; X64-NEXT:    movq %mm0, %rax
 ; X64-NEXT:    emms
 ; X64-NEXT:    retq
 entry:
@@ -439,7 +439,7 @@ define i64 @tt4(x86_mmx %t, x86_mmx* %q)
 ; X64-LABEL: tt4:
 ; X64:       # %bb.0: # %entry
 ; X64-NEXT:    paddusb (%rdi), %mm0
-; X64-NEXT:    movd %mm0, %rax
+; X64-NEXT:    movq %mm0, %rax
 ; X64-NEXT:    emms
 ; X64-NEXT:    retq
 entry:
@@ -471,7 +471,7 @@ define i64 @tt5(x86_mmx %t, x86_mmx* %q)
 ; X64-LABEL: tt5:
 ; X64:       # %bb.0: # %entry
 ; X64-NEXT:    paddusw (%rdi), %mm0
-; X64-NEXT:    movd %mm0, %rax
+; X64-NEXT:    movq %mm0, %rax
 ; X64-NEXT:    emms
 ; X64-NEXT:    retq
 entry:
@@ -503,7 +503,7 @@ define i64 @tt6(x86_mmx %t, x86_mmx* %q)
 ; X64-LABEL: tt6:
 ; X64:       # %bb.0: # %entry
 ; X64-NEXT:    psrlw (%rdi), %mm0
-; X64-NEXT:    movd %mm0, %rax
+; X64-NEXT:    movq %mm0, %rax
 ; X64-NEXT:    emms
 ; X64-NEXT:    retq
 entry:
@@ -535,7 +535,7 @@ define i64 @tt7(x86_mmx %t, x86_mmx* %q)
 ; X64-LABEL: tt7:
 ; X64:       # %bb.0: # %entry
 ; X64-NEXT:    psrld (%rdi), %mm0
-; X64-NEXT:    movd %mm0, %rax
+; X64-NEXT:    movq %mm0, %rax
 ; X64-NEXT:    emms
 ; X64-NEXT:    retq
 entry:
@@ -567,7 +567,7 @@ define i64 @tt8(x86_mmx %t, x86_mmx* %q)
 ; X64-LABEL: tt8:
 ; X64:       # %bb.0: # %entry
 ; X64-NEXT:    psrlq (%rdi), %mm0
-; X64-NEXT:    movd %mm0, %rax
+; X64-NEXT:    movq %mm0, %rax
 ; X64-NEXT:    emms
 ; X64-NEXT:    retq
 entry:
@@ -603,7 +603,7 @@ define void @test_psrlq_by_volatile_shif
 ; X64-NEXT:    movl $1, -{{[0-9]+}}(%rsp)
 ; X64-NEXT:    movd -{{[0-9]+}}(%rsp), %mm0
 ; X64-NEXT:    movl $255, %eax
-; X64-NEXT:    movd %rax, %mm1
+; X64-NEXT:    movq %rax, %mm1
 ; X64-NEXT:    psrlq %mm0, %mm1
 ; X64-NEXT:    movq %mm1, (%rdi)
 ; X64-NEXT:    retq

Modified: llvm/trunk/test/CodeGen/X86/mmx-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/mmx-schedule.ll?rev=321898&r1=321897&r2=321898&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/mmx-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/mmx-schedule.ll Fri Jan  5 12:55:12 2018
@@ -17,7 +17,7 @@ define i64 @test_cvtpd2pi(<2 x double> %
 ; GENERIC-NEXT:    cvtpd2pi (%rdi), %mm0 # sched: [10:1.00]
 ; GENERIC-NEXT:    cvtpd2pi %xmm0, %mm1 # sched: [4:1.00]
 ; GENERIC-NEXT:    por %mm1, %mm0 # sched: [1:1.00]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_cvtpd2pi:
@@ -25,7 +25,7 @@ define i64 @test_cvtpd2pi(<2 x double> %
 ; ATOM-NEXT:    cvtpd2pi (%rdi), %mm0 # sched: [8:4.00]
 ; ATOM-NEXT:    cvtpd2pi %xmm0, %mm1 # sched: [7:3.50]
 ; ATOM-NEXT:    por %mm1, %mm0 # sched: [1:0.50]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_cvtpd2pi:
@@ -33,7 +33,7 @@ define i64 @test_cvtpd2pi(<2 x double> %
 ; SLM-NEXT:    cvtpd2pi (%rdi), %mm1 # sched: [7:1.00]
 ; SLM-NEXT:    cvtpd2pi %xmm0, %mm0 # sched: [4:0.50]
 ; SLM-NEXT:    por %mm0, %mm1 # sched: [1:0.50]
-; SLM-NEXT:    movd %mm1, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm1, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_cvtpd2pi:
@@ -41,7 +41,7 @@ define i64 @test_cvtpd2pi(<2 x double> %
 ; SANDY-NEXT:    cvtpd2pi (%rdi), %mm0 # sched: [10:1.00]
 ; SANDY-NEXT:    cvtpd2pi %xmm0, %mm1 # sched: [4:1.00]
 ; SANDY-NEXT:    por %mm1, %mm0 # sched: [1:1.00]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_cvtpd2pi:
@@ -49,7 +49,7 @@ define i64 @test_cvtpd2pi(<2 x double> %
 ; HASWELL-NEXT:    cvtpd2pi (%rdi), %mm0 # sched: [10:1.00]
 ; HASWELL-NEXT:    cvtpd2pi %xmm0, %mm1 # sched: [4:1.00]
 ; HASWELL-NEXT:    por %mm1, %mm0 # sched: [1:0.33]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_cvtpd2pi:
@@ -57,7 +57,7 @@ define i64 @test_cvtpd2pi(<2 x double> %
 ; BROADWELL-NEXT:    cvtpd2pi %xmm0, %mm0 # sched: [4:1.00]
 ; BROADWELL-NEXT:    cvtpd2pi (%rdi), %mm1 # sched: [9:1.00]
 ; BROADWELL-NEXT:    por %mm0, %mm1 # sched: [1:0.33]
-; BROADWELL-NEXT:    movd %mm1, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm1, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_cvtpd2pi:
@@ -65,7 +65,7 @@ define i64 @test_cvtpd2pi(<2 x double> %
 ; SKYLAKE-NEXT:    cvtpd2pi %xmm0, %mm0 # sched: [5:1.00]
 ; SKYLAKE-NEXT:    cvtpd2pi (%rdi), %mm1 # sched: [11:1.00]
 ; SKYLAKE-NEXT:    por %mm0, %mm1 # sched: [1:0.50]
-; SKYLAKE-NEXT:    movd %mm1, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm1, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_cvtpd2pi:
@@ -73,7 +73,7 @@ define i64 @test_cvtpd2pi(<2 x double> %
 ; SKX-NEXT:    cvtpd2pi %xmm0, %mm0 # sched: [5:1.00]
 ; SKX-NEXT:    cvtpd2pi (%rdi), %mm1 # sched: [11:1.00]
 ; SKX-NEXT:    por %mm0, %mm1 # sched: [1:0.50]
-; SKX-NEXT:    movd %mm1, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm1, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_cvtpd2pi:
@@ -81,7 +81,7 @@ define i64 @test_cvtpd2pi(<2 x double> %
 ; BTVER2-NEXT:    cvtpd2pi (%rdi), %mm1 # sched: [8:1.00]
 ; BTVER2-NEXT:    cvtpd2pi %xmm0, %mm0 # sched: [3:1.00]
 ; BTVER2-NEXT:    por %mm0, %mm1 # sched: [1:0.50]
-; BTVER2-NEXT:    movd %mm1, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm1, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_cvtpd2pi:
@@ -89,7 +89,7 @@ define i64 @test_cvtpd2pi(<2 x double> %
 ; ZNVER1-NEXT:    cvtpd2pi (%rdi), %mm1 # sched: [12:1.00]
 ; ZNVER1-NEXT:    cvtpd2pi %xmm0, %mm0 # sched: [4:1.00]
 ; ZNVER1-NEXT:    por %mm0, %mm1 # sched: [1:0.25]
-; ZNVER1-NEXT:    movd %mm1, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm1, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double> %a0)
   %2 = load <2 x double>, <2 x double> *%a1, align 16
@@ -262,7 +262,7 @@ define i64 @test_cvtps2pi(<4 x float> %a
 ; GENERIC-NEXT:    cvtps2pi %xmm0, %mm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    cvtps2pi (%rdi), %mm1 # sched: [9:1.00]
 ; GENERIC-NEXT:    por %mm0, %mm1 # sched: [1:1.00]
-; GENERIC-NEXT:    movd %mm1, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm1, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_cvtps2pi:
@@ -270,7 +270,7 @@ define i64 @test_cvtps2pi(<4 x float> %a
 ; ATOM-NEXT:    cvtps2pi %xmm0, %mm0 # sched: [5:5.00]
 ; ATOM-NEXT:    cvtps2pi (%rdi), %mm1 # sched: [5:5.00]
 ; ATOM-NEXT:    por %mm0, %mm1 # sched: [1:0.50]
-; ATOM-NEXT:    movd %mm1, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm1, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_cvtps2pi:
@@ -278,7 +278,7 @@ define i64 @test_cvtps2pi(<4 x float> %a
 ; SLM-NEXT:    cvtps2pi (%rdi), %mm1 # sched: [7:1.00]
 ; SLM-NEXT:    cvtps2pi %xmm0, %mm0 # sched: [4:0.50]
 ; SLM-NEXT:    por %mm0, %mm1 # sched: [1:0.50]
-; SLM-NEXT:    movd %mm1, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm1, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_cvtps2pi:
@@ -286,7 +286,7 @@ define i64 @test_cvtps2pi(<4 x float> %a
 ; SANDY-NEXT:    cvtps2pi %xmm0, %mm0 # sched: [3:1.00]
 ; SANDY-NEXT:    cvtps2pi (%rdi), %mm1 # sched: [9:1.00]
 ; SANDY-NEXT:    por %mm0, %mm1 # sched: [1:1.00]
-; SANDY-NEXT:    movd %mm1, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm1, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_cvtps2pi:
@@ -294,7 +294,7 @@ define i64 @test_cvtps2pi(<4 x float> %a
 ; HASWELL-NEXT:    cvtps2pi %xmm0, %mm0 # sched: [4:1.00]
 ; HASWELL-NEXT:    cvtps2pi (%rdi), %mm1 # sched: [8:1.00]
 ; HASWELL-NEXT:    por %mm0, %mm1 # sched: [1:0.33]
-; HASWELL-NEXT:    movd %mm1, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm1, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_cvtps2pi:
@@ -302,7 +302,7 @@ define i64 @test_cvtps2pi(<4 x float> %a
 ; BROADWELL-NEXT:    cvtps2pi %xmm0, %mm0 # sched: [4:1.00]
 ; BROADWELL-NEXT:    cvtps2pi (%rdi), %mm1 # sched: [8:1.00]
 ; BROADWELL-NEXT:    por %mm0, %mm1 # sched: [1:0.33]
-; BROADWELL-NEXT:    movd %mm1, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm1, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_cvtps2pi:
@@ -310,7 +310,7 @@ define i64 @test_cvtps2pi(<4 x float> %a
 ; SKYLAKE-NEXT:    cvtps2pi %xmm0, %mm0 # sched: [5:1.00]
 ; SKYLAKE-NEXT:    cvtps2pi (%rdi), %mm1 # sched: [9:0.50]
 ; SKYLAKE-NEXT:    por %mm0, %mm1 # sched: [1:0.50]
-; SKYLAKE-NEXT:    movd %mm1, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm1, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_cvtps2pi:
@@ -318,7 +318,7 @@ define i64 @test_cvtps2pi(<4 x float> %a
 ; SKX-NEXT:    cvtps2pi %xmm0, %mm0 # sched: [5:1.00]
 ; SKX-NEXT:    cvtps2pi (%rdi), %mm1 # sched: [9:0.50]
 ; SKX-NEXT:    por %mm0, %mm1 # sched: [1:0.50]
-; SKX-NEXT:    movd %mm1, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm1, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_cvtps2pi:
@@ -326,7 +326,7 @@ define i64 @test_cvtps2pi(<4 x float> %a
 ; BTVER2-NEXT:    cvtps2pi (%rdi), %mm1 # sched: [8:1.00]
 ; BTVER2-NEXT:    cvtps2pi %xmm0, %mm0 # sched: [3:1.00]
 ; BTVER2-NEXT:    por %mm0, %mm1 # sched: [1:0.50]
-; BTVER2-NEXT:    movd %mm1, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm1, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_cvtps2pi:
@@ -334,7 +334,7 @@ define i64 @test_cvtps2pi(<4 x float> %a
 ; ZNVER1-NEXT:    cvtps2pi (%rdi), %mm1 # sched: [12:1.00]
 ; ZNVER1-NEXT:    cvtps2pi %xmm0, %mm0 # sched: [4:1.00]
 ; ZNVER1-NEXT:    por %mm0, %mm1 # sched: [1:0.25]
-; ZNVER1-NEXT:    movd %mm1, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm1, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call x86_mmx @llvm.x86.sse.cvtps2pi(<4 x float> %a0)
   %2 = load <4 x float>, <4 x float> *%a1, align 16
@@ -351,7 +351,7 @@ define i64 @test_cvttpd2pi(<2 x double>
 ; GENERIC-NEXT:    cvttpd2pi (%rdi), %mm0 # sched: [10:1.00]
 ; GENERIC-NEXT:    cvttpd2pi %xmm0, %mm1 # sched: [4:1.00]
 ; GENERIC-NEXT:    por %mm1, %mm0 # sched: [1:1.00]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_cvttpd2pi:
@@ -359,7 +359,7 @@ define i64 @test_cvttpd2pi(<2 x double>
 ; ATOM-NEXT:    cvttpd2pi (%rdi), %mm0 # sched: [8:4.00]
 ; ATOM-NEXT:    cvttpd2pi %xmm0, %mm1 # sched: [7:3.50]
 ; ATOM-NEXT:    por %mm1, %mm0 # sched: [1:0.50]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_cvttpd2pi:
@@ -367,7 +367,7 @@ define i64 @test_cvttpd2pi(<2 x double>
 ; SLM-NEXT:    cvttpd2pi (%rdi), %mm1 # sched: [7:1.00]
 ; SLM-NEXT:    cvttpd2pi %xmm0, %mm0 # sched: [4:0.50]
 ; SLM-NEXT:    por %mm0, %mm1 # sched: [1:0.50]
-; SLM-NEXT:    movd %mm1, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm1, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_cvttpd2pi:
@@ -375,7 +375,7 @@ define i64 @test_cvttpd2pi(<2 x double>
 ; SANDY-NEXT:    cvttpd2pi (%rdi), %mm0 # sched: [10:1.00]
 ; SANDY-NEXT:    cvttpd2pi %xmm0, %mm1 # sched: [4:1.00]
 ; SANDY-NEXT:    por %mm1, %mm0 # sched: [1:1.00]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_cvttpd2pi:
@@ -383,7 +383,7 @@ define i64 @test_cvttpd2pi(<2 x double>
 ; HASWELL-NEXT:    cvttpd2pi (%rdi), %mm0 # sched: [10:1.00]
 ; HASWELL-NEXT:    cvttpd2pi %xmm0, %mm1 # sched: [4:1.00]
 ; HASWELL-NEXT:    por %mm1, %mm0 # sched: [1:0.33]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_cvttpd2pi:
@@ -391,7 +391,7 @@ define i64 @test_cvttpd2pi(<2 x double>
 ; BROADWELL-NEXT:    cvttpd2pi %xmm0, %mm0 # sched: [4:1.00]
 ; BROADWELL-NEXT:    cvttpd2pi (%rdi), %mm1 # sched: [9:1.00]
 ; BROADWELL-NEXT:    por %mm0, %mm1 # sched: [1:0.33]
-; BROADWELL-NEXT:    movd %mm1, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm1, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_cvttpd2pi:
@@ -399,7 +399,7 @@ define i64 @test_cvttpd2pi(<2 x double>
 ; SKYLAKE-NEXT:    cvttpd2pi %xmm0, %mm0 # sched: [5:1.00]
 ; SKYLAKE-NEXT:    cvttpd2pi (%rdi), %mm1 # sched: [11:1.00]
 ; SKYLAKE-NEXT:    por %mm0, %mm1 # sched: [1:0.50]
-; SKYLAKE-NEXT:    movd %mm1, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm1, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_cvttpd2pi:
@@ -407,7 +407,7 @@ define i64 @test_cvttpd2pi(<2 x double>
 ; SKX-NEXT:    cvttpd2pi %xmm0, %mm0 # sched: [5:1.00]
 ; SKX-NEXT:    cvttpd2pi (%rdi), %mm1 # sched: [11:1.00]
 ; SKX-NEXT:    por %mm0, %mm1 # sched: [1:0.50]
-; SKX-NEXT:    movd %mm1, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm1, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_cvttpd2pi:
@@ -415,7 +415,7 @@ define i64 @test_cvttpd2pi(<2 x double>
 ; BTVER2-NEXT:    cvttpd2pi (%rdi), %mm1 # sched: [8:1.00]
 ; BTVER2-NEXT:    cvttpd2pi %xmm0, %mm0 # sched: [3:1.00]
 ; BTVER2-NEXT:    por %mm0, %mm1 # sched: [1:0.50]
-; BTVER2-NEXT:    movd %mm1, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm1, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_cvttpd2pi:
@@ -423,7 +423,7 @@ define i64 @test_cvttpd2pi(<2 x double>
 ; ZNVER1-NEXT:    cvttpd2pi (%rdi), %mm1 # sched: [12:1.00]
 ; ZNVER1-NEXT:    cvttpd2pi %xmm0, %mm0 # sched: [4:1.00]
 ; ZNVER1-NEXT:    por %mm0, %mm1 # sched: [1:0.25]
-; ZNVER1-NEXT:    movd %mm1, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm1, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double> %a0)
   %2 = load <2 x double>, <2 x double> *%a1, align 16
@@ -440,7 +440,7 @@ define i64 @test_cvttps2pi(<4 x float> %
 ; GENERIC-NEXT:    cvttps2pi %xmm0, %mm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    cvttps2pi (%rdi), %mm1 # sched: [9:1.00]
 ; GENERIC-NEXT:    por %mm0, %mm1 # sched: [1:1.00]
-; GENERIC-NEXT:    movd %mm1, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm1, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_cvttps2pi:
@@ -448,7 +448,7 @@ define i64 @test_cvttps2pi(<4 x float> %
 ; ATOM-NEXT:    cvttps2pi %xmm0, %mm0 # sched: [5:5.00]
 ; ATOM-NEXT:    cvttps2pi (%rdi), %mm1 # sched: [5:5.00]
 ; ATOM-NEXT:    por %mm0, %mm1 # sched: [1:0.50]
-; ATOM-NEXT:    movd %mm1, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm1, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_cvttps2pi:
@@ -456,7 +456,7 @@ define i64 @test_cvttps2pi(<4 x float> %
 ; SLM-NEXT:    cvttps2pi (%rdi), %mm1 # sched: [7:1.00]
 ; SLM-NEXT:    cvttps2pi %xmm0, %mm0 # sched: [4:0.50]
 ; SLM-NEXT:    por %mm0, %mm1 # sched: [1:0.50]
-; SLM-NEXT:    movd %mm1, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm1, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_cvttps2pi:
@@ -464,7 +464,7 @@ define i64 @test_cvttps2pi(<4 x float> %
 ; SANDY-NEXT:    cvttps2pi %xmm0, %mm0 # sched: [3:1.00]
 ; SANDY-NEXT:    cvttps2pi (%rdi), %mm1 # sched: [9:1.00]
 ; SANDY-NEXT:    por %mm0, %mm1 # sched: [1:1.00]
-; SANDY-NEXT:    movd %mm1, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm1, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_cvttps2pi:
@@ -472,7 +472,7 @@ define i64 @test_cvttps2pi(<4 x float> %
 ; HASWELL-NEXT:    cvttps2pi %xmm0, %mm0 # sched: [4:1.00]
 ; HASWELL-NEXT:    cvttps2pi (%rdi), %mm1 # sched: [8:1.00]
 ; HASWELL-NEXT:    por %mm0, %mm1 # sched: [1:0.33]
-; HASWELL-NEXT:    movd %mm1, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm1, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_cvttps2pi:
@@ -480,7 +480,7 @@ define i64 @test_cvttps2pi(<4 x float> %
 ; BROADWELL-NEXT:    cvttps2pi %xmm0, %mm0 # sched: [4:1.00]
 ; BROADWELL-NEXT:    cvttps2pi (%rdi), %mm1 # sched: [8:1.00]
 ; BROADWELL-NEXT:    por %mm0, %mm1 # sched: [1:0.33]
-; BROADWELL-NEXT:    movd %mm1, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm1, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_cvttps2pi:
@@ -488,7 +488,7 @@ define i64 @test_cvttps2pi(<4 x float> %
 ; SKYLAKE-NEXT:    cvttps2pi %xmm0, %mm0 # sched: [5:1.00]
 ; SKYLAKE-NEXT:    cvttps2pi (%rdi), %mm1 # sched: [9:0.50]
 ; SKYLAKE-NEXT:    por %mm0, %mm1 # sched: [1:0.50]
-; SKYLAKE-NEXT:    movd %mm1, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm1, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_cvttps2pi:
@@ -496,7 +496,7 @@ define i64 @test_cvttps2pi(<4 x float> %
 ; SKX-NEXT:    cvttps2pi %xmm0, %mm0 # sched: [5:1.00]
 ; SKX-NEXT:    cvttps2pi (%rdi), %mm1 # sched: [9:0.50]
 ; SKX-NEXT:    por %mm0, %mm1 # sched: [1:0.50]
-; SKX-NEXT:    movd %mm1, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm1, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_cvttps2pi:
@@ -504,7 +504,7 @@ define i64 @test_cvttps2pi(<4 x float> %
 ; BTVER2-NEXT:    cvttps2pi (%rdi), %mm1 # sched: [8:1.00]
 ; BTVER2-NEXT:    cvttps2pi %xmm0, %mm0 # sched: [3:1.00]
 ; BTVER2-NEXT:    por %mm0, %mm1 # sched: [1:0.50]
-; BTVER2-NEXT:    movd %mm1, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm1, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_cvttps2pi:
@@ -512,7 +512,7 @@ define i64 @test_cvttps2pi(<4 x float> %
 ; ZNVER1-NEXT:    cvttps2pi (%rdi), %mm1 # sched: [12:1.00]
 ; ZNVER1-NEXT:    cvttps2pi %xmm0, %mm0 # sched: [4:1.00]
 ; ZNVER1-NEXT:    por %mm0, %mm1 # sched: [1:0.25]
-; ZNVER1-NEXT:    movd %mm1, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm1, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call x86_mmx @llvm.x86.sse.cvttps2pi(<4 x float> %a0)
   %2 = load <4 x float>, <4 x float> *%a1, align 16
@@ -793,70 +793,70 @@ define i64 @test_movdq2q(<2 x i64> %a0)
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    movdq2q %xmm0, %mm0 # sched: [2:1.00]
 ; GENERIC-NEXT:    paddd %mm0, %mm0 # sched: [3:1.00]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_movdq2q:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    movdq2q %xmm0, %mm0 # sched: [1:0.50]
 ; ATOM-NEXT:    paddd %mm0, %mm0 # sched: [1:0.50]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_movdq2q:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    movdq2q %xmm0, %mm0 # sched: [1:0.50]
 ; SLM-NEXT:    paddd %mm0, %mm0 # sched: [1:0.50]
-; SLM-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_movdq2q:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    movdq2q %xmm0, %mm0 # sched: [2:1.00]
 ; SANDY-NEXT:    paddd %mm0, %mm0 # sched: [3:1.00]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_movdq2q:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    movdq2q %xmm0, %mm0 # sched: [2:0.67]
 ; HASWELL-NEXT:    paddd %mm0, %mm0 # sched: [1:0.50]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_movdq2q:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    movdq2q %xmm0, %mm0 # sched: [2:0.67]
 ; BROADWELL-NEXT:    paddd %mm0, %mm0 # sched: [1:0.50]
-; BROADWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_movdq2q:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    movdq2q %xmm0, %mm0 # sched: [2:1.00]
 ; SKYLAKE-NEXT:    paddd %mm0, %mm0 # sched: [1:0.50]
-; SKYLAKE-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_movdq2q:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    movdq2q %xmm0, %mm0 # sched: [2:1.00]
 ; SKX-NEXT:    paddd %mm0, %mm0 # sched: [1:0.50]
-; SKX-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_movdq2q:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    movdq2q %xmm0, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    paddd %mm0, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_movdq2q:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    movdq2q %xmm0, %mm0 # sched: [1:0.25]
 ; ZNVER1-NEXT:    paddd %mm0, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    movd %mm0, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = extractelement <2 x i64> %a0, i32 0
   %2 = bitcast i64 %1 to x86_mmx
@@ -1060,70 +1060,70 @@ define i64 @test_pabsb(x86_mmx *%a0) opt
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    pabsb (%rdi), %mm0 # sched: [6:0.50]
 ; GENERIC-NEXT:    pabsb %mm0, %mm0 # sched: [1:0.50]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pabsb:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    pabsb (%rdi), %mm0 # sched: [1:1.00]
 ; ATOM-NEXT:    pabsb %mm0, %mm0 # sched: [1:0.50]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_pabsb:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    pabsb (%rdi), %mm0 # sched: [4:1.00]
 ; SLM-NEXT:    pabsb %mm0, %mm0 # sched: [1:0.50]
-; SLM-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_pabsb:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    pabsb (%rdi), %mm0 # sched: [6:0.50]
 ; SANDY-NEXT:    pabsb %mm0, %mm0 # sched: [1:0.50]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pabsb:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    pabsb (%rdi), %mm0 # sched: [6:0.50]
 ; HASWELL-NEXT:    pabsb %mm0, %mm0 # sched: [1:0.50]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_pabsb:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    pabsb (%rdi), %mm0 # sched: [6:0.50]
 ; BROADWELL-NEXT:    pabsb %mm0, %mm0 # sched: [1:0.50]
-; BROADWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_pabsb:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    pabsb (%rdi), %mm0 # sched: [6:0.50]
 ; SKYLAKE-NEXT:    pabsb %mm0, %mm0 # sched: [1:0.50]
-; SKYLAKE-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_pabsb:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    pabsb (%rdi), %mm0 # sched: [6:0.50]
 ; SKX-NEXT:    pabsb %mm0, %mm0 # sched: [1:0.50]
-; SKX-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_pabsb:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    pabsb (%rdi), %mm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    pabsb %mm0, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_pabsb:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    pabsb (%rdi), %mm0 # sched: [8:0.50]
 ; ZNVER1-NEXT:    pabsb %mm0, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    movd %mm0, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = load x86_mmx, x86_mmx *%a0, align 8
   %2 = call x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx %1)
@@ -1138,70 +1138,70 @@ define i64 @test_pabsd(x86_mmx *%a0) opt
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    pabsd (%rdi), %mm0 # sched: [6:0.50]
 ; GENERIC-NEXT:    pabsd %mm0, %mm0 # sched: [1:0.50]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pabsd:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    pabsd (%rdi), %mm0 # sched: [1:1.00]
 ; ATOM-NEXT:    pabsd %mm0, %mm0 # sched: [1:0.50]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_pabsd:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    pabsd (%rdi), %mm0 # sched: [4:1.00]
 ; SLM-NEXT:    pabsd %mm0, %mm0 # sched: [1:0.50]
-; SLM-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_pabsd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    pabsd (%rdi), %mm0 # sched: [6:0.50]
 ; SANDY-NEXT:    pabsd %mm0, %mm0 # sched: [1:0.50]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pabsd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    pabsd (%rdi), %mm0 # sched: [6:0.50]
 ; HASWELL-NEXT:    pabsd %mm0, %mm0 # sched: [1:0.50]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_pabsd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    pabsd (%rdi), %mm0 # sched: [6:0.50]
 ; BROADWELL-NEXT:    pabsd %mm0, %mm0 # sched: [1:0.50]
-; BROADWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_pabsd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    pabsd (%rdi), %mm0 # sched: [6:0.50]
 ; SKYLAKE-NEXT:    pabsd %mm0, %mm0 # sched: [1:0.50]
-; SKYLAKE-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_pabsd:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    pabsd (%rdi), %mm0 # sched: [6:0.50]
 ; SKX-NEXT:    pabsd %mm0, %mm0 # sched: [1:0.50]
-; SKX-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_pabsd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    pabsd (%rdi), %mm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    pabsd %mm0, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_pabsd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    pabsd (%rdi), %mm0 # sched: [8:0.50]
 ; ZNVER1-NEXT:    pabsd %mm0, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    movd %mm0, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = load x86_mmx, x86_mmx *%a0, align 8
   %2 = call x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx %1)
@@ -1216,70 +1216,70 @@ define i64 @test_pabsw(x86_mmx *%a0) opt
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    pabsw (%rdi), %mm0 # sched: [6:0.50]
 ; GENERIC-NEXT:    pabsw %mm0, %mm0 # sched: [1:0.50]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pabsw:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    pabsw (%rdi), %mm0 # sched: [1:1.00]
 ; ATOM-NEXT:    pabsw %mm0, %mm0 # sched: [1:0.50]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_pabsw:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    pabsw (%rdi), %mm0 # sched: [4:1.00]
 ; SLM-NEXT:    pabsw %mm0, %mm0 # sched: [1:0.50]
-; SLM-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_pabsw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    pabsw (%rdi), %mm0 # sched: [6:0.50]
 ; SANDY-NEXT:    pabsw %mm0, %mm0 # sched: [1:0.50]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pabsw:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    pabsw (%rdi), %mm0 # sched: [6:0.50]
 ; HASWELL-NEXT:    pabsw %mm0, %mm0 # sched: [1:0.50]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_pabsw:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    pabsw (%rdi), %mm0 # sched: [6:0.50]
 ; BROADWELL-NEXT:    pabsw %mm0, %mm0 # sched: [1:0.50]
-; BROADWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_pabsw:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    pabsw (%rdi), %mm0 # sched: [6:0.50]
 ; SKYLAKE-NEXT:    pabsw %mm0, %mm0 # sched: [1:0.50]
-; SKYLAKE-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_pabsw:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    pabsw (%rdi), %mm0 # sched: [6:0.50]
 ; SKX-NEXT:    pabsw %mm0, %mm0 # sched: [1:0.50]
-; SKX-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_pabsw:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    pabsw (%rdi), %mm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    pabsw %mm0, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_pabsw:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    pabsw (%rdi), %mm0 # sched: [8:0.50]
 ; ZNVER1-NEXT:    pabsw %mm0, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    movd %mm0, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = load x86_mmx, x86_mmx *%a0, align 8
   %2 = call x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx %1)
@@ -1294,70 +1294,70 @@ define i64 @test_packssdw(x86_mmx %a0, x
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    packssdw %mm1, %mm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    packssdw (%rdi), %mm0 # sched: [5:1.00]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_packssdw:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    packssdw %mm1, %mm0 # sched: [1:0.50]
 ; ATOM-NEXT:    packssdw (%rdi), %mm0 # sched: [1:1.00]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_packssdw:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    packssdw %mm1, %mm0 # sched: [1:1.00]
 ; SLM-NEXT:    packssdw (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_packssdw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    packssdw %mm1, %mm0 # sched: [1:1.00]
 ; SANDY-NEXT:    packssdw (%rdi), %mm0 # sched: [5:1.00]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_packssdw:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    packssdw %mm1, %mm0 # sched: [3:2.00]
 ; HASWELL-NEXT:    packssdw (%rdi), %mm0 # sched: [7:2.00]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_packssdw:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    packssdw %mm1, %mm0 # sched: [3:2.00]
 ; BROADWELL-NEXT:    packssdw (%rdi), %mm0 # sched: [7:2.00]
-; BROADWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_packssdw:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    packssdw %mm1, %mm0 # sched: [3:2.00]
 ; SKYLAKE-NEXT:    packssdw (%rdi), %mm0 # sched: [7:2.00]
-; SKYLAKE-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_packssdw:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    packssdw %mm1, %mm0 # sched: [3:2.00]
 ; SKX-NEXT:    packssdw (%rdi), %mm0 # sched: [7:2.00]
-; SKX-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_packssdw:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    packssdw %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    packssdw (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_packssdw:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    packssdw %mm1, %mm0 # sched: [1:0.50]
 ; ZNVER1-NEXT:    packssdw (%rdi), %mm0 # sched: [1:0.50]
-; ZNVER1-NEXT:    movd %mm0, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call x86_mmx @llvm.x86.mmx.packssdw(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -1372,70 +1372,70 @@ define i64 @test_packsswb(x86_mmx %a0, x
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    packsswb %mm1, %mm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    packsswb (%rdi), %mm0 # sched: [5:1.00]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_packsswb:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    packsswb %mm1, %mm0 # sched: [1:0.50]
 ; ATOM-NEXT:    packsswb (%rdi), %mm0 # sched: [1:1.00]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_packsswb:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    packsswb %mm1, %mm0 # sched: [1:1.00]
 ; SLM-NEXT:    packsswb (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_packsswb:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    packsswb %mm1, %mm0 # sched: [1:1.00]
 ; SANDY-NEXT:    packsswb (%rdi), %mm0 # sched: [5:1.00]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_packsswb:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    packsswb %mm1, %mm0 # sched: [3:2.00]
 ; HASWELL-NEXT:    packsswb (%rdi), %mm0 # sched: [7:2.00]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_packsswb:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    packsswb %mm1, %mm0 # sched: [3:2.00]
 ; BROADWELL-NEXT:    packsswb (%rdi), %mm0 # sched: [7:2.00]
-; BROADWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_packsswb:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    packsswb %mm1, %mm0 # sched: [3:2.00]
 ; SKYLAKE-NEXT:    packsswb (%rdi), %mm0 # sched: [7:2.00]
-; SKYLAKE-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_packsswb:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    packsswb %mm1, %mm0 # sched: [3:2.00]
 ; SKX-NEXT:    packsswb (%rdi), %mm0 # sched: [7:2.00]
-; SKX-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_packsswb:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    packsswb %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    packsswb (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_packsswb:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    packsswb %mm1, %mm0 # sched: [1:0.50]
 ; ZNVER1-NEXT:    packsswb (%rdi), %mm0 # sched: [1:0.50]
-; ZNVER1-NEXT:    movd %mm0, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call x86_mmx @llvm.x86.mmx.packsswb(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -1450,70 +1450,70 @@ define i64 @test_packuswb(x86_mmx %a0, x
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    packuswb %mm1, %mm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    packuswb (%rdi), %mm0 # sched: [5:1.00]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_packuswb:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    packuswb %mm1, %mm0 # sched: [1:0.50]
 ; ATOM-NEXT:    packuswb (%rdi), %mm0 # sched: [1:1.00]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_packuswb:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    packuswb %mm1, %mm0 # sched: [1:1.00]
 ; SLM-NEXT:    packuswb (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_packuswb:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    packuswb %mm1, %mm0 # sched: [1:1.00]
 ; SANDY-NEXT:    packuswb (%rdi), %mm0 # sched: [5:1.00]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_packuswb:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    packuswb %mm1, %mm0 # sched: [3:2.00]
 ; HASWELL-NEXT:    packuswb (%rdi), %mm0 # sched: [7:2.00]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_packuswb:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    packuswb %mm1, %mm0 # sched: [3:2.00]
 ; BROADWELL-NEXT:    packuswb (%rdi), %mm0 # sched: [7:2.00]
-; BROADWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_packuswb:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    packuswb %mm1, %mm0 # sched: [3:2.00]
 ; SKYLAKE-NEXT:    packuswb (%rdi), %mm0 # sched: [7:2.00]
-; SKYLAKE-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_packuswb:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    packuswb %mm1, %mm0 # sched: [3:2.00]
 ; SKX-NEXT:    packuswb (%rdi), %mm0 # sched: [7:2.00]
-; SKX-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_packuswb:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    packuswb %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    packuswb (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_packuswb:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    packuswb %mm1, %mm0 # sched: [1:0.50]
 ; ZNVER1-NEXT:    packuswb (%rdi), %mm0 # sched: [1:0.50]
-; ZNVER1-NEXT:    movd %mm0, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call x86_mmx @llvm.x86.mmx.packuswb(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -1528,70 +1528,70 @@ define i64 @test_paddb(x86_mmx %a0, x86_
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    paddb %mm1, %mm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    paddb (%rdi), %mm0 # sched: [7:1.00]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_paddb:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    paddb %mm1, %mm0 # sched: [1:0.50]
 ; ATOM-NEXT:    paddb (%rdi), %mm0 # sched: [1:1.00]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_paddb:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    paddb %mm1, %mm0 # sched: [1:0.50]
 ; SLM-NEXT:    paddb (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_paddb:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    paddb %mm1, %mm0 # sched: [3:1.00]
 ; SANDY-NEXT:    paddb (%rdi), %mm0 # sched: [7:1.00]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_paddb:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    paddb %mm1, %mm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    paddb (%rdi), %mm0 # sched: [6:0.50]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_paddb:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    paddb %mm1, %mm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    paddb (%rdi), %mm0 # sched: [6:0.50]
-; BROADWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_paddb:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    paddb %mm1, %mm0 # sched: [1:0.50]
 ; SKYLAKE-NEXT:    paddb (%rdi), %mm0 # sched: [6:0.50]
-; SKYLAKE-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_paddb:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    paddb %mm1, %mm0 # sched: [1:0.50]
 ; SKX-NEXT:    paddb (%rdi), %mm0 # sched: [6:0.50]
-; SKX-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_paddb:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    paddb %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    paddb (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_paddb:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    paddb %mm1, %mm0 # sched: [1:0.25]
 ; ZNVER1-NEXT:    paddb (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT:    movd %mm0, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -1606,70 +1606,70 @@ define i64 @test_paddd(x86_mmx %a0, x86_
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    paddd %mm1, %mm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    paddd (%rdi), %mm0 # sched: [7:1.00]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_paddd:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    paddd %mm1, %mm0 # sched: [1:0.50]
 ; ATOM-NEXT:    paddd (%rdi), %mm0 # sched: [1:1.00]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_paddd:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    paddd %mm1, %mm0 # sched: [1:0.50]
 ; SLM-NEXT:    paddd (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_paddd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    paddd %mm1, %mm0 # sched: [3:1.00]
 ; SANDY-NEXT:    paddd (%rdi), %mm0 # sched: [7:1.00]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_paddd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    paddd %mm1, %mm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    paddd (%rdi), %mm0 # sched: [6:0.50]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_paddd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    paddd %mm1, %mm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    paddd (%rdi), %mm0 # sched: [6:0.50]
-; BROADWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_paddd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    paddd %mm1, %mm0 # sched: [1:0.50]
 ; SKYLAKE-NEXT:    paddd (%rdi), %mm0 # sched: [6:0.50]
-; SKYLAKE-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_paddd:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    paddd %mm1, %mm0 # sched: [1:0.50]
 ; SKX-NEXT:    paddd (%rdi), %mm0 # sched: [6:0.50]
-; SKX-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_paddd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    paddd %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    paddd (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_paddd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    paddd %mm1, %mm0 # sched: [1:0.25]
 ; ZNVER1-NEXT:    paddd (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT:    movd %mm0, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -1684,70 +1684,70 @@ define i64 @test_paddq(x86_mmx %a0, x86_
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    paddq %mm1, %mm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    paddq (%rdi), %mm0 # sched: [7:0.50]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_paddq:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    paddq %mm1, %mm0 # sched: [2:1.00]
 ; ATOM-NEXT:    paddq (%rdi), %mm0 # sched: [3:1.50]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_paddq:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    paddq %mm1, %mm0 # sched: [1:0.50]
 ; SLM-NEXT:    paddq (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_paddq:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    paddq %mm1, %mm0 # sched: [1:0.50]
 ; SANDY-NEXT:    paddq (%rdi), %mm0 # sched: [7:0.50]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_paddq:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    paddq %mm1, %mm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    paddq (%rdi), %mm0 # sched: [6:0.50]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_paddq:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    paddq %mm1, %mm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    paddq (%rdi), %mm0 # sched: [6:0.50]
-; BROADWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_paddq:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    paddq %mm1, %mm0 # sched: [1:0.50]
 ; SKYLAKE-NEXT:    paddq (%rdi), %mm0 # sched: [6:0.50]
-; SKYLAKE-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_paddq:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    paddq %mm1, %mm0 # sched: [1:0.50]
 ; SKX-NEXT:    paddq (%rdi), %mm0 # sched: [6:0.50]
-; SKX-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_paddq:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    paddq %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    paddq (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_paddq:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    paddq %mm1, %mm0 # sched: [1:0.25]
 ; ZNVER1-NEXT:    paddq (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT:    movd %mm0, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -1762,70 +1762,70 @@ define i64 @test_paddsb(x86_mmx %a0, x86
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    paddsb %mm1, %mm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    paddsb (%rdi), %mm0 # sched: [7:1.00]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_paddsb:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    paddsb %mm1, %mm0 # sched: [1:0.50]
 ; ATOM-NEXT:    paddsb (%rdi), %mm0 # sched: [1:1.00]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_paddsb:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    paddsb %mm1, %mm0 # sched: [1:0.50]
 ; SLM-NEXT:    paddsb (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_paddsb:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    paddsb %mm1, %mm0 # sched: [3:1.00]
 ; SANDY-NEXT:    paddsb (%rdi), %mm0 # sched: [7:1.00]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_paddsb:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    paddsb %mm1, %mm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    paddsb (%rdi), %mm0 # sched: [6:0.50]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_paddsb:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    paddsb %mm1, %mm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    paddsb (%rdi), %mm0 # sched: [6:0.50]
-; BROADWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_paddsb:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    paddsb %mm1, %mm0 # sched: [1:1.00]
 ; SKYLAKE-NEXT:    paddsb (%rdi), %mm0 # sched: [6:1.00]
-; SKYLAKE-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_paddsb:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    paddsb %mm1, %mm0 # sched: [1:1.00]
 ; SKX-NEXT:    paddsb (%rdi), %mm0 # sched: [6:1.00]
-; SKX-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_paddsb:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    paddsb %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    paddsb (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_paddsb:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    paddsb %mm1, %mm0 # sched: [1:0.25]
 ; ZNVER1-NEXT:    paddsb (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT:    movd %mm0, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call x86_mmx @llvm.x86.mmx.padds.b(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -1840,70 +1840,70 @@ define i64 @test_paddsw(x86_mmx %a0, x86
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    paddsw %mm1, %mm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    paddsw (%rdi), %mm0 # sched: [7:1.00]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_paddsw:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    paddsw %mm1, %mm0 # sched: [1:0.50]
 ; ATOM-NEXT:    paddsw (%rdi), %mm0 # sched: [1:1.00]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_paddsw:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    paddsw %mm1, %mm0 # sched: [1:0.50]
 ; SLM-NEXT:    paddsw (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_paddsw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    paddsw %mm1, %mm0 # sched: [3:1.00]
 ; SANDY-NEXT:    paddsw (%rdi), %mm0 # sched: [7:1.00]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_paddsw:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    paddsw %mm1, %mm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    paddsw (%rdi), %mm0 # sched: [6:0.50]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_paddsw:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    paddsw %mm1, %mm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    paddsw (%rdi), %mm0 # sched: [6:0.50]
-; BROADWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_paddsw:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    paddsw %mm1, %mm0 # sched: [1:1.00]
 ; SKYLAKE-NEXT:    paddsw (%rdi), %mm0 # sched: [6:1.00]
-; SKYLAKE-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_paddsw:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    paddsw %mm1, %mm0 # sched: [1:1.00]
 ; SKX-NEXT:    paddsw (%rdi), %mm0 # sched: [6:1.00]
-; SKX-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_paddsw:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    paddsw %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    paddsw (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_paddsw:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    paddsw %mm1, %mm0 # sched: [1:0.25]
 ; ZNVER1-NEXT:    paddsw (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT:    movd %mm0, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call x86_mmx @llvm.x86.mmx.padds.w(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -1918,70 +1918,70 @@ define i64 @test_paddusb(x86_mmx %a0, x8
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    paddusb %mm1, %mm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    paddusb (%rdi), %mm0 # sched: [7:1.00]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_paddusb:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    paddusb %mm1, %mm0 # sched: [1:0.50]
 ; ATOM-NEXT:    paddusb (%rdi), %mm0 # sched: [1:1.00]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_paddusb:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    paddusb %mm1, %mm0 # sched: [1:0.50]
 ; SLM-NEXT:    paddusb (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_paddusb:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    paddusb %mm1, %mm0 # sched: [3:1.00]
 ; SANDY-NEXT:    paddusb (%rdi), %mm0 # sched: [7:1.00]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_paddusb:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    paddusb %mm1, %mm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    paddusb (%rdi), %mm0 # sched: [6:0.50]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_paddusb:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    paddusb %mm1, %mm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    paddusb (%rdi), %mm0 # sched: [6:0.50]
-; BROADWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_paddusb:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    paddusb %mm1, %mm0 # sched: [1:1.00]
 ; SKYLAKE-NEXT:    paddusb (%rdi), %mm0 # sched: [6:1.00]
-; SKYLAKE-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_paddusb:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    paddusb %mm1, %mm0 # sched: [1:1.00]
 ; SKX-NEXT:    paddusb (%rdi), %mm0 # sched: [6:1.00]
-; SKX-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_paddusb:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    paddusb %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    paddusb (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_paddusb:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    paddusb %mm1, %mm0 # sched: [1:0.25]
 ; ZNVER1-NEXT:    paddusb (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT:    movd %mm0, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -1996,70 +1996,70 @@ define i64 @test_paddusw(x86_mmx %a0, x8
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    paddusw %mm1, %mm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    paddusw (%rdi), %mm0 # sched: [7:1.00]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_paddusw:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    paddusw %mm1, %mm0 # sched: [1:0.50]
 ; ATOM-NEXT:    paddusw (%rdi), %mm0 # sched: [1:1.00]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_paddusw:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    paddusw %mm1, %mm0 # sched: [1:0.50]
 ; SLM-NEXT:    paddusw (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_paddusw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    paddusw %mm1, %mm0 # sched: [3:1.00]
 ; SANDY-NEXT:    paddusw (%rdi), %mm0 # sched: [7:1.00]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_paddusw:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    paddusw %mm1, %mm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    paddusw (%rdi), %mm0 # sched: [6:0.50]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_paddusw:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    paddusw %mm1, %mm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    paddusw (%rdi), %mm0 # sched: [6:0.50]
-; BROADWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_paddusw:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    paddusw %mm1, %mm0 # sched: [1:1.00]
 ; SKYLAKE-NEXT:    paddusw (%rdi), %mm0 # sched: [6:1.00]
-; SKYLAKE-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_paddusw:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    paddusw %mm1, %mm0 # sched: [1:1.00]
 ; SKX-NEXT:    paddusw (%rdi), %mm0 # sched: [6:1.00]
-; SKX-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_paddusw:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    paddusw %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    paddusw (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_paddusw:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    paddusw %mm1, %mm0 # sched: [1:0.25]
 ; ZNVER1-NEXT:    paddusw (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT:    movd %mm0, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -2074,70 +2074,70 @@ define i64 @test_paddw(x86_mmx %a0, x86_
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    paddw %mm1, %mm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    paddw (%rdi), %mm0 # sched: [7:1.00]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_paddw:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    paddw %mm1, %mm0 # sched: [1:0.50]
 ; ATOM-NEXT:    paddw (%rdi), %mm0 # sched: [1:1.00]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_paddw:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    paddw %mm1, %mm0 # sched: [1:0.50]
 ; SLM-NEXT:    paddw (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_paddw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    paddw %mm1, %mm0 # sched: [3:1.00]
 ; SANDY-NEXT:    paddw (%rdi), %mm0 # sched: [7:1.00]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_paddw:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    paddw %mm1, %mm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    paddw (%rdi), %mm0 # sched: [6:0.50]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_paddw:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    paddw %mm1, %mm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    paddw (%rdi), %mm0 # sched: [6:0.50]
-; BROADWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_paddw:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    paddw %mm1, %mm0 # sched: [1:0.50]
 ; SKYLAKE-NEXT:    paddw (%rdi), %mm0 # sched: [6:0.50]
-; SKYLAKE-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_paddw:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    paddw %mm1, %mm0 # sched: [1:0.50]
 ; SKX-NEXT:    paddw (%rdi), %mm0 # sched: [6:0.50]
-; SKX-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_paddw:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    paddw %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    paddw (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_paddw:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    paddw %mm1, %mm0 # sched: [1:0.25]
 ; ZNVER1-NEXT:    paddw (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT:    movd %mm0, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -2152,70 +2152,70 @@ define i64 @test_palignr(x86_mmx %a0, x8
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    palignr $1, %mm1, %mm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    palignr $1, (%rdi), %mm0 # sched: [6:0.50]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_palignr:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    palignr $1, %mm1, %mm0
 ; ATOM-NEXT:    palignr $1, (%rdi), %mm0
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_palignr:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    palignr $1, %mm1, %mm0 # sched: [1:1.00]
 ; SLM-NEXT:    palignr $1, (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_palignr:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    palignr $1, %mm1, %mm0 # sched: [1:0.50]
 ; SANDY-NEXT:    palignr $1, (%rdi), %mm0 # sched: [6:0.50]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_palignr:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    palignr $1, %mm1, %mm0 # sched: [1:1.00]
 ; HASWELL-NEXT:    palignr $1, (%rdi), %mm0 # sched: [6:1.00]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_palignr:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    palignr $1, %mm1, %mm0 # sched: [1:1.00]
 ; BROADWELL-NEXT:    palignr $1, (%rdi), %mm0 # sched: [6:1.00]
-; BROADWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_palignr:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    palignr $1, %mm1, %mm0 # sched: [1:1.00]
 ; SKYLAKE-NEXT:    palignr $1, (%rdi), %mm0 # sched: [6:1.00]
-; SKYLAKE-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_palignr:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    palignr $1, %mm1, %mm0 # sched: [1:1.00]
 ; SKX-NEXT:    palignr $1, (%rdi), %mm0 # sched: [6:1.00]
-; SKX-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_palignr:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    palignr $1, %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    palignr $1, (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_palignr:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    palignr $1, %mm1, %mm0 # sched: [1:0.25]
 ; ZNVER1-NEXT:    palignr $1, (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT:    movd %mm0, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx %a0, x86_mmx %a1, i8 1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -2230,70 +2230,70 @@ define i64 @test_pand(x86_mmx %a0, x86_m
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    pand %mm1, %mm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    pand (%rdi), %mm0 # sched: [5:1.00]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pand:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    pand %mm1, %mm0 # sched: [1:0.50]
 ; ATOM-NEXT:    pand (%rdi), %mm0 # sched: [1:1.00]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_pand:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    pand %mm1, %mm0 # sched: [1:0.50]
 ; SLM-NEXT:    pand (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_pand:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    pand %mm1, %mm0 # sched: [1:1.00]
 ; SANDY-NEXT:    pand (%rdi), %mm0 # sched: [5:1.00]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pand:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    pand %mm1, %mm0 # sched: [1:0.33]
 ; HASWELL-NEXT:    pand (%rdi), %mm0 # sched: [6:0.50]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_pand:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    pand %mm1, %mm0 # sched: [1:0.33]
 ; BROADWELL-NEXT:    pand (%rdi), %mm0 # sched: [6:0.50]
-; BROADWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_pand:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    pand %mm1, %mm0 # sched: [1:0.50]
 ; SKYLAKE-NEXT:    pand (%rdi), %mm0 # sched: [6:0.50]
-; SKYLAKE-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_pand:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    pand %mm1, %mm0 # sched: [1:0.50]
 ; SKX-NEXT:    pand (%rdi), %mm0 # sched: [6:0.50]
-; SKX-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_pand:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    pand %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    pand (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_pand:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    pand %mm1, %mm0 # sched: [1:0.25]
 ; ZNVER1-NEXT:    pand (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT:    movd %mm0, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call x86_mmx @llvm.x86.mmx.pand(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -2308,70 +2308,70 @@ define i64 @test_pandn(x86_mmx %a0, x86_
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    pandn %mm1, %mm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    pandn (%rdi), %mm0 # sched: [5:1.00]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pandn:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    pandn %mm1, %mm0 # sched: [1:0.50]
 ; ATOM-NEXT:    pandn (%rdi), %mm0 # sched: [1:1.00]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_pandn:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    pandn %mm1, %mm0 # sched: [1:0.50]
 ; SLM-NEXT:    pandn (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_pandn:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    pandn %mm1, %mm0 # sched: [1:1.00]
 ; SANDY-NEXT:    pandn (%rdi), %mm0 # sched: [5:1.00]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pandn:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    pandn %mm1, %mm0 # sched: [1:0.33]
 ; HASWELL-NEXT:    pandn (%rdi), %mm0 # sched: [6:0.50]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_pandn:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    pandn %mm1, %mm0 # sched: [1:0.33]
 ; BROADWELL-NEXT:    pandn (%rdi), %mm0 # sched: [6:0.50]
-; BROADWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_pandn:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    pandn %mm1, %mm0 # sched: [1:0.50]
 ; SKYLAKE-NEXT:    pandn (%rdi), %mm0 # sched: [6:0.50]
-; SKYLAKE-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_pandn:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    pandn %mm1, %mm0 # sched: [1:0.50]
 ; SKX-NEXT:    pandn (%rdi), %mm0 # sched: [6:0.50]
-; SKX-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_pandn:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    pandn %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    pandn (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_pandn:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    pandn %mm1, %mm0 # sched: [1:0.25]
 ; ZNVER1-NEXT:    pandn (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT:    movd %mm0, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call x86_mmx @llvm.x86.mmx.pandn(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -2386,70 +2386,70 @@ define i64 @test_pavgb(x86_mmx %a0, x86_
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    pavgb %mm1, %mm0 # sched: [5:1.00]
 ; GENERIC-NEXT:    pavgb (%rdi), %mm0 # sched: [9:1.00]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pavgb:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    pavgb %mm1, %mm0 # sched: [1:1.00]
 ; ATOM-NEXT:    pavgb (%rdi), %mm0 # sched: [1:0.50]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_pavgb:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    pavgb %mm1, %mm0 # sched: [4:1.00]
 ; SLM-NEXT:    pavgb (%rdi), %mm0 # sched: [7:1.00]
-; SLM-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_pavgb:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    pavgb %mm1, %mm0 # sched: [5:1.00]
 ; SANDY-NEXT:    pavgb (%rdi), %mm0 # sched: [9:1.00]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pavgb:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    pavgb %mm1, %mm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    pavgb (%rdi), %mm0 # sched: [6:0.50]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_pavgb:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    pavgb %mm1, %mm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    pavgb (%rdi), %mm0 # sched: [6:0.50]
-; BROADWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_pavgb:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    pavgb %mm1, %mm0 # sched: [1:1.00]
 ; SKYLAKE-NEXT:    pavgb (%rdi), %mm0 # sched: [6:1.00]
-; SKYLAKE-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_pavgb:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    pavgb %mm1, %mm0 # sched: [1:1.00]
 ; SKX-NEXT:    pavgb (%rdi), %mm0 # sched: [6:1.00]
-; SKX-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_pavgb:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    pavgb %mm1, %mm0 # sched: [2:1.00]
 ; BTVER2-NEXT:    pavgb (%rdi), %mm0 # sched: [7:1.00]
-; BTVER2-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_pavgb:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    pavgb %mm1, %mm0 # sched: [4:1.00]
 ; ZNVER1-NEXT:    pavgb (%rdi), %mm0 # sched: [11:1.00]
-; ZNVER1-NEXT:    movd %mm0, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -2464,70 +2464,70 @@ define i64 @test_pavgw(x86_mmx %a0, x86_
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    pavgw %mm1, %mm0 # sched: [5:1.00]
 ; GENERIC-NEXT:    pavgw (%rdi), %mm0 # sched: [9:1.00]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pavgw:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    pavgw %mm1, %mm0 # sched: [1:1.00]
 ; ATOM-NEXT:    pavgw (%rdi), %mm0 # sched: [1:0.50]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_pavgw:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    pavgw %mm1, %mm0 # sched: [4:1.00]
 ; SLM-NEXT:    pavgw (%rdi), %mm0 # sched: [7:1.00]
-; SLM-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_pavgw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    pavgw %mm1, %mm0 # sched: [5:1.00]
 ; SANDY-NEXT:    pavgw (%rdi), %mm0 # sched: [9:1.00]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pavgw:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    pavgw %mm1, %mm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    pavgw (%rdi), %mm0 # sched: [6:0.50]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_pavgw:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    pavgw %mm1, %mm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    pavgw (%rdi), %mm0 # sched: [6:0.50]
-; BROADWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_pavgw:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    pavgw %mm1, %mm0 # sched: [1:1.00]
 ; SKYLAKE-NEXT:    pavgw (%rdi), %mm0 # sched: [6:1.00]
-; SKYLAKE-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_pavgw:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    pavgw %mm1, %mm0 # sched: [1:1.00]
 ; SKX-NEXT:    pavgw (%rdi), %mm0 # sched: [6:1.00]
-; SKX-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_pavgw:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    pavgw %mm1, %mm0 # sched: [2:1.00]
 ; BTVER2-NEXT:    pavgw (%rdi), %mm0 # sched: [7:1.00]
-; BTVER2-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_pavgw:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    pavgw %mm1, %mm0 # sched: [4:1.00]
 ; ZNVER1-NEXT:    pavgw (%rdi), %mm0 # sched: [11:1.00]
-; ZNVER1-NEXT:    movd %mm0, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -2542,70 +2542,70 @@ define i64 @test_pcmpeqb(x86_mmx %a0, x8
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    pcmpeqb %mm1, %mm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    pcmpeqb (%rdi), %mm0 # sched: [7:1.00]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pcmpeqb:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    pcmpeqb %mm1, %mm0 # sched: [1:0.50]
 ; ATOM-NEXT:    pcmpeqb (%rdi), %mm0 # sched: [1:1.00]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_pcmpeqb:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    pcmpeqb %mm1, %mm0 # sched: [1:0.50]
 ; SLM-NEXT:    pcmpeqb (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_pcmpeqb:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    pcmpeqb %mm1, %mm0 # sched: [3:1.00]
 ; SANDY-NEXT:    pcmpeqb (%rdi), %mm0 # sched: [7:1.00]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pcmpeqb:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    pcmpeqb %mm1, %mm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    pcmpeqb (%rdi), %mm0 # sched: [6:0.50]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_pcmpeqb:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    pcmpeqb %mm1, %mm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    pcmpeqb (%rdi), %mm0 # sched: [6:0.50]
-; BROADWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_pcmpeqb:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    pcmpeqb %mm1, %mm0 # sched: [1:1.00]
 ; SKYLAKE-NEXT:    pcmpeqb (%rdi), %mm0 # sched: [6:1.00]
-; SKYLAKE-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_pcmpeqb:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    pcmpeqb %mm1, %mm0 # sched: [1:1.00]
 ; SKX-NEXT:    pcmpeqb (%rdi), %mm0 # sched: [6:1.00]
-; SKX-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_pcmpeqb:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    pcmpeqb %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    pcmpeqb (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_pcmpeqb:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    pcmpeqb %mm1, %mm0 # sched: [1:0.25]
 ; ZNVER1-NEXT:    pcmpeqb (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT:    movd %mm0, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -2620,70 +2620,70 @@ define i64 @test_pcmpeqd(x86_mmx %a0, x8
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    pcmpeqd %mm1, %mm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    pcmpeqd (%rdi), %mm0 # sched: [7:1.00]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pcmpeqd:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    pcmpeqd %mm1, %mm0 # sched: [1:0.50]
 ; ATOM-NEXT:    pcmpeqd (%rdi), %mm0 # sched: [1:1.00]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_pcmpeqd:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    pcmpeqd %mm1, %mm0 # sched: [1:0.50]
 ; SLM-NEXT:    pcmpeqd (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_pcmpeqd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    pcmpeqd %mm1, %mm0 # sched: [3:1.00]
 ; SANDY-NEXT:    pcmpeqd (%rdi), %mm0 # sched: [7:1.00]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pcmpeqd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    pcmpeqd %mm1, %mm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    pcmpeqd (%rdi), %mm0 # sched: [6:0.50]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_pcmpeqd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    pcmpeqd %mm1, %mm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    pcmpeqd (%rdi), %mm0 # sched: [6:0.50]
-; BROADWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_pcmpeqd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    pcmpeqd %mm1, %mm0 # sched: [1:1.00]
 ; SKYLAKE-NEXT:    pcmpeqd (%rdi), %mm0 # sched: [6:1.00]
-; SKYLAKE-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_pcmpeqd:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    pcmpeqd %mm1, %mm0 # sched: [1:1.00]
 ; SKX-NEXT:    pcmpeqd (%rdi), %mm0 # sched: [6:1.00]
-; SKX-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_pcmpeqd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    pcmpeqd %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    pcmpeqd (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_pcmpeqd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    pcmpeqd %mm1, %mm0 # sched: [1:0.25]
 ; ZNVER1-NEXT:    pcmpeqd (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT:    movd %mm0, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -2698,70 +2698,70 @@ define i64 @test_pcmpeqw(x86_mmx %a0, x8
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    pcmpeqw %mm1, %mm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    pcmpeqw (%rdi), %mm0 # sched: [7:1.00]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pcmpeqw:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    pcmpeqw %mm1, %mm0 # sched: [1:0.50]
 ; ATOM-NEXT:    pcmpeqw (%rdi), %mm0 # sched: [1:1.00]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_pcmpeqw:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    pcmpeqw %mm1, %mm0 # sched: [1:0.50]
 ; SLM-NEXT:    pcmpeqw (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_pcmpeqw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    pcmpeqw %mm1, %mm0 # sched: [3:1.00]
 ; SANDY-NEXT:    pcmpeqw (%rdi), %mm0 # sched: [7:1.00]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pcmpeqw:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    pcmpeqw %mm1, %mm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    pcmpeqw (%rdi), %mm0 # sched: [6:0.50]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_pcmpeqw:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    pcmpeqw %mm1, %mm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    pcmpeqw (%rdi), %mm0 # sched: [6:0.50]
-; BROADWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_pcmpeqw:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    pcmpeqw %mm1, %mm0 # sched: [1:1.00]
 ; SKYLAKE-NEXT:    pcmpeqw (%rdi), %mm0 # sched: [6:1.00]
-; SKYLAKE-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_pcmpeqw:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    pcmpeqw %mm1, %mm0 # sched: [1:1.00]
 ; SKX-NEXT:    pcmpeqw (%rdi), %mm0 # sched: [6:1.00]
-; SKX-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_pcmpeqw:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    pcmpeqw %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    pcmpeqw (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_pcmpeqw:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    pcmpeqw %mm1, %mm0 # sched: [1:0.25]
 ; ZNVER1-NEXT:    pcmpeqw (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT:    movd %mm0, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -2776,70 +2776,70 @@ define i64 @test_pcmpgtb(x86_mmx %a0, x8
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    pcmpgtb %mm1, %mm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    pcmpgtb (%rdi), %mm0 # sched: [7:1.00]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pcmpgtb:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    pcmpgtb %mm1, %mm0 # sched: [1:0.50]
 ; ATOM-NEXT:    pcmpgtb (%rdi), %mm0 # sched: [1:1.00]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_pcmpgtb:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    pcmpgtb %mm1, %mm0 # sched: [1:0.50]
 ; SLM-NEXT:    pcmpgtb (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_pcmpgtb:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    pcmpgtb %mm1, %mm0 # sched: [3:1.00]
 ; SANDY-NEXT:    pcmpgtb (%rdi), %mm0 # sched: [7:1.00]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pcmpgtb:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    pcmpgtb %mm1, %mm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    pcmpgtb (%rdi), %mm0 # sched: [6:0.50]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_pcmpgtb:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    pcmpgtb %mm1, %mm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    pcmpgtb (%rdi), %mm0 # sched: [6:0.50]
-; BROADWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_pcmpgtb:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    pcmpgtb %mm1, %mm0 # sched: [1:1.00]
 ; SKYLAKE-NEXT:    pcmpgtb (%rdi), %mm0 # sched: [6:1.00]
-; SKYLAKE-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_pcmpgtb:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    pcmpgtb %mm1, %mm0 # sched: [1:1.00]
 ; SKX-NEXT:    pcmpgtb (%rdi), %mm0 # sched: [6:1.00]
-; SKX-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_pcmpgtb:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    pcmpgtb %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    pcmpgtb (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_pcmpgtb:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    pcmpgtb %mm1, %mm0 # sched: [1:0.25]
 ; ZNVER1-NEXT:    pcmpgtb (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT:    movd %mm0, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -2854,70 +2854,70 @@ define i64 @test_pcmpgtd(x86_mmx %a0, x8
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    pcmpgtd %mm1, %mm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    pcmpgtd (%rdi), %mm0 # sched: [7:1.00]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pcmpgtd:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    pcmpgtd %mm1, %mm0 # sched: [1:0.50]
 ; ATOM-NEXT:    pcmpgtd (%rdi), %mm0 # sched: [1:1.00]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_pcmpgtd:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    pcmpgtd %mm1, %mm0 # sched: [1:0.50]
 ; SLM-NEXT:    pcmpgtd (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_pcmpgtd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    pcmpgtd %mm1, %mm0 # sched: [3:1.00]
 ; SANDY-NEXT:    pcmpgtd (%rdi), %mm0 # sched: [7:1.00]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pcmpgtd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    pcmpgtd %mm1, %mm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    pcmpgtd (%rdi), %mm0 # sched: [6:0.50]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_pcmpgtd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    pcmpgtd %mm1, %mm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    pcmpgtd (%rdi), %mm0 # sched: [6:0.50]
-; BROADWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_pcmpgtd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    pcmpgtd %mm1, %mm0 # sched: [1:1.00]
 ; SKYLAKE-NEXT:    pcmpgtd (%rdi), %mm0 # sched: [6:1.00]
-; SKYLAKE-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_pcmpgtd:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    pcmpgtd %mm1, %mm0 # sched: [1:1.00]
 ; SKX-NEXT:    pcmpgtd (%rdi), %mm0 # sched: [6:1.00]
-; SKX-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_pcmpgtd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    pcmpgtd %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    pcmpgtd (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_pcmpgtd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    pcmpgtd %mm1, %mm0 # sched: [1:0.25]
 ; ZNVER1-NEXT:    pcmpgtd (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT:    movd %mm0, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -2932,70 +2932,70 @@ define i64 @test_pcmpgtw(x86_mmx %a0, x8
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    pcmpgtw %mm1, %mm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    pcmpgtw (%rdi), %mm0 # sched: [7:1.00]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pcmpgtw:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    pcmpgtw %mm1, %mm0 # sched: [1:0.50]
 ; ATOM-NEXT:    pcmpgtw (%rdi), %mm0 # sched: [1:1.00]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_pcmpgtw:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    pcmpgtw %mm1, %mm0 # sched: [1:0.50]
 ; SLM-NEXT:    pcmpgtw (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_pcmpgtw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    pcmpgtw %mm1, %mm0 # sched: [3:1.00]
 ; SANDY-NEXT:    pcmpgtw (%rdi), %mm0 # sched: [7:1.00]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pcmpgtw:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    pcmpgtw %mm1, %mm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    pcmpgtw (%rdi), %mm0 # sched: [6:0.50]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_pcmpgtw:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    pcmpgtw %mm1, %mm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    pcmpgtw (%rdi), %mm0 # sched: [6:0.50]
-; BROADWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_pcmpgtw:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    pcmpgtw %mm1, %mm0 # sched: [1:1.00]
 ; SKYLAKE-NEXT:    pcmpgtw (%rdi), %mm0 # sched: [6:1.00]
-; SKYLAKE-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_pcmpgtw:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    pcmpgtw %mm1, %mm0 # sched: [1:1.00]
 ; SKX-NEXT:    pcmpgtw (%rdi), %mm0 # sched: [6:1.00]
-; SKX-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_pcmpgtw:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    pcmpgtw %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    pcmpgtw (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_pcmpgtw:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    pcmpgtw %mm1, %mm0 # sched: [1:0.25]
 ; ZNVER1-NEXT:    pcmpgtw (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT:    movd %mm0, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -3065,70 +3065,70 @@ define i64 @test_phaddd(x86_mmx %a0, x86
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    phaddd %mm1, %mm0 # sched: [3:1.50]
 ; GENERIC-NEXT:    phaddd (%rdi), %mm0 # sched: [8:1.50]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_phaddd:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    phaddd %mm1, %mm0 # sched: [3:1.50]
 ; ATOM-NEXT:    phaddd (%rdi), %mm0 # sched: [4:2.00]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_phaddd:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    phaddd %mm1, %mm0 # sched: [1:0.50]
 ; SLM-NEXT:    phaddd (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_phaddd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    phaddd %mm1, %mm0 # sched: [3:1.50]
 ; SANDY-NEXT:    phaddd (%rdi), %mm0 # sched: [8:1.50]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_phaddd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    phaddd %mm1, %mm0 # sched: [3:2.00]
 ; HASWELL-NEXT:    phaddd (%rdi), %mm0 # sched: [8:2.00]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_phaddd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    phaddd %mm1, %mm0 # sched: [3:2.00]
 ; BROADWELL-NEXT:    phaddd (%rdi), %mm0 # sched: [8:2.00]
-; BROADWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_phaddd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    phaddd %mm1, %mm0 # sched: [3:2.00]
 ; SKYLAKE-NEXT:    phaddd (%rdi), %mm0 # sched: [8:2.00]
-; SKYLAKE-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_phaddd:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    phaddd %mm1, %mm0 # sched: [3:2.00]
 ; SKX-NEXT:    phaddd (%rdi), %mm0 # sched: [8:2.00]
-; SKX-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_phaddd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    phaddd %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    phaddd (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_phaddd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    phaddd %mm1, %mm0 # sched: [100:?]
 ; ZNVER1-NEXT:    phaddd (%rdi), %mm0 # sched: [100:?]
-; ZNVER1-NEXT:    movd %mm0, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -3143,70 +3143,70 @@ define i64 @test_phaddsw(x86_mmx %a0, x8
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    phaddsw %mm1, %mm0 # sched: [3:1.50]
 ; GENERIC-NEXT:    phaddsw (%rdi), %mm0 # sched: [8:1.50]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_phaddsw:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    phaddsw %mm1, %mm0 # sched: [5:2.50]
 ; ATOM-NEXT:    phaddsw (%rdi), %mm0 # sched: [6:3.00]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_phaddsw:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    phaddsw %mm1, %mm0 # sched: [1:0.50]
 ; SLM-NEXT:    phaddsw (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_phaddsw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    phaddsw %mm1, %mm0 # sched: [3:1.50]
 ; SANDY-NEXT:    phaddsw (%rdi), %mm0 # sched: [8:1.50]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_phaddsw:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    phaddsw %mm1, %mm0 # sched: [3:2.00]
 ; HASWELL-NEXT:    phaddsw (%rdi), %mm0 # sched: [8:2.00]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_phaddsw:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    phaddsw %mm1, %mm0 # sched: [3:2.00]
 ; BROADWELL-NEXT:    phaddsw (%rdi), %mm0 # sched: [8:2.00]
-; BROADWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_phaddsw:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    phaddsw %mm1, %mm0 # sched: [3:2.00]
 ; SKYLAKE-NEXT:    phaddsw (%rdi), %mm0 # sched: [8:2.00]
-; SKYLAKE-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_phaddsw:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    phaddsw %mm1, %mm0 # sched: [3:2.00]
 ; SKX-NEXT:    phaddsw (%rdi), %mm0 # sched: [8:2.00]
-; SKX-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_phaddsw:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    phaddsw %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    phaddsw (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_phaddsw:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    phaddsw %mm1, %mm0 # sched: [100:?]
 ; ZNVER1-NEXT:    phaddsw (%rdi), %mm0 # sched: [100:?]
-; ZNVER1-NEXT:    movd %mm0, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -3221,70 +3221,70 @@ define i64 @test_phaddw(x86_mmx %a0, x86
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    phaddw %mm1, %mm0 # sched: [3:1.50]
 ; GENERIC-NEXT:    phaddw (%rdi), %mm0 # sched: [8:1.50]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_phaddw:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    phaddw %mm1, %mm0 # sched: [5:2.50]
 ; ATOM-NEXT:    phaddw (%rdi), %mm0 # sched: [6:3.00]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_phaddw:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    phaddw %mm1, %mm0 # sched: [1:0.50]
 ; SLM-NEXT:    phaddw (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_phaddw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    phaddw %mm1, %mm0 # sched: [3:1.50]
 ; SANDY-NEXT:    phaddw (%rdi), %mm0 # sched: [8:1.50]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_phaddw:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    phaddw %mm1, %mm0 # sched: [3:2.00]
 ; HASWELL-NEXT:    phaddw (%rdi), %mm0 # sched: [8:2.00]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_phaddw:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    phaddw %mm1, %mm0 # sched: [3:2.00]
 ; BROADWELL-NEXT:    phaddw (%rdi), %mm0 # sched: [8:2.00]
-; BROADWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_phaddw:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    phaddw %mm1, %mm0 # sched: [3:2.00]
 ; SKYLAKE-NEXT:    phaddw (%rdi), %mm0 # sched: [8:2.00]
-; SKYLAKE-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_phaddw:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    phaddw %mm1, %mm0 # sched: [3:2.00]
 ; SKX-NEXT:    phaddw (%rdi), %mm0 # sched: [8:2.00]
-; SKX-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_phaddw:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    phaddw %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    phaddw (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_phaddw:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    phaddw %mm1, %mm0 # sched: [100:?]
 ; ZNVER1-NEXT:    phaddw (%rdi), %mm0 # sched: [100:?]
-; ZNVER1-NEXT:    movd %mm0, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -3299,70 +3299,70 @@ define i64 @test_phsubd(x86_mmx %a0, x86
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    phsubd %mm1, %mm0 # sched: [3:1.50]
 ; GENERIC-NEXT:    phsubd (%rdi), %mm0 # sched: [8:1.50]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_phsubd:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    phsubd %mm1, %mm0 # sched: [3:1.50]
 ; ATOM-NEXT:    phsubd (%rdi), %mm0 # sched: [4:2.00]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_phsubd:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    phsubd %mm1, %mm0 # sched: [1:0.50]
 ; SLM-NEXT:    phsubd (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_phsubd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    phsubd %mm1, %mm0 # sched: [3:1.50]
 ; SANDY-NEXT:    phsubd (%rdi), %mm0 # sched: [8:1.50]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_phsubd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    phsubd %mm1, %mm0 # sched: [3:2.00]
 ; HASWELL-NEXT:    phsubd (%rdi), %mm0 # sched: [8:2.00]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_phsubd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    phsubd %mm1, %mm0 # sched: [3:2.00]
 ; BROADWELL-NEXT:    phsubd (%rdi), %mm0 # sched: [8:2.00]
-; BROADWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_phsubd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    phsubd %mm1, %mm0 # sched: [3:2.00]
 ; SKYLAKE-NEXT:    phsubd (%rdi), %mm0 # sched: [8:2.00]
-; SKYLAKE-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_phsubd:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    phsubd %mm1, %mm0 # sched: [3:2.00]
 ; SKX-NEXT:    phsubd (%rdi), %mm0 # sched: [8:2.00]
-; SKX-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_phsubd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    phsubd %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    phsubd (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_phsubd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    phsubd %mm1, %mm0 # sched: [100:?]
 ; ZNVER1-NEXT:    phsubd (%rdi), %mm0 # sched: [100:?]
-; ZNVER1-NEXT:    movd %mm0, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -3377,70 +3377,70 @@ define i64 @test_phsubsw(x86_mmx %a0, x8
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    phsubsw %mm1, %mm0 # sched: [3:1.50]
 ; GENERIC-NEXT:    phsubsw (%rdi), %mm0 # sched: [8:1.50]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_phsubsw:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    phsubsw %mm1, %mm0 # sched: [5:2.50]
 ; ATOM-NEXT:    phsubsw (%rdi), %mm0 # sched: [6:3.00]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_phsubsw:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    phsubsw %mm1, %mm0 # sched: [1:0.50]
 ; SLM-NEXT:    phsubsw (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_phsubsw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    phsubsw %mm1, %mm0 # sched: [3:1.50]
 ; SANDY-NEXT:    phsubsw (%rdi), %mm0 # sched: [8:1.50]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_phsubsw:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    phsubsw %mm1, %mm0 # sched: [3:2.00]
 ; HASWELL-NEXT:    phsubsw (%rdi), %mm0 # sched: [8:2.00]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_phsubsw:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    phsubsw %mm1, %mm0 # sched: [3:2.00]
 ; BROADWELL-NEXT:    phsubsw (%rdi), %mm0 # sched: [8:2.00]
-; BROADWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_phsubsw:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    phsubsw %mm1, %mm0 # sched: [3:2.00]
 ; SKYLAKE-NEXT:    phsubsw (%rdi), %mm0 # sched: [8:2.00]
-; SKYLAKE-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_phsubsw:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    phsubsw %mm1, %mm0 # sched: [3:2.00]
 ; SKX-NEXT:    phsubsw (%rdi), %mm0 # sched: [8:2.00]
-; SKX-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_phsubsw:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    phsubsw %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    phsubsw (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_phsubsw:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    phsubsw %mm1, %mm0 # sched: [100:?]
 ; ZNVER1-NEXT:    phsubsw (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT:    movd %mm0, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -3455,70 +3455,70 @@ define i64 @test_phsubw(x86_mmx %a0, x86
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    phsubw %mm1, %mm0 # sched: [3:1.50]
 ; GENERIC-NEXT:    phsubw (%rdi), %mm0 # sched: [8:1.50]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_phsubw:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    phsubw %mm1, %mm0 # sched: [5:2.50]
 ; ATOM-NEXT:    phsubw (%rdi), %mm0 # sched: [6:3.00]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_phsubw:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    phsubw %mm1, %mm0 # sched: [1:0.50]
 ; SLM-NEXT:    phsubw (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_phsubw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    phsubw %mm1, %mm0 # sched: [3:1.50]
 ; SANDY-NEXT:    phsubw (%rdi), %mm0 # sched: [8:1.50]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_phsubw:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    phsubw %mm1, %mm0 # sched: [3:2.00]
 ; HASWELL-NEXT:    phsubw (%rdi), %mm0 # sched: [8:2.00]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_phsubw:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    phsubw %mm1, %mm0 # sched: [3:2.00]
 ; BROADWELL-NEXT:    phsubw (%rdi), %mm0 # sched: [8:2.00]
-; BROADWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_phsubw:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    phsubw %mm1, %mm0 # sched: [3:2.00]
 ; SKYLAKE-NEXT:    phsubw (%rdi), %mm0 # sched: [8:2.00]
-; SKYLAKE-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_phsubw:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    phsubw %mm1, %mm0 # sched: [3:2.00]
 ; SKX-NEXT:    phsubw (%rdi), %mm0 # sched: [8:2.00]
-; SKX-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_phsubw:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    phsubw %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    phsubw (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_phsubw:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    phsubw %mm1, %mm0 # sched: [100:?]
 ; ZNVER1-NEXT:    phsubw (%rdi), %mm0 # sched: [100:?]
-; ZNVER1-NEXT:    movd %mm0, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -3534,7 +3534,7 @@ define i64 @test_pinsrw(x86_mmx %a0, i32
 ; GENERIC-NEXT:    pinsrw $0, %edi, %mm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    movswl (%rsi), %eax # sched: [5:0.50]
 ; GENERIC-NEXT:    pinsrw $1, %eax, %mm0 # sched: [1:1.00]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pinsrw:
@@ -3542,7 +3542,7 @@ define i64 @test_pinsrw(x86_mmx %a0, i32
 ; ATOM-NEXT:    movswl (%rsi), %eax # sched: [1:1.00]
 ; ATOM-NEXT:    pinsrw $0, %edi, %mm0 # sched: [1:1.00]
 ; ATOM-NEXT:    pinsrw $1, %eax, %mm0 # sched: [1:1.00]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_pinsrw:
@@ -3550,7 +3550,7 @@ define i64 @test_pinsrw(x86_mmx %a0, i32
 ; SLM-NEXT:    movswl (%rsi), %eax # sched: [4:1.00]
 ; SLM-NEXT:    pinsrw $0, %edi, %mm0 # sched: [1:1.00]
 ; SLM-NEXT:    pinsrw $1, %eax, %mm0 # sched: [1:1.00]
-; SLM-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_pinsrw:
@@ -3558,7 +3558,7 @@ define i64 @test_pinsrw(x86_mmx %a0, i32
 ; SANDY-NEXT:    pinsrw $0, %edi, %mm0 # sched: [1:1.00]
 ; SANDY-NEXT:    movswl (%rsi), %eax # sched: [5:0.50]
 ; SANDY-NEXT:    pinsrw $1, %eax, %mm0 # sched: [1:1.00]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pinsrw:
@@ -3566,7 +3566,7 @@ define i64 @test_pinsrw(x86_mmx %a0, i32
 ; HASWELL-NEXT:    pinsrw $0, %edi, %mm0 # sched: [2:2.00]
 ; HASWELL-NEXT:    movswl (%rsi), %eax # sched: [5:0.50]
 ; HASWELL-NEXT:    pinsrw $1, %eax, %mm0 # sched: [2:2.00]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_pinsrw:
@@ -3574,7 +3574,7 @@ define i64 @test_pinsrw(x86_mmx %a0, i32
 ; BROADWELL-NEXT:    pinsrw $0, %edi, %mm0 # sched: [2:2.00]
 ; BROADWELL-NEXT:    movswl (%rsi), %eax # sched: [5:0.50]
 ; BROADWELL-NEXT:    pinsrw $1, %eax, %mm0 # sched: [2:2.00]
-; BROADWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_pinsrw:
@@ -3582,7 +3582,7 @@ define i64 @test_pinsrw(x86_mmx %a0, i32
 ; SKYLAKE-NEXT:    pinsrw $0, %edi, %mm0 # sched: [2:2.00]
 ; SKYLAKE-NEXT:    movswl (%rsi), %eax # sched: [5:0.50]
 ; SKYLAKE-NEXT:    pinsrw $1, %eax, %mm0 # sched: [2:2.00]
-; SKYLAKE-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_pinsrw:
@@ -3590,7 +3590,7 @@ define i64 @test_pinsrw(x86_mmx %a0, i32
 ; SKX-NEXT:    pinsrw $0, %edi, %mm0 # sched: [2:2.00]
 ; SKX-NEXT:    movswl (%rsi), %eax # sched: [5:0.50]
 ; SKX-NEXT:    pinsrw $1, %eax, %mm0 # sched: [2:2.00]
-; SKX-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_pinsrw:
@@ -3598,7 +3598,7 @@ define i64 @test_pinsrw(x86_mmx %a0, i32
 ; BTVER2-NEXT:    movswl (%rsi), %eax # sched: [4:1.00]
 ; BTVER2-NEXT:    pinsrw $0, %edi, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    pinsrw $1, %eax, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_pinsrw:
@@ -3606,7 +3606,7 @@ define i64 @test_pinsrw(x86_mmx %a0, i32
 ; ZNVER1-NEXT:    movswl (%rsi), %eax # sched: [8:0.50]
 ; ZNVER1-NEXT:    pinsrw $0, %edi, %mm0 # sched: [1:0.25]
 ; ZNVER1-NEXT:    pinsrw $1, %eax, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    movd %mm0, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call x86_mmx @llvm.x86.mmx.pinsr.w(x86_mmx %a0, i32 %a1, i32 0)
   %2 = load i16, i16 *%a2, align 2
@@ -3622,70 +3622,70 @@ define i64 @test_pmaddwd(x86_mmx %a0, x8
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    pmaddwd %mm1, %mm0 # sched: [5:1.00]
 ; GENERIC-NEXT:    pmaddwd (%rdi), %mm0 # sched: [9:1.00]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pmaddwd:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    pmaddwd %mm1, %mm0 # sched: [4:4.00]
 ; ATOM-NEXT:    pmaddwd (%rdi), %mm0 # sched: [4:4.00]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_pmaddwd:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    pmaddwd %mm1, %mm0 # sched: [4:1.00]
 ; SLM-NEXT:    pmaddwd (%rdi), %mm0 # sched: [7:1.00]
-; SLM-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_pmaddwd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    pmaddwd %mm1, %mm0 # sched: [5:1.00]
 ; SANDY-NEXT:    pmaddwd (%rdi), %mm0 # sched: [9:1.00]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmaddwd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    pmaddwd %mm1, %mm0 # sched: [5:1.00]
 ; HASWELL-NEXT:    pmaddwd (%rdi), %mm0 # sched: [10:1.00]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_pmaddwd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    pmaddwd %mm1, %mm0 # sched: [5:1.00]
 ; BROADWELL-NEXT:    pmaddwd (%rdi), %mm0 # sched: [10:1.00]
-; BROADWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_pmaddwd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    pmaddwd %mm1, %mm0 # sched: [4:1.00]
 ; SKYLAKE-NEXT:    pmaddwd (%rdi), %mm0 # sched: [9:1.00]
-; SKYLAKE-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_pmaddwd:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    pmaddwd %mm1, %mm0 # sched: [4:1.00]
 ; SKX-NEXT:    pmaddwd (%rdi), %mm0 # sched: [9:1.00]
-; SKX-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_pmaddwd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    pmaddwd %mm1, %mm0 # sched: [2:1.00]
 ; BTVER2-NEXT:    pmaddwd (%rdi), %mm0 # sched: [7:1.00]
-; BTVER2-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_pmaddwd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    pmaddwd %mm1, %mm0 # sched: [4:1.00]
 ; ZNVER1-NEXT:    pmaddwd (%rdi), %mm0 # sched: [11:1.00]
-; ZNVER1-NEXT:    movd %mm0, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -3700,70 +3700,70 @@ define i64 @test_pmaddubsw(x86_mmx %a0,
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    pmaddubsw %mm1, %mm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    pmaddubsw (%rdi), %mm0 # sched: [8:1.00]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pmaddubsw:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    pmaddubsw %mm1, %mm0 # sched: [4:4.00]
 ; ATOM-NEXT:    pmaddubsw (%rdi), %mm0 # sched: [4:4.00]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_pmaddubsw:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    pmaddubsw %mm1, %mm0 # sched: [4:1.00]
 ; SLM-NEXT:    pmaddubsw (%rdi), %mm0 # sched: [7:1.00]
-; SLM-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_pmaddubsw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    pmaddubsw %mm1, %mm0 # sched: [3:1.00]
 ; SANDY-NEXT:    pmaddubsw (%rdi), %mm0 # sched: [8:1.00]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmaddubsw:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    pmaddubsw %mm1, %mm0 # sched: [5:1.00]
 ; HASWELL-NEXT:    pmaddubsw (%rdi), %mm0 # sched: [10:1.00]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_pmaddubsw:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    pmaddubsw %mm1, %mm0 # sched: [5:1.00]
 ; BROADWELL-NEXT:    pmaddubsw (%rdi), %mm0 # sched: [10:1.00]
-; BROADWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_pmaddubsw:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    pmaddubsw %mm1, %mm0 # sched: [4:1.00]
 ; SKYLAKE-NEXT:    pmaddubsw (%rdi), %mm0 # sched: [9:1.00]
-; SKYLAKE-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_pmaddubsw:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    pmaddubsw %mm1, %mm0 # sched: [4:1.00]
 ; SKX-NEXT:    pmaddubsw (%rdi), %mm0 # sched: [9:1.00]
-; SKX-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_pmaddubsw:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    pmaddubsw %mm1, %mm0 # sched: [2:1.00]
 ; BTVER2-NEXT:    pmaddubsw (%rdi), %mm0 # sched: [7:1.00]
-; BTVER2-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_pmaddubsw:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    pmaddubsw %mm1, %mm0 # sched: [4:1.00]
 ; ZNVER1-NEXT:    pmaddubsw (%rdi), %mm0 # sched: [11:1.00]
-; ZNVER1-NEXT:    movd %mm0, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -3778,70 +3778,70 @@ define i64 @test_pmaxsw(x86_mmx %a0, x86
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    pmaxsw %mm1, %mm0 # sched: [5:1.00]
 ; GENERIC-NEXT:    pmaxsw (%rdi), %mm0 # sched: [9:1.00]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pmaxsw:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    pmaxsw %mm1, %mm0 # sched: [1:1.00]
 ; ATOM-NEXT:    pmaxsw (%rdi), %mm0 # sched: [1:0.50]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_pmaxsw:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    pmaxsw %mm1, %mm0 # sched: [4:1.00]
 ; SLM-NEXT:    pmaxsw (%rdi), %mm0 # sched: [7:1.00]
-; SLM-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_pmaxsw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    pmaxsw %mm1, %mm0 # sched: [5:1.00]
 ; SANDY-NEXT:    pmaxsw (%rdi), %mm0 # sched: [9:1.00]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmaxsw:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    pmaxsw %mm1, %mm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    pmaxsw (%rdi), %mm0 # sched: [6:0.50]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_pmaxsw:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    pmaxsw %mm1, %mm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    pmaxsw (%rdi), %mm0 # sched: [6:0.50]
-; BROADWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_pmaxsw:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    pmaxsw %mm1, %mm0 # sched: [1:1.00]
 ; SKYLAKE-NEXT:    pmaxsw (%rdi), %mm0 # sched: [6:1.00]
-; SKYLAKE-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_pmaxsw:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    pmaxsw %mm1, %mm0 # sched: [1:1.00]
 ; SKX-NEXT:    pmaxsw (%rdi), %mm0 # sched: [6:1.00]
-; SKX-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_pmaxsw:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    pmaxsw %mm1, %mm0 # sched: [2:1.00]
 ; BTVER2-NEXT:    pmaxsw (%rdi), %mm0 # sched: [7:1.00]
-; BTVER2-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_pmaxsw:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    pmaxsw %mm1, %mm0 # sched: [4:1.00]
 ; ZNVER1-NEXT:    pmaxsw (%rdi), %mm0 # sched: [11:1.00]
-; ZNVER1-NEXT:    movd %mm0, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -3856,70 +3856,70 @@ define i64 @test_pmaxub(x86_mmx %a0, x86
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    pmaxub %mm1, %mm0 # sched: [5:1.00]
 ; GENERIC-NEXT:    pmaxub (%rdi), %mm0 # sched: [9:1.00]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pmaxub:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    pmaxub %mm1, %mm0 # sched: [1:1.00]
 ; ATOM-NEXT:    pmaxub (%rdi), %mm0 # sched: [1:0.50]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_pmaxub:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    pmaxub %mm1, %mm0 # sched: [4:1.00]
 ; SLM-NEXT:    pmaxub (%rdi), %mm0 # sched: [7:1.00]
-; SLM-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_pmaxub:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    pmaxub %mm1, %mm0 # sched: [5:1.00]
 ; SANDY-NEXT:    pmaxub (%rdi), %mm0 # sched: [9:1.00]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmaxub:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    pmaxub %mm1, %mm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    pmaxub (%rdi), %mm0 # sched: [6:0.50]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_pmaxub:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    pmaxub %mm1, %mm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    pmaxub (%rdi), %mm0 # sched: [6:0.50]
-; BROADWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_pmaxub:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    pmaxub %mm1, %mm0 # sched: [1:1.00]
 ; SKYLAKE-NEXT:    pmaxub (%rdi), %mm0 # sched: [6:1.00]
-; SKYLAKE-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_pmaxub:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    pmaxub %mm1, %mm0 # sched: [1:1.00]
 ; SKX-NEXT:    pmaxub (%rdi), %mm0 # sched: [6:1.00]
-; SKX-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_pmaxub:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    pmaxub %mm1, %mm0 # sched: [2:1.00]
 ; BTVER2-NEXT:    pmaxub (%rdi), %mm0 # sched: [7:1.00]
-; BTVER2-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_pmaxub:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    pmaxub %mm1, %mm0 # sched: [4:1.00]
 ; ZNVER1-NEXT:    pmaxub (%rdi), %mm0 # sched: [11:1.00]
-; ZNVER1-NEXT:    movd %mm0, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -3934,70 +3934,70 @@ define i64 @test_pminsw(x86_mmx %a0, x86
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    pminsw %mm1, %mm0 # sched: [5:1.00]
 ; GENERIC-NEXT:    pminsw (%rdi), %mm0 # sched: [9:1.00]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pminsw:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    pminsw %mm1, %mm0 # sched: [1:1.00]
 ; ATOM-NEXT:    pminsw (%rdi), %mm0 # sched: [1:0.50]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_pminsw:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    pminsw %mm1, %mm0 # sched: [4:1.00]
 ; SLM-NEXT:    pminsw (%rdi), %mm0 # sched: [7:1.00]
-; SLM-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_pminsw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    pminsw %mm1, %mm0 # sched: [5:1.00]
 ; SANDY-NEXT:    pminsw (%rdi), %mm0 # sched: [9:1.00]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pminsw:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    pminsw %mm1, %mm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    pminsw (%rdi), %mm0 # sched: [6:0.50]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_pminsw:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    pminsw %mm1, %mm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    pminsw (%rdi), %mm0 # sched: [6:0.50]
-; BROADWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_pminsw:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    pminsw %mm1, %mm0 # sched: [1:1.00]
 ; SKYLAKE-NEXT:    pminsw (%rdi), %mm0 # sched: [6:1.00]
-; SKYLAKE-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_pminsw:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    pminsw %mm1, %mm0 # sched: [1:1.00]
 ; SKX-NEXT:    pminsw (%rdi), %mm0 # sched: [6:1.00]
-; SKX-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_pminsw:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    pminsw %mm1, %mm0 # sched: [2:1.00]
 ; BTVER2-NEXT:    pminsw (%rdi), %mm0 # sched: [7:1.00]
-; BTVER2-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_pminsw:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    pminsw %mm1, %mm0 # sched: [4:1.00]
 ; ZNVER1-NEXT:    pminsw (%rdi), %mm0 # sched: [11:1.00]
-; ZNVER1-NEXT:    movd %mm0, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -4012,70 +4012,70 @@ define i64 @test_pminub(x86_mmx %a0, x86
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    pminub %mm1, %mm0 # sched: [5:1.00]
 ; GENERIC-NEXT:    pminub (%rdi), %mm0 # sched: [9:1.00]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pminub:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    pminub %mm1, %mm0 # sched: [1:1.00]
 ; ATOM-NEXT:    pminub (%rdi), %mm0 # sched: [1:0.50]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_pminub:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    pminub %mm1, %mm0 # sched: [4:1.00]
 ; SLM-NEXT:    pminub (%rdi), %mm0 # sched: [7:1.00]
-; SLM-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_pminub:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    pminub %mm1, %mm0 # sched: [5:1.00]
 ; SANDY-NEXT:    pminub (%rdi), %mm0 # sched: [9:1.00]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pminub:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    pminub %mm1, %mm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    pminub (%rdi), %mm0 # sched: [6:0.50]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_pminub:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    pminub %mm1, %mm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    pminub (%rdi), %mm0 # sched: [6:0.50]
-; BROADWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_pminub:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    pminub %mm1, %mm0 # sched: [1:1.00]
 ; SKYLAKE-NEXT:    pminub (%rdi), %mm0 # sched: [6:1.00]
-; SKYLAKE-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_pminub:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    pminub %mm1, %mm0 # sched: [1:1.00]
 ; SKX-NEXT:    pminub (%rdi), %mm0 # sched: [6:1.00]
-; SKX-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_pminub:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    pminub %mm1, %mm0 # sched: [2:1.00]
 ; BTVER2-NEXT:    pminub (%rdi), %mm0 # sched: [7:1.00]
-; BTVER2-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_pminub:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    pminub %mm1, %mm0 # sched: [4:1.00]
 ; ZNVER1-NEXT:    pminub (%rdi), %mm0 # sched: [11:1.00]
-; ZNVER1-NEXT:    movd %mm0, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -4145,70 +4145,70 @@ define i64 @test_pmulhrsw(x86_mmx %a0, x
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    pmulhrsw %mm1, %mm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    pmulhrsw (%rdi), %mm0 # sched: [8:1.00]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pmulhrsw:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    pmulhrsw %mm1, %mm0 # sched: [4:4.00]
 ; ATOM-NEXT:    pmulhrsw (%rdi), %mm0 # sched: [4:4.00]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_pmulhrsw:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    pmulhrsw %mm1, %mm0 # sched: [4:1.00]
 ; SLM-NEXT:    pmulhrsw (%rdi), %mm0 # sched: [7:1.00]
-; SLM-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_pmulhrsw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    pmulhrsw %mm1, %mm0 # sched: [3:1.00]
 ; SANDY-NEXT:    pmulhrsw (%rdi), %mm0 # sched: [8:1.00]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmulhrsw:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    pmulhrsw %mm1, %mm0 # sched: [5:1.00]
 ; HASWELL-NEXT:    pmulhrsw (%rdi), %mm0 # sched: [10:1.00]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_pmulhrsw:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    pmulhrsw %mm1, %mm0 # sched: [5:1.00]
 ; BROADWELL-NEXT:    pmulhrsw (%rdi), %mm0 # sched: [10:1.00]
-; BROADWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_pmulhrsw:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    pmulhrsw %mm1, %mm0 # sched: [4:1.00]
 ; SKYLAKE-NEXT:    pmulhrsw (%rdi), %mm0 # sched: [9:1.00]
-; SKYLAKE-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_pmulhrsw:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    pmulhrsw %mm1, %mm0 # sched: [4:1.00]
 ; SKX-NEXT:    pmulhrsw (%rdi), %mm0 # sched: [9:1.00]
-; SKX-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_pmulhrsw:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    pmulhrsw %mm1, %mm0 # sched: [2:1.00]
 ; BTVER2-NEXT:    pmulhrsw (%rdi), %mm0 # sched: [7:1.00]
-; BTVER2-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_pmulhrsw:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    pmulhrsw %mm1, %mm0 # sched: [4:1.00]
 ; ZNVER1-NEXT:    pmulhrsw (%rdi), %mm0 # sched: [11:1.00]
-; ZNVER1-NEXT:    movd %mm0, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -4223,70 +4223,70 @@ define i64 @test_pmulhw(x86_mmx %a0, x86
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    pmulhw %mm1, %mm0 # sched: [5:1.00]
 ; GENERIC-NEXT:    pmulhw (%rdi), %mm0 # sched: [9:1.00]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pmulhw:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    pmulhw %mm1, %mm0 # sched: [4:4.00]
 ; ATOM-NEXT:    pmulhw (%rdi), %mm0 # sched: [4:4.00]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_pmulhw:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    pmulhw %mm1, %mm0 # sched: [4:1.00]
 ; SLM-NEXT:    pmulhw (%rdi), %mm0 # sched: [7:1.00]
-; SLM-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_pmulhw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    pmulhw %mm1, %mm0 # sched: [5:1.00]
 ; SANDY-NEXT:    pmulhw (%rdi), %mm0 # sched: [9:1.00]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmulhw:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    pmulhw %mm1, %mm0 # sched: [5:1.00]
 ; HASWELL-NEXT:    pmulhw (%rdi), %mm0 # sched: [10:1.00]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_pmulhw:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    pmulhw %mm1, %mm0 # sched: [5:1.00]
 ; BROADWELL-NEXT:    pmulhw (%rdi), %mm0 # sched: [10:1.00]
-; BROADWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_pmulhw:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    pmulhw %mm1, %mm0 # sched: [4:1.00]
 ; SKYLAKE-NEXT:    pmulhw (%rdi), %mm0 # sched: [9:1.00]
-; SKYLAKE-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_pmulhw:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    pmulhw %mm1, %mm0 # sched: [4:1.00]
 ; SKX-NEXT:    pmulhw (%rdi), %mm0 # sched: [9:1.00]
-; SKX-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_pmulhw:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    pmulhw %mm1, %mm0 # sched: [2:1.00]
 ; BTVER2-NEXT:    pmulhw (%rdi), %mm0 # sched: [7:1.00]
-; BTVER2-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_pmulhw:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    pmulhw %mm1, %mm0 # sched: [4:1.00]
 ; ZNVER1-NEXT:    pmulhw (%rdi), %mm0 # sched: [11:1.00]
-; ZNVER1-NEXT:    movd %mm0, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -4301,70 +4301,70 @@ define i64 @test_pmulhuw(x86_mmx %a0, x8
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    pmulhuw %mm1, %mm0 # sched: [5:1.00]
 ; GENERIC-NEXT:    pmulhuw (%rdi), %mm0 # sched: [9:1.00]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pmulhuw:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    pmulhuw %mm1, %mm0 # sched: [4:4.00]
 ; ATOM-NEXT:    pmulhuw (%rdi), %mm0 # sched: [4:4.00]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_pmulhuw:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    pmulhuw %mm1, %mm0 # sched: [4:1.00]
 ; SLM-NEXT:    pmulhuw (%rdi), %mm0 # sched: [7:1.00]
-; SLM-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_pmulhuw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    pmulhuw %mm1, %mm0 # sched: [5:1.00]
 ; SANDY-NEXT:    pmulhuw (%rdi), %mm0 # sched: [9:1.00]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmulhuw:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    pmulhuw %mm1, %mm0 # sched: [5:1.00]
 ; HASWELL-NEXT:    pmulhuw (%rdi), %mm0 # sched: [10:1.00]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_pmulhuw:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    pmulhuw %mm1, %mm0 # sched: [5:1.00]
 ; BROADWELL-NEXT:    pmulhuw (%rdi), %mm0 # sched: [10:1.00]
-; BROADWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_pmulhuw:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    pmulhuw %mm1, %mm0 # sched: [4:1.00]
 ; SKYLAKE-NEXT:    pmulhuw (%rdi), %mm0 # sched: [9:1.00]
-; SKYLAKE-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_pmulhuw:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    pmulhuw %mm1, %mm0 # sched: [4:1.00]
 ; SKX-NEXT:    pmulhuw (%rdi), %mm0 # sched: [9:1.00]
-; SKX-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_pmulhuw:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    pmulhuw %mm1, %mm0 # sched: [2:1.00]
 ; BTVER2-NEXT:    pmulhuw (%rdi), %mm0 # sched: [7:1.00]
-; BTVER2-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_pmulhuw:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    pmulhuw %mm1, %mm0 # sched: [4:1.00]
 ; ZNVER1-NEXT:    pmulhuw (%rdi), %mm0 # sched: [11:1.00]
-; ZNVER1-NEXT:    movd %mm0, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -4379,70 +4379,70 @@ define i64 @test_pmullw(x86_mmx %a0, x86
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    pmullw %mm1, %mm0 # sched: [5:1.00]
 ; GENERIC-NEXT:    pmullw (%rdi), %mm0 # sched: [9:1.00]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pmullw:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    pmullw %mm1, %mm0 # sched: [4:4.00]
 ; ATOM-NEXT:    pmullw (%rdi), %mm0 # sched: [4:4.00]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_pmullw:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    pmullw %mm1, %mm0 # sched: [4:1.00]
 ; SLM-NEXT:    pmullw (%rdi), %mm0 # sched: [7:1.00]
-; SLM-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_pmullw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    pmullw %mm1, %mm0 # sched: [5:1.00]
 ; SANDY-NEXT:    pmullw (%rdi), %mm0 # sched: [9:1.00]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmullw:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    pmullw %mm1, %mm0 # sched: [5:1.00]
 ; HASWELL-NEXT:    pmullw (%rdi), %mm0 # sched: [10:1.00]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_pmullw:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    pmullw %mm1, %mm0 # sched: [5:1.00]
 ; BROADWELL-NEXT:    pmullw (%rdi), %mm0 # sched: [10:1.00]
-; BROADWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_pmullw:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    pmullw %mm1, %mm0 # sched: [4:1.00]
 ; SKYLAKE-NEXT:    pmullw (%rdi), %mm0 # sched: [9:1.00]
-; SKYLAKE-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_pmullw:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    pmullw %mm1, %mm0 # sched: [4:1.00]
 ; SKX-NEXT:    pmullw (%rdi), %mm0 # sched: [9:1.00]
-; SKX-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_pmullw:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    pmullw %mm1, %mm0 # sched: [2:1.00]
 ; BTVER2-NEXT:    pmullw (%rdi), %mm0 # sched: [7:1.00]
-; BTVER2-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_pmullw:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    pmullw %mm1, %mm0 # sched: [4:1.00]
 ; ZNVER1-NEXT:    pmullw (%rdi), %mm0 # sched: [11:1.00]
-; ZNVER1-NEXT:    movd %mm0, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -4457,70 +4457,70 @@ define i64 @test_pmuludq(x86_mmx %a0, x8
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    pmuludq %mm1, %mm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    pmuludq (%rdi), %mm0 # sched: [9:1.00]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pmuludq:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    pmuludq %mm1, %mm0 # sched: [4:4.00]
 ; ATOM-NEXT:    pmuludq (%rdi), %mm0 # sched: [4:4.00]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_pmuludq:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    pmuludq %mm1, %mm0 # sched: [4:1.00]
 ; SLM-NEXT:    pmuludq (%rdi), %mm0 # sched: [7:1.00]
-; SLM-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_pmuludq:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    pmuludq %mm1, %mm0 # sched: [3:1.00]
 ; SANDY-NEXT:    pmuludq (%rdi), %mm0 # sched: [9:1.00]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pmuludq:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    pmuludq %mm1, %mm0 # sched: [5:1.00]
 ; HASWELL-NEXT:    pmuludq (%rdi), %mm0 # sched: [10:1.00]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_pmuludq:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    pmuludq %mm1, %mm0 # sched: [5:1.00]
 ; BROADWELL-NEXT:    pmuludq (%rdi), %mm0 # sched: [10:1.00]
-; BROADWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_pmuludq:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    pmuludq %mm1, %mm0 # sched: [4:1.00]
 ; SKYLAKE-NEXT:    pmuludq (%rdi), %mm0 # sched: [9:1.00]
-; SKYLAKE-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_pmuludq:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    pmuludq %mm1, %mm0 # sched: [4:1.00]
 ; SKX-NEXT:    pmuludq (%rdi), %mm0 # sched: [9:1.00]
-; SKX-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_pmuludq:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    pmuludq %mm1, %mm0 # sched: [2:1.00]
 ; BTVER2-NEXT:    pmuludq (%rdi), %mm0 # sched: [7:1.00]
-; BTVER2-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_pmuludq:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    pmuludq %mm1, %mm0 # sched: [4:1.00]
 ; ZNVER1-NEXT:    pmuludq (%rdi), %mm0 # sched: [11:1.00]
-; ZNVER1-NEXT:    movd %mm0, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -4535,70 +4535,70 @@ define i64 @test_por(x86_mmx %a0, x86_mm
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    por %mm1, %mm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    por (%rdi), %mm0 # sched: [5:1.00]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_por:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    por %mm1, %mm0 # sched: [1:0.50]
 ; ATOM-NEXT:    por (%rdi), %mm0 # sched: [1:1.00]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_por:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    por %mm1, %mm0 # sched: [1:0.50]
 ; SLM-NEXT:    por (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_por:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    por %mm1, %mm0 # sched: [1:1.00]
 ; SANDY-NEXT:    por (%rdi), %mm0 # sched: [5:1.00]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_por:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    por %mm1, %mm0 # sched: [1:0.33]
 ; HASWELL-NEXT:    por (%rdi), %mm0 # sched: [6:0.50]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_por:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    por %mm1, %mm0 # sched: [1:0.33]
 ; BROADWELL-NEXT:    por (%rdi), %mm0 # sched: [6:0.50]
-; BROADWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_por:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    por %mm1, %mm0 # sched: [1:0.50]
 ; SKYLAKE-NEXT:    por (%rdi), %mm0 # sched: [6:0.50]
-; SKYLAKE-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_por:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    por %mm1, %mm0 # sched: [1:0.50]
 ; SKX-NEXT:    por (%rdi), %mm0 # sched: [6:0.50]
-; SKX-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_por:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    por %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    por (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_por:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    por %mm1, %mm0 # sched: [1:0.25]
 ; ZNVER1-NEXT:    por (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT:    movd %mm0, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call x86_mmx @llvm.x86.mmx.por(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -4613,70 +4613,70 @@ define i64 @test_psadbw(x86_mmx %a0, x86
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    psadbw %mm1, %mm0 # sched: [5:1.00]
 ; GENERIC-NEXT:    psadbw (%rdi), %mm0 # sched: [9:1.00]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_psadbw:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    psadbw %mm1, %mm0 # sched: [4:2.00]
 ; ATOM-NEXT:    psadbw (%rdi), %mm0 # sched: [4:2.00]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_psadbw:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    psadbw %mm1, %mm0 # sched: [4:1.00]
 ; SLM-NEXT:    psadbw (%rdi), %mm0 # sched: [7:1.00]
-; SLM-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_psadbw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    psadbw %mm1, %mm0 # sched: [5:1.00]
 ; SANDY-NEXT:    psadbw (%rdi), %mm0 # sched: [9:1.00]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psadbw:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    psadbw %mm1, %mm0 # sched: [5:1.00]
 ; HASWELL-NEXT:    psadbw (%rdi), %mm0 # sched: [10:1.00]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_psadbw:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    psadbw %mm1, %mm0 # sched: [5:1.00]
 ; BROADWELL-NEXT:    psadbw (%rdi), %mm0 # sched: [10:1.00]
-; BROADWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_psadbw:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    psadbw %mm1, %mm0 # sched: [3:1.00]
 ; SKYLAKE-NEXT:    psadbw (%rdi), %mm0 # sched: [8:1.00]
-; SKYLAKE-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_psadbw:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    psadbw %mm1, %mm0 # sched: [3:1.00]
 ; SKX-NEXT:    psadbw (%rdi), %mm0 # sched: [8:1.00]
-; SKX-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_psadbw:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    psadbw %mm1, %mm0 # sched: [2:1.00]
 ; BTVER2-NEXT:    psadbw (%rdi), %mm0 # sched: [7:1.00]
-; BTVER2-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_psadbw:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    psadbw %mm1, %mm0 # sched: [4:1.00]
 ; ZNVER1-NEXT:    psadbw (%rdi), %mm0 # sched: [11:1.00]
-; ZNVER1-NEXT:    movd %mm0, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -4691,70 +4691,70 @@ define i64 @test_pshufb(x86_mmx %a0, x86
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    pshufb %mm1, %mm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    pshufb (%rdi), %mm0 # sched: [6:0.50]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pshufb:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    pshufb %mm1, %mm0 # sched: [1:1.00]
 ; ATOM-NEXT:    pshufb (%rdi), %mm0 # sched: [1:1.00]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_pshufb:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    pshufb %mm1, %mm0 # sched: [1:1.00]
 ; SLM-NEXT:    pshufb (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_pshufb:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    pshufb %mm1, %mm0 # sched: [1:0.50]
 ; SANDY-NEXT:    pshufb (%rdi), %mm0 # sched: [6:0.50]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pshufb:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    pshufb %mm1, %mm0 # sched: [1:1.00]
 ; HASWELL-NEXT:    pshufb (%rdi), %mm0 # sched: [6:1.00]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_pshufb:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    pshufb %mm1, %mm0 # sched: [1:1.00]
 ; BROADWELL-NEXT:    pshufb (%rdi), %mm0 # sched: [6:1.00]
-; BROADWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_pshufb:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    pshufb %mm1, %mm0 # sched: [1:1.00]
 ; SKYLAKE-NEXT:    pshufb (%rdi), %mm0 # sched: [6:1.00]
-; SKYLAKE-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_pshufb:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    pshufb %mm1, %mm0 # sched: [1:1.00]
 ; SKX-NEXT:    pshufb (%rdi), %mm0 # sched: [6:1.00]
-; SKX-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_pshufb:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    pshufb %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    pshufb (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_pshufb:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    pshufb %mm1, %mm0 # sched: [1:0.25]
 ; ZNVER1-NEXT:    pshufb (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT:    movd %mm0, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -4769,70 +4769,70 @@ define i64 @test_pshufw(x86_mmx *%a0) op
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [5:1.00]
 ; GENERIC-NEXT:    pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pshufw:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [1:1.00]
 ; ATOM-NEXT:    pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_pshufw:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [4:1.00]
 ; SLM-NEXT:    pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00]
-; SLM-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_pshufw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [5:1.00]
 ; SANDY-NEXT:    pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pshufw:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [6:1.00]
 ; HASWELL-NEXT:    pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_pshufw:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [6:1.00]
 ; BROADWELL-NEXT:    pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00]
-; BROADWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_pshufw:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [6:1.00]
 ; SKYLAKE-NEXT:    pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00]
-; SKYLAKE-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_pshufw:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [6:1.00]
 ; SKX-NEXT:    pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00]
-; SKX-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_pshufw:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [6:1.00]
 ; BTVER2-NEXT:    pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:0.50]
-; BTVER2-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_pshufw:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [8:0.50]
 ; ZNVER1-NEXT:    pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:0.25]
-; ZNVER1-NEXT:    movd %mm0, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = load x86_mmx, x86_mmx *%a0, align 8
   %2 = call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %1, i8 0)
@@ -4847,70 +4847,70 @@ define i64 @test_psignb(x86_mmx %a0, x86
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    psignb %mm1, %mm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    psignb (%rdi), %mm0 # sched: [6:0.50]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_psignb:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    psignb %mm1, %mm0 # sched: [1:1.00]
 ; ATOM-NEXT:    psignb (%rdi), %mm0 # sched: [1:0.50]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_psignb:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    psignb %mm1, %mm0 # sched: [4:1.00]
 ; SLM-NEXT:    psignb (%rdi), %mm0 # sched: [7:1.00]
-; SLM-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_psignb:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    psignb %mm1, %mm0 # sched: [1:0.50]
 ; SANDY-NEXT:    psignb (%rdi), %mm0 # sched: [6:0.50]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psignb:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    psignb %mm1, %mm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    psignb (%rdi), %mm0 # sched: [6:0.50]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_psignb:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    psignb %mm1, %mm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    psignb (%rdi), %mm0 # sched: [6:0.50]
-; BROADWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_psignb:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    psignb %mm1, %mm0 # sched: [1:0.50]
 ; SKYLAKE-NEXT:    psignb (%rdi), %mm0 # sched: [6:0.50]
-; SKYLAKE-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_psignb:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    psignb %mm1, %mm0 # sched: [1:0.50]
 ; SKX-NEXT:    psignb (%rdi), %mm0 # sched: [6:0.50]
-; SKX-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_psignb:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    psignb %mm1, %mm0 # sched: [2:1.00]
 ; BTVER2-NEXT:    psignb (%rdi), %mm0 # sched: [7:1.00]
-; BTVER2-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_psignb:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    psignb %mm1, %mm0 # sched: [4:1.00]
 ; ZNVER1-NEXT:    psignb (%rdi), %mm0 # sched: [11:1.00]
-; ZNVER1-NEXT:    movd %mm0, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -4925,70 +4925,70 @@ define i64 @test_psignd(x86_mmx %a0, x86
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    psignd %mm1, %mm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    psignd (%rdi), %mm0 # sched: [6:0.50]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_psignd:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    psignd %mm1, %mm0 # sched: [1:1.00]
 ; ATOM-NEXT:    psignd (%rdi), %mm0 # sched: [1:0.50]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_psignd:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    psignd %mm1, %mm0 # sched: [4:1.00]
 ; SLM-NEXT:    psignd (%rdi), %mm0 # sched: [7:1.00]
-; SLM-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_psignd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    psignd %mm1, %mm0 # sched: [1:0.50]
 ; SANDY-NEXT:    psignd (%rdi), %mm0 # sched: [6:0.50]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psignd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    psignd %mm1, %mm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    psignd (%rdi), %mm0 # sched: [6:0.50]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_psignd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    psignd %mm1, %mm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    psignd (%rdi), %mm0 # sched: [6:0.50]
-; BROADWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_psignd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    psignd %mm1, %mm0 # sched: [1:0.50]
 ; SKYLAKE-NEXT:    psignd (%rdi), %mm0 # sched: [6:0.50]
-; SKYLAKE-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_psignd:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    psignd %mm1, %mm0 # sched: [1:0.50]
 ; SKX-NEXT:    psignd (%rdi), %mm0 # sched: [6:0.50]
-; SKX-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_psignd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    psignd %mm1, %mm0 # sched: [2:1.00]
 ; BTVER2-NEXT:    psignd (%rdi), %mm0 # sched: [7:1.00]
-; BTVER2-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_psignd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    psignd %mm1, %mm0 # sched: [4:1.00]
 ; ZNVER1-NEXT:    psignd (%rdi), %mm0 # sched: [11:1.00]
-; ZNVER1-NEXT:    movd %mm0, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -5003,70 +5003,70 @@ define i64 @test_psignw(x86_mmx %a0, x86
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    psignw %mm1, %mm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    psignw (%rdi), %mm0 # sched: [6:0.50]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_psignw:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    psignw %mm1, %mm0 # sched: [1:1.00]
 ; ATOM-NEXT:    psignw (%rdi), %mm0 # sched: [1:0.50]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_psignw:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    psignw %mm1, %mm0 # sched: [4:1.00]
 ; SLM-NEXT:    psignw (%rdi), %mm0 # sched: [7:1.00]
-; SLM-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_psignw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    psignw %mm1, %mm0 # sched: [1:0.50]
 ; SANDY-NEXT:    psignw (%rdi), %mm0 # sched: [6:0.50]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psignw:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    psignw %mm1, %mm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    psignw (%rdi), %mm0 # sched: [6:0.50]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_psignw:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    psignw %mm1, %mm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    psignw (%rdi), %mm0 # sched: [6:0.50]
-; BROADWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_psignw:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    psignw %mm1, %mm0 # sched: [1:0.50]
 ; SKYLAKE-NEXT:    psignw (%rdi), %mm0 # sched: [6:0.50]
-; SKYLAKE-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_psignw:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    psignw %mm1, %mm0 # sched: [1:0.50]
 ; SKX-NEXT:    psignw (%rdi), %mm0 # sched: [6:0.50]
-; SKX-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_psignw:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    psignw %mm1, %mm0 # sched: [2:1.00]
 ; BTVER2-NEXT:    psignw (%rdi), %mm0 # sched: [7:1.00]
-; BTVER2-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_psignw:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    psignw %mm1, %mm0 # sched: [4:1.00]
 ; ZNVER1-NEXT:    psignw (%rdi), %mm0 # sched: [11:1.00]
-; ZNVER1-NEXT:    movd %mm0, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -5082,7 +5082,7 @@ define i64 @test_pslld(x86_mmx %a0, x86_
 ; GENERIC-NEXT:    pslld %mm1, %mm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    pslld (%rdi), %mm0 # sched: [5:1.00]
 ; GENERIC-NEXT:    pslld $7, %mm0 # sched: [1:1.00]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pslld:
@@ -5090,7 +5090,7 @@ define i64 @test_pslld(x86_mmx %a0, x86_
 ; ATOM-NEXT:    pslld %mm1, %mm0 # sched: [2:1.00]
 ; ATOM-NEXT:    pslld (%rdi), %mm0 # sched: [3:1.50]
 ; ATOM-NEXT:    pslld $7, %mm0 # sched: [1:0.50]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_pslld:
@@ -5098,7 +5098,7 @@ define i64 @test_pslld(x86_mmx %a0, x86_
 ; SLM-NEXT:    pslld %mm1, %mm0 # sched: [1:1.00]
 ; SLM-NEXT:    pslld (%rdi), %mm0 # sched: [4:1.00]
 ; SLM-NEXT:    pslld $7, %mm0 # sched: [1:1.00]
-; SLM-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_pslld:
@@ -5106,7 +5106,7 @@ define i64 @test_pslld(x86_mmx %a0, x86_
 ; SANDY-NEXT:    pslld %mm1, %mm0 # sched: [1:1.00]
 ; SANDY-NEXT:    pslld (%rdi), %mm0 # sched: [5:1.00]
 ; SANDY-NEXT:    pslld $7, %mm0 # sched: [1:1.00]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pslld:
@@ -5114,7 +5114,7 @@ define i64 @test_pslld(x86_mmx %a0, x86_
 ; HASWELL-NEXT:    pslld %mm1, %mm0 # sched: [1:1.00]
 ; HASWELL-NEXT:    pslld (%rdi), %mm0 # sched: [6:1.00]
 ; HASWELL-NEXT:    pslld $7, %mm0 # sched: [1:1.00]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_pslld:
@@ -5122,7 +5122,7 @@ define i64 @test_pslld(x86_mmx %a0, x86_
 ; BROADWELL-NEXT:    pslld %mm1, %mm0 # sched: [1:1.00]
 ; BROADWELL-NEXT:    pslld (%rdi), %mm0 # sched: [6:1.00]
 ; BROADWELL-NEXT:    pslld $7, %mm0 # sched: [1:1.00]
-; BROADWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_pslld:
@@ -5130,7 +5130,7 @@ define i64 @test_pslld(x86_mmx %a0, x86_
 ; SKYLAKE-NEXT:    pslld %mm1, %mm0 # sched: [1:1.00]
 ; SKYLAKE-NEXT:    pslld (%rdi), %mm0 # sched: [6:1.00]
 ; SKYLAKE-NEXT:    pslld $7, %mm0 # sched: [1:1.00]
-; SKYLAKE-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_pslld:
@@ -5138,7 +5138,7 @@ define i64 @test_pslld(x86_mmx %a0, x86_
 ; SKX-NEXT:    pslld %mm1, %mm0 # sched: [1:1.00]
 ; SKX-NEXT:    pslld (%rdi), %mm0 # sched: [6:1.00]
 ; SKX-NEXT:    pslld $7, %mm0 # sched: [1:1.00]
-; SKX-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_pslld:
@@ -5146,7 +5146,7 @@ define i64 @test_pslld(x86_mmx %a0, x86_
 ; BTVER2-NEXT:    pslld %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    pslld (%rdi), %mm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    pslld $7, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_pslld:
@@ -5154,7 +5154,7 @@ define i64 @test_pslld(x86_mmx %a0, x86_
 ; ZNVER1-NEXT:    pslld %mm1, %mm0 # sched: [1:0.25]
 ; ZNVER1-NEXT:    pslld (%rdi), %mm0 # sched: [8:0.50]
 ; ZNVER1-NEXT:    pslld $7, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    movd %mm0, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call x86_mmx @llvm.x86.mmx.psll.d(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -5172,7 +5172,7 @@ define i64 @test_psllq(x86_mmx %a0, x86_
 ; GENERIC-NEXT:    psllq %mm1, %mm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    psllq (%rdi), %mm0 # sched: [5:1.00]
 ; GENERIC-NEXT:    psllq $7, %mm0 # sched: [1:1.00]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_psllq:
@@ -5180,7 +5180,7 @@ define i64 @test_psllq(x86_mmx %a0, x86_
 ; ATOM-NEXT:    psllq %mm1, %mm0 # sched: [2:1.00]
 ; ATOM-NEXT:    psllq (%rdi), %mm0 # sched: [3:1.50]
 ; ATOM-NEXT:    psllq $7, %mm0 # sched: [1:0.50]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_psllq:
@@ -5188,7 +5188,7 @@ define i64 @test_psllq(x86_mmx %a0, x86_
 ; SLM-NEXT:    psllq %mm1, %mm0 # sched: [1:1.00]
 ; SLM-NEXT:    psllq (%rdi), %mm0 # sched: [4:1.00]
 ; SLM-NEXT:    psllq $7, %mm0 # sched: [1:1.00]
-; SLM-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_psllq:
@@ -5196,7 +5196,7 @@ define i64 @test_psllq(x86_mmx %a0, x86_
 ; SANDY-NEXT:    psllq %mm1, %mm0 # sched: [1:1.00]
 ; SANDY-NEXT:    psllq (%rdi), %mm0 # sched: [5:1.00]
 ; SANDY-NEXT:    psllq $7, %mm0 # sched: [1:1.00]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psllq:
@@ -5204,7 +5204,7 @@ define i64 @test_psllq(x86_mmx %a0, x86_
 ; HASWELL-NEXT:    psllq %mm1, %mm0 # sched: [1:1.00]
 ; HASWELL-NEXT:    psllq (%rdi), %mm0 # sched: [6:1.00]
 ; HASWELL-NEXT:    psllq $7, %mm0 # sched: [1:1.00]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_psllq:
@@ -5212,7 +5212,7 @@ define i64 @test_psllq(x86_mmx %a0, x86_
 ; BROADWELL-NEXT:    psllq %mm1, %mm0 # sched: [1:1.00]
 ; BROADWELL-NEXT:    psllq (%rdi), %mm0 # sched: [6:1.00]
 ; BROADWELL-NEXT:    psllq $7, %mm0 # sched: [1:1.00]
-; BROADWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_psllq:
@@ -5220,7 +5220,7 @@ define i64 @test_psllq(x86_mmx %a0, x86_
 ; SKYLAKE-NEXT:    psllq %mm1, %mm0 # sched: [1:1.00]
 ; SKYLAKE-NEXT:    psllq (%rdi), %mm0 # sched: [6:1.00]
 ; SKYLAKE-NEXT:    psllq $7, %mm0 # sched: [1:1.00]
-; SKYLAKE-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_psllq:
@@ -5228,7 +5228,7 @@ define i64 @test_psllq(x86_mmx %a0, x86_
 ; SKX-NEXT:    psllq %mm1, %mm0 # sched: [1:1.00]
 ; SKX-NEXT:    psllq (%rdi), %mm0 # sched: [6:1.00]
 ; SKX-NEXT:    psllq $7, %mm0 # sched: [1:1.00]
-; SKX-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_psllq:
@@ -5236,7 +5236,7 @@ define i64 @test_psllq(x86_mmx %a0, x86_
 ; BTVER2-NEXT:    psllq %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    psllq (%rdi), %mm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    psllq $7, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_psllq:
@@ -5244,7 +5244,7 @@ define i64 @test_psllq(x86_mmx %a0, x86_
 ; ZNVER1-NEXT:    psllq %mm1, %mm0 # sched: [1:0.25]
 ; ZNVER1-NEXT:    psllq (%rdi), %mm0 # sched: [8:0.50]
 ; ZNVER1-NEXT:    psllq $7, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    movd %mm0, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call x86_mmx @llvm.x86.mmx.psll.q(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -5262,7 +5262,7 @@ define i64 @test_psllw(x86_mmx %a0, x86_
 ; GENERIC-NEXT:    psllw %mm1, %mm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    psllw (%rdi), %mm0 # sched: [5:1.00]
 ; GENERIC-NEXT:    psllw $7, %mm0 # sched: [1:1.00]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_psllw:
@@ -5270,7 +5270,7 @@ define i64 @test_psllw(x86_mmx %a0, x86_
 ; ATOM-NEXT:    psllw %mm1, %mm0 # sched: [2:1.00]
 ; ATOM-NEXT:    psllw (%rdi), %mm0 # sched: [3:1.50]
 ; ATOM-NEXT:    psllw $7, %mm0 # sched: [1:0.50]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_psllw:
@@ -5278,7 +5278,7 @@ define i64 @test_psllw(x86_mmx %a0, x86_
 ; SLM-NEXT:    psllw %mm1, %mm0 # sched: [1:1.00]
 ; SLM-NEXT:    psllw (%rdi), %mm0 # sched: [4:1.00]
 ; SLM-NEXT:    psllw $7, %mm0 # sched: [1:1.00]
-; SLM-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_psllw:
@@ -5286,7 +5286,7 @@ define i64 @test_psllw(x86_mmx %a0, x86_
 ; SANDY-NEXT:    psllw %mm1, %mm0 # sched: [1:1.00]
 ; SANDY-NEXT:    psllw (%rdi), %mm0 # sched: [5:1.00]
 ; SANDY-NEXT:    psllw $7, %mm0 # sched: [1:1.00]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psllw:
@@ -5294,7 +5294,7 @@ define i64 @test_psllw(x86_mmx %a0, x86_
 ; HASWELL-NEXT:    psllw %mm1, %mm0 # sched: [1:1.00]
 ; HASWELL-NEXT:    psllw (%rdi), %mm0 # sched: [6:1.00]
 ; HASWELL-NEXT:    psllw $7, %mm0 # sched: [1:1.00]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_psllw:
@@ -5302,7 +5302,7 @@ define i64 @test_psllw(x86_mmx %a0, x86_
 ; BROADWELL-NEXT:    psllw %mm1, %mm0 # sched: [1:1.00]
 ; BROADWELL-NEXT:    psllw (%rdi), %mm0 # sched: [6:1.00]
 ; BROADWELL-NEXT:    psllw $7, %mm0 # sched: [1:1.00]
-; BROADWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_psllw:
@@ -5310,7 +5310,7 @@ define i64 @test_psllw(x86_mmx %a0, x86_
 ; SKYLAKE-NEXT:    psllw %mm1, %mm0 # sched: [1:1.00]
 ; SKYLAKE-NEXT:    psllw (%rdi), %mm0 # sched: [6:1.00]
 ; SKYLAKE-NEXT:    psllw $7, %mm0 # sched: [1:1.00]
-; SKYLAKE-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_psllw:
@@ -5318,7 +5318,7 @@ define i64 @test_psllw(x86_mmx %a0, x86_
 ; SKX-NEXT:    psllw %mm1, %mm0 # sched: [1:1.00]
 ; SKX-NEXT:    psllw (%rdi), %mm0 # sched: [6:1.00]
 ; SKX-NEXT:    psllw $7, %mm0 # sched: [1:1.00]
-; SKX-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_psllw:
@@ -5326,7 +5326,7 @@ define i64 @test_psllw(x86_mmx %a0, x86_
 ; BTVER2-NEXT:    psllw %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    psllw (%rdi), %mm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    psllw $7, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_psllw:
@@ -5334,7 +5334,7 @@ define i64 @test_psllw(x86_mmx %a0, x86_
 ; ZNVER1-NEXT:    psllw %mm1, %mm0 # sched: [1:0.25]
 ; ZNVER1-NEXT:    psllw (%rdi), %mm0 # sched: [8:0.50]
 ; ZNVER1-NEXT:    psllw $7, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    movd %mm0, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call x86_mmx @llvm.x86.mmx.psll.w(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -5352,7 +5352,7 @@ define i64 @test_psrad(x86_mmx %a0, x86_
 ; GENERIC-NEXT:    psrad %mm1, %mm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    psrad (%rdi), %mm0 # sched: [5:1.00]
 ; GENERIC-NEXT:    psrad $7, %mm0 # sched: [1:1.00]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_psrad:
@@ -5360,7 +5360,7 @@ define i64 @test_psrad(x86_mmx %a0, x86_
 ; ATOM-NEXT:    psrad %mm1, %mm0 # sched: [2:1.00]
 ; ATOM-NEXT:    psrad (%rdi), %mm0 # sched: [3:1.50]
 ; ATOM-NEXT:    psrad $7, %mm0 # sched: [1:0.50]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_psrad:
@@ -5368,7 +5368,7 @@ define i64 @test_psrad(x86_mmx %a0, x86_
 ; SLM-NEXT:    psrad %mm1, %mm0 # sched: [1:1.00]
 ; SLM-NEXT:    psrad (%rdi), %mm0 # sched: [4:1.00]
 ; SLM-NEXT:    psrad $7, %mm0 # sched: [1:1.00]
-; SLM-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_psrad:
@@ -5376,7 +5376,7 @@ define i64 @test_psrad(x86_mmx %a0, x86_
 ; SANDY-NEXT:    psrad %mm1, %mm0 # sched: [1:1.00]
 ; SANDY-NEXT:    psrad (%rdi), %mm0 # sched: [5:1.00]
 ; SANDY-NEXT:    psrad $7, %mm0 # sched: [1:1.00]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psrad:
@@ -5384,7 +5384,7 @@ define i64 @test_psrad(x86_mmx %a0, x86_
 ; HASWELL-NEXT:    psrad %mm1, %mm0 # sched: [1:1.00]
 ; HASWELL-NEXT:    psrad (%rdi), %mm0 # sched: [6:1.00]
 ; HASWELL-NEXT:    psrad $7, %mm0 # sched: [1:1.00]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_psrad:
@@ -5392,7 +5392,7 @@ define i64 @test_psrad(x86_mmx %a0, x86_
 ; BROADWELL-NEXT:    psrad %mm1, %mm0 # sched: [1:1.00]
 ; BROADWELL-NEXT:    psrad (%rdi), %mm0 # sched: [6:1.00]
 ; BROADWELL-NEXT:    psrad $7, %mm0 # sched: [1:1.00]
-; BROADWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_psrad:
@@ -5400,7 +5400,7 @@ define i64 @test_psrad(x86_mmx %a0, x86_
 ; SKYLAKE-NEXT:    psrad %mm1, %mm0 # sched: [1:1.00]
 ; SKYLAKE-NEXT:    psrad (%rdi), %mm0 # sched: [6:1.00]
 ; SKYLAKE-NEXT:    psrad $7, %mm0 # sched: [1:1.00]
-; SKYLAKE-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_psrad:
@@ -5408,7 +5408,7 @@ define i64 @test_psrad(x86_mmx %a0, x86_
 ; SKX-NEXT:    psrad %mm1, %mm0 # sched: [1:1.00]
 ; SKX-NEXT:    psrad (%rdi), %mm0 # sched: [6:1.00]
 ; SKX-NEXT:    psrad $7, %mm0 # sched: [1:1.00]
-; SKX-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_psrad:
@@ -5416,7 +5416,7 @@ define i64 @test_psrad(x86_mmx %a0, x86_
 ; BTVER2-NEXT:    psrad %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    psrad (%rdi), %mm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    psrad $7, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_psrad:
@@ -5424,7 +5424,7 @@ define i64 @test_psrad(x86_mmx %a0, x86_
 ; ZNVER1-NEXT:    psrad %mm1, %mm0 # sched: [1:0.25]
 ; ZNVER1-NEXT:    psrad (%rdi), %mm0 # sched: [8:0.50]
 ; ZNVER1-NEXT:    psrad $7, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    movd %mm0, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call x86_mmx @llvm.x86.mmx.psra.d(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -5442,7 +5442,7 @@ define i64 @test_psraw(x86_mmx %a0, x86_
 ; GENERIC-NEXT:    psraw %mm1, %mm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    psraw (%rdi), %mm0 # sched: [5:1.00]
 ; GENERIC-NEXT:    psraw $7, %mm0 # sched: [1:1.00]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_psraw:
@@ -5450,7 +5450,7 @@ define i64 @test_psraw(x86_mmx %a0, x86_
 ; ATOM-NEXT:    psraw %mm1, %mm0 # sched: [2:1.00]
 ; ATOM-NEXT:    psraw (%rdi), %mm0 # sched: [3:1.50]
 ; ATOM-NEXT:    psraw $7, %mm0 # sched: [1:0.50]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_psraw:
@@ -5458,7 +5458,7 @@ define i64 @test_psraw(x86_mmx %a0, x86_
 ; SLM-NEXT:    psraw %mm1, %mm0 # sched: [1:1.00]
 ; SLM-NEXT:    psraw (%rdi), %mm0 # sched: [4:1.00]
 ; SLM-NEXT:    psraw $7, %mm0 # sched: [1:1.00]
-; SLM-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_psraw:
@@ -5466,7 +5466,7 @@ define i64 @test_psraw(x86_mmx %a0, x86_
 ; SANDY-NEXT:    psraw %mm1, %mm0 # sched: [1:1.00]
 ; SANDY-NEXT:    psraw (%rdi), %mm0 # sched: [5:1.00]
 ; SANDY-NEXT:    psraw $7, %mm0 # sched: [1:1.00]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psraw:
@@ -5474,7 +5474,7 @@ define i64 @test_psraw(x86_mmx %a0, x86_
 ; HASWELL-NEXT:    psraw %mm1, %mm0 # sched: [1:1.00]
 ; HASWELL-NEXT:    psraw (%rdi), %mm0 # sched: [6:1.00]
 ; HASWELL-NEXT:    psraw $7, %mm0 # sched: [1:1.00]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_psraw:
@@ -5482,7 +5482,7 @@ define i64 @test_psraw(x86_mmx %a0, x86_
 ; BROADWELL-NEXT:    psraw %mm1, %mm0 # sched: [1:1.00]
 ; BROADWELL-NEXT:    psraw (%rdi), %mm0 # sched: [6:1.00]
 ; BROADWELL-NEXT:    psraw $7, %mm0 # sched: [1:1.00]
-; BROADWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_psraw:
@@ -5490,7 +5490,7 @@ define i64 @test_psraw(x86_mmx %a0, x86_
 ; SKYLAKE-NEXT:    psraw %mm1, %mm0 # sched: [1:1.00]
 ; SKYLAKE-NEXT:    psraw (%rdi), %mm0 # sched: [6:1.00]
 ; SKYLAKE-NEXT:    psraw $7, %mm0 # sched: [1:1.00]
-; SKYLAKE-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_psraw:
@@ -5498,7 +5498,7 @@ define i64 @test_psraw(x86_mmx %a0, x86_
 ; SKX-NEXT:    psraw %mm1, %mm0 # sched: [1:1.00]
 ; SKX-NEXT:    psraw (%rdi), %mm0 # sched: [6:1.00]
 ; SKX-NEXT:    psraw $7, %mm0 # sched: [1:1.00]
-; SKX-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_psraw:
@@ -5506,7 +5506,7 @@ define i64 @test_psraw(x86_mmx %a0, x86_
 ; BTVER2-NEXT:    psraw %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    psraw (%rdi), %mm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    psraw $7, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_psraw:
@@ -5514,7 +5514,7 @@ define i64 @test_psraw(x86_mmx %a0, x86_
 ; ZNVER1-NEXT:    psraw %mm1, %mm0 # sched: [1:0.25]
 ; ZNVER1-NEXT:    psraw (%rdi), %mm0 # sched: [8:0.50]
 ; ZNVER1-NEXT:    psraw $7, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    movd %mm0, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call x86_mmx @llvm.x86.mmx.psra.w(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -5532,7 +5532,7 @@ define i64 @test_psrld(x86_mmx %a0, x86_
 ; GENERIC-NEXT:    psrld %mm1, %mm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    psrld (%rdi), %mm0 # sched: [5:1.00]
 ; GENERIC-NEXT:    psrld $7, %mm0 # sched: [1:1.00]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_psrld:
@@ -5540,7 +5540,7 @@ define i64 @test_psrld(x86_mmx %a0, x86_
 ; ATOM-NEXT:    psrld %mm1, %mm0 # sched: [2:1.00]
 ; ATOM-NEXT:    psrld (%rdi), %mm0 # sched: [3:1.50]
 ; ATOM-NEXT:    psrld $7, %mm0 # sched: [1:0.50]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_psrld:
@@ -5548,7 +5548,7 @@ define i64 @test_psrld(x86_mmx %a0, x86_
 ; SLM-NEXT:    psrld %mm1, %mm0 # sched: [1:1.00]
 ; SLM-NEXT:    psrld (%rdi), %mm0 # sched: [4:1.00]
 ; SLM-NEXT:    psrld $7, %mm0 # sched: [1:1.00]
-; SLM-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_psrld:
@@ -5556,7 +5556,7 @@ define i64 @test_psrld(x86_mmx %a0, x86_
 ; SANDY-NEXT:    psrld %mm1, %mm0 # sched: [1:1.00]
 ; SANDY-NEXT:    psrld (%rdi), %mm0 # sched: [5:1.00]
 ; SANDY-NEXT:    psrld $7, %mm0 # sched: [1:1.00]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psrld:
@@ -5564,7 +5564,7 @@ define i64 @test_psrld(x86_mmx %a0, x86_
 ; HASWELL-NEXT:    psrld %mm1, %mm0 # sched: [1:1.00]
 ; HASWELL-NEXT:    psrld (%rdi), %mm0 # sched: [6:1.00]
 ; HASWELL-NEXT:    psrld $7, %mm0 # sched: [1:1.00]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_psrld:
@@ -5572,7 +5572,7 @@ define i64 @test_psrld(x86_mmx %a0, x86_
 ; BROADWELL-NEXT:    psrld %mm1, %mm0 # sched: [1:1.00]
 ; BROADWELL-NEXT:    psrld (%rdi), %mm0 # sched: [6:1.00]
 ; BROADWELL-NEXT:    psrld $7, %mm0 # sched: [1:1.00]
-; BROADWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_psrld:
@@ -5580,7 +5580,7 @@ define i64 @test_psrld(x86_mmx %a0, x86_
 ; SKYLAKE-NEXT:    psrld %mm1, %mm0 # sched: [1:1.00]
 ; SKYLAKE-NEXT:    psrld (%rdi), %mm0 # sched: [6:1.00]
 ; SKYLAKE-NEXT:    psrld $7, %mm0 # sched: [1:1.00]
-; SKYLAKE-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_psrld:
@@ -5588,7 +5588,7 @@ define i64 @test_psrld(x86_mmx %a0, x86_
 ; SKX-NEXT:    psrld %mm1, %mm0 # sched: [1:1.00]
 ; SKX-NEXT:    psrld (%rdi), %mm0 # sched: [6:1.00]
 ; SKX-NEXT:    psrld $7, %mm0 # sched: [1:1.00]
-; SKX-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_psrld:
@@ -5596,7 +5596,7 @@ define i64 @test_psrld(x86_mmx %a0, x86_
 ; BTVER2-NEXT:    psrld %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    psrld (%rdi), %mm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    psrld $7, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_psrld:
@@ -5604,7 +5604,7 @@ define i64 @test_psrld(x86_mmx %a0, x86_
 ; ZNVER1-NEXT:    psrld %mm1, %mm0 # sched: [1:0.25]
 ; ZNVER1-NEXT:    psrld (%rdi), %mm0 # sched: [8:0.50]
 ; ZNVER1-NEXT:    psrld $7, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    movd %mm0, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -5622,7 +5622,7 @@ define i64 @test_psrlq(x86_mmx %a0, x86_
 ; GENERIC-NEXT:    psrlq %mm1, %mm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    psrlq (%rdi), %mm0 # sched: [5:1.00]
 ; GENERIC-NEXT:    psrlq $7, %mm0 # sched: [1:1.00]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_psrlq:
@@ -5630,7 +5630,7 @@ define i64 @test_psrlq(x86_mmx %a0, x86_
 ; ATOM-NEXT:    psrlq %mm1, %mm0 # sched: [2:1.00]
 ; ATOM-NEXT:    psrlq (%rdi), %mm0 # sched: [3:1.50]
 ; ATOM-NEXT:    psrlq $7, %mm0 # sched: [1:0.50]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_psrlq:
@@ -5638,7 +5638,7 @@ define i64 @test_psrlq(x86_mmx %a0, x86_
 ; SLM-NEXT:    psrlq %mm1, %mm0 # sched: [1:1.00]
 ; SLM-NEXT:    psrlq (%rdi), %mm0 # sched: [4:1.00]
 ; SLM-NEXT:    psrlq $7, %mm0 # sched: [1:1.00]
-; SLM-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_psrlq:
@@ -5646,7 +5646,7 @@ define i64 @test_psrlq(x86_mmx %a0, x86_
 ; SANDY-NEXT:    psrlq %mm1, %mm0 # sched: [1:1.00]
 ; SANDY-NEXT:    psrlq (%rdi), %mm0 # sched: [5:1.00]
 ; SANDY-NEXT:    psrlq $7, %mm0 # sched: [1:1.00]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psrlq:
@@ -5654,7 +5654,7 @@ define i64 @test_psrlq(x86_mmx %a0, x86_
 ; HASWELL-NEXT:    psrlq %mm1, %mm0 # sched: [1:1.00]
 ; HASWELL-NEXT:    psrlq (%rdi), %mm0 # sched: [6:1.00]
 ; HASWELL-NEXT:    psrlq $7, %mm0 # sched: [1:1.00]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_psrlq:
@@ -5662,7 +5662,7 @@ define i64 @test_psrlq(x86_mmx %a0, x86_
 ; BROADWELL-NEXT:    psrlq %mm1, %mm0 # sched: [1:1.00]
 ; BROADWELL-NEXT:    psrlq (%rdi), %mm0 # sched: [6:1.00]
 ; BROADWELL-NEXT:    psrlq $7, %mm0 # sched: [1:1.00]
-; BROADWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_psrlq:
@@ -5670,7 +5670,7 @@ define i64 @test_psrlq(x86_mmx %a0, x86_
 ; SKYLAKE-NEXT:    psrlq %mm1, %mm0 # sched: [1:1.00]
 ; SKYLAKE-NEXT:    psrlq (%rdi), %mm0 # sched: [6:1.00]
 ; SKYLAKE-NEXT:    psrlq $7, %mm0 # sched: [1:1.00]
-; SKYLAKE-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_psrlq:
@@ -5678,7 +5678,7 @@ define i64 @test_psrlq(x86_mmx %a0, x86_
 ; SKX-NEXT:    psrlq %mm1, %mm0 # sched: [1:1.00]
 ; SKX-NEXT:    psrlq (%rdi), %mm0 # sched: [6:1.00]
 ; SKX-NEXT:    psrlq $7, %mm0 # sched: [1:1.00]
-; SKX-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_psrlq:
@@ -5686,7 +5686,7 @@ define i64 @test_psrlq(x86_mmx %a0, x86_
 ; BTVER2-NEXT:    psrlq %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    psrlq (%rdi), %mm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    psrlq $7, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_psrlq:
@@ -5694,7 +5694,7 @@ define i64 @test_psrlq(x86_mmx %a0, x86_
 ; ZNVER1-NEXT:    psrlq %mm1, %mm0 # sched: [1:0.25]
 ; ZNVER1-NEXT:    psrlq (%rdi), %mm0 # sched: [8:0.50]
 ; ZNVER1-NEXT:    psrlq $7, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    movd %mm0, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -5712,7 +5712,7 @@ define i64 @test_psrlw(x86_mmx %a0, x86_
 ; GENERIC-NEXT:    psrlw %mm1, %mm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    psrlw (%rdi), %mm0 # sched: [5:1.00]
 ; GENERIC-NEXT:    psrlw $7, %mm0 # sched: [1:1.00]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_psrlw:
@@ -5720,7 +5720,7 @@ define i64 @test_psrlw(x86_mmx %a0, x86_
 ; ATOM-NEXT:    psrlw %mm1, %mm0 # sched: [2:1.00]
 ; ATOM-NEXT:    psrlw (%rdi), %mm0 # sched: [3:1.50]
 ; ATOM-NEXT:    psrlw $7, %mm0 # sched: [1:0.50]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_psrlw:
@@ -5728,7 +5728,7 @@ define i64 @test_psrlw(x86_mmx %a0, x86_
 ; SLM-NEXT:    psrlw %mm1, %mm0 # sched: [1:1.00]
 ; SLM-NEXT:    psrlw (%rdi), %mm0 # sched: [4:1.00]
 ; SLM-NEXT:    psrlw $7, %mm0 # sched: [1:1.00]
-; SLM-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_psrlw:
@@ -5736,7 +5736,7 @@ define i64 @test_psrlw(x86_mmx %a0, x86_
 ; SANDY-NEXT:    psrlw %mm1, %mm0 # sched: [1:1.00]
 ; SANDY-NEXT:    psrlw (%rdi), %mm0 # sched: [5:1.00]
 ; SANDY-NEXT:    psrlw $7, %mm0 # sched: [1:1.00]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psrlw:
@@ -5744,7 +5744,7 @@ define i64 @test_psrlw(x86_mmx %a0, x86_
 ; HASWELL-NEXT:    psrlw %mm1, %mm0 # sched: [1:1.00]
 ; HASWELL-NEXT:    psrlw (%rdi), %mm0 # sched: [6:1.00]
 ; HASWELL-NEXT:    psrlw $7, %mm0 # sched: [1:1.00]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_psrlw:
@@ -5752,7 +5752,7 @@ define i64 @test_psrlw(x86_mmx %a0, x86_
 ; BROADWELL-NEXT:    psrlw %mm1, %mm0 # sched: [1:1.00]
 ; BROADWELL-NEXT:    psrlw (%rdi), %mm0 # sched: [6:1.00]
 ; BROADWELL-NEXT:    psrlw $7, %mm0 # sched: [1:1.00]
-; BROADWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_psrlw:
@@ -5760,7 +5760,7 @@ define i64 @test_psrlw(x86_mmx %a0, x86_
 ; SKYLAKE-NEXT:    psrlw %mm1, %mm0 # sched: [1:1.00]
 ; SKYLAKE-NEXT:    psrlw (%rdi), %mm0 # sched: [6:1.00]
 ; SKYLAKE-NEXT:    psrlw $7, %mm0 # sched: [1:1.00]
-; SKYLAKE-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_psrlw:
@@ -5768,7 +5768,7 @@ define i64 @test_psrlw(x86_mmx %a0, x86_
 ; SKX-NEXT:    psrlw %mm1, %mm0 # sched: [1:1.00]
 ; SKX-NEXT:    psrlw (%rdi), %mm0 # sched: [6:1.00]
 ; SKX-NEXT:    psrlw $7, %mm0 # sched: [1:1.00]
-; SKX-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_psrlw:
@@ -5776,7 +5776,7 @@ define i64 @test_psrlw(x86_mmx %a0, x86_
 ; BTVER2-NEXT:    psrlw %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    psrlw (%rdi), %mm0 # sched: [6:1.00]
 ; BTVER2-NEXT:    psrlw $7, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_psrlw:
@@ -5784,7 +5784,7 @@ define i64 @test_psrlw(x86_mmx %a0, x86_
 ; ZNVER1-NEXT:    psrlw %mm1, %mm0 # sched: [1:0.25]
 ; ZNVER1-NEXT:    psrlw (%rdi), %mm0 # sched: [8:0.50]
 ; ZNVER1-NEXT:    psrlw $7, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT:    movd %mm0, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -5801,70 +5801,70 @@ define i64 @test_psubb(x86_mmx %a0, x86_
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    psubb %mm1, %mm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    psubb (%rdi), %mm0 # sched: [7:1.00]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_psubb:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    psubb %mm1, %mm0 # sched: [1:0.50]
 ; ATOM-NEXT:    psubb (%rdi), %mm0 # sched: [1:1.00]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_psubb:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    psubb %mm1, %mm0 # sched: [1:0.50]
 ; SLM-NEXT:    psubb (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_psubb:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    psubb %mm1, %mm0 # sched: [3:1.00]
 ; SANDY-NEXT:    psubb (%rdi), %mm0 # sched: [7:1.00]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psubb:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    psubb %mm1, %mm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    psubb (%rdi), %mm0 # sched: [6:0.50]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_psubb:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    psubb %mm1, %mm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    psubb (%rdi), %mm0 # sched: [6:0.50]
-; BROADWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_psubb:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    psubb %mm1, %mm0 # sched: [1:0.50]
 ; SKYLAKE-NEXT:    psubb (%rdi), %mm0 # sched: [6:0.50]
-; SKYLAKE-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_psubb:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    psubb %mm1, %mm0 # sched: [1:0.50]
 ; SKX-NEXT:    psubb (%rdi), %mm0 # sched: [6:0.50]
-; SKX-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_psubb:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    psubb %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    psubb (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_psubb:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    psubb %mm1, %mm0 # sched: [1:0.25]
 ; ZNVER1-NEXT:    psubb (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT:    movd %mm0, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call x86_mmx @llvm.x86.mmx.psub.b(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -5879,70 +5879,70 @@ define i64 @test_psubd(x86_mmx %a0, x86_
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    psubd %mm1, %mm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    psubd (%rdi), %mm0 # sched: [7:1.00]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_psubd:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    psubd %mm1, %mm0 # sched: [1:0.50]
 ; ATOM-NEXT:    psubd (%rdi), %mm0 # sched: [1:1.00]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_psubd:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    psubd %mm1, %mm0 # sched: [1:0.50]
 ; SLM-NEXT:    psubd (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_psubd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    psubd %mm1, %mm0 # sched: [3:1.00]
 ; SANDY-NEXT:    psubd (%rdi), %mm0 # sched: [7:1.00]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psubd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    psubd %mm1, %mm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    psubd (%rdi), %mm0 # sched: [6:0.50]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_psubd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    psubd %mm1, %mm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    psubd (%rdi), %mm0 # sched: [6:0.50]
-; BROADWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_psubd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    psubd %mm1, %mm0 # sched: [1:0.50]
 ; SKYLAKE-NEXT:    psubd (%rdi), %mm0 # sched: [6:0.50]
-; SKYLAKE-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_psubd:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    psubd %mm1, %mm0 # sched: [1:0.50]
 ; SKX-NEXT:    psubd (%rdi), %mm0 # sched: [6:0.50]
-; SKX-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_psubd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    psubd %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    psubd (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_psubd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    psubd %mm1, %mm0 # sched: [1:0.25]
 ; ZNVER1-NEXT:    psubd (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT:    movd %mm0, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call x86_mmx @llvm.x86.mmx.psub.d(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -5957,70 +5957,70 @@ define i64 @test_psubq(x86_mmx %a0, x86_
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    psubq %mm1, %mm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    psubq (%rdi), %mm0 # sched: [7:1.00]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_psubq:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    psubq %mm1, %mm0 # sched: [2:1.00]
 ; ATOM-NEXT:    psubq (%rdi), %mm0 # sched: [3:1.50]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_psubq:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    psubq %mm1, %mm0 # sched: [1:0.50]
 ; SLM-NEXT:    psubq (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_psubq:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    psubq %mm1, %mm0 # sched: [3:1.00]
 ; SANDY-NEXT:    psubq (%rdi), %mm0 # sched: [7:1.00]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psubq:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    psubq %mm1, %mm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    psubq (%rdi), %mm0 # sched: [6:0.50]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_psubq:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    psubq %mm1, %mm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    psubq (%rdi), %mm0 # sched: [6:0.50]
-; BROADWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_psubq:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    psubq %mm1, %mm0 # sched: [1:0.50]
 ; SKYLAKE-NEXT:    psubq (%rdi), %mm0 # sched: [6:0.50]
-; SKYLAKE-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_psubq:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    psubq %mm1, %mm0 # sched: [1:0.50]
 ; SKX-NEXT:    psubq (%rdi), %mm0 # sched: [6:0.50]
-; SKX-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_psubq:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    psubq %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    psubq (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_psubq:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    psubq %mm1, %mm0 # sched: [1:0.25]
 ; ZNVER1-NEXT:    psubq (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT:    movd %mm0, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call x86_mmx @llvm.x86.mmx.psub.q(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -6035,70 +6035,70 @@ define i64 @test_psubsb(x86_mmx %a0, x86
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    psubsb %mm1, %mm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    psubsb (%rdi), %mm0 # sched: [7:1.00]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_psubsb:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    psubsb %mm1, %mm0 # sched: [1:0.50]
 ; ATOM-NEXT:    psubsb (%rdi), %mm0 # sched: [1:1.00]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_psubsb:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    psubsb %mm1, %mm0 # sched: [1:0.50]
 ; SLM-NEXT:    psubsb (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_psubsb:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    psubsb %mm1, %mm0 # sched: [3:1.00]
 ; SANDY-NEXT:    psubsb (%rdi), %mm0 # sched: [7:1.00]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psubsb:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    psubsb %mm1, %mm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    psubsb (%rdi), %mm0 # sched: [6:0.50]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_psubsb:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    psubsb %mm1, %mm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    psubsb (%rdi), %mm0 # sched: [6:0.50]
-; BROADWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_psubsb:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    psubsb %mm1, %mm0 # sched: [1:1.00]
 ; SKYLAKE-NEXT:    psubsb (%rdi), %mm0 # sched: [6:1.00]
-; SKYLAKE-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_psubsb:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    psubsb %mm1, %mm0 # sched: [1:1.00]
 ; SKX-NEXT:    psubsb (%rdi), %mm0 # sched: [6:1.00]
-; SKX-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_psubsb:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    psubsb %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    psubsb (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_psubsb:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    psubsb %mm1, %mm0 # sched: [1:0.25]
 ; ZNVER1-NEXT:    psubsb (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT:    movd %mm0, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -6113,70 +6113,70 @@ define i64 @test_psubsw(x86_mmx %a0, x86
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    psubsw %mm1, %mm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    psubsw (%rdi), %mm0 # sched: [7:1.00]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_psubsw:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    psubsw %mm1, %mm0 # sched: [1:0.50]
 ; ATOM-NEXT:    psubsw (%rdi), %mm0 # sched: [1:1.00]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_psubsw:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    psubsw %mm1, %mm0 # sched: [1:0.50]
 ; SLM-NEXT:    psubsw (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_psubsw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    psubsw %mm1, %mm0 # sched: [3:1.00]
 ; SANDY-NEXT:    psubsw (%rdi), %mm0 # sched: [7:1.00]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psubsw:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    psubsw %mm1, %mm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    psubsw (%rdi), %mm0 # sched: [6:0.50]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_psubsw:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    psubsw %mm1, %mm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    psubsw (%rdi), %mm0 # sched: [6:0.50]
-; BROADWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_psubsw:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    psubsw %mm1, %mm0 # sched: [1:1.00]
 ; SKYLAKE-NEXT:    psubsw (%rdi), %mm0 # sched: [6:1.00]
-; SKYLAKE-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_psubsw:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    psubsw %mm1, %mm0 # sched: [1:1.00]
 ; SKX-NEXT:    psubsw (%rdi), %mm0 # sched: [6:1.00]
-; SKX-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_psubsw:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    psubsw %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    psubsw (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_psubsw:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    psubsw %mm1, %mm0 # sched: [1:0.25]
 ; ZNVER1-NEXT:    psubsw (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT:    movd %mm0, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -6191,70 +6191,70 @@ define i64 @test_psubusb(x86_mmx %a0, x8
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    psubusb %mm1, %mm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    psubusb (%rdi), %mm0 # sched: [7:1.00]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_psubusb:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    psubusb %mm1, %mm0 # sched: [1:0.50]
 ; ATOM-NEXT:    psubusb (%rdi), %mm0 # sched: [1:1.00]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_psubusb:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    psubusb %mm1, %mm0 # sched: [1:0.50]
 ; SLM-NEXT:    psubusb (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_psubusb:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    psubusb %mm1, %mm0 # sched: [3:1.00]
 ; SANDY-NEXT:    psubusb (%rdi), %mm0 # sched: [7:1.00]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psubusb:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    psubusb %mm1, %mm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    psubusb (%rdi), %mm0 # sched: [6:0.50]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_psubusb:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    psubusb %mm1, %mm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    psubusb (%rdi), %mm0 # sched: [6:0.50]
-; BROADWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_psubusb:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    psubusb %mm1, %mm0 # sched: [1:1.00]
 ; SKYLAKE-NEXT:    psubusb (%rdi), %mm0 # sched: [6:1.00]
-; SKYLAKE-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_psubusb:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    psubusb %mm1, %mm0 # sched: [1:1.00]
 ; SKX-NEXT:    psubusb (%rdi), %mm0 # sched: [6:1.00]
-; SKX-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_psubusb:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    psubusb %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    psubusb (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_psubusb:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    psubusb %mm1, %mm0 # sched: [1:0.25]
 ; ZNVER1-NEXT:    psubusb (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT:    movd %mm0, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -6269,70 +6269,70 @@ define i64 @test_psubusw(x86_mmx %a0, x8
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    psubusw %mm1, %mm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    psubusw (%rdi), %mm0 # sched: [7:1.00]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_psubusw:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    psubusw %mm1, %mm0 # sched: [1:0.50]
 ; ATOM-NEXT:    psubusw (%rdi), %mm0 # sched: [1:1.00]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_psubusw:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    psubusw %mm1, %mm0 # sched: [1:0.50]
 ; SLM-NEXT:    psubusw (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_psubusw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    psubusw %mm1, %mm0 # sched: [3:1.00]
 ; SANDY-NEXT:    psubusw (%rdi), %mm0 # sched: [7:1.00]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psubusw:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    psubusw %mm1, %mm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    psubusw (%rdi), %mm0 # sched: [6:0.50]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_psubusw:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    psubusw %mm1, %mm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    psubusw (%rdi), %mm0 # sched: [6:0.50]
-; BROADWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_psubusw:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    psubusw %mm1, %mm0 # sched: [1:1.00]
 ; SKYLAKE-NEXT:    psubusw (%rdi), %mm0 # sched: [6:1.00]
-; SKYLAKE-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_psubusw:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    psubusw %mm1, %mm0 # sched: [1:1.00]
 ; SKX-NEXT:    psubusw (%rdi), %mm0 # sched: [6:1.00]
-; SKX-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_psubusw:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    psubusw %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    psubusw (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_psubusw:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    psubusw %mm1, %mm0 # sched: [1:0.25]
 ; ZNVER1-NEXT:    psubusw (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT:    movd %mm0, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -6347,70 +6347,70 @@ define i64 @test_psubw(x86_mmx %a0, x86_
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    psubw %mm1, %mm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    psubw (%rdi), %mm0 # sched: [7:1.00]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_psubw:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    psubw %mm1, %mm0 # sched: [1:0.50]
 ; ATOM-NEXT:    psubw (%rdi), %mm0 # sched: [1:1.00]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_psubw:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    psubw %mm1, %mm0 # sched: [1:0.50]
 ; SLM-NEXT:    psubw (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_psubw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    psubw %mm1, %mm0 # sched: [3:1.00]
 ; SANDY-NEXT:    psubw (%rdi), %mm0 # sched: [7:1.00]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_psubw:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    psubw %mm1, %mm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    psubw (%rdi), %mm0 # sched: [6:0.50]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_psubw:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    psubw %mm1, %mm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    psubw (%rdi), %mm0 # sched: [6:0.50]
-; BROADWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_psubw:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    psubw %mm1, %mm0 # sched: [1:0.50]
 ; SKYLAKE-NEXT:    psubw (%rdi), %mm0 # sched: [6:0.50]
-; SKYLAKE-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_psubw:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    psubw %mm1, %mm0 # sched: [1:0.50]
 ; SKX-NEXT:    psubw (%rdi), %mm0 # sched: [6:0.50]
-; SKX-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_psubw:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    psubw %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    psubw (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_psubw:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    psubw %mm1, %mm0 # sched: [1:0.25]
 ; ZNVER1-NEXT:    psubw (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT:    movd %mm0, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call x86_mmx @llvm.x86.mmx.psub.w(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -6425,70 +6425,70 @@ define i64 @test_punpckhbw(x86_mmx %a0,
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:1.00]
 ; GENERIC-NEXT:    punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [5:1.00]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_punpckhbw:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:0.50]
 ; ATOM-NEXT:    punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [1:1.00]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_punpckhbw:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:1.00]
 ; SLM-NEXT:    punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [4:1.00]
-; SLM-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_punpckhbw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:1.00]
 ; SANDY-NEXT:    punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [5:1.00]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_punpckhbw:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:1.00]
 ; HASWELL-NEXT:    punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [6:1.00]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_punpckhbw:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:1.00]
 ; BROADWELL-NEXT:    punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [6:1.00]
-; BROADWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_punpckhbw:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:1.00]
 ; SKYLAKE-NEXT:    punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [6:1.00]
-; SKYLAKE-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_punpckhbw:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:1.00]
 ; SKX-NEXT:    punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [6:1.00]
-; SKX-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_punpckhbw:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:0.50]
 ; BTVER2-NEXT:    punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [6:1.00]
-; BTVER2-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_punpckhbw:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:0.25]
 ; ZNVER1-NEXT:    punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [8:0.50]
-; ZNVER1-NEXT:    movd %mm0, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -6503,70 +6503,70 @@ define i64 @test_punpckhdq(x86_mmx %a0,
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:1.00]
 ; GENERIC-NEXT:    punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [5:1.00]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_punpckhdq:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:0.50]
 ; ATOM-NEXT:    punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [1:1.00]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_punpckhdq:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:1.00]
 ; SLM-NEXT:    punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [4:1.00]
-; SLM-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_punpckhdq:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:1.00]
 ; SANDY-NEXT:    punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [5:1.00]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_punpckhdq:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:1.00]
 ; HASWELL-NEXT:    punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [6:1.00]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_punpckhdq:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:1.00]
 ; BROADWELL-NEXT:    punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [6:1.00]
-; BROADWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_punpckhdq:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:1.00]
 ; SKYLAKE-NEXT:    punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [6:1.00]
-; SKYLAKE-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_punpckhdq:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:1.00]
 ; SKX-NEXT:    punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [6:1.00]
-; SKX-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_punpckhdq:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:0.50]
 ; BTVER2-NEXT:    punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [6:1.00]
-; BTVER2-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_punpckhdq:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:0.25]
 ; ZNVER1-NEXT:    punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [8:0.50]
-; ZNVER1-NEXT:    movd %mm0, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -6581,70 +6581,70 @@ define i64 @test_punpckhwd(x86_mmx %a0,
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00]
 ; GENERIC-NEXT:    punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [5:1.00]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_punpckhwd:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:0.50]
 ; ATOM-NEXT:    punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [1:1.00]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_punpckhwd:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00]
 ; SLM-NEXT:    punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [4:1.00]
-; SLM-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_punpckhwd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00]
 ; SANDY-NEXT:    punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [5:1.00]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_punpckhwd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00]
 ; HASWELL-NEXT:    punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_punpckhwd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00]
 ; BROADWELL-NEXT:    punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00]
-; BROADWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_punpckhwd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00]
 ; SKYLAKE-NEXT:    punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00]
-; SKYLAKE-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_punpckhwd:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00]
 ; SKX-NEXT:    punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00]
-; SKX-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_punpckhwd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:0.50]
 ; BTVER2-NEXT:    punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00]
-; BTVER2-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_punpckhwd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:0.25]
 ; ZNVER1-NEXT:    punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [8:0.50]
-; ZNVER1-NEXT:    movd %mm0, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -6659,70 +6659,70 @@ define i64 @test_punpcklbw(x86_mmx %a0,
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00]
 ; GENERIC-NEXT:    punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [5:1.00]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_punpcklbw:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00]
 ; ATOM-NEXT:    punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [1:1.00]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_punpcklbw:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00]
 ; SLM-NEXT:    punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [4:1.00]
-; SLM-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_punpcklbw:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00]
 ; SANDY-NEXT:    punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [5:1.00]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_punpcklbw:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00]
 ; HASWELL-NEXT:    punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_punpcklbw:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00]
 ; BROADWELL-NEXT:    punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00]
-; BROADWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_punpcklbw:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00]
 ; SKYLAKE-NEXT:    punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00]
-; SKYLAKE-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_punpcklbw:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00]
 ; SKX-NEXT:    punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00]
-; SKX-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_punpcklbw:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:0.50]
 ; BTVER2-NEXT:    punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00]
-; BTVER2-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_punpcklbw:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:0.25]
 ; ZNVER1-NEXT:    punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [8:0.50]
-; ZNVER1-NEXT:    movd %mm0, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -6737,70 +6737,70 @@ define i64 @test_punpckldq(x86_mmx %a0,
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00]
 ; GENERIC-NEXT:    punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [5:1.00]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_punpckldq:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00]
 ; ATOM-NEXT:    punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [1:1.00]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_punpckldq:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00]
 ; SLM-NEXT:    punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [4:1.00]
-; SLM-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_punpckldq:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00]
 ; SANDY-NEXT:    punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [5:1.00]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_punpckldq:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00]
 ; HASWELL-NEXT:    punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [6:1.00]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_punpckldq:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00]
 ; BROADWELL-NEXT:    punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [6:1.00]
-; BROADWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_punpckldq:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00]
 ; SKYLAKE-NEXT:    punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [6:1.00]
-; SKYLAKE-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_punpckldq:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00]
 ; SKX-NEXT:    punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [6:1.00]
-; SKX-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_punpckldq:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:0.50]
 ; BTVER2-NEXT:    punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [6:1.00]
-; BTVER2-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_punpckldq:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:0.25]
 ; ZNVER1-NEXT:    punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [8:0.50]
-; ZNVER1-NEXT:    movd %mm0, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -6815,70 +6815,70 @@ define i64 @test_punpcklwd(x86_mmx %a0,
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00]
 ; GENERIC-NEXT:    punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [5:1.00]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_punpcklwd:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00]
 ; ATOM-NEXT:    punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [1:1.00]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_punpcklwd:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00]
 ; SLM-NEXT:    punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [4:1.00]
-; SLM-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_punpcklwd:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00]
 ; SANDY-NEXT:    punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [5:1.00]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_punpcklwd:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00]
 ; HASWELL-NEXT:    punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [6:1.00]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_punpcklwd:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00]
 ; BROADWELL-NEXT:    punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [6:1.00]
-; BROADWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_punpcklwd:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00]
 ; SKYLAKE-NEXT:    punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [6:1.00]
-; SKYLAKE-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_punpcklwd:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00]
 ; SKX-NEXT:    punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [6:1.00]
-; SKX-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_punpcklwd:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:0.50]
 ; BTVER2-NEXT:    punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [6:1.00]
-; BTVER2-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_punpcklwd:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:0.25]
 ; ZNVER1-NEXT:    punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [8:0.50]
-; ZNVER1-NEXT:    movd %mm0, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8
@@ -6893,70 +6893,70 @@ define i64 @test_pxor(x86_mmx %a0, x86_m
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    pxor %mm1, %mm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    pxor (%rdi), %mm0 # sched: [5:1.00]
-; GENERIC-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; ATOM-LABEL: test_pxor:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    pxor %mm1, %mm0 # sched: [1:0.50]
 ; ATOM-NEXT:    pxor (%rdi), %mm0 # sched: [1:1.00]
-; ATOM-NEXT:    movd %mm0, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_pxor:
 ; SLM:       # %bb.0:
 ; SLM-NEXT:    pxor %mm1, %mm0 # sched: [1:0.50]
 ; SLM-NEXT:    pxor (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_pxor:
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    pxor %mm1, %mm0 # sched: [1:1.00]
 ; SANDY-NEXT:    pxor (%rdi), %mm0 # sched: [5:1.00]
-; SANDY-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pxor:
 ; HASWELL:       # %bb.0:
 ; HASWELL-NEXT:    pxor %mm1, %mm0 # sched: [1:0.33]
 ; HASWELL-NEXT:    pxor (%rdi), %mm0 # sched: [6:0.50]
-; HASWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; BROADWELL-LABEL: test_pxor:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    pxor %mm1, %mm0 # sched: [1:0.33]
 ; BROADWELL-NEXT:    pxor (%rdi), %mm0 # sched: [6:0.50]
-; BROADWELL-NEXT:    movd %mm0, %rax # sched: [1:1.00]
+; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_pxor:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    pxor %mm1, %mm0 # sched: [1:0.50]
 ; SKYLAKE-NEXT:    pxor (%rdi), %mm0 # sched: [6:0.50]
-; SKYLAKE-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_pxor:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    pxor %mm1, %mm0 # sched: [1:0.50]
 ; SKX-NEXT:    pxor (%rdi), %mm0 # sched: [6:0.50]
-; SKX-NEXT:    movd %mm0, %rax # sched: [1:0.33]
+; SKX-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_pxor:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    pxor %mm1, %mm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    pxor (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT:    movd %mm0, %rax # sched: [1:0.50]
+; BTVER2-NEXT:    movq %mm0, %rax # sched: [1:0.50]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_pxor:
 ; ZNVER1:       # %bb.0:
 ; ZNVER1-NEXT:    pxor %mm1, %mm0 # sched: [1:0.25]
 ; ZNVER1-NEXT:    pxor (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT:    movd %mm0, %rax # sched: [2:1.00]
+; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   %1 = call x86_mmx @llvm.x86.mmx.pxor(x86_mmx %a0, x86_mmx %a1)
   %2 = load x86_mmx, x86_mmx *%a2, align 8

Modified: llvm/trunk/test/CodeGen/X86/select-mmx.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/select-mmx.ll?rev=321898&r1=321897&r2=321898&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/select-mmx.ll (original)
+++ llvm/trunk/test/CodeGen/X86/select-mmx.ll Fri Jan  5 12:55:12 2018
@@ -18,9 +18,9 @@ define i64 @test47(i64 %arg)  {
 ; X64-NEXT:    testq %rdi, %rdi
 ; X64-NEXT:    movl $7, %ecx
 ; X64-NEXT:    cmoveq %rcx, %rax
-; X64-NEXT:    movd %rax, %mm0
+; X64-NEXT:    movq %rax, %mm0
 ; X64-NEXT:    psllw %mm0, %mm0
-; X64-NEXT:    movd %mm0, %rax
+; X64-NEXT:    movq %mm0, %rax
 ; X64-NEXT:    retq
 ;
 ; I32-LABEL: test47:
@@ -70,9 +70,9 @@ define i64 @test49(i64 %arg, i64 %x, i64
 ; X64:       # %bb.0:
 ; X64-NEXT:    testq %rdi, %rdi
 ; X64-NEXT:    cmovneq %rdx, %rsi
-; X64-NEXT:    movd %rsi, %mm0
+; X64-NEXT:    movq %rsi, %mm0
 ; X64-NEXT:    psllw %mm0, %mm0
-; X64-NEXT:    movd %mm0, %rax
+; X64-NEXT:    movq %mm0, %rax
 ; X64-NEXT:    retq
 ;
 ; I32-LABEL: test49:

Modified: llvm/trunk/test/CodeGen/X86/x86-64-psub.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/x86-64-psub.ll?rev=321898&r1=321897&r2=321898&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/x86-64-psub.ll (original)
+++ llvm/trunk/test/CodeGen/X86/x86-64-psub.ll Fri Jan  5 12:55:12 2018
@@ -30,8 +30,8 @@ entry:
 ; CHECK:   callq getFirstParam
 ; CHECK:   movq %rax, [[TEMP:%[a-z0-9]+]]
 ; CHECK:   callq getSecondParam
-; CHECK:   movd [[TEMP]], [[PARAM1:%[a-z0-9]+]]
-; CHECK:   movd %rax, [[PARAM2:%[a-z0-9]+]]
+; CHECK:   movq [[TEMP]], [[PARAM1:%[a-z0-9]+]]
+; CHECK:   movq %rax, [[PARAM2:%[a-z0-9]+]]
 ; CHECK:   psubb [[PARAM2]], [[PARAM1]]
 ; CHECK: ret
 
@@ -58,8 +58,8 @@ entry:
 ; CHECK:   callq getFirstParam
 ; CHECK:   movq %rax, [[TEMP:%[a-z0-9]+]]
 ; CHECK:   callq getSecondParam
-; CHECK:   movd [[TEMP]], [[PARAM1:%[a-z0-9]+]]
-; CHECK:   movd %rax, [[PARAM2:%[a-z0-9]+]]
+; CHECK:   movq [[TEMP]], [[PARAM1:%[a-z0-9]+]]
+; CHECK:   movq %rax, [[PARAM2:%[a-z0-9]+]]
 ; CHECK:   psubw [[PARAM2]], [[PARAM1]]
 ; CHECK: ret
 
@@ -87,8 +87,8 @@ entry:
 ; CHECK:   callq getFirstParam
 ; CHECK:   movq %rax, [[TEMP:%[a-z0-9]+]]
 ; CHECK:   callq getSecondParam
-; CHECK:   movd [[TEMP]], [[PARAM1:%[a-z0-9]+]]
-; CHECK:   movd %rax, [[PARAM2:%[a-z0-9]+]]
+; CHECK:   movq [[TEMP]], [[PARAM1:%[a-z0-9]+]]
+; CHECK:   movq %rax, [[PARAM2:%[a-z0-9]+]]
 ; CHECK:   psubd [[PARAM2]], [[PARAM1]]
 ; CHECK: ret
 
@@ -115,8 +115,8 @@ entry:
 ; CHECK:   callq getFirstParam
 ; CHECK:   movq %rax, [[TEMP:%[a-z0-9]+]]
 ; CHECK:   callq getSecondParam
-; CHECK:   movd [[TEMP]], [[PARAM1:%[a-z0-9]+]]
-; CHECK:   movd %rax, [[PARAM2:%[a-z0-9]+]]
+; CHECK:   movq [[TEMP]], [[PARAM1:%[a-z0-9]+]]
+; CHECK:   movq %rax, [[PARAM2:%[a-z0-9]+]]
 ; CHECK:   psubsb [[PARAM2]], [[PARAM1]]
 ; CHECK: ret
 
@@ -143,8 +143,8 @@ entry:
 ; CHECK:   callq getFirstParam
 ; CHECK:   movq %rax, [[TEMP:%[a-z0-9]+]]
 ; CHECK:   callq getSecondParam
-; CHECK:   movd [[TEMP]], [[PARAM1:%[a-z0-9]+]]
-; CHECK:   movd %rax, [[PARAM2:%[a-z0-9]+]]
+; CHECK:   movq [[TEMP]], [[PARAM1:%[a-z0-9]+]]
+; CHECK:   movq %rax, [[PARAM2:%[a-z0-9]+]]
 ; CHECK:   psubsw [[PARAM2]], [[PARAM1]]
 ; CHECK: ret
 
@@ -171,8 +171,8 @@ entry:
 ; CHECK:   callq getFirstParam
 ; CHECK:   movq %rax, [[TEMP:%[a-z0-9]+]]
 ; CHECK:   callq getSecondParam
-; CHECK:   movd [[TEMP]], [[PARAM1:%[a-z0-9]+]]
-; CHECK:   movd %rax, [[PARAM2:%[a-z0-9]+]]
+; CHECK:   movq [[TEMP]], [[PARAM1:%[a-z0-9]+]]
+; CHECK:   movq %rax, [[PARAM2:%[a-z0-9]+]]
 ; CHECK:   psubusb [[PARAM2]], [[PARAM1]]
 ; CHECK: ret
 
@@ -199,8 +199,8 @@ entry:
 ; CHECK:   callq getFirstParam
 ; CHECK:   movq %rax, [[TEMP:%[a-z0-9]+]]
 ; CHECK:   callq getSecondParam
-; CHECK:   movd [[TEMP]], [[PARAM1:%[a-z0-9]+]]
-; CHECK:   movd %rax, [[PARAM2:%[a-z0-9]+]]
+; CHECK:   movq [[TEMP]], [[PARAM1:%[a-z0-9]+]]
+; CHECK:   movq %rax, [[PARAM2:%[a-z0-9]+]]
 ; CHECK:   psubusw [[PARAM2]], [[PARAM1]]
 ; CHECK: ret
 

Modified: llvm/trunk/test/MC/Disassembler/X86/x86-64.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/Disassembler/X86/x86-64.txt?rev=321898&r1=321897&r2=321898&view=diff
==============================================================================
--- llvm/trunk/test/MC/Disassembler/X86/x86-64.txt (original)
+++ llvm/trunk/test/MC/Disassembler/X86/x86-64.txt Fri Jan  5 12:55:12 2018
@@ -236,16 +236,16 @@
 # CHECK: vmovq %xmm0, %rax
 0xc4 0xe1 0xf9 0x7e 0xc0
 
-# CHECK: movd (%rax), %mm0
+# CHECK: movq (%rax), %mm0
 0x48 0x0f 0x6e 0x00
 
-# CHECK: movd %rax, %mm0
+# CHECK: movq %rax, %mm0
 0x48 0x0f 0x6e 0xc0
 
-# CHECK: movd %mm0, (%rax)
+# CHECK: movq %mm0, (%rax)
 0x48 0x0f 0x7e 0x00
 
-# CHECK: movd %mm0, %rax
+# CHECK: movq %mm0, %rax
 0x48 0x0f 0x7e 0xc0
 
 # CHECK: movq (%rax), %xmm0

Modified: llvm/trunk/test/MC/X86/x86-64.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/X86/x86-64.s?rev=321898&r1=321897&r2=321898&view=diff
==============================================================================
--- llvm/trunk/test/MC/X86/x86-64.s (original)
+++ llvm/trunk/test/MC/X86/x86-64.s Fri Jan  5 12:55:12 2018
@@ -1131,10 +1131,10 @@ mov %gs, (%rsi)  // CHECK: movw	%gs, (%r
 	idiv	0x12345678,%eax
 
 // PR8524
-movd	%rax, %mm5 // CHECK: movd %rax, %mm5 # encoding: [0x48,0x0f,0x6e,0xe8]
-movd	%mm5, %rbx // CHECK: movd %mm5, %rbx # encoding: [0x48,0x0f,0x7e,0xeb]
-movq	%rax, %mm5 // CHECK: movd %rax, %mm5 # encoding: [0x48,0x0f,0x6e,0xe8]
-movq	%mm5, %rbx // CHECK: movd %mm5, %rbx # encoding: [0x48,0x0f,0x7e,0xeb]
+movd	%rax, %mm5 // CHECK: movq %rax, %mm5 # encoding: [0x48,0x0f,0x6e,0xe8]
+movd	%mm5, %rbx // CHECK: movq %mm5, %rbx # encoding: [0x48,0x0f,0x7e,0xeb]
+movq	%rax, %mm5 // CHECK: movq %rax, %mm5 # encoding: [0x48,0x0f,0x6e,0xe8]
+movq	%mm5, %rbx // CHECK: movq %mm5, %rbx # encoding: [0x48,0x0f,0x7e,0xeb]
 
 rex64 // CHECK: rex64 # encoding: [0x48]
 data16 // CHECK: data16 # encoding: [0x66]

Modified: llvm/trunk/test/MC/X86/x86_64-encoding.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/X86/x86_64-encoding.s?rev=321898&r1=321897&r2=321898&view=diff
==============================================================================
--- llvm/trunk/test/MC/X86/x86_64-encoding.s (original)
+++ llvm/trunk/test/MC/X86/x86_64-encoding.s Fri Jan  5 12:55:12 2018
@@ -82,7 +82,7 @@ movq	%gs:(%rdi), %rax
 // CHECK:  encoding: [0xf2,0x48,0x0f,0x38,0xf1,0x43,0x04]
         crc32q	4(%rbx), %rax
 
-// CHECK: movd %r8, %mm1
+// CHECK: movq %r8, %mm1
 // CHECK:  encoding: [0x49,0x0f,0x6e,0xc8]
 movd %r8, %mm1
 
@@ -90,7 +90,7 @@ movd %r8, %mm1
 // CHECK:  encoding: [0x41,0x0f,0x6e,0xc8]
 movd %r8d, %mm1
 
-// CHECK: movd %rdx, %mm1
+// CHECK: movq %rdx, %mm1
 // CHECK:  encoding: [0x48,0x0f,0x6e,0xca]
 movd %rdx, %mm1
 
@@ -98,7 +98,7 @@ movd %rdx, %mm1
 // CHECK:  encoding: [0x0f,0x6e,0xca]
 movd %edx, %mm1
 
-// CHECK: movd %mm1, %r8
+// CHECK: movq %mm1, %r8
 // CHECK:  encoding: [0x49,0x0f,0x7e,0xc8]
 movd %mm1, %r8
 
@@ -106,7 +106,7 @@ movd %mm1, %r8
 // CHECK:  encoding: [0x41,0x0f,0x7e,0xc8]
 movd %mm1, %r8d
 
-// CHECK: movd %mm1, %rdx
+// CHECK: movq %mm1, %rdx
 // CHECK:  encoding: [0x48,0x0f,0x7e,0xca]
 movd %mm1, %rdx
 
@@ -114,6 +114,30 @@ movd %mm1, %rdx
 // CHECK:  encoding: [0x0f,0x7e,0xca]
 movd %mm1, %edx
 
+// CHECK: movd %mm1, (%rax)
+// CHECK:  encoding: [0x0f,0x7e,0x08]
+movd %mm1, (%rax)
+
+// CHECK: movd (%rax), %mm1
+// CHECK:  encoding: [0x0f,0x6e,0x08]
+movd (%rax), %mm1
+
+// CHECK: movq %r8, %mm1
+// CHECK:  encoding: [0x49,0x0f,0x6e,0xc8]
+movq %r8, %mm1
+
+// CHECK: movq %rdx, %mm1
+// CHECK:  encoding: [0x48,0x0f,0x6e,0xca]
+movq %rdx, %mm1
+
+// CHECK: movq %mm1, %r8
+// CHECK:  encoding: [0x49,0x0f,0x7e,0xc8]
+movq %mm1, %r8
+
+// CHECK: movq %mm1, %rdx
+// CHECK:  encoding: [0x48,0x0f,0x7e,0xca]
+movq %mm1, %rdx
+
 // rdar://7840289
 // CHECK: pshufb	CPI1_0(%rip), %xmm1
 // CHECK:  encoding: [0x66,0x0f,0x38,0x00,0x0d,A,A,A,A]




More information about the llvm-commits mailing list