[llvm] r268831 - [X86] Teach X86FixupBWInsts to promote MOV8rr/MOV16rr to MOV32rr.

Ahmed Bougacha via llvm-commits llvm-commits at lists.llvm.org
Fri May 6 18:11:18 PDT 2016


Author: ab
Date: Fri May  6 20:11:17 2016
New Revision: 268831

URL: http://llvm.org/viewvc/llvm-project?rev=268831&view=rev
Log:
[X86] Teach X86FixupBWInsts to promote MOV8rr/MOV16rr to MOV32rr.

This re-applies r268760, reverted in r268794.
Fixes http://llvm.org/PR27670

The original imp-defs assertion was way overzealous: forward all
implicit operands, except imp-defs of the new super-reg def (r268787
for GR64, but also possible for GR16->GR32), or imp-uses of the new
super-reg use.
While there, mark the source use as Undef, and add an imp-use of the
old source reg: that should cover any case of dead super-regs.

At the stage the pass runs, flags are unlikely to matter anyway;
still, let's be as correct as possible.

Also add MIR tests for the various interesting cases.

Original commit message:
Codesize is less (16) or equal (8), and we avoid partial
dependencies.

Differential Revision: http://reviews.llvm.org/D19999

Added:
    llvm/trunk/test/CodeGen/X86/fixup-bw-copy.ll
    llvm/trunk/test/CodeGen/X86/fixup-bw-copy.mir
Modified:
    llvm/trunk/lib/Target/X86/X86FixupBWInsts.cpp
    llvm/trunk/test/CodeGen/X86/2011-06-14-PreschedRegalias.ll
    llvm/trunk/test/CodeGen/X86/anyext.ll
    llvm/trunk/test/CodeGen/X86/avx512-calling-conv.ll
    llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
    llvm/trunk/test/CodeGen/X86/avx512-select.ll
    llvm/trunk/test/CodeGen/X86/avx512dq-mask-op.ll
    llvm/trunk/test/CodeGen/X86/cmovcmov.ll
    llvm/trunk/test/CodeGen/X86/float-conv-elim.ll
    llvm/trunk/test/CodeGen/X86/machine-combiner-int.ll
    llvm/trunk/test/CodeGen/X86/opt-ext-uses.ll
    llvm/trunk/test/CodeGen/X86/pr23664.ll
    llvm/trunk/test/CodeGen/X86/vector-bitreverse.ll
    llvm/trunk/test/CodeGen/X86/vector-idiv-sdiv-512.ll
    llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-512.ll
    llvm/trunk/test/CodeGen/X86/xaluo.ll

Modified: llvm/trunk/lib/Target/X86/X86FixupBWInsts.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86FixupBWInsts.cpp?rev=268831&r1=268830&r2=268831&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86FixupBWInsts.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86FixupBWInsts.cpp Fri May  6 20:11:17 2016
@@ -90,6 +90,11 @@ class FixupBWInstPass : public MachineFu
   /// OK, otherwise return nullptr.
   MachineInstr *tryReplaceLoad(unsigned New32BitOpcode, MachineInstr *MI) const;
 
+  /// Change the MachineInstr \p MI into the equivalent 32-bit copy if it is
+  /// safe to do so.  Return the replacement instruction if OK, otherwise return
+  /// nullptr.
+  MachineInstr *tryReplaceCopy(MachineInstr *MI) const;
+
 public:
   static char ID;
 
@@ -226,6 +231,42 @@ MachineInstr *FixupBWInstPass::tryReplac
   return MIB;
 }
 
+MachineInstr *FixupBWInstPass::tryReplaceCopy(MachineInstr *MI) const {
+  assert(MI->getNumExplicitOperands() == 2);
+  auto &OldDest = MI->getOperand(0);
+  auto &OldSrc = MI->getOperand(1);
+
+  unsigned NewDestReg;
+  if (!getSuperRegDestIfDead(MI, NewDestReg))
+    return nullptr;
+
+  unsigned NewSrcReg = getX86SubSuperRegister(OldSrc.getReg(), 32);
+
+  // This is only correct if we access the same subregister index: otherwise,
+  // we could try to replace "movb %ah, %al" with "movl %eax, %eax".
+  auto *TRI = &TII->getRegisterInfo();
+  if (TRI->getSubRegIndex(NewSrcReg, OldSrc.getReg()) !=
+      TRI->getSubRegIndex(NewDestReg, OldDest.getReg()))
+    return nullptr;
+
+  // Safe to change the instruction.
+  // Don't set src flags, as we don't know if we're also killing the superreg.
+  // However, the superregister might not be defined; make it explicit that
+  // we don't care about the higher bits by reading it as Undef, and adding
+  // an imp-use on the original subregister.
+  MachineInstrBuilder MIB =
+      BuildMI(*MF, MI->getDebugLoc(), TII->get(X86::MOV32rr), NewDestReg)
+          .addReg(NewSrcReg, RegState::Undef)
+          .addReg(OldSrc.getReg(), RegState::Implicit);
+
+  // Drop imp-defs/uses that would be redundant with the new def/use.
+  for (auto &Op : MI->implicit_operands())
+    if (Op.getReg() != (Op.isDef() ? NewDestReg : NewSrcReg))
+      MIB.addOperand(Op);
+
+  return MIB;
+}
+
 void FixupBWInstPass::processBasicBlock(MachineFunction &MF,
                                         MachineBasicBlock &MBB) {
 
@@ -272,6 +313,15 @@ void FixupBWInstPass::processBasicBlock(
       NewMI = tryReplaceLoad(X86::MOVZX32rm16, MI);
       break;
 
+    case X86::MOV8rr:
+    case X86::MOV16rr:
+      // Always try to replace 8/16 bit copies with a 32 bit copy.
+      // Code size is either less (16) or equal (8), and there is sometimes a
+      // perf advantage from eliminating a false dependence on the upper portion
+      // of the register.
+      NewMI = tryReplaceCopy(MI);
+      break;
+
     default:
       // nothing to do here.
       break;

Modified: llvm/trunk/test/CodeGen/X86/2011-06-14-PreschedRegalias.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2011-06-14-PreschedRegalias.ll?rev=268831&r1=268830&r2=268831&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/2011-06-14-PreschedRegalias.ll (original)
+++ llvm/trunk/test/CodeGen/X86/2011-06-14-PreschedRegalias.ll Fri May  6 20:11:17 2016
@@ -6,7 +6,7 @@
 define i8 @f(i8 %v1, i8 %v2) nounwind {
 entry:
 ; CHECK: callq
-; CHECK: movb %{{.*}}, %al
+; CHECK: movl %{{.*}}, %eax
 ; CHECK: mulb
 ; CHECK: mulb
         %rval = tail call i8 @bar() nounwind

Modified: llvm/trunk/test/CodeGen/X86/anyext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/anyext.ll?rev=268831&r1=268830&r2=268831&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/anyext.ll (original)
+++ llvm/trunk/test/CodeGen/X86/anyext.ll Fri May  6 20:11:17 2016
@@ -39,7 +39,7 @@ define i32 @bar(i32 %p, i16 zeroext %x)
 ; X64-LABEL: bar:
 ; X64:       # BB#0:
 ; X64-NEXT:    xorl %edx, %edx
-; X64-NEXT:    movw %di, %ax
+; X64-NEXT:    movl %edi, %eax
 ; X64-NEXT:    divw %si
 ; X64-NEXT:    andl $1, %eax
 ; X64-NEXT:    retq

Modified: llvm/trunk/test/CodeGen/X86/avx512-calling-conv.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-calling-conv.ll?rev=268831&r1=268830&r2=268831&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-calling-conv.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-calling-conv.ll Fri May  6 20:11:17 2016
@@ -461,7 +461,7 @@ define i32 @test12(i32 %a1, i32 %a2, i32
 ; KNL_X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
 ; KNL_X32-NEXT:    movl %edi, (%esp)
 ; KNL_X32-NEXT:    calll _test11
-; KNL_X32-NEXT:    movb %al, %bl
+; KNL_X32-NEXT:    movl %eax, %ebx
 ; KNL_X32-NEXT:    movzbl %bl, %eax
 ; KNL_X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
 ; KNL_X32-NEXT:    movl %esi, {{[0-9]+}}(%esp)

Modified: llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll?rev=268831&r1=268830&r2=268831&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll Fri May  6 20:11:17 2016
@@ -81,7 +81,7 @@ define i16 @mand16(i16 %x, i16 %y) {
 ; CHECK-NEXT:    xorl %esi, %eax
 ; CHECK-NEXT:    andl %esi, %edi
 ; CHECK-NEXT:    orl %eax, %edi
-; CHECK-NEXT:    movw %di, %ax
+; CHECK-NEXT:    movl %edi, %eax
 ; CHECK-NEXT:    retq
   %ma = bitcast i16 %x to <16 x i1>
   %mb = bitcast i16 %y to <16 x i1>

Modified: llvm/trunk/test/CodeGen/X86/avx512-select.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-select.ll?rev=268831&r1=268830&r2=268831&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-select.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-select.ll Fri May  6 20:11:17 2016
@@ -72,7 +72,7 @@ define i8 @select05(i8 %a.0, i8 %m) {
 ; CHECK-LABEL: select05:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    orl %esi, %edi
-; CHECK-NEXT:    movb %dil, %al
+; CHECK-NEXT:    movl %edi, %eax
 ; CHECK-NEXT:    retq
   %mask = bitcast i8 %m to <8 x i1>
   %a = bitcast i8 %a.0 to <8 x i1>
@@ -102,7 +102,7 @@ define i8 @select06(i8 %a.0, i8 %m) {
 ; CHECK-LABEL: select06:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    andl %esi, %edi
-; CHECK-NEXT:    movb %dil, %al
+; CHECK-NEXT:    movl %edi, %eax
 ; CHECK-NEXT:    retq
   %mask = bitcast i8 %m to <8 x i1>
   %a = bitcast i8 %a.0 to <8 x i1>

Modified: llvm/trunk/test/CodeGen/X86/avx512dq-mask-op.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512dq-mask-op.ll?rev=268831&r1=268830&r2=268831&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512dq-mask-op.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512dq-mask-op.ll Fri May  6 20:11:17 2016
@@ -36,7 +36,7 @@ define i8 @mand8(i8 %x, i8 %y) {
 ; CHECK-NEXT:    xorl %esi, %eax
 ; CHECK-NEXT:    andl %esi, %edi
 ; CHECK-NEXT:    orl %eax, %edi
-; CHECK-NEXT:    movb %dil, %al
+; CHECK-NEXT:    movl %edi, %eax
 ; CHECK-NEXT:    retq
   %ma = bitcast i8 %x to <8 x i1>
   %mb = bitcast i8 %y to <8 x i1>

Modified: llvm/trunk/test/CodeGen/X86/cmovcmov.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/cmovcmov.ll?rev=268831&r1=268830&r2=268831&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/cmovcmov.ll (original)
+++ llvm/trunk/test/CodeGen/X86/cmovcmov.ll Fri May  6 20:11:17 2016
@@ -250,14 +250,14 @@ attributes #0 = { nounwind }
 ; CMOV-DAG: movb $20, %al
 ; CMOV-DAG: movb $20, %dl
 ; CMOV:   jl [[BB0:.LBB[0-9_]+]]
-; CMOV:   movb %cl, %dl
+; CMOV:   movl %ecx, %edx
 ; CMOV: [[BB0]]:
 ; CMOV:   jg [[BB1:.LBB[0-9_]+]]
-; CMOV:   movb %dl, %al
+; CMOV:   movl %edx, %eax
 ; CMOV: [[BB1]]:
 ; CMOV:   testl %edi, %edi
 ; CMOV:   je [[BB2:.LBB[0-9_]+]]
-; CMOV:   movb %dl, %al
+; CMOV:   movl %edx, %eax
 ; CMOV: [[BB2]]:
 ; CMOV:   movb %al, g8(%rip)
 ; CMOV:   retq

Added: llvm/trunk/test/CodeGen/X86/fixup-bw-copy.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fixup-bw-copy.ll?rev=268831&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fixup-bw-copy.ll (added)
+++ llvm/trunk/test/CodeGen/X86/fixup-bw-copy.ll Fri May  6 20:11:17 2016
@@ -0,0 +1,70 @@
+; NOTE: Assertions have been autogenerated by update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -fixup-byte-word-insts=1 -mtriple=x86_64-- < %s | FileCheck --check-prefix=X64 --check-prefix=BWON64 %s
+; RUN: llc -verify-machineinstrs -fixup-byte-word-insts=0 -mtriple=x86_64-- < %s | FileCheck --check-prefix=X64 --check-prefix=BWOFF64 %s
+; RUN: llc -verify-machineinstrs -fixup-byte-word-insts=1 -mtriple=i386-- < %s | FileCheck --check-prefix=X32 --check-prefix=BWON32 %s
+; RUN: llc -verify-machineinstrs -fixup-byte-word-insts=0 -mtriple=i386-- < %s | FileCheck --check-prefix=X32 --check-prefix=BWOFF32 %s
+
+target datalayout = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128"
+
+define i8 @test_movb(i8 %a0) {
+; BWON64-LABEL: test_movb:
+; BWON64:       # BB#0:
+; BWON64-NEXT:    movl %edi, %eax
+; BWON64-NEXT:    retq
+;
+; BWOFF64-LABEL: test_movb:
+; BWOFF64:       # BB#0:
+; BWOFF64-NEXT:    movb %dil, %al
+; BWOFF64-NEXT:    retq
+;
+; X32-LABEL: test_movb:
+; X32:       # BB#0:
+; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X32-NEXT:    retl
+  ret i8 %a0
+}
+
+define i16 @test_movw(i16 %a0) {
+; BWON64-LABEL: test_movw:
+; BWON64:       # BB#0:
+; BWON64-NEXT:    movl %edi, %eax
+; BWON64-NEXT:    retq
+;
+; BWOFF64-LABEL: test_movw:
+; BWOFF64:       # BB#0:
+; BWOFF64-NEXT:    movw %di, %ax
+; BWOFF64-NEXT:    retq
+;
+; BWON32-LABEL: test_movw:
+; BWON32:       # BB#0:
+; BWON32-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; BWON32-NEXT:    retl
+;
+; BWOFF32-LABEL: test_movw:
+; BWOFF32:       # BB#0:
+; BWOFF32-NEXT:    movw {{[0-9]+}}(%esp), %ax
+; BWOFF32-NEXT:    retl
+  ret i16 %a0
+}
+
+; Verify we don't mess with H-reg copies (only generated in 32-bit mode).
+define i8 @test_movb_hreg(i16 %a0) {
+; X64-LABEL: test_movb_hreg:
+; X64:       # BB#0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    shrl $8, %eax
+; X64-NEXT:    addb %dil, %al
+; X64-NEXT:    retq
+;
+; X32-LABEL: test_movb_hreg:
+; X32:       # BB#0:
+; X32-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    addb %al, %ah
+; X32-NEXT:    movb %ah, %al
+; X32-NEXT:    retl
+  %tmp0 = trunc i16 %a0 to i8
+  %tmp1 = lshr i16 %a0, 8
+  %tmp2 = trunc i16 %tmp1 to i8
+  %tmp3 = add i8 %tmp0, %tmp2
+  ret i8 %tmp3
+}

Added: llvm/trunk/test/CodeGen/X86/fixup-bw-copy.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fixup-bw-copy.mir?rev=268831&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fixup-bw-copy.mir (added)
+++ llvm/trunk/test/CodeGen/X86/fixup-bw-copy.mir Fri May  6 20:11:17 2016
@@ -0,0 +1,156 @@
+# RUN: llc -run-pass x86-fixup-bw-insts -mtriple=x86_64-- -o /dev/null %s 2>&1 | FileCheck %s
+
+# Verify that we correctly deal with the flag edge cases when replacing
+# copies by bigger copies, which is a pretty unusual transform.
+
+--- |
+  target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+  define i8 @test_movb_killed(i8 %a0) {
+    ret i8 %a0
+  }
+
+  define i8 @test_movb_impuse(i8 %a0) {
+    ret i8 %a0
+  }
+
+  define i8 @test_movb_impdef_gr64(i8 %a0) {
+    ret i8 %a0
+  }
+
+  define i8 @test_movb_impdef_gr32(i8 %a0) {
+    ret i8 %a0
+  }
+
+  define i8 @test_movb_impdef_gr16(i8 %a0) {
+    ret i8 %a0
+  }
+
+  define i16 @test_movw_impdef_gr32(i16 %a0) {
+    ret i16 %a0
+  }
+
+  define i16 @test_movw_impdef_gr64(i16 %a0) {
+    ret i16 %a0
+  }
+
+...
+
+---
+name:            test_movb_killed
+allVRegsAllocated: true
+isSSA:           false
+tracksRegLiveness: true
+liveins:
+  - { reg: '%edi' }
+body:             |
+  bb.0 (%ir-block.0):
+    liveins: %edi
+
+    ; CHECK: %eax = MOV32rr undef %edi, implicit %dil
+    %al = MOV8rr killed %dil
+    RETQ killed %al
+
+...
+
+---
+name:            test_movb_impuse
+allVRegsAllocated: true
+isSSA:           false
+tracksRegLiveness: true
+liveins:
+  - { reg: '%edi' }
+body:             |
+  bb.0 (%ir-block.0):
+    liveins: %edi
+
+    ; CHECK: %eax = MOV32rr undef %edi, implicit %dil
+    %al = MOV8rr %dil, implicit %edi
+    RETQ killed %al
+
+...
+
+---
+name:            test_movb_impdef_gr64
+allVRegsAllocated: true
+isSSA:           false
+tracksRegLiveness: true
+liveins:
+  - { reg: '%edi' }
+body:             |
+  bb.0 (%ir-block.0):
+    liveins: %edi
+
+    ; CHECK: %eax = MOV32rr undef %edi, implicit %dil, implicit-def %rax
+    %al = MOV8rr %dil, implicit-def %rax
+    RETQ killed %al
+
+...
+
+---
+name:            test_movb_impdef_gr32
+allVRegsAllocated: true
+isSSA:           false
+tracksRegLiveness: true
+liveins:
+  - { reg: '%edi' }
+body:             |
+  bb.0 (%ir-block.0):
+    liveins: %edi
+
+    ; CHECK: %eax = MOV32rr undef %edi, implicit %dil
+    %al = MOV8rr %dil, implicit-def %eax
+    RETQ killed %al
+
+...
+
+---
+name:            test_movb_impdef_gr16
+allVRegsAllocated: true
+isSSA:           false
+tracksRegLiveness: true
+liveins:
+  - { reg: '%edi' }
+body:             |
+  bb.0 (%ir-block.0):
+    liveins: %edi
+
+    ; CHECK: %eax = MOV32rr undef %edi, implicit %dil
+    %al = MOV8rr %dil, implicit-def %ax
+    RETQ killed %al
+
+...
+
+---
+name:            test_movw_impdef_gr32
+allVRegsAllocated: true
+isSSA:           false
+tracksRegLiveness: true
+liveins:
+  - { reg: '%edi' }
+body:             |
+  bb.0 (%ir-block.0):
+    liveins: %edi
+
+    ; CHECK: %eax = MOV32rr undef %edi, implicit %di
+    %ax = MOV16rr %di, implicit-def %eax
+    RETQ killed %ax
+
+...
+
+---
+name:            test_movw_impdef_gr64
+allVRegsAllocated: true
+isSSA:           false
+tracksRegLiveness: true
+liveins:
+  - { reg: '%edi' }
+body:             |
+  bb.0 (%ir-block.0):
+    liveins: %edi
+
+    ; CHECK: %eax = MOV32rr undef %edi, implicit %di, implicit-def %rax
+    %ax = MOV16rr %di, implicit-def %rax
+    RETQ killed %ax
+
+...

Modified: llvm/trunk/test/CodeGen/X86/float-conv-elim.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/float-conv-elim.ll?rev=268831&r1=268830&r2=268831&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/float-conv-elim.ll (original)
+++ llvm/trunk/test/CodeGen/X86/float-conv-elim.ll Fri May  6 20:11:17 2016
@@ -21,7 +21,7 @@ define i32 @foo2(i8 %a) #0 {
 
 ; CHECK-LABEL: bar
 ; CHECK-NOT: cvt
-; CHECK: movb
+; CHECK: movl
 define zeroext i8 @bar(i8 zeroext %a) #0 {
   %conv = uitofp i8 %a to float
   %conv1 = fptoui float %conv to i8

Modified: llvm/trunk/test/CodeGen/X86/machine-combiner-int.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/machine-combiner-int.ll?rev=268831&r1=268830&r2=268831&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/machine-combiner-int.ll (original)
+++ llvm/trunk/test/CodeGen/X86/machine-combiner-int.ll Fri May  6 20:11:17 2016
@@ -60,7 +60,7 @@ define i8 @reassociate_ands_i8(i8 %x0, i
 ; CHECK-NEXT:    subb  %sil, %dil
 ; CHECK-NEXT:    andb  %cl, %dl
 ; CHECK-NEXT:    andb  %dil, %dl
-; CHECK-NEXT:    movb  %dl, %al
+; CHECK-NEXT:    movl  %edx, %eax
 ; CHECK-NEXT:    retq
   %t0 = sub i8 %x0, %x1
   %t1 = and i8 %x2, %t0
@@ -107,7 +107,7 @@ define i8 @reassociate_ors_i8(i8 %x0, i8
 ; CHECK-NEXT:    subb  %sil, %dil
 ; CHECK-NEXT:    orb   %cl, %dl
 ; CHECK-NEXT:    orb   %dil, %dl
-; CHECK-NEXT:    movb  %dl, %al
+; CHECK-NEXT:    movl  %edx, %eax
 ; CHECK-NEXT:    retq
   %t0 = sub i8 %x0, %x1
   %t1 = or i8 %x2, %t0
@@ -154,7 +154,7 @@ define i8 @reassociate_xors_i8(i8 %x0, i
 ; CHECK-NEXT:    subb  %sil, %dil
 ; CHECK-NEXT:    xorb  %cl, %dl
 ; CHECK-NEXT:    xorb  %dil, %dl
-; CHECK-NEXT:    movb  %dl, %al
+; CHECK-NEXT:    movl  %edx, %eax
 ; CHECK-NEXT:    retq
   %t0 = sub i8 %x0, %x1
   %t1 = xor i8 %x2, %t0

Modified: llvm/trunk/test/CodeGen/X86/opt-ext-uses.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/opt-ext-uses.ll?rev=268831&r1=268830&r2=268831&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/opt-ext-uses.ll (original)
+++ llvm/trunk/test/CodeGen/X86/opt-ext-uses.ll Fri May  6 20:11:17 2016
@@ -2,8 +2,8 @@
 
 ; This test should get one and only one register to register mov.
 ; CHECK-LABEL: t:
-; CHECK:     movw
-; CHECK-NOT: movw
+; CHECK:     movl
+; CHECK-NOT: mov
 ; CHECK:     ret
 
 define signext i16 @t()   {

Modified: llvm/trunk/test/CodeGen/X86/pr23664.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr23664.ll?rev=268831&r1=268830&r2=268831&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pr23664.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pr23664.ll Fri May  6 20:11:17 2016
@@ -9,6 +9,6 @@ define i2 @f(i32 %arg) {
 ; CHECK-LABEL: f:
 ; CHECK:      addb    %dil, %dil
 ; CHECK-NEXT: orb     $1, %dil
-; CHECK-NEXT: movb    %dil, %al
+; CHECK-NEXT: movl    %edi, %eax
 ; CHECK-NEXT: retq
 }

Modified: llvm/trunk/test/CodeGen/X86/vector-bitreverse.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-bitreverse.ll?rev=268831&r1=268830&r2=268831&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-bitreverse.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-bitreverse.ll Fri May  6 20:11:17 2016
@@ -9,68 +9,68 @@
 define i8 @test_bitreverse_i8(i8 %a) nounwind {
 ; SSE-LABEL: test_bitreverse_i8:
 ; SSE:       # BB#0:
-; SSE-NEXT:    movb %dil, %al
+; SSE-NEXT:    movl %edi, %eax
 ; SSE-NEXT:    shlb $7, %al
-; SSE-NEXT:    movb %dil, %cl
+; SSE-NEXT:    movl %edi, %ecx
 ; SSE-NEXT:    shlb $5, %cl
 ; SSE-NEXT:    andb $64, %cl
-; SSE-NEXT:    movb %dil, %dl
+; SSE-NEXT:    movl %edi, %edx
 ; SSE-NEXT:    shlb $3, %dl
 ; SSE-NEXT:    andb $32, %dl
 ; SSE-NEXT:    orb %cl, %dl
-; SSE-NEXT:    movb %dil, %cl
+; SSE-NEXT:    movl %edi, %ecx
 ; SSE-NEXT:    addb %cl, %cl
 ; SSE-NEXT:    andb $16, %cl
 ; SSE-NEXT:    orb %dl, %cl
-; SSE-NEXT:    movb %dil, %dl
+; SSE-NEXT:    movl %edi, %edx
 ; SSE-NEXT:    shrb %dl
 ; SSE-NEXT:    andb $8, %dl
 ; SSE-NEXT:    orb %cl, %dl
-; SSE-NEXT:    movb %dil, %cl
+; SSE-NEXT:    movl %edi, %ecx
 ; SSE-NEXT:    shrb $3, %cl
 ; SSE-NEXT:    andb $4, %cl
 ; SSE-NEXT:    orb %dl, %cl
-; SSE-NEXT:    movb %dil, %dl
+; SSE-NEXT:    movl %edi, %edx
 ; SSE-NEXT:    shrb $5, %dl
 ; SSE-NEXT:    andb $2, %dl
 ; SSE-NEXT:    orb %cl, %dl
 ; SSE-NEXT:    shrb $7, %dil
 ; SSE-NEXT:    orb %dl, %dil
 ; SSE-NEXT:    orb %al, %dil
-; SSE-NEXT:    movb %dil, %al
+; SSE-NEXT:    movl %edi, %eax
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: test_bitreverse_i8:
 ; AVX:       # BB#0:
-; AVX-NEXT:    movb %dil, %al
+; AVX-NEXT:    movl %edi, %eax
 ; AVX-NEXT:    shlb $7, %al
-; AVX-NEXT:    movb %dil, %cl
+; AVX-NEXT:    movl %edi, %ecx
 ; AVX-NEXT:    shlb $5, %cl
 ; AVX-NEXT:    andb $64, %cl
-; AVX-NEXT:    movb %dil, %dl
+; AVX-NEXT:    movl %edi, %edx
 ; AVX-NEXT:    shlb $3, %dl
 ; AVX-NEXT:    andb $32, %dl
 ; AVX-NEXT:    orb %cl, %dl
-; AVX-NEXT:    movb %dil, %cl
+; AVX-NEXT:    movl %edi, %ecx
 ; AVX-NEXT:    addb %cl, %cl
 ; AVX-NEXT:    andb $16, %cl
 ; AVX-NEXT:    orb %dl, %cl
-; AVX-NEXT:    movb %dil, %dl
+; AVX-NEXT:    movl %edi, %edx
 ; AVX-NEXT:    shrb %dl
 ; AVX-NEXT:    andb $8, %dl
 ; AVX-NEXT:    orb %cl, %dl
-; AVX-NEXT:    movb %dil, %cl
+; AVX-NEXT:    movl %edi, %ecx
 ; AVX-NEXT:    shrb $3, %cl
 ; AVX-NEXT:    andb $4, %cl
 ; AVX-NEXT:    orb %dl, %cl
-; AVX-NEXT:    movb %dil, %dl
+; AVX-NEXT:    movl %edi, %edx
 ; AVX-NEXT:    shrb $5, %dl
 ; AVX-NEXT:    andb $2, %dl
 ; AVX-NEXT:    orb %cl, %dl
 ; AVX-NEXT:    shrb $7, %dil
 ; AVX-NEXT:    orb %dl, %dil
 ; AVX-NEXT:    orb %al, %dil
-; AVX-NEXT:    movb %dil, %al
+; AVX-NEXT:    movl %edi, %eax
 ; AVX-NEXT:    retq
 ;
 ; XOP-LABEL: test_bitreverse_i8:

Modified: llvm/trunk/test/CodeGen/X86/vector-idiv-sdiv-512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-idiv-sdiv-512.ll?rev=268831&r1=268830&r2=268831&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-idiv-sdiv-512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-idiv-sdiv-512.ll Fri May  6 20:11:17 2016
@@ -336,7 +336,7 @@ define <64 x i8> @test_div7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %eax, %ecx
 ; AVX512BW-NEXT:    shrl $8, %ecx
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %cl
+; AVX512BW-NEXT:    movl %eax, %ecx
 ; AVX512BW-NEXT:    shrb $7, %cl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %cl, %al
@@ -346,7 +346,7 @@ define <64 x i8> @test_div7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %ecx, %edx
 ; AVX512BW-NEXT:    shrl $8, %edx
 ; AVX512BW-NEXT:    addb %dl, %cl
-; AVX512BW-NEXT:    movb %cl, %dl
+; AVX512BW-NEXT:    movl %ecx, %edx
 ; AVX512BW-NEXT:    shrb $7, %dl
 ; AVX512BW-NEXT:    sarb $2, %cl
 ; AVX512BW-NEXT:    addb %dl, %cl
@@ -358,7 +358,7 @@ define <64 x i8> @test_div7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %eax, %ecx
 ; AVX512BW-NEXT:    shrl $8, %ecx
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %cl
+; AVX512BW-NEXT:    movl %eax, %ecx
 ; AVX512BW-NEXT:    shrb $7, %cl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %cl, %al
@@ -369,7 +369,7 @@ define <64 x i8> @test_div7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %eax, %ecx
 ; AVX512BW-NEXT:    shrl $8, %ecx
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %cl
+; AVX512BW-NEXT:    movl %eax, %ecx
 ; AVX512BW-NEXT:    shrb $7, %cl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %cl, %al
@@ -380,7 +380,7 @@ define <64 x i8> @test_div7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %eax, %ecx
 ; AVX512BW-NEXT:    shrl $8, %ecx
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %cl
+; AVX512BW-NEXT:    movl %eax, %ecx
 ; AVX512BW-NEXT:    shrb $7, %cl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %cl, %al
@@ -391,7 +391,7 @@ define <64 x i8> @test_div7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %eax, %ecx
 ; AVX512BW-NEXT:    shrl $8, %ecx
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %cl
+; AVX512BW-NEXT:    movl %eax, %ecx
 ; AVX512BW-NEXT:    shrb $7, %cl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %cl, %al
@@ -402,7 +402,7 @@ define <64 x i8> @test_div7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %eax, %ecx
 ; AVX512BW-NEXT:    shrl $8, %ecx
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %cl
+; AVX512BW-NEXT:    movl %eax, %ecx
 ; AVX512BW-NEXT:    shrb $7, %cl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %cl, %al
@@ -413,7 +413,7 @@ define <64 x i8> @test_div7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %eax, %ecx
 ; AVX512BW-NEXT:    shrl $8, %ecx
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %cl
+; AVX512BW-NEXT:    movl %eax, %ecx
 ; AVX512BW-NEXT:    shrb $7, %cl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %cl, %al
@@ -424,7 +424,7 @@ define <64 x i8> @test_div7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %eax, %ecx
 ; AVX512BW-NEXT:    shrl $8, %ecx
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %cl
+; AVX512BW-NEXT:    movl %eax, %ecx
 ; AVX512BW-NEXT:    shrb $7, %cl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %cl, %al
@@ -435,7 +435,7 @@ define <64 x i8> @test_div7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %eax, %ecx
 ; AVX512BW-NEXT:    shrl $8, %ecx
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %cl
+; AVX512BW-NEXT:    movl %eax, %ecx
 ; AVX512BW-NEXT:    shrb $7, %cl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %cl, %al
@@ -446,7 +446,7 @@ define <64 x i8> @test_div7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %eax, %ecx
 ; AVX512BW-NEXT:    shrl $8, %ecx
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %cl
+; AVX512BW-NEXT:    movl %eax, %ecx
 ; AVX512BW-NEXT:    shrb $7, %cl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %cl, %al
@@ -457,7 +457,7 @@ define <64 x i8> @test_div7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %eax, %ecx
 ; AVX512BW-NEXT:    shrl $8, %ecx
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %cl
+; AVX512BW-NEXT:    movl %eax, %ecx
 ; AVX512BW-NEXT:    shrb $7, %cl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %cl, %al
@@ -468,7 +468,7 @@ define <64 x i8> @test_div7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %eax, %ecx
 ; AVX512BW-NEXT:    shrl $8, %ecx
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %cl
+; AVX512BW-NEXT:    movl %eax, %ecx
 ; AVX512BW-NEXT:    shrb $7, %cl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %cl, %al
@@ -479,7 +479,7 @@ define <64 x i8> @test_div7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %eax, %ecx
 ; AVX512BW-NEXT:    shrl $8, %ecx
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %cl
+; AVX512BW-NEXT:    movl %eax, %ecx
 ; AVX512BW-NEXT:    shrb $7, %cl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %cl, %al
@@ -490,7 +490,7 @@ define <64 x i8> @test_div7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %eax, %ecx
 ; AVX512BW-NEXT:    shrl $8, %ecx
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %cl
+; AVX512BW-NEXT:    movl %eax, %ecx
 ; AVX512BW-NEXT:    shrb $7, %cl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %cl, %al
@@ -501,7 +501,7 @@ define <64 x i8> @test_div7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %eax, %ecx
 ; AVX512BW-NEXT:    shrl $8, %ecx
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %cl
+; AVX512BW-NEXT:    movl %eax, %ecx
 ; AVX512BW-NEXT:    shrb $7, %cl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %cl, %al
@@ -513,7 +513,7 @@ define <64 x i8> @test_div7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %eax, %ecx
 ; AVX512BW-NEXT:    shrl $8, %ecx
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %cl
+; AVX512BW-NEXT:    movl %eax, %ecx
 ; AVX512BW-NEXT:    shrb $7, %cl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %cl, %al
@@ -523,7 +523,7 @@ define <64 x i8> @test_div7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %ecx, %edx
 ; AVX512BW-NEXT:    shrl $8, %edx
 ; AVX512BW-NEXT:    addb %dl, %cl
-; AVX512BW-NEXT:    movb %cl, %dl
+; AVX512BW-NEXT:    movl %ecx, %edx
 ; AVX512BW-NEXT:    shrb $7, %dl
 ; AVX512BW-NEXT:    sarb $2, %cl
 ; AVX512BW-NEXT:    addb %dl, %cl
@@ -535,7 +535,7 @@ define <64 x i8> @test_div7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %eax, %ecx
 ; AVX512BW-NEXT:    shrl $8, %ecx
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %cl
+; AVX512BW-NEXT:    movl %eax, %ecx
 ; AVX512BW-NEXT:    shrb $7, %cl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %cl, %al
@@ -546,7 +546,7 @@ define <64 x i8> @test_div7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %eax, %ecx
 ; AVX512BW-NEXT:    shrl $8, %ecx
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %cl
+; AVX512BW-NEXT:    movl %eax, %ecx
 ; AVX512BW-NEXT:    shrb $7, %cl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %cl, %al
@@ -557,7 +557,7 @@ define <64 x i8> @test_div7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %eax, %ecx
 ; AVX512BW-NEXT:    shrl $8, %ecx
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %cl
+; AVX512BW-NEXT:    movl %eax, %ecx
 ; AVX512BW-NEXT:    shrb $7, %cl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %cl, %al
@@ -568,7 +568,7 @@ define <64 x i8> @test_div7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %eax, %ecx
 ; AVX512BW-NEXT:    shrl $8, %ecx
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %cl
+; AVX512BW-NEXT:    movl %eax, %ecx
 ; AVX512BW-NEXT:    shrb $7, %cl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %cl, %al
@@ -579,7 +579,7 @@ define <64 x i8> @test_div7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %eax, %ecx
 ; AVX512BW-NEXT:    shrl $8, %ecx
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %cl
+; AVX512BW-NEXT:    movl %eax, %ecx
 ; AVX512BW-NEXT:    shrb $7, %cl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %cl, %al
@@ -590,7 +590,7 @@ define <64 x i8> @test_div7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %eax, %ecx
 ; AVX512BW-NEXT:    shrl $8, %ecx
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %cl
+; AVX512BW-NEXT:    movl %eax, %ecx
 ; AVX512BW-NEXT:    shrb $7, %cl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %cl, %al
@@ -601,7 +601,7 @@ define <64 x i8> @test_div7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %eax, %ecx
 ; AVX512BW-NEXT:    shrl $8, %ecx
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %cl
+; AVX512BW-NEXT:    movl %eax, %ecx
 ; AVX512BW-NEXT:    shrb $7, %cl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %cl, %al
@@ -612,7 +612,7 @@ define <64 x i8> @test_div7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %eax, %ecx
 ; AVX512BW-NEXT:    shrl $8, %ecx
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %cl
+; AVX512BW-NEXT:    movl %eax, %ecx
 ; AVX512BW-NEXT:    shrb $7, %cl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %cl, %al
@@ -623,7 +623,7 @@ define <64 x i8> @test_div7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %eax, %ecx
 ; AVX512BW-NEXT:    shrl $8, %ecx
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %cl
+; AVX512BW-NEXT:    movl %eax, %ecx
 ; AVX512BW-NEXT:    shrb $7, %cl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %cl, %al
@@ -634,7 +634,7 @@ define <64 x i8> @test_div7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %eax, %ecx
 ; AVX512BW-NEXT:    shrl $8, %ecx
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %cl
+; AVX512BW-NEXT:    movl %eax, %ecx
 ; AVX512BW-NEXT:    shrb $7, %cl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %cl, %al
@@ -645,7 +645,7 @@ define <64 x i8> @test_div7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %eax, %ecx
 ; AVX512BW-NEXT:    shrl $8, %ecx
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %cl
+; AVX512BW-NEXT:    movl %eax, %ecx
 ; AVX512BW-NEXT:    shrb $7, %cl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %cl, %al
@@ -656,7 +656,7 @@ define <64 x i8> @test_div7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %eax, %ecx
 ; AVX512BW-NEXT:    shrl $8, %ecx
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %cl
+; AVX512BW-NEXT:    movl %eax, %ecx
 ; AVX512BW-NEXT:    shrb $7, %cl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %cl, %al
@@ -667,7 +667,7 @@ define <64 x i8> @test_div7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %eax, %ecx
 ; AVX512BW-NEXT:    shrl $8, %ecx
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %cl
+; AVX512BW-NEXT:    movl %eax, %ecx
 ; AVX512BW-NEXT:    shrb $7, %cl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %cl, %al
@@ -678,7 +678,7 @@ define <64 x i8> @test_div7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %eax, %ecx
 ; AVX512BW-NEXT:    shrl $8, %ecx
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %cl
+; AVX512BW-NEXT:    movl %eax, %ecx
 ; AVX512BW-NEXT:    shrb $7, %cl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %cl, %al
@@ -691,7 +691,7 @@ define <64 x i8> @test_div7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %eax, %ecx
 ; AVX512BW-NEXT:    shrl $8, %ecx
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %cl
+; AVX512BW-NEXT:    movl %eax, %ecx
 ; AVX512BW-NEXT:    shrb $7, %cl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %cl, %al
@@ -701,7 +701,7 @@ define <64 x i8> @test_div7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %ecx, %edx
 ; AVX512BW-NEXT:    shrl $8, %edx
 ; AVX512BW-NEXT:    addb %dl, %cl
-; AVX512BW-NEXT:    movb %cl, %dl
+; AVX512BW-NEXT:    movl %ecx, %edx
 ; AVX512BW-NEXT:    shrb $7, %dl
 ; AVX512BW-NEXT:    sarb $2, %cl
 ; AVX512BW-NEXT:    addb %dl, %cl
@@ -713,7 +713,7 @@ define <64 x i8> @test_div7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %eax, %ecx
 ; AVX512BW-NEXT:    shrl $8, %ecx
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %cl
+; AVX512BW-NEXT:    movl %eax, %ecx
 ; AVX512BW-NEXT:    shrb $7, %cl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %cl, %al
@@ -724,7 +724,7 @@ define <64 x i8> @test_div7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %eax, %ecx
 ; AVX512BW-NEXT:    shrl $8, %ecx
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %cl
+; AVX512BW-NEXT:    movl %eax, %ecx
 ; AVX512BW-NEXT:    shrb $7, %cl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %cl, %al
@@ -735,7 +735,7 @@ define <64 x i8> @test_div7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %eax, %ecx
 ; AVX512BW-NEXT:    shrl $8, %ecx
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %cl
+; AVX512BW-NEXT:    movl %eax, %ecx
 ; AVX512BW-NEXT:    shrb $7, %cl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %cl, %al
@@ -746,7 +746,7 @@ define <64 x i8> @test_div7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %eax, %ecx
 ; AVX512BW-NEXT:    shrl $8, %ecx
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %cl
+; AVX512BW-NEXT:    movl %eax, %ecx
 ; AVX512BW-NEXT:    shrb $7, %cl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %cl, %al
@@ -757,7 +757,7 @@ define <64 x i8> @test_div7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %eax, %ecx
 ; AVX512BW-NEXT:    shrl $8, %ecx
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %cl
+; AVX512BW-NEXT:    movl %eax, %ecx
 ; AVX512BW-NEXT:    shrb $7, %cl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %cl, %al
@@ -768,7 +768,7 @@ define <64 x i8> @test_div7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %eax, %ecx
 ; AVX512BW-NEXT:    shrl $8, %ecx
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %cl
+; AVX512BW-NEXT:    movl %eax, %ecx
 ; AVX512BW-NEXT:    shrb $7, %cl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %cl, %al
@@ -779,7 +779,7 @@ define <64 x i8> @test_div7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %eax, %ecx
 ; AVX512BW-NEXT:    shrl $8, %ecx
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %cl
+; AVX512BW-NEXT:    movl %eax, %ecx
 ; AVX512BW-NEXT:    shrb $7, %cl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %cl, %al
@@ -790,7 +790,7 @@ define <64 x i8> @test_div7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %eax, %ecx
 ; AVX512BW-NEXT:    shrl $8, %ecx
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %cl
+; AVX512BW-NEXT:    movl %eax, %ecx
 ; AVX512BW-NEXT:    shrb $7, %cl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %cl, %al
@@ -801,7 +801,7 @@ define <64 x i8> @test_div7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %eax, %ecx
 ; AVX512BW-NEXT:    shrl $8, %ecx
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %cl
+; AVX512BW-NEXT:    movl %eax, %ecx
 ; AVX512BW-NEXT:    shrb $7, %cl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %cl, %al
@@ -812,7 +812,7 @@ define <64 x i8> @test_div7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %eax, %ecx
 ; AVX512BW-NEXT:    shrl $8, %ecx
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %cl
+; AVX512BW-NEXT:    movl %eax, %ecx
 ; AVX512BW-NEXT:    shrb $7, %cl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %cl, %al
@@ -823,7 +823,7 @@ define <64 x i8> @test_div7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %eax, %ecx
 ; AVX512BW-NEXT:    shrl $8, %ecx
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %cl
+; AVX512BW-NEXT:    movl %eax, %ecx
 ; AVX512BW-NEXT:    shrb $7, %cl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %cl, %al
@@ -834,7 +834,7 @@ define <64 x i8> @test_div7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %eax, %ecx
 ; AVX512BW-NEXT:    shrl $8, %ecx
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %cl
+; AVX512BW-NEXT:    movl %eax, %ecx
 ; AVX512BW-NEXT:    shrb $7, %cl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %cl, %al
@@ -845,7 +845,7 @@ define <64 x i8> @test_div7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %eax, %ecx
 ; AVX512BW-NEXT:    shrl $8, %ecx
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %cl
+; AVX512BW-NEXT:    movl %eax, %ecx
 ; AVX512BW-NEXT:    shrb $7, %cl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %cl, %al
@@ -856,7 +856,7 @@ define <64 x i8> @test_div7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %eax, %ecx
 ; AVX512BW-NEXT:    shrl $8, %ecx
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %cl
+; AVX512BW-NEXT:    movl %eax, %ecx
 ; AVX512BW-NEXT:    shrb $7, %cl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %cl, %al
@@ -867,7 +867,7 @@ define <64 x i8> @test_div7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %eax, %ecx
 ; AVX512BW-NEXT:    shrl $8, %ecx
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %cl
+; AVX512BW-NEXT:    movl %eax, %ecx
 ; AVX512BW-NEXT:    shrb $7, %cl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %cl, %al
@@ -877,7 +877,7 @@ define <64 x i8> @test_div7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %ecx, %edx
 ; AVX512BW-NEXT:    shrl $8, %edx
 ; AVX512BW-NEXT:    addb %dl, %cl
-; AVX512BW-NEXT:    movb %cl, %dl
+; AVX512BW-NEXT:    movl %ecx, %edx
 ; AVX512BW-NEXT:    shrb $7, %dl
 ; AVX512BW-NEXT:    sarb $2, %cl
 ; AVX512BW-NEXT:    addb %dl, %cl
@@ -889,7 +889,7 @@ define <64 x i8> @test_div7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %eax, %ecx
 ; AVX512BW-NEXT:    shrl $8, %ecx
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %cl
+; AVX512BW-NEXT:    movl %eax, %ecx
 ; AVX512BW-NEXT:    shrb $7, %cl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %cl, %al
@@ -900,7 +900,7 @@ define <64 x i8> @test_div7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %eax, %ecx
 ; AVX512BW-NEXT:    shrl $8, %ecx
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %cl
+; AVX512BW-NEXT:    movl %eax, %ecx
 ; AVX512BW-NEXT:    shrb $7, %cl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %cl, %al
@@ -911,7 +911,7 @@ define <64 x i8> @test_div7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %eax, %ecx
 ; AVX512BW-NEXT:    shrl $8, %ecx
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %cl
+; AVX512BW-NEXT:    movl %eax, %ecx
 ; AVX512BW-NEXT:    shrb $7, %cl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %cl, %al
@@ -922,7 +922,7 @@ define <64 x i8> @test_div7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %eax, %ecx
 ; AVX512BW-NEXT:    shrl $8, %ecx
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %cl
+; AVX512BW-NEXT:    movl %eax, %ecx
 ; AVX512BW-NEXT:    shrb $7, %cl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %cl, %al
@@ -933,7 +933,7 @@ define <64 x i8> @test_div7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %eax, %ecx
 ; AVX512BW-NEXT:    shrl $8, %ecx
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %cl
+; AVX512BW-NEXT:    movl %eax, %ecx
 ; AVX512BW-NEXT:    shrb $7, %cl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %cl, %al
@@ -944,7 +944,7 @@ define <64 x i8> @test_div7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %eax, %ecx
 ; AVX512BW-NEXT:    shrl $8, %ecx
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %cl
+; AVX512BW-NEXT:    movl %eax, %ecx
 ; AVX512BW-NEXT:    shrb $7, %cl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %cl, %al
@@ -955,7 +955,7 @@ define <64 x i8> @test_div7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %eax, %ecx
 ; AVX512BW-NEXT:    shrl $8, %ecx
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %cl
+; AVX512BW-NEXT:    movl %eax, %ecx
 ; AVX512BW-NEXT:    shrb $7, %cl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %cl, %al
@@ -966,7 +966,7 @@ define <64 x i8> @test_div7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %eax, %ecx
 ; AVX512BW-NEXT:    shrl $8, %ecx
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %cl
+; AVX512BW-NEXT:    movl %eax, %ecx
 ; AVX512BW-NEXT:    shrb $7, %cl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %cl, %al
@@ -977,7 +977,7 @@ define <64 x i8> @test_div7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %eax, %ecx
 ; AVX512BW-NEXT:    shrl $8, %ecx
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %cl
+; AVX512BW-NEXT:    movl %eax, %ecx
 ; AVX512BW-NEXT:    shrb $7, %cl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %cl, %al
@@ -988,7 +988,7 @@ define <64 x i8> @test_div7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %eax, %ecx
 ; AVX512BW-NEXT:    shrl $8, %ecx
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %cl
+; AVX512BW-NEXT:    movl %eax, %ecx
 ; AVX512BW-NEXT:    shrb $7, %cl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %cl, %al
@@ -999,7 +999,7 @@ define <64 x i8> @test_div7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %eax, %ecx
 ; AVX512BW-NEXT:    shrl $8, %ecx
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %cl
+; AVX512BW-NEXT:    movl %eax, %ecx
 ; AVX512BW-NEXT:    shrb $7, %cl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %cl, %al
@@ -1010,7 +1010,7 @@ define <64 x i8> @test_div7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %eax, %ecx
 ; AVX512BW-NEXT:    shrl $8, %ecx
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %cl
+; AVX512BW-NEXT:    movl %eax, %ecx
 ; AVX512BW-NEXT:    shrb $7, %cl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %cl, %al
@@ -1021,7 +1021,7 @@ define <64 x i8> @test_div7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %eax, %ecx
 ; AVX512BW-NEXT:    shrl $8, %ecx
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %cl
+; AVX512BW-NEXT:    movl %eax, %ecx
 ; AVX512BW-NEXT:    shrb $7, %cl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %cl, %al
@@ -1032,7 +1032,7 @@ define <64 x i8> @test_div7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %eax, %ecx
 ; AVX512BW-NEXT:    shrl $8, %ecx
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %cl
+; AVX512BW-NEXT:    movl %eax, %ecx
 ; AVX512BW-NEXT:    shrb $7, %cl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %cl, %al
@@ -1489,7 +1489,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %edx, %eax
 ; AVX512BW-NEXT:    shrl $8, %eax
 ; AVX512BW-NEXT:    addb %dl, %al
-; AVX512BW-NEXT:    movb %al, %cl
+; AVX512BW-NEXT:    movl %eax, %ecx
 ; AVX512BW-NEXT:    shrb $7, %cl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %cl, %al
@@ -1502,7 +1502,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %esi, %eax
 ; AVX512BW-NEXT:    shrl $8, %eax
 ; AVX512BW-NEXT:    addb %sil, %al
-; AVX512BW-NEXT:    movb %al, %cl
+; AVX512BW-NEXT:    movl %eax, %ecx
 ; AVX512BW-NEXT:    shrb $7, %cl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %cl, %al
@@ -1516,7 +1516,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %ecx, %eax
 ; AVX512BW-NEXT:    shrl $8, %eax
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %dl
+; AVX512BW-NEXT:    movl %eax, %edx
 ; AVX512BW-NEXT:    shrb $7, %dl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %dl, %al
@@ -1529,7 +1529,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %ecx, %eax
 ; AVX512BW-NEXT:    shrl $8, %eax
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %dl
+; AVX512BW-NEXT:    movl %eax, %edx
 ; AVX512BW-NEXT:    shrb $7, %dl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %dl, %al
@@ -1542,7 +1542,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %ecx, %eax
 ; AVX512BW-NEXT:    shrl $8, %eax
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %dl
+; AVX512BW-NEXT:    movl %eax, %edx
 ; AVX512BW-NEXT:    shrb $7, %dl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %dl, %al
@@ -1555,7 +1555,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %ecx, %eax
 ; AVX512BW-NEXT:    shrl $8, %eax
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %dl
+; AVX512BW-NEXT:    movl %eax, %edx
 ; AVX512BW-NEXT:    shrb $7, %dl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %dl, %al
@@ -1568,7 +1568,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %ecx, %eax
 ; AVX512BW-NEXT:    shrl $8, %eax
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %dl
+; AVX512BW-NEXT:    movl %eax, %edx
 ; AVX512BW-NEXT:    shrb $7, %dl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %dl, %al
@@ -1581,7 +1581,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %ecx, %eax
 ; AVX512BW-NEXT:    shrl $8, %eax
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %dl
+; AVX512BW-NEXT:    movl %eax, %edx
 ; AVX512BW-NEXT:    shrb $7, %dl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %dl, %al
@@ -1594,7 +1594,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %ecx, %eax
 ; AVX512BW-NEXT:    shrl $8, %eax
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %dl
+; AVX512BW-NEXT:    movl %eax, %edx
 ; AVX512BW-NEXT:    shrb $7, %dl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %dl, %al
@@ -1607,7 +1607,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %ecx, %eax
 ; AVX512BW-NEXT:    shrl $8, %eax
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %dl
+; AVX512BW-NEXT:    movl %eax, %edx
 ; AVX512BW-NEXT:    shrb $7, %dl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %dl, %al
@@ -1620,7 +1620,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %ecx, %eax
 ; AVX512BW-NEXT:    shrl $8, %eax
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %dl
+; AVX512BW-NEXT:    movl %eax, %edx
 ; AVX512BW-NEXT:    shrb $7, %dl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %dl, %al
@@ -1633,7 +1633,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %ecx, %eax
 ; AVX512BW-NEXT:    shrl $8, %eax
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %dl
+; AVX512BW-NEXT:    movl %eax, %edx
 ; AVX512BW-NEXT:    shrb $7, %dl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %dl, %al
@@ -1646,7 +1646,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %ecx, %eax
 ; AVX512BW-NEXT:    shrl $8, %eax
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %dl
+; AVX512BW-NEXT:    movl %eax, %edx
 ; AVX512BW-NEXT:    shrb $7, %dl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %dl, %al
@@ -1659,7 +1659,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %ecx, %eax
 ; AVX512BW-NEXT:    shrl $8, %eax
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %dl
+; AVX512BW-NEXT:    movl %eax, %edx
 ; AVX512BW-NEXT:    shrb $7, %dl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %dl, %al
@@ -1672,7 +1672,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %ecx, %eax
 ; AVX512BW-NEXT:    shrl $8, %eax
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %dl
+; AVX512BW-NEXT:    movl %eax, %edx
 ; AVX512BW-NEXT:    shrb $7, %dl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %dl, %al
@@ -1685,7 +1685,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %ecx, %eax
 ; AVX512BW-NEXT:    shrl $8, %eax
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %dl
+; AVX512BW-NEXT:    movl %eax, %edx
 ; AVX512BW-NEXT:    shrb $7, %dl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %dl, %al
@@ -1699,7 +1699,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %ecx, %eax
 ; AVX512BW-NEXT:    shrl $8, %eax
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %dl
+; AVX512BW-NEXT:    movl %eax, %edx
 ; AVX512BW-NEXT:    shrb $7, %dl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %dl, %al
@@ -1711,7 +1711,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %edx, %eax
 ; AVX512BW-NEXT:    shrl $8, %eax
 ; AVX512BW-NEXT:    addb %dl, %al
-; AVX512BW-NEXT:    movb %al, %cl
+; AVX512BW-NEXT:    movl %eax, %ecx
 ; AVX512BW-NEXT:    shrb $7, %cl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %cl, %al
@@ -1725,7 +1725,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %ecx, %eax
 ; AVX512BW-NEXT:    shrl $8, %eax
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %dl
+; AVX512BW-NEXT:    movl %eax, %edx
 ; AVX512BW-NEXT:    shrb $7, %dl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %dl, %al
@@ -1738,7 +1738,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %ecx, %eax
 ; AVX512BW-NEXT:    shrl $8, %eax
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %dl
+; AVX512BW-NEXT:    movl %eax, %edx
 ; AVX512BW-NEXT:    shrb $7, %dl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %dl, %al
@@ -1751,7 +1751,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %ecx, %eax
 ; AVX512BW-NEXT:    shrl $8, %eax
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %dl
+; AVX512BW-NEXT:    movl %eax, %edx
 ; AVX512BW-NEXT:    shrb $7, %dl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %dl, %al
@@ -1764,7 +1764,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %ecx, %eax
 ; AVX512BW-NEXT:    shrl $8, %eax
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %dl
+; AVX512BW-NEXT:    movl %eax, %edx
 ; AVX512BW-NEXT:    shrb $7, %dl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %dl, %al
@@ -1777,7 +1777,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %ecx, %eax
 ; AVX512BW-NEXT:    shrl $8, %eax
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %dl
+; AVX512BW-NEXT:    movl %eax, %edx
 ; AVX512BW-NEXT:    shrb $7, %dl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %dl, %al
@@ -1790,7 +1790,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %ecx, %eax
 ; AVX512BW-NEXT:    shrl $8, %eax
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %dl
+; AVX512BW-NEXT:    movl %eax, %edx
 ; AVX512BW-NEXT:    shrb $7, %dl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %dl, %al
@@ -1803,7 +1803,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %ecx, %eax
 ; AVX512BW-NEXT:    shrl $8, %eax
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %dl
+; AVX512BW-NEXT:    movl %eax, %edx
 ; AVX512BW-NEXT:    shrb $7, %dl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %dl, %al
@@ -1816,7 +1816,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %ecx, %eax
 ; AVX512BW-NEXT:    shrl $8, %eax
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %dl
+; AVX512BW-NEXT:    movl %eax, %edx
 ; AVX512BW-NEXT:    shrb $7, %dl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %dl, %al
@@ -1829,7 +1829,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %ecx, %eax
 ; AVX512BW-NEXT:    shrl $8, %eax
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %dl
+; AVX512BW-NEXT:    movl %eax, %edx
 ; AVX512BW-NEXT:    shrb $7, %dl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %dl, %al
@@ -1842,7 +1842,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %ecx, %eax
 ; AVX512BW-NEXT:    shrl $8, %eax
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %dl
+; AVX512BW-NEXT:    movl %eax, %edx
 ; AVX512BW-NEXT:    shrb $7, %dl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %dl, %al
@@ -1855,7 +1855,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %ecx, %eax
 ; AVX512BW-NEXT:    shrl $8, %eax
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %dl
+; AVX512BW-NEXT:    movl %eax, %edx
 ; AVX512BW-NEXT:    shrb $7, %dl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %dl, %al
@@ -1868,7 +1868,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %ecx, %eax
 ; AVX512BW-NEXT:    shrl $8, %eax
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %dl
+; AVX512BW-NEXT:    movl %eax, %edx
 ; AVX512BW-NEXT:    shrb $7, %dl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %dl, %al
@@ -1881,7 +1881,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %ecx, %eax
 ; AVX512BW-NEXT:    shrl $8, %eax
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %dl
+; AVX512BW-NEXT:    movl %eax, %edx
 ; AVX512BW-NEXT:    shrb $7, %dl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %dl, %al
@@ -1894,7 +1894,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %ecx, %eax
 ; AVX512BW-NEXT:    shrl $8, %eax
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %dl
+; AVX512BW-NEXT:    movl %eax, %edx
 ; AVX512BW-NEXT:    shrb $7, %dl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %dl, %al
@@ -1909,7 +1909,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %ecx, %eax
 ; AVX512BW-NEXT:    shrl $8, %eax
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %dl
+; AVX512BW-NEXT:    movl %eax, %edx
 ; AVX512BW-NEXT:    shrb $7, %dl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %dl, %al
@@ -1921,7 +1921,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %edx, %eax
 ; AVX512BW-NEXT:    shrl $8, %eax
 ; AVX512BW-NEXT:    addb %dl, %al
-; AVX512BW-NEXT:    movb %al, %cl
+; AVX512BW-NEXT:    movl %eax, %ecx
 ; AVX512BW-NEXT:    shrb $7, %cl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %cl, %al
@@ -1935,7 +1935,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %ecx, %eax
 ; AVX512BW-NEXT:    shrl $8, %eax
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %dl
+; AVX512BW-NEXT:    movl %eax, %edx
 ; AVX512BW-NEXT:    shrb $7, %dl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %dl, %al
@@ -1948,7 +1948,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %ecx, %eax
 ; AVX512BW-NEXT:    shrl $8, %eax
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %dl
+; AVX512BW-NEXT:    movl %eax, %edx
 ; AVX512BW-NEXT:    shrb $7, %dl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %dl, %al
@@ -1961,7 +1961,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %ecx, %eax
 ; AVX512BW-NEXT:    shrl $8, %eax
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %dl
+; AVX512BW-NEXT:    movl %eax, %edx
 ; AVX512BW-NEXT:    shrb $7, %dl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %dl, %al
@@ -1974,7 +1974,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %ecx, %eax
 ; AVX512BW-NEXT:    shrl $8, %eax
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %dl
+; AVX512BW-NEXT:    movl %eax, %edx
 ; AVX512BW-NEXT:    shrb $7, %dl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %dl, %al
@@ -1987,7 +1987,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %ecx, %eax
 ; AVX512BW-NEXT:    shrl $8, %eax
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %dl
+; AVX512BW-NEXT:    movl %eax, %edx
 ; AVX512BW-NEXT:    shrb $7, %dl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %dl, %al
@@ -2000,7 +2000,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %ecx, %eax
 ; AVX512BW-NEXT:    shrl $8, %eax
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %dl
+; AVX512BW-NEXT:    movl %eax, %edx
 ; AVX512BW-NEXT:    shrb $7, %dl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %dl, %al
@@ -2013,7 +2013,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %ecx, %eax
 ; AVX512BW-NEXT:    shrl $8, %eax
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %dl
+; AVX512BW-NEXT:    movl %eax, %edx
 ; AVX512BW-NEXT:    shrb $7, %dl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %dl, %al
@@ -2026,7 +2026,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %ecx, %eax
 ; AVX512BW-NEXT:    shrl $8, %eax
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %dl
+; AVX512BW-NEXT:    movl %eax, %edx
 ; AVX512BW-NEXT:    shrb $7, %dl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %dl, %al
@@ -2039,7 +2039,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %ecx, %eax
 ; AVX512BW-NEXT:    shrl $8, %eax
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %dl
+; AVX512BW-NEXT:    movl %eax, %edx
 ; AVX512BW-NEXT:    shrb $7, %dl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %dl, %al
@@ -2052,7 +2052,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %ecx, %eax
 ; AVX512BW-NEXT:    shrl $8, %eax
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %dl
+; AVX512BW-NEXT:    movl %eax, %edx
 ; AVX512BW-NEXT:    shrb $7, %dl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %dl, %al
@@ -2065,7 +2065,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %ecx, %eax
 ; AVX512BW-NEXT:    shrl $8, %eax
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %dl
+; AVX512BW-NEXT:    movl %eax, %edx
 ; AVX512BW-NEXT:    shrb $7, %dl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %dl, %al
@@ -2078,7 +2078,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %ecx, %eax
 ; AVX512BW-NEXT:    shrl $8, %eax
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %dl
+; AVX512BW-NEXT:    movl %eax, %edx
 ; AVX512BW-NEXT:    shrb $7, %dl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %dl, %al
@@ -2091,7 +2091,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %ecx, %eax
 ; AVX512BW-NEXT:    shrl $8, %eax
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %dl
+; AVX512BW-NEXT:    movl %eax, %edx
 ; AVX512BW-NEXT:    shrb $7, %dl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %dl, %al
@@ -2104,7 +2104,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %ecx, %eax
 ; AVX512BW-NEXT:    shrl $8, %eax
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %dl
+; AVX512BW-NEXT:    movl %eax, %edx
 ; AVX512BW-NEXT:    shrb $7, %dl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %dl, %al
@@ -2117,7 +2117,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %ecx, %eax
 ; AVX512BW-NEXT:    shrl $8, %eax
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %dl
+; AVX512BW-NEXT:    movl %eax, %edx
 ; AVX512BW-NEXT:    shrb $7, %dl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %dl, %al
@@ -2129,7 +2129,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %edx, %eax
 ; AVX512BW-NEXT:    shrl $8, %eax
 ; AVX512BW-NEXT:    addb %dl, %al
-; AVX512BW-NEXT:    movb %al, %cl
+; AVX512BW-NEXT:    movl %eax, %ecx
 ; AVX512BW-NEXT:    shrb $7, %cl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %cl, %al
@@ -2143,7 +2143,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %ecx, %eax
 ; AVX512BW-NEXT:    shrl $8, %eax
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %dl
+; AVX512BW-NEXT:    movl %eax, %edx
 ; AVX512BW-NEXT:    shrb $7, %dl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %dl, %al
@@ -2156,7 +2156,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %ecx, %eax
 ; AVX512BW-NEXT:    shrl $8, %eax
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %dl
+; AVX512BW-NEXT:    movl %eax, %edx
 ; AVX512BW-NEXT:    shrb $7, %dl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %dl, %al
@@ -2169,7 +2169,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %ecx, %eax
 ; AVX512BW-NEXT:    shrl $8, %eax
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %dl
+; AVX512BW-NEXT:    movl %eax, %edx
 ; AVX512BW-NEXT:    shrb $7, %dl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %dl, %al
@@ -2182,7 +2182,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %ecx, %eax
 ; AVX512BW-NEXT:    shrl $8, %eax
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %dl
+; AVX512BW-NEXT:    movl %eax, %edx
 ; AVX512BW-NEXT:    shrb $7, %dl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %dl, %al
@@ -2195,7 +2195,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %ecx, %eax
 ; AVX512BW-NEXT:    shrl $8, %eax
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %dl
+; AVX512BW-NEXT:    movl %eax, %edx
 ; AVX512BW-NEXT:    shrb $7, %dl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %dl, %al
@@ -2208,7 +2208,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %ecx, %eax
 ; AVX512BW-NEXT:    shrl $8, %eax
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %dl
+; AVX512BW-NEXT:    movl %eax, %edx
 ; AVX512BW-NEXT:    shrb $7, %dl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %dl, %al
@@ -2221,7 +2221,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %ecx, %eax
 ; AVX512BW-NEXT:    shrl $8, %eax
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %dl
+; AVX512BW-NEXT:    movl %eax, %edx
 ; AVX512BW-NEXT:    shrb $7, %dl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %dl, %al
@@ -2234,7 +2234,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %ecx, %eax
 ; AVX512BW-NEXT:    shrl $8, %eax
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %dl
+; AVX512BW-NEXT:    movl %eax, %edx
 ; AVX512BW-NEXT:    shrb $7, %dl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %dl, %al
@@ -2247,7 +2247,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %ecx, %eax
 ; AVX512BW-NEXT:    shrl $8, %eax
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %dl
+; AVX512BW-NEXT:    movl %eax, %edx
 ; AVX512BW-NEXT:    shrb $7, %dl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %dl, %al
@@ -2260,7 +2260,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %ecx, %eax
 ; AVX512BW-NEXT:    shrl $8, %eax
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %dl
+; AVX512BW-NEXT:    movl %eax, %edx
 ; AVX512BW-NEXT:    shrb $7, %dl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %dl, %al
@@ -2273,7 +2273,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %ecx, %eax
 ; AVX512BW-NEXT:    shrl $8, %eax
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %dl
+; AVX512BW-NEXT:    movl %eax, %edx
 ; AVX512BW-NEXT:    shrb $7, %dl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %dl, %al
@@ -2286,7 +2286,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %ecx, %eax
 ; AVX512BW-NEXT:    shrl $8, %eax
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %dl
+; AVX512BW-NEXT:    movl %eax, %edx
 ; AVX512BW-NEXT:    shrb $7, %dl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %dl, %al
@@ -2299,7 +2299,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %ecx, %eax
 ; AVX512BW-NEXT:    shrl $8, %eax
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %dl
+; AVX512BW-NEXT:    movl %eax, %edx
 ; AVX512BW-NEXT:    shrb $7, %dl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %dl, %al
@@ -2312,7 +2312,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    imull $-109, %ecx, %eax
 ; AVX512BW-NEXT:    shrl $8, %eax
 ; AVX512BW-NEXT:    addb %cl, %al
-; AVX512BW-NEXT:    movb %al, %dl
+; AVX512BW-NEXT:    movl %eax, %edx
 ; AVX512BW-NEXT:    shrb $7, %dl
 ; AVX512BW-NEXT:    sarb $2, %al
 ; AVX512BW-NEXT:    addb %dl, %al

Modified: llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-512.ll?rev=268831&r1=268830&r2=268831&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-512.ll Fri May  6 20:11:17 2016
@@ -1323,7 +1323,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    vpextrb $1, %xmm1, %edx
 ; AVX512BW-NEXT:    imull $37, %edx, %ecx
 ; AVX512BW-NEXT:    shrl $8, %ecx
-; AVX512BW-NEXT:    movb %dl, %al
+; AVX512BW-NEXT:    movl %edx, %eax
 ; AVX512BW-NEXT:    subb %cl, %al
 ; AVX512BW-NEXT:    shrb %al
 ; AVX512BW-NEXT:    addb %cl, %al
@@ -1335,7 +1335,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    vpextrb $0, %xmm1, %esi
 ; AVX512BW-NEXT:    imull $37, %esi, %edi
 ; AVX512BW-NEXT:    shrl $8, %edi
-; AVX512BW-NEXT:    movb %sil, %al
+; AVX512BW-NEXT:    movl %esi, %eax
 ; AVX512BW-NEXT:    subb %dil, %al
 ; AVX512BW-NEXT:    shrb %al
 ; AVX512BW-NEXT:    addb %dil, %al
@@ -1348,7 +1348,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    vpextrb $2, %xmm1, %edx
 ; AVX512BW-NEXT:    imull $37, %edx, %esi
 ; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movb %dl, %al
+; AVX512BW-NEXT:    movl %edx, %eax
 ; AVX512BW-NEXT:    subb %sil, %al
 ; AVX512BW-NEXT:    shrb %al
 ; AVX512BW-NEXT:    addb %sil, %al
@@ -1360,7 +1360,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    vpextrb $3, %xmm1, %edx
 ; AVX512BW-NEXT:    imull $37, %edx, %esi
 ; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movb %dl, %al
+; AVX512BW-NEXT:    movl %edx, %eax
 ; AVX512BW-NEXT:    subb %sil, %al
 ; AVX512BW-NEXT:    shrb %al
 ; AVX512BW-NEXT:    addb %sil, %al
@@ -1372,7 +1372,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    vpextrb $4, %xmm1, %edx
 ; AVX512BW-NEXT:    imull $37, %edx, %esi
 ; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movb %dl, %al
+; AVX512BW-NEXT:    movl %edx, %eax
 ; AVX512BW-NEXT:    subb %sil, %al
 ; AVX512BW-NEXT:    shrb %al
 ; AVX512BW-NEXT:    addb %sil, %al
@@ -1384,7 +1384,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    vpextrb $5, %xmm1, %edx
 ; AVX512BW-NEXT:    imull $37, %edx, %esi
 ; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movb %dl, %al
+; AVX512BW-NEXT:    movl %edx, %eax
 ; AVX512BW-NEXT:    subb %sil, %al
 ; AVX512BW-NEXT:    shrb %al
 ; AVX512BW-NEXT:    addb %sil, %al
@@ -1396,7 +1396,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    vpextrb $6, %xmm1, %edx
 ; AVX512BW-NEXT:    imull $37, %edx, %esi
 ; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movb %dl, %al
+; AVX512BW-NEXT:    movl %edx, %eax
 ; AVX512BW-NEXT:    subb %sil, %al
 ; AVX512BW-NEXT:    shrb %al
 ; AVX512BW-NEXT:    addb %sil, %al
@@ -1408,7 +1408,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    vpextrb $7, %xmm1, %edx
 ; AVX512BW-NEXT:    imull $37, %edx, %esi
 ; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movb %dl, %al
+; AVX512BW-NEXT:    movl %edx, %eax
 ; AVX512BW-NEXT:    subb %sil, %al
 ; AVX512BW-NEXT:    shrb %al
 ; AVX512BW-NEXT:    addb %sil, %al
@@ -1420,7 +1420,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    vpextrb $8, %xmm1, %edx
 ; AVX512BW-NEXT:    imull $37, %edx, %esi
 ; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movb %dl, %al
+; AVX512BW-NEXT:    movl %edx, %eax
 ; AVX512BW-NEXT:    subb %sil, %al
 ; AVX512BW-NEXT:    shrb %al
 ; AVX512BW-NEXT:    addb %sil, %al
@@ -1432,7 +1432,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    vpextrb $9, %xmm1, %edx
 ; AVX512BW-NEXT:    imull $37, %edx, %esi
 ; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movb %dl, %al
+; AVX512BW-NEXT:    movl %edx, %eax
 ; AVX512BW-NEXT:    subb %sil, %al
 ; AVX512BW-NEXT:    shrb %al
 ; AVX512BW-NEXT:    addb %sil, %al
@@ -1444,7 +1444,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    vpextrb $10, %xmm1, %edx
 ; AVX512BW-NEXT:    imull $37, %edx, %esi
 ; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movb %dl, %al
+; AVX512BW-NEXT:    movl %edx, %eax
 ; AVX512BW-NEXT:    subb %sil, %al
 ; AVX512BW-NEXT:    shrb %al
 ; AVX512BW-NEXT:    addb %sil, %al
@@ -1456,7 +1456,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    vpextrb $11, %xmm1, %edx
 ; AVX512BW-NEXT:    imull $37, %edx, %esi
 ; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movb %dl, %al
+; AVX512BW-NEXT:    movl %edx, %eax
 ; AVX512BW-NEXT:    subb %sil, %al
 ; AVX512BW-NEXT:    shrb %al
 ; AVX512BW-NEXT:    addb %sil, %al
@@ -1468,7 +1468,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    vpextrb $12, %xmm1, %edx
 ; AVX512BW-NEXT:    imull $37, %edx, %esi
 ; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movb %dl, %al
+; AVX512BW-NEXT:    movl %edx, %eax
 ; AVX512BW-NEXT:    subb %sil, %al
 ; AVX512BW-NEXT:    shrb %al
 ; AVX512BW-NEXT:    addb %sil, %al
@@ -1480,7 +1480,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    vpextrb $13, %xmm1, %edx
 ; AVX512BW-NEXT:    imull $37, %edx, %esi
 ; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movb %dl, %al
+; AVX512BW-NEXT:    movl %edx, %eax
 ; AVX512BW-NEXT:    subb %sil, %al
 ; AVX512BW-NEXT:    shrb %al
 ; AVX512BW-NEXT:    addb %sil, %al
@@ -1492,7 +1492,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    vpextrb $14, %xmm1, %edx
 ; AVX512BW-NEXT:    imull $37, %edx, %esi
 ; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movb %dl, %al
+; AVX512BW-NEXT:    movl %edx, %eax
 ; AVX512BW-NEXT:    subb %sil, %al
 ; AVX512BW-NEXT:    shrb %al
 ; AVX512BW-NEXT:    addb %sil, %al
@@ -1504,7 +1504,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    vpextrb $15, %xmm1, %edx
 ; AVX512BW-NEXT:    imull $37, %edx, %esi
 ; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movb %dl, %al
+; AVX512BW-NEXT:    movl %edx, %eax
 ; AVX512BW-NEXT:    subb %sil, %al
 ; AVX512BW-NEXT:    shrb %al
 ; AVX512BW-NEXT:    addb %sil, %al
@@ -1517,7 +1517,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    vpextrb $1, %xmm2, %edx
 ; AVX512BW-NEXT:    imull $37, %edx, %esi
 ; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movb %dl, %al
+; AVX512BW-NEXT:    movl %edx, %eax
 ; AVX512BW-NEXT:    subb %sil, %al
 ; AVX512BW-NEXT:    shrb %al
 ; AVX512BW-NEXT:    addb %sil, %al
@@ -1528,7 +1528,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    vpextrb $0, %xmm2, %esi
 ; AVX512BW-NEXT:    imull $37, %esi, %edi
 ; AVX512BW-NEXT:    shrl $8, %edi
-; AVX512BW-NEXT:    movb %sil, %al
+; AVX512BW-NEXT:    movl %esi, %eax
 ; AVX512BW-NEXT:    subb %dil, %al
 ; AVX512BW-NEXT:    shrb %al
 ; AVX512BW-NEXT:    addb %dil, %al
@@ -1541,7 +1541,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    vpextrb $2, %xmm2, %edx
 ; AVX512BW-NEXT:    imull $37, %edx, %esi
 ; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movb %dl, %al
+; AVX512BW-NEXT:    movl %edx, %eax
 ; AVX512BW-NEXT:    subb %sil, %al
 ; AVX512BW-NEXT:    shrb %al
 ; AVX512BW-NEXT:    addb %sil, %al
@@ -1553,7 +1553,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    vpextrb $3, %xmm2, %edx
 ; AVX512BW-NEXT:    imull $37, %edx, %esi
 ; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movb %dl, %al
+; AVX512BW-NEXT:    movl %edx, %eax
 ; AVX512BW-NEXT:    subb %sil, %al
 ; AVX512BW-NEXT:    shrb %al
 ; AVX512BW-NEXT:    addb %sil, %al
@@ -1565,7 +1565,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    vpextrb $4, %xmm2, %edx
 ; AVX512BW-NEXT:    imull $37, %edx, %esi
 ; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movb %dl, %al
+; AVX512BW-NEXT:    movl %edx, %eax
 ; AVX512BW-NEXT:    subb %sil, %al
 ; AVX512BW-NEXT:    shrb %al
 ; AVX512BW-NEXT:    addb %sil, %al
@@ -1577,7 +1577,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    vpextrb $5, %xmm2, %edx
 ; AVX512BW-NEXT:    imull $37, %edx, %esi
 ; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movb %dl, %al
+; AVX512BW-NEXT:    movl %edx, %eax
 ; AVX512BW-NEXT:    subb %sil, %al
 ; AVX512BW-NEXT:    shrb %al
 ; AVX512BW-NEXT:    addb %sil, %al
@@ -1589,7 +1589,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    vpextrb $6, %xmm2, %edx
 ; AVX512BW-NEXT:    imull $37, %edx, %esi
 ; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movb %dl, %al
+; AVX512BW-NEXT:    movl %edx, %eax
 ; AVX512BW-NEXT:    subb %sil, %al
 ; AVX512BW-NEXT:    shrb %al
 ; AVX512BW-NEXT:    addb %sil, %al
@@ -1601,7 +1601,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    vpextrb $7, %xmm2, %edx
 ; AVX512BW-NEXT:    imull $37, %edx, %esi
 ; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movb %dl, %al
+; AVX512BW-NEXT:    movl %edx, %eax
 ; AVX512BW-NEXT:    subb %sil, %al
 ; AVX512BW-NEXT:    shrb %al
 ; AVX512BW-NEXT:    addb %sil, %al
@@ -1613,7 +1613,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    vpextrb $8, %xmm2, %edx
 ; AVX512BW-NEXT:    imull $37, %edx, %esi
 ; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movb %dl, %al
+; AVX512BW-NEXT:    movl %edx, %eax
 ; AVX512BW-NEXT:    subb %sil, %al
 ; AVX512BW-NEXT:    shrb %al
 ; AVX512BW-NEXT:    addb %sil, %al
@@ -1625,7 +1625,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    vpextrb $9, %xmm2, %edx
 ; AVX512BW-NEXT:    imull $37, %edx, %esi
 ; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movb %dl, %al
+; AVX512BW-NEXT:    movl %edx, %eax
 ; AVX512BW-NEXT:    subb %sil, %al
 ; AVX512BW-NEXT:    shrb %al
 ; AVX512BW-NEXT:    addb %sil, %al
@@ -1637,7 +1637,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    vpextrb $10, %xmm2, %edx
 ; AVX512BW-NEXT:    imull $37, %edx, %esi
 ; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movb %dl, %al
+; AVX512BW-NEXT:    movl %edx, %eax
 ; AVX512BW-NEXT:    subb %sil, %al
 ; AVX512BW-NEXT:    shrb %al
 ; AVX512BW-NEXT:    addb %sil, %al
@@ -1649,7 +1649,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    vpextrb $11, %xmm2, %edx
 ; AVX512BW-NEXT:    imull $37, %edx, %esi
 ; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movb %dl, %al
+; AVX512BW-NEXT:    movl %edx, %eax
 ; AVX512BW-NEXT:    subb %sil, %al
 ; AVX512BW-NEXT:    shrb %al
 ; AVX512BW-NEXT:    addb %sil, %al
@@ -1661,7 +1661,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    vpextrb $12, %xmm2, %edx
 ; AVX512BW-NEXT:    imull $37, %edx, %esi
 ; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movb %dl, %al
+; AVX512BW-NEXT:    movl %edx, %eax
 ; AVX512BW-NEXT:    subb %sil, %al
 ; AVX512BW-NEXT:    shrb %al
 ; AVX512BW-NEXT:    addb %sil, %al
@@ -1673,7 +1673,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    vpextrb $13, %xmm2, %edx
 ; AVX512BW-NEXT:    imull $37, %edx, %esi
 ; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movb %dl, %al
+; AVX512BW-NEXT:    movl %edx, %eax
 ; AVX512BW-NEXT:    subb %sil, %al
 ; AVX512BW-NEXT:    shrb %al
 ; AVX512BW-NEXT:    addb %sil, %al
@@ -1685,7 +1685,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    vpextrb $14, %xmm2, %edx
 ; AVX512BW-NEXT:    imull $37, %edx, %esi
 ; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movb %dl, %al
+; AVX512BW-NEXT:    movl %edx, %eax
 ; AVX512BW-NEXT:    subb %sil, %al
 ; AVX512BW-NEXT:    shrb %al
 ; AVX512BW-NEXT:    addb %sil, %al
@@ -1697,7 +1697,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    vpextrb $15, %xmm2, %edx
 ; AVX512BW-NEXT:    imull $37, %edx, %esi
 ; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movb %dl, %al
+; AVX512BW-NEXT:    movl %edx, %eax
 ; AVX512BW-NEXT:    subb %sil, %al
 ; AVX512BW-NEXT:    shrb %al
 ; AVX512BW-NEXT:    addb %sil, %al
@@ -1711,7 +1711,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    vpextrb $1, %xmm2, %edx
 ; AVX512BW-NEXT:    imull $37, %edx, %esi
 ; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movb %dl, %al
+; AVX512BW-NEXT:    movl %edx, %eax
 ; AVX512BW-NEXT:    subb %sil, %al
 ; AVX512BW-NEXT:    shrb %al
 ; AVX512BW-NEXT:    addb %sil, %al
@@ -1722,7 +1722,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    vpextrb $0, %xmm2, %esi
 ; AVX512BW-NEXT:    imull $37, %esi, %edi
 ; AVX512BW-NEXT:    shrl $8, %edi
-; AVX512BW-NEXT:    movb %sil, %al
+; AVX512BW-NEXT:    movl %esi, %eax
 ; AVX512BW-NEXT:    subb %dil, %al
 ; AVX512BW-NEXT:    shrb %al
 ; AVX512BW-NEXT:    addb %dil, %al
@@ -1735,7 +1735,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    vpextrb $2, %xmm2, %edx
 ; AVX512BW-NEXT:    imull $37, %edx, %esi
 ; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movb %dl, %al
+; AVX512BW-NEXT:    movl %edx, %eax
 ; AVX512BW-NEXT:    subb %sil, %al
 ; AVX512BW-NEXT:    shrb %al
 ; AVX512BW-NEXT:    addb %sil, %al
@@ -1747,7 +1747,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    vpextrb $3, %xmm2, %edx
 ; AVX512BW-NEXT:    imull $37, %edx, %esi
 ; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movb %dl, %al
+; AVX512BW-NEXT:    movl %edx, %eax
 ; AVX512BW-NEXT:    subb %sil, %al
 ; AVX512BW-NEXT:    shrb %al
 ; AVX512BW-NEXT:    addb %sil, %al
@@ -1759,7 +1759,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    vpextrb $4, %xmm2, %edx
 ; AVX512BW-NEXT:    imull $37, %edx, %esi
 ; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movb %dl, %al
+; AVX512BW-NEXT:    movl %edx, %eax
 ; AVX512BW-NEXT:    subb %sil, %al
 ; AVX512BW-NEXT:    shrb %al
 ; AVX512BW-NEXT:    addb %sil, %al
@@ -1771,7 +1771,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    vpextrb $5, %xmm2, %edx
 ; AVX512BW-NEXT:    imull $37, %edx, %esi
 ; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movb %dl, %al
+; AVX512BW-NEXT:    movl %edx, %eax
 ; AVX512BW-NEXT:    subb %sil, %al
 ; AVX512BW-NEXT:    shrb %al
 ; AVX512BW-NEXT:    addb %sil, %al
@@ -1783,7 +1783,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    vpextrb $6, %xmm2, %edx
 ; AVX512BW-NEXT:    imull $37, %edx, %esi
 ; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movb %dl, %al
+; AVX512BW-NEXT:    movl %edx, %eax
 ; AVX512BW-NEXT:    subb %sil, %al
 ; AVX512BW-NEXT:    shrb %al
 ; AVX512BW-NEXT:    addb %sil, %al
@@ -1795,7 +1795,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    vpextrb $7, %xmm2, %edx
 ; AVX512BW-NEXT:    imull $37, %edx, %esi
 ; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movb %dl, %al
+; AVX512BW-NEXT:    movl %edx, %eax
 ; AVX512BW-NEXT:    subb %sil, %al
 ; AVX512BW-NEXT:    shrb %al
 ; AVX512BW-NEXT:    addb %sil, %al
@@ -1807,7 +1807,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    vpextrb $8, %xmm2, %edx
 ; AVX512BW-NEXT:    imull $37, %edx, %esi
 ; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movb %dl, %al
+; AVX512BW-NEXT:    movl %edx, %eax
 ; AVX512BW-NEXT:    subb %sil, %al
 ; AVX512BW-NEXT:    shrb %al
 ; AVX512BW-NEXT:    addb %sil, %al
@@ -1819,7 +1819,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    vpextrb $9, %xmm2, %edx
 ; AVX512BW-NEXT:    imull $37, %edx, %esi
 ; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movb %dl, %al
+; AVX512BW-NEXT:    movl %edx, %eax
 ; AVX512BW-NEXT:    subb %sil, %al
 ; AVX512BW-NEXT:    shrb %al
 ; AVX512BW-NEXT:    addb %sil, %al
@@ -1831,7 +1831,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    vpextrb $10, %xmm2, %edx
 ; AVX512BW-NEXT:    imull $37, %edx, %esi
 ; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movb %dl, %al
+; AVX512BW-NEXT:    movl %edx, %eax
 ; AVX512BW-NEXT:    subb %sil, %al
 ; AVX512BW-NEXT:    shrb %al
 ; AVX512BW-NEXT:    addb %sil, %al
@@ -1843,7 +1843,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    vpextrb $11, %xmm2, %edx
 ; AVX512BW-NEXT:    imull $37, %edx, %esi
 ; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movb %dl, %al
+; AVX512BW-NEXT:    movl %edx, %eax
 ; AVX512BW-NEXT:    subb %sil, %al
 ; AVX512BW-NEXT:    shrb %al
 ; AVX512BW-NEXT:    addb %sil, %al
@@ -1855,7 +1855,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    vpextrb $12, %xmm2, %edx
 ; AVX512BW-NEXT:    imull $37, %edx, %esi
 ; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movb %dl, %al
+; AVX512BW-NEXT:    movl %edx, %eax
 ; AVX512BW-NEXT:    subb %sil, %al
 ; AVX512BW-NEXT:    shrb %al
 ; AVX512BW-NEXT:    addb %sil, %al
@@ -1867,7 +1867,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    vpextrb $13, %xmm2, %edx
 ; AVX512BW-NEXT:    imull $37, %edx, %esi
 ; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movb %dl, %al
+; AVX512BW-NEXT:    movl %edx, %eax
 ; AVX512BW-NEXT:    subb %sil, %al
 ; AVX512BW-NEXT:    shrb %al
 ; AVX512BW-NEXT:    addb %sil, %al
@@ -1879,7 +1879,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    vpextrb $14, %xmm2, %edx
 ; AVX512BW-NEXT:    imull $37, %edx, %esi
 ; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movb %dl, %al
+; AVX512BW-NEXT:    movl %edx, %eax
 ; AVX512BW-NEXT:    subb %sil, %al
 ; AVX512BW-NEXT:    shrb %al
 ; AVX512BW-NEXT:    addb %sil, %al
@@ -1891,7 +1891,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    vpextrb $15, %xmm2, %edx
 ; AVX512BW-NEXT:    imull $37, %edx, %esi
 ; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movb %dl, %al
+; AVX512BW-NEXT:    movl %edx, %eax
 ; AVX512BW-NEXT:    subb %sil, %al
 ; AVX512BW-NEXT:    shrb %al
 ; AVX512BW-NEXT:    addb %sil, %al
@@ -1903,7 +1903,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    vpextrb $1, %xmm0, %edx
 ; AVX512BW-NEXT:    imull $37, %edx, %esi
 ; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movb %dl, %al
+; AVX512BW-NEXT:    movl %edx, %eax
 ; AVX512BW-NEXT:    subb %sil, %al
 ; AVX512BW-NEXT:    shrb %al
 ; AVX512BW-NEXT:    addb %sil, %al
@@ -1914,7 +1914,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    vpextrb $0, %xmm0, %esi
 ; AVX512BW-NEXT:    imull $37, %esi, %edi
 ; AVX512BW-NEXT:    shrl $8, %edi
-; AVX512BW-NEXT:    movb %sil, %al
+; AVX512BW-NEXT:    movl %esi, %eax
 ; AVX512BW-NEXT:    subb %dil, %al
 ; AVX512BW-NEXT:    shrb %al
 ; AVX512BW-NEXT:    addb %dil, %al
@@ -1927,7 +1927,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    vpextrb $2, %xmm0, %edx
 ; AVX512BW-NEXT:    imull $37, %edx, %esi
 ; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movb %dl, %al
+; AVX512BW-NEXT:    movl %edx, %eax
 ; AVX512BW-NEXT:    subb %sil, %al
 ; AVX512BW-NEXT:    shrb %al
 ; AVX512BW-NEXT:    addb %sil, %al
@@ -1939,7 +1939,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    vpextrb $3, %xmm0, %edx
 ; AVX512BW-NEXT:    imull $37, %edx, %esi
 ; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movb %dl, %al
+; AVX512BW-NEXT:    movl %edx, %eax
 ; AVX512BW-NEXT:    subb %sil, %al
 ; AVX512BW-NEXT:    shrb %al
 ; AVX512BW-NEXT:    addb %sil, %al
@@ -1951,7 +1951,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    vpextrb $4, %xmm0, %edx
 ; AVX512BW-NEXT:    imull $37, %edx, %esi
 ; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movb %dl, %al
+; AVX512BW-NEXT:    movl %edx, %eax
 ; AVX512BW-NEXT:    subb %sil, %al
 ; AVX512BW-NEXT:    shrb %al
 ; AVX512BW-NEXT:    addb %sil, %al
@@ -1963,7 +1963,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    vpextrb $5, %xmm0, %edx
 ; AVX512BW-NEXT:    imull $37, %edx, %esi
 ; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movb %dl, %al
+; AVX512BW-NEXT:    movl %edx, %eax
 ; AVX512BW-NEXT:    subb %sil, %al
 ; AVX512BW-NEXT:    shrb %al
 ; AVX512BW-NEXT:    addb %sil, %al
@@ -1975,7 +1975,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    vpextrb $6, %xmm0, %edx
 ; AVX512BW-NEXT:    imull $37, %edx, %esi
 ; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movb %dl, %al
+; AVX512BW-NEXT:    movl %edx, %eax
 ; AVX512BW-NEXT:    subb %sil, %al
 ; AVX512BW-NEXT:    shrb %al
 ; AVX512BW-NEXT:    addb %sil, %al
@@ -1987,7 +1987,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    vpextrb $7, %xmm0, %edx
 ; AVX512BW-NEXT:    imull $37, %edx, %esi
 ; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movb %dl, %al
+; AVX512BW-NEXT:    movl %edx, %eax
 ; AVX512BW-NEXT:    subb %sil, %al
 ; AVX512BW-NEXT:    shrb %al
 ; AVX512BW-NEXT:    addb %sil, %al
@@ -1999,7 +1999,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    vpextrb $8, %xmm0, %edx
 ; AVX512BW-NEXT:    imull $37, %edx, %esi
 ; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movb %dl, %al
+; AVX512BW-NEXT:    movl %edx, %eax
 ; AVX512BW-NEXT:    subb %sil, %al
 ; AVX512BW-NEXT:    shrb %al
 ; AVX512BW-NEXT:    addb %sil, %al
@@ -2011,7 +2011,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    vpextrb $9, %xmm0, %edx
 ; AVX512BW-NEXT:    imull $37, %edx, %esi
 ; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movb %dl, %al
+; AVX512BW-NEXT:    movl %edx, %eax
 ; AVX512BW-NEXT:    subb %sil, %al
 ; AVX512BW-NEXT:    shrb %al
 ; AVX512BW-NEXT:    addb %sil, %al
@@ -2023,7 +2023,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    vpextrb $10, %xmm0, %edx
 ; AVX512BW-NEXT:    imull $37, %edx, %esi
 ; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movb %dl, %al
+; AVX512BW-NEXT:    movl %edx, %eax
 ; AVX512BW-NEXT:    subb %sil, %al
 ; AVX512BW-NEXT:    shrb %al
 ; AVX512BW-NEXT:    addb %sil, %al
@@ -2035,7 +2035,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    vpextrb $11, %xmm0, %edx
 ; AVX512BW-NEXT:    imull $37, %edx, %esi
 ; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movb %dl, %al
+; AVX512BW-NEXT:    movl %edx, %eax
 ; AVX512BW-NEXT:    subb %sil, %al
 ; AVX512BW-NEXT:    shrb %al
 ; AVX512BW-NEXT:    addb %sil, %al
@@ -2047,7 +2047,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    vpextrb $12, %xmm0, %edx
 ; AVX512BW-NEXT:    imull $37, %edx, %esi
 ; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movb %dl, %al
+; AVX512BW-NEXT:    movl %edx, %eax
 ; AVX512BW-NEXT:    subb %sil, %al
 ; AVX512BW-NEXT:    shrb %al
 ; AVX512BW-NEXT:    addb %sil, %al
@@ -2059,7 +2059,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    vpextrb $13, %xmm0, %edx
 ; AVX512BW-NEXT:    imull $37, %edx, %esi
 ; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movb %dl, %al
+; AVX512BW-NEXT:    movl %edx, %eax
 ; AVX512BW-NEXT:    subb %sil, %al
 ; AVX512BW-NEXT:    shrb %al
 ; AVX512BW-NEXT:    addb %sil, %al
@@ -2071,7 +2071,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    vpextrb $14, %xmm0, %edx
 ; AVX512BW-NEXT:    imull $37, %edx, %esi
 ; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movb %dl, %al
+; AVX512BW-NEXT:    movl %edx, %eax
 ; AVX512BW-NEXT:    subb %sil, %al
 ; AVX512BW-NEXT:    shrb %al
 ; AVX512BW-NEXT:    addb %sil, %al
@@ -2083,7 +2083,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    vpextrb $15, %xmm0, %edx
 ; AVX512BW-NEXT:    imull $37, %edx, %esi
 ; AVX512BW-NEXT:    shrl $8, %esi
-; AVX512BW-NEXT:    movb %dl, %al
+; AVX512BW-NEXT:    movl %edx, %eax
 ; AVX512BW-NEXT:    subb %sil, %al
 ; AVX512BW-NEXT:    shrb %al
 ; AVX512BW-NEXT:    addb %sil, %al

Modified: llvm/trunk/test/CodeGen/X86/xaluo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/xaluo.ll?rev=268831&r1=268830&r2=268831&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/xaluo.ll (original)
+++ llvm/trunk/test/CodeGen/X86/xaluo.ll Fri May  6 20:11:17 2016
@@ -295,7 +295,7 @@ entry:
 define zeroext i1 @smulo.i8(i8 %v1, i8 %v2, i8* %res) {
 entry:
 ; CHECK-LABEL:   smulo.i8
-; CHECK:         movb %dil, %al
+; CHECK:         movl %edi, %eax
 ; CHECK-NEXT:    imulb %sil
 ; CHECK-NEXT:    seto %cl
   %t = call {i8, i1} @llvm.smul.with.overflow.i8(i8 %v1, i8 %v2)
@@ -345,7 +345,7 @@ entry:
 define zeroext i1 @umulo.i8(i8 %v1, i8 %v2, i8* %res) {
 entry:
 ; CHECK-LABEL:   umulo.i8
-; CHECK:         movb %dil, %al
+; CHECK:         movl %edi, %eax
 ; CHECK-NEXT:    mulb %sil
 ; CHECK-NEXT:    seto %cl
   %t = call {i8, i1} @llvm.umul.with.overflow.i8(i8 %v1, i8 %v2)




More information about the llvm-commits mailing list